In [1]:
import numpy as np
import pandas as pd
from pyBKT.models import Model
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [2]:
model = Model(seed = 60, num_fits = 1)

In [3]:
model.fetch_dataset("https://raw.githubusercontent.com/lishaparmar13/H5P-Enhanced-BKT/main/Under16-CS.csv",'.')

In [4]:
df1= pd.read_csv(r"Under16-CS.csv", encoding ='latin')
df1.head(5)

Unnamed: 0,ï»¿Row,Anon Student Id,Age,Problem Hierarchy,question_id,Step Start Time,Step End Time,Correct First Attempt,Step Duration (sec),Answer Type,KC(Default),Opportunity (Default),skill_id,template_id
0,2,2,13,1,3,11-07-2023 07:28,11-07-2023 07:29,0,37,Drag & Drop,Algebra,1,1,1
1,17,17,13,1,3,18-07-2023 04:30,18-07-2023 04:31,0,48,Drag & Drop,Algebra,1,1,1
2,19,19,13,1,3,19-07-2023 16:54,19-07-2023 16:56,0,120,Drag & Drop,Algebra,1,1,1
3,2,2,13,1,5,11-07-2023 07:31,11-07-2023 07:31,1,23,Drag & Drop,Fractions,1,3,1
4,17,17,13,1,5,18-07-2023 04:33,18-07-2023 04:35,1,60,Drag & Drop,Fractions,1,3,1


In [5]:
model.fit(data = df1)

In [6]:
train_df, test_df = train_test_split(df1, test_size=0.2, random_state=42)

In [7]:
model.fit(data = train_df)
print("Fitted Skills:\n%s" % '\n'.join(model.coef_.keys()))

Fitted Skills:
Ratio and Proportion
Numbers and Geometry and Percentages
Algebra and Fractions
Algebra
Numbers and Geometry and Fractions
Geometry
Numbers and Algebra and Fractions
Fractions
Numbers and Percentages and Ratio and Proportion


In [8]:
# Evaluate with the default RMSE then specify AUC.
model.fit(data= df1)
training_rmse = model.evaluate(data = df1)
training_auc = model.evaluate(data = df1, metric = 'auc')
print("Training RMSE: %f" % training_rmse)
print("Training AUC: %f" % training_auc)

Training RMSE: 0.467707
Training AUC: 0.644788


In [9]:
#define own metric
def mae(true_vals, pred_vals):
  """ Calculates the mean absolute error. """
  return np.mean(np.abs(true_vals - pred_vals))

training_mae = model.evaluate(data = df1, metric = mae)
print("Training MAE: %f" % training_mae)

Training MAE: 0.437504


In [10]:
model = Model(seed = 60, num_fits = 1)

In [11]:
# Note again that the REGEX expression below trains BKT models on all
# skills containing the word fraction!
model.fit(data = train_df)
preds = model.predict(data = test_df)
preds[['Anon Student Id', 'KC(Default)', 'Correct First Attempt', 
       'correct_predictions', 'state_predictions']].head(10)

Unnamed: 0,Anon Student Id,KC(Default),Correct First Attempt,correct_predictions,state_predictions
150,1,Numbers and Percentages and Ratio and Proportion,1,0.5,0.44215
15,2,Ratio and Proportion,1,0.5,0.84397
18,2,Algebra and Fractions,1,0.5,0.75807
24,2,Numbers and Algebra and Fractions,1,0.5,0.03291
161,4,Algebra,0,0.41815,0.36338
111,5,Ratio and Proportion,1,0.5,0.84397
122,8,Algebra and Fractions,0,0.5,0.75807
112,8,Ratio and Proportion,0,0.5,0.84397
153,11,Numbers and Percentages and Ratio and Proportion,0,0.5,0.44215
123,11,Algebra and Fractions,1,0.5,0.75807


In [12]:
# Let's sanity check that we have only trained on the skills that we 
# specified in the call to fit! Note that while it is possible for a 
# BKT prediction to be 0.5 exactly, it is unlikely.
preds[preds['correct_predictions'] != 0.5]['KC(Default)'].unique()

array(['Algebra'], dtype=object)

In [13]:
# We use model.evaluate to accomplish the same thing!
# You should receive an RMSE that is identical to the above
# manually calculated RMSE.
model.evaluate(data = df1)

0.49743755955713354

In [14]:
model.evaluate(data=df1, metric=['rmse','accuracy','auc'])

[0.49743755955713354, 0.68, 0.5933075933075933]

In [15]:
# Note that folds is an optional parameter as well as the model 
# variant, seed, and crossvalidated metric.
# By default, we crossvalidate on all skills separately.
model.crossvalidate(data = df1, folds = 5)

Unnamed: 0_level_0,rmse
skill,Unnamed: 1_level_1
Algebra,0.30619
Fractions,0.5
Numbers and Geometry and Percentages,0.5
Numbers and Geometry and Fractions,0.5
Ratio and Proportion,0.5
Algebra and Fractions,0.5
Geometry,0.5
Numbers and Algebra and Fractions,0.5
Numbers and Percentages and Ratio and Proportion,0.5


In [16]:
model = Model(seed = 60, num_fits = 1)
# Try this with a different skill or metric by replacing the lines below.
skill = 'Algebra'
metric = 'rmse'

simple_cv = model.crossvalidate(data = df1, skills = skill, 
                                metric = metric)
simple_cv

Unnamed: 0_level_0,rmse
skill,Unnamed: 1_level_1
Algebra,0.30619


In [17]:
model = Model(seed = 60, num_fits = 1)
multigs_cv = model.crossvalidate(data= df1, skills = skill,
                                 multigs = True, metric = metric)
multigs_cv

Unnamed: 0_level_0,rmse
skill,Unnamed: 1_level_1
Algebra,0.30619


In [18]:
model = Model(seed = 60, num_fits = 1)
skill = 'Algebra'

multilearn_cv = model.crossvalidate(data = df1, skills = skill,
                                    multilearn ="question_id", forgets = True, 
                                    metric = metric)
multilearn_cv

Unnamed: 0_level_0,rmse
skill,Unnamed: 1_level_1
Algebra,0.30619


In [19]:
model = Model(seed = 60, num_fits = 1)
# The multiprior model generates different priors based on the first 
# response of each student.
multiprior_cv = model.crossvalidate(data= df1, skills = skill,
                                    multiprior = True, metric = metric,
                                    folds = 3)

model = Model(seed = 60, num_fits = 1)
multipair_cv = model.crossvalidate(data = df1, skills = skill,
                                   multipair = True, metric = metric,
                                   folds = 3)

pd.concat([multiprior_cv, multipair_cv], axis = 0)

Unnamed: 0_level_0,rmse
skill,Unnamed: 1_level_1
Algebra,0.32141
Algebra,0.36518


In [20]:
model = Model(seed = 60, num_fits = 1)
# We combine the fifth parameter, forgets, with the previous multilearn
# and multiguess/slip models for a combo model.
combo_cv = model.crossvalidate(data = df1, skills = skill,
                               forgets = True, multilearn = True, 
                               multigs = True, metric = metric)
combo_cv

Unnamed: 0_level_0,rmse
skill,Unnamed: 1_level_1
Algebra,0.30619


In [21]:
model = Model(seed = 60, num_fits = 1)
skill = 'Geometry'

model.coef_ = {skill: {'prior': 1e-40}}
model.coef_

{'Geometry': {'prior': 1e-40}}

In [22]:
# Train the model with the pre-initialized parameters.
model.fit(data = df1, multigs = True)
low_prior_auc = model.evaluate(data = df1, metric = 'auc')

# We can obtain the prior value by indexing into the model.coef_
# dictionary with the skill and parameter names. 
print("Fitted Prior Value: %f" % model.coef_[skill]['prior'])
print("Training AUC: %f" % low_prior_auc)

Fitted Prior Value: 0.000000
Training AUC: 0.644788


In [23]:
# Initialize the prior to be more reasonable.
model.coef_ = {skill: {'prior': 0.5}}
model.fit(data= df1, multigs = True)
normal_prior_auc = model.evaluate(data= df1, metric = 'auc')

# Print the fitted prior value and RMSE.
print("Fitted Prior Value: %f" % model.coef_[skill]['prior'])
print("Training AUC: %f" % normal_prior_auc)

Fitted Prior Value: 0.745866
Training AUC: 0.644788


In [24]:
model.fit(data = df1, forgets = True, multilearn = True,skills=skill, 
          multigs = True)
model.params()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value
skill,param,class,Unnamed: 3_level_1
Geometry,prior,default,0.53355
Geometry,learns,6,1.0
Geometry,guesses,6,0.5
Geometry,slips,6,0.5
Geometry,forgets,6,0.0
