In [None]:
import numpy as np
import pandas as pd
from pyBKT.models import Model
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
model = Model(seed = 60, num_fits = 1)

In [None]:
model.fetch_dataset("https://raw.githubusercontent.com/lishaparmar13/H5P-Enhanced-BKT/main/Under16-SS.csv",'.')

In [None]:
df4= pd.read_csv(r"Under16-SS.csv", encoding ='latin')
df4.tail(5)

In [None]:
train_df, test_df = train_test_split(df4, test_size=0.2, random_state=42)

In [None]:
model.fit(data = df4)

In [None]:
model.fit(data_path = 'Sepcolumn.csv')
print("Fitted Skills:\n%s" % '\n'.join(model.coef_.keys()))

In [None]:
# Evaluate with the default RMSE then specify AUC.
model.fit(data = df4)
training_rmse = model.evaluate(data = df4)
training_auc = model.evaluate(data = df4, metric = 'auc')
print("Training RMSE: %f" % training_rmse)
print("Training AUC: %f" % training_auc)

In [None]:
#define own metric
def mae(true_vals, pred_vals):
  """ Calculates the mean absolute error. """
  return np.mean(np.abs(true_vals - pred_vals))

training_mae = model.evaluate(data = df4, metric = mae)
print("Training MAE: %f" % training_mae)

In [None]:
model = Model(seed = 60, num_fits = 1)
model.fit(data = train_df)
preds = model.predict(data = test_df)
preds[['Anon Student Id', 'KC(Default)', 'Correct First Attempt', 
       'correct_predictions', 'state_predictions']].head(10)

In [None]:
# Let's sanity check that we have only trained on the skills that we 
# specified in the call to fit! Note that while it is possible for a 
# BKT prediction to be 0.5 exactly, it is unlikely.
preds[preds['correct_predictions'] != 0.5]['KC(Default)'].unique()

In [None]:
# We use model.evaluate to accomplish the same thing!
# You should receive an RMSE that is identical to the above
# manually calculated RMSE.
model.evaluate(data = df4)

In [None]:
model.evaluate(data=df4, metric=['rmse','accuracy','auc','mean_absolute_error'])

In [None]:
model = Model(seed = 60, num_fits = 1)
# Note that folds is an optional parameter as well as the model 
# variant, seed, and crossvalidated metric.
# By default, we crossvalidate on all skills separately.
model.crossvalidate(data= df4, folds = 5)

In [None]:
model = Model(seed = 60, num_fits = 1)
# Try this with a different skill or metric by replacing the lines below.
skill = 'Algebra'
metric = 'rmse'

simple_cv = model.crossvalidate(data = df4, skills = skill, 
                                metric = metric)
simple_cv

In [None]:
model = Model(seed = 60, num_fits = 1)
multigs_cv = model.crossvalidate(data = df4, skills = skill,
                                 multigs = True, metric = metric)
multigs_cv

In [None]:
model = Model(seed = 60, num_fits = 1)
skill = 'Algebra'

multilearn_cv = model.crossvalidate(df4, skills = skill,
                                    multilearn ="question_id", metric = metric)
multilearn_cv

In [None]:
model = Model(seed = 60, num_fits = 1)
# The multiprior model generates different priors based on the first 
# response of each student.
multiprior_cv = model.crossvalidate(data = df4, skills = skill,
                                    multiprior = True, metric = metric,
                                    folds = 3)

model = Model(seed = 60, num_fits = 1)
multipair_cv = model.crossvalidate(data = df4, skills = skill,
                                   multipair = True, metric = metric,
                                   folds = 3)
pd.concat([multiprior_cv, multipair_cv], axis = 0)

In [None]:
model = Model(seed = 60, num_fits = 1)
# We combine the fifth parameter, forgets, with the previous multilearn
# and multiguess/slip models for a combo model.
combo_cv = model.crossvalidate(data = df4, skills = skill,
                               forgets = True, multilearn = True, 
                               multigs = True, metric = metric)
combo_cv

In [None]:
model = Model(seed = 60, num_fits = 1)

In [None]:
skill = 'Geometry'

model.coef_ = {skill: {'prior': 1e-40}}
model.coef_

In [None]:
# Train the model with the pre-initialized parameters.
model.fit(data = df4, multigs = True)
low_prior_auc = model.evaluate(data= df4, metric = 'auc')

# We can obtain the prior value by indexing into the model.coef_
# dictionary with the skill and parameter names. 
print("Fitted Prior Value: %f" % model.coef_[skill]['prior'])
print("Training AUC: %f" % low_prior_auc)

In [None]:
# Initialize the prior to be more reasonable.
model.coef_ = {skill: {'prior': 0.5}}
model.fit(data= df4, multigs = True)
normal_prior_auc = model.evaluate(data= df4, metric = 'auc')

# Print the fitted prior value and RMSE.
print("Fitted Prior Value: %f" % model.coef_[skill]['prior'])
print("Training AUC: %f" % normal_prior_auc)

In [None]:
model.fit(data= df4, skills=skill,
          forgets = True, multilearn = True, 
          multigs = True)
params= model.params()
params

In [None]:
# We will get warnings for using indexing past lexsort. That's fine,
# and we will disable these warnings.
import warnings
warnings.simplefilter(action='ignore')

#Model Intialization
#model = Model(seed = 60, num_fits = 1)
#model.fit(data = df4,
#          forgets = True, multilearn = True, 
#          multigs = True)

# Plot the learns, forgets, slips and guesses for each of the classes.
#params = model.params()
plt.figure(figsize = (12, 6))
plt.plot(params.loc[(skill, 'guesses')], label = 'Guesses')
plt.plot(params.loc[(skill, 'learns')], label = 'Learns')
plt.plot(params.loc[(skill, 'forgets')], label = 'Forgets')
plt.plot(params.loc[(skill, 'slips')], label = 'Slips')
plt.xlabel('Template ID')
plt.ylabel('Rate')
plt.title('BKT Parameters per Template ID Class for Under16 Dataset')
plt.legend();

In [None]:
import pandas as pd

# Set options to display all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 1)


In [None]:
model.fit(data = df4, 
          forgets = True, multilearn = True, 
          multigs = True)
params= model.params()
params