# First model test: standard BKT , BKT+Forgets:

In [1]:
# Import all required packages including pyBKT.models.Model!
import numpy as np
import pandas as pd
from pyBKT.models import Model
import matplotlib.pyplot as plt


In [2]:
# Take no action when the exception occurs
np.seterr(divide='ignore', invalid='ignore');

The train data for the next model* is from : 
https://github.com/CAHLR/pyBKT-examples/blob/master/data/builder_train_preprocessed.csv

The test data for the next model* is from : 
https://github.com/CAHLR/pyBKT-examples/blob/master/data/builder_test_preprocessed.csv

In [6]:
# one time run 
# model.fetch_dataset('https://raw.githubusercontent.com/CAHLR/pyBKT-examples/master/data/as.csv', '.')
# model.fetch_dataset('https://raw.githubusercontent.com/CAHLR/pyBKT-examples/master/data/ct.csv', '.')

In [3]:
as_df= pd.read_csv('as.csv', encoding = 'latin', low_memory = False) 

ct_df = pd.read_csv('ct.csv', encoding = 'latin')

In [3]:

builder_train_preprocessed = pd.read_csv('builder_train_preprocessed.csv')
builder_train_preprocessed

Unnamed: 0,user_id,skill_name,correct
0,1,7,1
1,7,82,0
2,7,82,0
3,7,82,0
4,7,82,0
...,...,...,...
407962,145,123,0
407963,145,123,1
407964,145,123,1
407965,145,123,1


In [4]:
builder_test_preprocessed = pd.read_csv('builder_test_preprocessed.csv')
builder_test_preprocessed

Unnamed: 0,user_id,skill_name,correct
0,4,51,0
1,4,51,1
2,4,51,1
3,4,51,1
4,9,82,0
...,...,...,...
117562,116,123,1
117563,116,123,0
117564,116,123,0
117565,116,123,1


In [8]:
# Evaluate the AUC of the model on the testing data.


model = Model(seed = 42, num_fits = 20)
model.fit(data_path = "builder_train_preprocessed.csv")
print("AUC of Standard BKT:", model.evaluate(data_path = "builder_test_preprocessed.csv", metric="auc"))
model2 = Model(seed = 42, num_fits = 20)
model2.fit(data_path = "builder_train_preprocessed.csv", forgets=True)
print("AUC of BKT+Forgets:", model2.evaluate(data_path = "builder_test_preprocessed.csv", metric="auc"))

AUC of Standard BKT: 0.7594614282799547
AUC of BKT+Forgets: 0.8246734641237574


### Predections 

In [6]:
#predection of BKT+ Standard

preds_df = model.predict(data_path = "builder_test_preprocessed.csv")
preds_df

Unnamed: 0,user_id,skill_name,correct,correct_predictions,state_predictions
33,1,82,1,0.64476,0.75734
34,1,82,0,0.73042,0.91563
797,1,30,0,0.62922,0.55329
4832,1,59,1,0.62106,0.86219
16515,1,32,1,0.72522,0.56716
...,...,...,...,...,...
99566,8214,123,1,0.67525,1.00000
99567,8214,123,1,0.67525,1.00000
99568,8214,123,1,0.67525,1.00000
99569,8214,123,1,0.67525,1.00000


In [7]:
#predection of BKT+ Forgets
preds_df = model2.predict(data_path = "builder_test_preprocessed.csv")
preds_df

Unnamed: 0,user_id,skill_name,correct,correct_predictions,state_predictions
33,1,82,1,0.67874,0.83995
34,1,82,0,0.77384,0.97012
797,1,30,0,0.67297,0.80827
4832,1,59,1,0.63352,0.84018
16515,1,32,1,0.73291,0.70054
...,...,...,...,...,...
99566,8214,123,1,0.70546,0.96361
99567,8214,123,1,0.70546,0.96361
99568,8214,123,1,0.70546,0.96361
99569,8214,123,1,0.70546,0.96361


In [8]:
#see the test data : 

test_one_student= builder_test_preprocessed[builder_test_preprocessed['user_id']==4]
test_one_student.sort_values(by=['skill_name'], inplace=True)
test_one_student


Unnamed: 0,user_id,skill_name,correct
79816,4,0,1
79815,4,0,1
48064,4,1,0
66812,4,6,0
48456,4,7,0
...,...,...,...
115132,4,99,1
115130,4,99,1
115131,4,99,1
115133,4,99,1


In [9]:
# Count the frequency that a skill occurs for one student =4 
test_one_student['skill_name'].value_counts()

34     8
47     7
82     6
97     5
51     4
99     4
49     4
48     4
55     4
37     4
9      3
96     3
18     3
52     3
58     2
0      2
32     2
14     2
7      2
85     1
84     1
38     1
44     1
1      1
29     1
24     1
17     1
11     1
8      1
6      1
123    1
Name: skill_name, dtype: int64

In [19]:
#result for one student on the model that was trainded with forget
preds_df_forget= model2.predict(data = test_one_student)
newdf=preds_df_forget.copy()
preds_df_forget[preds_df_forget['skill_name']=='34']

Unnamed: 0,user_id,skill_name,correct,correct_predictions,state_predictions
65848,4,34,1,0.88082,0.97022
65849,4,34,1,0.85922,0.93734
65847,4,34,1,0.85502,0.93095
26337,4,34,1,0.85419,0.92968
26336,4,34,1,0.85402,0.92942
26335,4,34,1,0.85398,0.92937
26334,4,34,0,0.85398,0.92936
65846,4,34,0,0.70171,0.69757


In [20]:
#result for one student on the model that was trainded without forget

preds_df= model.predict(data = test_one_student)

preds_df[preds_df['skill_name']=='34']

Unnamed: 0,user_id,skill_name,correct,correct_predictions,state_predictions
65848,4,34,1,0.85266,0.17214
65849,4,34,1,0.85911,0.14802
65847,4,34,1,0.86399,0.12978
26337,4,34,1,0.86763,0.11617
26336,4,34,1,0.87032,0.10611
26335,4,34,1,0.8723,0.09873
26334,4,34,0,0.87374,0.09335
65846,4,34,0,0.82119,0.28979


In [21]:
newdf.rename(columns = {'correct_predictions':'correct_predictions_forgets',
                        'state_predictions':'state_predictions_forgets'}, inplace = True)
newdf['correct_predictionsBKT']=preds_df['correct_predictions']
newdf['state_predictionsBKT']=preds_df['state_predictions']

In [23]:
print('Results for one student with Std bkt and BKT with forget\n')
newdf

Results for one student with Std bkt and BKT with forget



Unnamed: 0,user_id,skill_name,correct,correct_predictions_forgets,state_predictions_forgets,correct_predictionsBKT,state_predictionsBKT
79816,4,0,1,0.68446,0.73522,0.72757,0.61112
79815,4,0,1,0.79953,0.90875,0.76869,0.73226
48064,4,1,0,0.43470,0.55652,0.45080,0.58217
66812,4,6,0,0.70032,0.85331,0.68817,0.87392
48456,4,7,0,0.58937,0.60977,0.62044,0.10958
...,...,...,...,...,...,...,...
115132,4,99,1,0.48909,0.53851,0.52184,0.52790
115130,4,99,1,0.75888,0.87432,0.73480,0.93261
115131,4,99,1,0.75432,0.86864,0.76667,0.99317
115133,4,99,1,0.75437,0.86870,0.76991,0.99934


In [24]:
print("Check for one student: ")
print("Standard BKT, auc:", model.evaluate(data=test_one_student, metric="auc"))
print("BKT+Forgets, auc:", model2.evaluate(data = test_one_student, metric="auc"))
print("")
print("Standard BKT, rmse:", model.evaluate(data=test_one_student, metric="rmse"))

print("BKT+Forgets, rmse:", model2.evaluate(data = test_one_student, metric="rmse"))

evalAUC_standard= model.evaluate(data=test_one_student, metric="auc")
evalAUC_BKT_Forgets= model2.evaluate(data = test_one_student, metric="auc")

evalrmse_standard= model.evaluate(data=test_one_student, metric="rmse")
evalrmse_BKT_Forgets= model2.evaluate(data = test_one_student, metric="rmse")

evlaluation_one_Student = pd.DataFrame({"Standard BKT": [evalAUC_standard, evalrmse_standard],
                    "BKT+Forgets": [evalAUC_BKT_Forgets, evalrmse_BKT_Forgets]})
evlaluation_one_Student.set_axis(['auc', 'rmse'], axis='index',inplace=True)
evlaluation_one_Student

Check for one student: 
Standard BKT, auc: 0.7734567901234568
BKT+Forgets, auc: 0.7969135802469135

Standard BKT, rmse: 0.42628246975895767
BKT+Forgets, rmse: 0.4183779484122843


Unnamed: 0,Standard BKT,BKT+Forgets
auc,0.77346,0.79691
rmse,0.42628,0.41838


In [85]:
newdf

Unnamed: 0,user_id,skill_name,correct,correct_predictions_forgets,state_predictions_forgets,correct_predictions,state_predictions
79816,4,0,1,0.69876,0.74428,0.72420,0.63777
79815,4,0,1,0.79348,0.90320,0.76905,0.76322
48064,4,1,0,0.43481,0.55922,0.44927,0.56902
66812,4,6,0,0.70132,0.85539,0.68800,0.86997
48456,4,7,0,0.58881,0.60895,0.62041,0.10965
...,...,...,...,...,...,...,...
115132,4,99,1,0.49011,0.49519,0.52094,0.60221
115130,4,99,1,0.75110,0.78215,0.75206,0.97257
115131,4,99,1,0.74845,0.77923,0.76837,0.99869
115133,4,99,1,0.74847,0.77925,0.76915,0.99994


In [25]:
# Let's check for one skill 

newdf[newdf['skill_name']=='34']

Unnamed: 0,user_id,skill_name,correct,correct_predictions_forgets,state_predictions_forgets,correct_predictionsBKT,state_predictionsBKT
65848,4,34,1,0.88082,0.97022,0.85266,0.17214
65849,4,34,1,0.85922,0.93734,0.85911,0.14802
65847,4,34,1,0.85502,0.93095,0.86399,0.12978
26337,4,34,1,0.85419,0.92968,0.86763,0.11617
26336,4,34,1,0.85402,0.92942,0.87032,0.10611
26335,4,34,1,0.85398,0.92937,0.8723,0.09873
26334,4,34,0,0.85398,0.92936,0.87374,0.09335
65846,4,34,0,0.70171,0.69757,0.82119,0.28979


In [26]:

# check parameters for skill =34 for every model

paramsForgets= model2.params()['value']
paramsForgets
print("Skill=34\n\nparameters for BKT+Forget:\n", paramsForgets.loc[pd.IndexSlice['34']])

paramsStandard= model.params()['value']
paramsStandard
paramsStandard.loc[pd.IndexSlice['34']]

print("\nparameters for BKT Standard:\n", paramsStandard.loc[pd.IndexSlice['34']])

Skill=34

parameters for BKT+Forget:
 param    class  
prior    default   0.97022
learns   default   0.27267
guesses  default   0.24345
slips    default   0.09962
forgets  default   0.05715
Name: value, dtype: float64

parameters for BKT Standard:
 param    class  
prior    default   0.17214
learns   default   0.02359
guesses  default   0.89870
slips    default   0.36877
forgets  default   0.00000
Name: value, dtype: float64


# Prior Per Student 

## Student-specific parameters

In [3]:
# The multiprior model generates different priors based on the first 
# response of each student.

In [7]:
skill = 'Calculations with Similar Figures'
metric = 'auc'
Model(num_fits = 10, seed=2020)
multiprior_cv = model.crossvalidate(data_path = 'as.csv', skills = skill,
                                    multiprior = True, metric = metric,
                                    folds = 3)
BKT_cv = model.crossvalidate(data_path = 'as.csv', skills = skill,
                                    metric = metric,
                                   folds = 3)
pd.concat([multiprior_cv, BKT_cv], axis = 0)

Unnamed: 0_level_0,auc
skill,Unnamed: 1_level_1
Calculations with Similar Figures,0.6048
Calculations with Similar Figures,0.59946


In [10]:
model.fit(data_path = 'as.csv', multiprior = True)

print(model.params())

                                              value
skill                       param   class          
Circle Graph                prior   default 0.00000
                            learns  1       0.04770
                                    2       0.49465
                                    Default 0.10168
                            guesses default 0.22273
...                                             ...
Recognize Quadratic Pattern guesses default 0.00001
                            slips   default 0.04841
                            forgets 1       0.00000
                                    2       0.00000
                                    Default 0.00000

[997 rows x 1 columns]


In [12]:
model.fit(data_path = 'as.csv', multiprior = True,skills=skill)

print(model.params())

                                                    value
skill                             param   class          
Calculations with Similar Figures prior   default 0.00000
                                  learns  1       0.14472
                                          2       0.58241
                                          Default 0.27394
                                  guesses default 0.28368
                                  slips   default 0.25138
                                  forgets 1       0.00000
                                          2       0.00000
                                          Default 0.00000


In [16]:
model.fit(data_path = 'ct.csv', multiprior = True)

print(model.params())

preds = model.predict(data_path = 'ct.csv')
preds[['Anon Student Id', 'KC(Default)', 'Correct First Attempt', 
       'correct_predictions', 'state_predictions']].head(10)

                                                         value
skill                                  param   class          
Plot non-terminating improper fraction prior   default 0.00000
                                       learns  1       0.01435
                                               2       0.45802
                                               Default 0.20021
                                       guesses default 0.30617
...                                                        ...
Finding the intersection, SIF          guesses default 0.34157
                                       slips   default 0.18526
                                       forgets 1       0.00000
                                               2       0.00000
                                               Default 0.00000

[108 rows x 1 columns]


Unnamed: 0,Anon Student Id,KC(Default),Correct First Attempt,correct_predictions,state_predictions
773,0I891Gg,Plot non-terminating improper fraction,0,0.31551,0.01435
774,0I891Gg,Plot imperfect radical,0,0.11601,0.02548
775,0I891Gg,Plot terminating proper fraction,0,0.43342,0.02652
776,0I891Gg,Plot pi,1,0.79935,0.6995
777,0I891Gg,Plot terminating proper fraction,1,0.46703,0.1083
778,0I891Gg,Plot whole number,1,0.89727,0.23583
779,0I891Gg,Plot imperfect radical,0,0.22243,0.16811
780,0I891Gg,Plot non-terminating improper fraction,1,0.44443,0.21242
781,0I891Gg,Plot whole number,1,0.91618,0.39869
782,0I891Gg,Plot decimal - thousandths,0,0.21505,0.17364


# Item Difficulty Effect Model 

## Reference: 
https://www.researchgate.net/publication/225182390_KT-IDEM_Introducing_Item_Difficulty_to_the_Knowledge_Tracing_Model


In [None]:
# KtIdem.py

In [10]:
import sys
sys.path.append('../')
import numpy as np
from pyBKT.models import Model
np.seterr(divide='ignore', invalid='ignore')

skills = ["Percent Of", "Addition and Subtraction Integers", "Conversion of Fraction Decimals Percents", "Volume Rectangular Prism", "Venn Diagram", "Equation Solving Two or Fewer Steps", "Volume Cylinder", "Multiplication and Division Integers", "Area Rectangle", "Addition and Subtraction Fractions", ]

model = Model(seed = 0, num_fits = 20)
print("BKT")
#print(model.crossvalidate(data_path = "data/as.csv", skills = skills, metric = "auc"))
print(model.crossvalidate(data_path = "as.csv", skills = skills, metric = "rmse"))
print()
print("KT-IDEM")
#print(model.crossvalidate(data_path = "data/as.csv", skills = skills, multigs = True, metric = "auc"))
print(model.crossvalidate(data_path = "as.csv", skills = skills, multigs = True, metric = "rmse"))
print()

BKT
                                            rmse
skill                                           
Percent Of                               0.38326
Multiplication and Division Integers     0.40320
Equation Solving Two or Fewer Steps      0.45629
Addition and Subtraction Integers        0.41075
Addition and Subtraction Fractions       0.42161
Conversion of Fraction Decimals Percents 0.45583
Venn Diagram                             0.33191
Volume Rectangular Prism                 0.23364
Volume Cylinder                          0.29401
Area Rectangle                           0.14943

KT-IDEM
                                            rmse
skill                                           
Percent Of                               0.42834
Multiplication and Division Integers     0.41398
Equation Solving Two or Fewer Steps      0.47104
Addition and Subtraction Integers        0.42195
Addition and Subtraction Fractions       0.45459
Conversion of Fraction Decimals Percents 0.46094
Venn Di

In [24]:
import sys

import numpy as np
from pyBKT.models import Model
np.seterr(divide='ignore', invalid='ignore')

model.fit(data_path = 'as.csv', skills='Multiplication and Division Integers',multigs = True)


print("Fitted Skills:\n%s" % '\n'.join(model.coef_.keys()))

print(model.params())


Fitted Skills:
Multiplication and Division Integers
                                                       value
skill                                param   class          
Multiplication and Division Integers prior   default 0.92840
                                     learns  default 0.02777
                                     guesses 29924   0.12548
                                             29925   0.08288
                                             29926   0.14118
                                             30015   0.13587
                                             30016   0.37498
                                             30017   0.08115
                                             30726   0.08119
                                             31001   0.03519
                                             54495   0.02801
                                             54506   0.10297
                                     slips   29924   0.14551
                                 

In [43]:
as_df= pd.read_csv('as.csv', encoding = 'latin', low_memory = False) 
as_df.info()
# ct_df = pd.read_csv('ct.csv', encoding = 'latin')
# ct_df.info()
as_df[as_df["skill_name"]=="Multiplication and Division Integers"]['order_id'].unique()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 525534 entries, 0 to 525533
Data columns (total 30 columns):
 #   Column                Non-Null Count   Dtype  
---  ------                --------------   -----  
 0   order_id              525534 non-null  int64  
 1   assignment_id         525534 non-null  int64  
 2   user_id               525534 non-null  int64  
 3   assistment_id         525534 non-null  int64  
 4   problem_id            525534 non-null  int64  
 5   original              525534 non-null  int64  
 6   correct               525534 non-null  int64  
 7   attempt_count         525534 non-null  int64  
 8   ms_first_response     525534 non-null  int64  
 9   tutor_mode            525534 non-null  object 
 10  answer_type           525534 non-null  object 
 11  sequence_id           525534 non-null  int64  
 12  student_class_id      525534 non-null  int64  
 13  position              525534 non-null  int64  
 14  type                  525534 non-null  object 
 15  

array([23944780, 23944797, 23944811, ..., 38214068, 38214194, 38214203])

In [32]:
# try to see the class for slip/guess is for students or question  

new = as_df[as_df["assistment_id"].astype("str").str.contains("29925")& as_df["skill_name"].str.contains("Multiplication and Division Integers")]
new.sort_values("problem_id").head()

Unnamed: 0,order_id,assignment_id,user_id,assistment_id,problem_id,original,correct,attempt_count,ms_first_response,tutor_mode,...,hint_count,hint_total,overlap_time,template_id,answer_id,answer_text,first_action,bottom_hint,opportunity,opportunity_original


In [22]:
ct_df[ct_df["Problem Name"].str.contains("RATIONAL1")].head()

Unnamed: 0.1,Unnamed: 0,Row,Anon Student Id,Problem Hierarchy,Problem Name,Problem View,Step Name,Step Start Time,First Transaction Time,Correct Transaction Time,Step End Time,Step Duration (sec),Correct Step Duration (sec),Error Step Duration (sec),Correct First Attempt,Incorrects,Hints,Corrects,KC(Default),Opportunity(Default)
36,5977,7765,3cjD21W,"Unit RATIONAL-IRRATIONAL-NUMBERS, Section RATI...",RATIONAL1-299,1,RationalNumberline1,2007-02-09 13:51:12.0,2007-02-09 13:51:26.0,2007-02-09 13:51:26.0,2007-02-09 13:51:26.0,14.0,14.0,,1,0,0,1,Plot whole number,1
37,5985,7773,3cjD21W,"Unit RATIONAL-IRRATIONAL-NUMBERS, Section RATI...",RATIONAL1-155,1,RationalNumberline1,2007-02-09 13:53:37.0,2007-02-09 13:53:50.0,2007-02-09 13:54:07.0,2007-02-09 13:54:07.0,25.0,,25.0,0,3,1,1,Plot terminating proper fraction,1
38,5986,7774,3cjD21W,"Unit RATIONAL-IRRATIONAL-NUMBERS, Section RATI...",RATIONAL1-193,1,RationalNumberline1,2007-02-09 13:54:10.0,2007-02-09 13:54:29.0,2007-02-09 13:54:29.0,2007-02-09 13:54:29.0,19.0,19.0,,1,0,0,1,Plot imperfect radical,1
39,5987,7775,3cjD21W,"Unit RATIONAL-IRRATIONAL-NUMBERS, Section RATI...",RATIONAL1-248,1,RationalNumberline1,2007-02-09 13:54:33.0,2007-02-09 13:54:44.0,2007-02-09 13:54:44.0,2007-02-09 13:54:44.0,11.0,11.0,,1,0,0,1,Plot terminating proper fraction,2
40,5988,7776,3cjD21W,"Unit RATIONAL-IRRATIONAL-NUMBERS, Section RATI...",RATIONAL1-117,1,RationalNumberline1,2007-02-09 13:54:46.0,2007-02-09 13:54:57.0,2007-02-09 13:54:57.0,2007-02-09 13:54:57.0,11.0,11.0,,1,0,0,1,Plot non-terminating improper fraction,1


In [110]:

print("BKT")
#print(model.crossvalidate(data_path = "data/as.csv", skills = skills, metric = "auc"))
print(model.crossvalidate(data_path = "builder_test_preprocessed.csv", metric = "rmse"))
print()
print("KT-IDEM")
print(model.crossvalidate(data_path = "builder_test_preprocessed.csv", multigs = 'skill_name', metric = "rmse"))

BKT
         rmse
skill        
82    0.43202
30    0.44888
59    0.47846
32    0.42905
53    0.47058
...       ...
100       NaN
105       NaN
108       NaN
116       NaN
64        NaN

[120 rows x 1 columns]

KT-IDEM
         rmse
skill        
82    0.43181
30    0.44901
59    0.47756
32    0.43021
53    0.46876
...       ...
100       NaN
105       NaN
108       NaN
116       NaN
64        NaN

[120 rows x 1 columns]


In [111]:

print("BKT")
#print(model.crossvalidate(data_path = "data/as.csv", skills = skills, metric = "auc"))
print(model.crossvalidate(data_path = "builder_test_preprocessed.csv", metric = "rmse"))
print()
print("KT-IDEM")
print(model.crossvalidate(data_path = "builder_test_preprocessed.csv", multigs = 'user_id', metric = "rmse"))

BKT
         rmse
skill        
82    0.43257
30    0.44889
59    0.47697
32    0.42945
53    0.46921
...       ...
100       NaN
105       NaN
108       NaN
116       NaN
64        NaN

[120 rows x 1 columns]

KT-IDEM
         rmse
skill        
82    0.50000
30    0.50000
59    0.50000
32    0.50000
53    0.50000
...       ...
100       NaN
105       NaN
108       NaN
116       NaN
64        NaN

[120 rows x 1 columns]


In [50]:
import numpy as np
import matplotlib.pyplot as plt
from pyBKT.models import Model
from pyBKT.generate import SimulatedDataSet

# Define the parameters for the BKT model
params = {
    'num_kcs': 3,
    'num_items': 20,
    'pi_0': np.array([0.2, 0.1, 0.3]),
    'trans_mat': np.array([
        [0.7, 0.3, 0.1],
        [0.2, 0.8, 0.2],
        [0.1, 0.2, 0.7]
    ]),
    'emission_mat': np.array([
        [0.9, 0.1],
        [0.1, 0.9],
        [0.9, 0.1]
    ])
}

# Generate simulated student learning data
data = SimulatedDataSet(seed=1, params=params)

# Fit the BKT model to the data
model = Model(seed=1)
model.fit(data.data)

# Predict student knowledge at each time step
predictions = model.predict(data.data)

# Calculate the Root Mean Squared Error (RMSE) between the predicted and actual knowledge
rmse = np.sqrt(np.mean((predictions - data.data["correct"]) ** 2))
print(f"RMSE: {rmse}")

# Plot the predicted knowledge for each skill over time
for kc_idx in range(params['num_kcs']):
    plt.plot(predictions[:, kc_idx], label=f"Skill {kc_idx+1}")
plt.legend()
plt.title("Predicted Knowledge Over Time")
plt.xlabel("Time")
plt.ylabel("Probability of Knowing")
plt.show()


ImportError: cannot import name 'SimulatedDataSet' from 'pyBKT.generate' (/home/tharaa/Desktop/BKTstuff/bktenv/lib/python3.10/site-packages/pyBKT/generate/__init__.py)

In [4]:
import numpy as np
import matplotlib.pyplot as plt
from pyBKT.models import Model
from pyBKT.generate.synthetic_data import synthetic_data

# Define the parameters for the BKT model
params = {
    'num_kcs': 3,
    'num_items': 20,
    'pi_0': np.array([0.2, 0.1, 0.3]),
    'trans_mat': np.array([
        [0.7, 0.3, 0.1],
        [0.2, 0.8, 0.2],
        [0.1, 0.2, 0.7]
    ]),
    'emission_mat': np.array([
        [0.9, 0.1],
        [0.1, 0.9],
        [0.9, 0.1]
    ])
}

# Generate synthetic student learning data
data = synthetic_data(seed=1, num_students=100, num_items=params['num_items'], 
                      num_kcs=params['num_kcs'], params=params)

# Fit the BKT model to the data
model = Model(seed=1)
model.fit(data)

# Predict student knowledge at each time step
predictions = model.predict(data)

# Calculate the Root Mean Squared Error (RMSE) between the predicted and actual knowledge
rmse = np.sqrt(np.mean((predictions - data["activity"]) ** 2))
print(f"RMSE: {rmse}")

# Plot the predicted knowledge for each skill


TypeError: synthetic_data() got an unexpected keyword argument 'seed'