In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import csv
import os
import sys

## Load data

In [2]:
data_dir = './data'

student_log_paths = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.startswith('student_log')]
training_label_path = os.path.join(data_dir, 'training_label.csv')
validation_test_label = os.path.join(data_dir, 'validation_test_label.csv')

dfs = []

for path in student_log_paths:
    temp = pd.read_csv(path)
    dfs.append(temp)
student_df = pd.concat(dfs)

training_label_df = pd.read_csv(training_label_path)
validation_test_label_df = pd.read_csv(validation_test_label)

  interactivity=interactivity, compiler=compiler, result=result)


## Remove duplicates

In [3]:
#student_df = student_df.drop_duplicates(['ITEST_id'])
#training_label_df = training_label_df.drop_duplicates(['ITEST_id'])

In [4]:
print("student_df.shape:", student_df.shape) 
print("training_label_df.shape:", training_label_df.shape)
print("validation_test_label_df.shape:", validation_test_label_df.shape)

student_df.shape: (942816, 77)
training_label_df.shape: (514, 5)
validation_test_label_df.shape: (172, 4)


In [5]:
skill = student_df.skill.unique()
skill_id = range(len(skill))
skill_id_map = dict(zip(skill, skill_id))

In [6]:
print(np.mean(training_label_df['isSTEM']))

0.31906614785992216


In [7]:
student_specific_columns = ["AveKnow",
                            "AveCarelessness",
                            "AveCorrect",
                            "NumActions",
                            "AveResBored",
                            "AveResEngcon",
                            "AveResConf",
                            "AveResFrust",
                            "AveResOfftask",
                            "AveResGaming"]

In [8]:
required_cols = ['ITEST_id'] + student_specific_columns
student_specific_df = student_df[required_cols].drop_duplicates()

# Static features

In [9]:
# combined_df = pd.merge(left=training_label_df, right=student_specific_df, how='left', left_on='ITEST_id', right_on='ITEST_id')
combined_df_train = training_label_df.merge(student_specific_df, left_on='ITEST_id', right_on='ITEST_id', how='left')
combined_df_train['AveCorrect'] = combined_df_train['AveCorrect_x']
del combined_df_train['AveCorrect_x']
del combined_df_train['AveCorrect_y']
X_static = combined_df_train[student_specific_columns].values
y_static = combined_df_train['isSTEM'].values

In [10]:
combined_df_test = pd.merge(left=validation_test_label_df, right=student_specific_df, how='left')
X_target = combined_df_test[student_specific_columns].values

In [11]:
combined_df_train.head()

Unnamed: 0,ITEST_id,SchoolId,MCAS,isSTEM,AveKnow,AveCarelessness,NumActions,AveResBored,AveResEngcon,AveResConf,AveResFrust,AveResOfftask,AveResGaming,AveCorrect
0,9,2,32,1,0.185138,0.099734,504,0.277149,0.644744,0.098078,0.162771,0.213378,0.005554,0.438492
1,27,1,21,0,0.142031,0.069297,129,0.330226,0.551367,0.122658,0.09542,0.34809,0.0369,0.348837
2,33,2,52,0,0.459813,0.202787,169,0.260426,0.650769,0.059971,0.061834,0.296286,0.010954,0.686391
3,35,2,34,0,0.255164,0.158848,993,0.222796,0.650079,0.069987,0.164347,0.153147,0.2368,0.379658
4,37,3,-999,0,0.071909,0.046183,121,0.326384,0.653445,0.125325,0.094714,0.451467,0.016241,0.305785


# Import model of DKT
If you want to change the model with different regularization parameters
Simply change
```python
network_config['lambda_o']
network_config['lambda_w1']
network_config['lambda_w2']
```
to the desired value

---
To use the original DKT model without any regularization:
```python
network_config['lambda_o'] = 0.0
network_config['lambda_w1'] = 0.0
network_config['lambda_w2'] = 0.0
```

To use the original DKT model with regularization:
```python
network_config['lambda_o'] = 0.1
network_config['lambda_w1'] = 0.3
network_config['lambda_w2'] = 3.0
```

# Obtain student's DKT feature

# DKT feature 

In [12]:
students_dk_df = pd.read_csv('./data/students_knowledge_states_dkt.csv')
combined_df = pd.merge(left=student_specific_df, right=students_dk_df, how='left')
train_df = training_label_df.merge(combined_df, left_on='ITEST_id', right_on='ITEST_id', how='left')
test_df = validation_test_label_df.merge(combined_df, left_on='ITEST_id', right_on='ITEST_id', how='left')
train_df['AveCorrect'] = train_df['AveCorrect_x']
del train_df['AveCorrect_x']
del train_df['AveCorrect_y']
test_df['AveCorrect'] = test_df['AveCorrect_x']
del test_df['AveCorrect_x']
del test_df['AveCorrect_y']

In [13]:
features_cols = list(train_df.columns.drop(['ITEST_id', 'isSTEM', 'SchoolId', 'MCAS']))
X_dkt = train_df[features_cols].values
y_dkt = train_df['isSTEM'].values
#X_test_dkt = test_df[features_cols].values

In [14]:
train_df[features_cols].head()

Unnamed: 0,AveKnow,AveCarelessness,NumActions,AveResBored,AveResEngcon,AveResConf,AveResFrust,AveResOfftask,AveResGaming,kt_0,...,kt_91,kt_92,kt_93,kt_94,kt_95,kt_96,kt_97,kt_98,kt_99,AveCorrect
0,0.185138,0.099734,504,0.277149,0.644744,0.098078,0.162771,0.213378,0.005554,0.056223,...,0.604331,0.811209,0.816579,0.72706,0.834404,0.784057,0.69899,0.686475,0.825749,0.438492
1,0.142031,0.069297,129,0.330226,0.551367,0.122658,0.09542,0.34809,0.0369,0.078004,...,0.588781,0.685849,0.66814,0.613515,0.57864,0.679307,0.593397,0.615776,0.589999,0.348837
2,0.459813,0.202787,169,0.260426,0.650769,0.059971,0.061834,0.296286,0.010954,0.032441,...,0.562058,0.64735,0.886021,0.810916,0.849762,0.765824,0.713093,0.765192,0.872928,0.686391
3,0.255164,0.158848,993,0.222796,0.650079,0.069987,0.164347,0.153147,0.2368,0.032341,...,0.565016,0.586461,0.676752,0.629468,0.601491,0.670198,0.592042,0.556765,0.653874,0.379658
4,0.071909,0.046183,121,0.326384,0.653445,0.125325,0.094714,0.451467,0.016241,0.073857,...,0.433965,0.515282,0.592892,0.605307,0.456578,0.573977,0.553958,0.523287,0.539461,0.305785


# DKT+ feature 

In [15]:
students_dk_df = pd.read_csv('./data/students_knowledge_states_dktp.csv')
combined_df = pd.merge(left=student_specific_df, right=students_dk_df, how='left')
train_df = training_label_df.merge(combined_df, left_on='ITEST_id', right_on='ITEST_id', how='left')
#test_df = validation_test_label_df.merge(combined_df, left_on='ITEST_id', right_on='ITEST_id', how='left')
train_df['AveCorrect'] = train_df['AveCorrect_x']
del train_df['AveCorrect_x']
del train_df['AveCorrect_y']
#test_df['AveCorrect'] = test_df['AveCorrect_x']
#del test_df['AveCorrect_x']
#del test_df['AveCorrect_y']

In [16]:
features_cols = list(train_df.columns.drop(['ITEST_id', 'isSTEM', 'SchoolId', 'MCAS']))
X_dktp = train_df[features_cols].values
y_dktp = train_df['isSTEM'].values
#X_test_dktp = test_df[features_cols].values

# Training the static machine learning model

In [17]:
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit, GridSearchCV, cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import (auc, roc_curve, mean_squared_error, make_scorer, accuracy_score, precision_score, recall_score,
average_precision_score, f1_score)

from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis 
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

In [18]:
lr_param_grid = {
    'penalty': ['l1', 'l2']
}

In [19]:
svm_param_grid = {
    'C': [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0, 30.0, 100.0], 
    'gamma': [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0, 30.0, 100.0], 
    'kernel': ['rbf']
    
}

In [20]:
gbdt_param_grid = {
    'n_estimators': [200, 250, 300, 350, 400, 450, 500, 550], 
    'max_depth': [2, 3, 4, 5],
    'min_samples_leaf': [2, 3, 4, 5, 6, 7]
}

In [21]:
import itertools
mlp_layer_search_list = list(itertools.product([16, 64, 128, 256]))
mlp_layer_search_list += list(itertools.product([16, 64, 128, 256], [16, 64, 128, 256]))
mlp_layer_search_list += list(itertools.product([16, 64, 128, 256], [16, 64, 128, 256], [16, 64, 128, 256]))
mlp_param_grid = {
    'hidden_layer_sizes': mlp_layer_search_list,
    'early_stopping': [True]
}

In [22]:
lda_param_grid = {'solver':['svd', 'lsqr', 'eigen'], 'tol':[1e-10, 3e-10, 1e-9, 3e-9, 1e-8, 3e-8] }

qda_param_grid = {'tol':[1e-10, 3e-10, 1e-9, 3e-9, 1e-8, 3e-8]}

## Define customized scorer 

In [23]:
def auc_score(y_true, y_pred):
    fpr, tpr, thresholds = roc_curve(y_true, y_pred, pos_label=1)
    auc_ = auc(fpr, tpr)
    return auc_
auc_scorer = make_scorer(auc_score, greater_is_better = True)

In [24]:
def rmse_score(y_true, y_pred):
    rmse_ = np.sqrt(mean_squared_error(y_true, y_pred))
    return rmse_
rmse_scorer = make_scorer(rmse_score, greater_is_better = False)

In [25]:
#Evaluation metric of ASSISTments DM Competition
def admc_score(y_true, y_pred):
    # auc
    auc_ = auc_score(y_true, y_pred)
    
    # rmse
    rmse_ = rmse_score(y_true, y_pred)
    
    
    return auc_ + (1 - rmse_)

admc_scorer = make_scorer(admc_score, greater_is_better=True)

#  Experiment

## Define lists

In [26]:
algorithm_dict = {'LR': LogisticRegression(), 'SVM': SVC(), 'ANN':MLPClassifier(), 'GBDT': GradientBoostingClassifier(), 
                 'LDA': LinearDiscriminantAnalysis(), 'QDA': QuadraticDiscriminantAnalysis() }

#algorithm_dict = {'LDA': LinearDiscriminantAnalysis(), 'QDA': QuadraticDiscriminantAnalysis()}
feature_list = ['static', 'DKT', 'DKT+']
param_dict = {'LR': lr_param_grid, 'SVM': svm_param_grid, 'ANN': mlp_param_grid, 'GBDT': gbdt_param_grid,
              'LDA': lda_param_grid, 'QDA': qda_param_grid}



## Define cross-validation for multiple metrics

In [27]:
'''
cross validate the input estimator using stratified  n_fold shuffle split,
return the mean and std of each metric in dictionary format

params:
scoring_dict : with metric name (e.g. 'AUC') as keys and 
callable scoring function as values, which has the signature
of (y_true, y_pred) and return scores

return:
score_dict: with metric name (e.g. 'AUC') as keys and 
mean +/- 2*std string as values
'''

def multi_metrics_cv(estimator, X, y, scoring_method_dict, n_fold = 10):
    sss = StratifiedShuffleSplit(n_splits=5, random_state=42)
    
    for train_index, test_index in sss.split(X, y):
        X_train, y_train = X[train_index], y[train_index]
        X_test, y_test = X[test_index], y[test_index]
        
        ros = RandomOverSampler(random_state=42)
        X_train, y_train = ros.fit_sample(X_train, y_train)
        estimator.fit(X_train, y_train)
        
        
        y_pred = estimator.predict(X_test)
        
        score_list_dict =  {'AUC': [], 
                'RMSE': [], 
                'AUC + (1-RMSE)':[], 
                'Accuracy':[], 
                'Precision':[], 
                'Recall': [] ,
                'F1':[],
                'AP': []}
        score_dict = {}
        
        for metric in scoring_method_dict:
            
            score = scoring_method_dict[metric](y_test, y_pred)
            
            score_list_dict[metric].append(score)
    
    for metric in scoring_method_dict:
        mean = np.mean(score_list_dict[metric])
        std = np.std(score_list_dict[metric])
        val_str = "{:.3f} $\pm$ {:.3f}".format(mean, std)
        score_dict[metric] = val_str
        

    return score_dict

## Perform test on each clf with best parameters

In [28]:
#get experimental result of each clf on feature
#repeate 5 times and store the average +- 2*std
#e.g. score_dict['AUC'] = ['0.9 +- 0.01']
def eval_model2(clf, X, y):
    
    #A dictionary containing the name string or callable of scoring  methods 
    scoring_method_dict = {
        'AUC': auc_score,
        'RMSE': rmse_score, 
        'AUC + (1-RMSE)': admc_score,
        'Accuracy': accuracy_score, 
        'Precision': precision_score, 
        'Recall': recall_score, 
        'AP': precision_score, 
        'F1': f1_score
    }
    
    # finding the best parameters
    clf.fit(X,y)
   

    
    # get the best estimator
    estimator = clf.best_estimator_

    score_dict = multi_metrics_cv(estimator = estimator, X=X, y=y, scoring_method_dict = scoring_method_dict, n_fold=5)
    
    return score_dict

##  Perform parameter search and test on each feature set on 4 clfs 


In [29]:
#Do parameter search and get experimental result on given feature (string)
#return a dictionary containing the scores of each clf
from imblearn.over_sampling import RandomOverSampler 
def test_feature2(feature):
    
    cv = StratifiedShuffleSplit(n_splits=5, random_state=42)
    
    res_dict = {'name': [], 
                'AUC': [], 
                'RMSE': [], 
                'Accuracy':[], 
                'AUC + (1-RMSE)':[], 
                'Precision':[], 
                'Recall': [] ,
                'AP': [], 
                'F1':[]}
    best_param_dict = {'name': [], 'params': []}
    
    
    if feature == 'static':
        X_train = X_static
        y_train = y_static
        
    elif feature == 'DKT':
        X_train = X_dkt
        y_train = y_dkt
        
        
    elif feature == 'DKT+':
        X_train = X_dktp
        y_train = y_dktp
        
        
    else:
        print('Please select correct feature!')
        return 
    
    
    for algorithm in algorithm_dict:
        print(algorithm)
        
        #Parameter search
        clf = GridSearchCV(estimator=algorithm_dict[algorithm],
                  param_grid=param_dict[algorithm],
                  scoring=admc_scorer,
                  refit = True,
                  cv=cv,
                  verbose=1,
                  return_train_score=True)
        
        
        #estimator = clf.best_estimator_
        
        score_dict = eval_model2(clf=clf, X=X_train, y=y_train)
        
        #Append the score into dictionaries
        res_dict['name'].append(algorithm)
        best_param_dict['name'].append(algorithm)
        best_param_dict['params'].append(clf.best_params_)
        
        
        for score in score_dict:
            res_dict[score].append(score_dict[score])
    
    return res_dict, best_param_dict

In [None]:
import warnings
warnings.filterwarnings('ignore')

## Write result tabel of approach 2 to csv files

In [None]:
from sklearn.model_selection import GridSearchCV
for feature in feature_list:
    
    res_dict, best_param_dict = test_feature2(feature)
    
    #Convert the model name according to the features used (Optional)
    suffix = ('' if feature=='static' else ('-' + feature))
    
    
    for i in range(0,  len(res_dict['name']), 1):
        new_name = res_dict['name'][i] + suffix
        res_dict['name'][i] = new_name
        best_param_dict['name'][i] = new_name
    ####################################################
    
   
    
    res_df = pd.DataFrame(data = res_dict)
    
    print('{:s} result'.format(feature))
    col = list(res_df.columns)
    col = [col[-1]] + col[:-1]
    res_df = res_df[col]
    
    display(res_df)
    
    res_df.to_csv('./experiment/{:s}_score_2.csv'.format(feature), index=False, header=True)
    
    best_param_df = pd.DataFrame(data = best_param_dict)
    
    print('{:s} best_params'.format(feature))
    display(best_param_df)
         
    best_param_df.to_csv('./experiment/{:s}_best_params_2.csv'.format(feature, index=False, header=True))

LR
Fitting 5 folds for each of 2 candidates, totalling 10 fits
SVM
Fitting 5 folds for each of 121 candidates, totalling 605 fits


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done 605 out of 605 | elapsed:   15.7s finished


ANN
Fitting 5 folds for each of 84 candidates, totalling 420 fits


[Parallel(n_jobs=1)]: Done 420 out of 420 | elapsed:   11.1s finished


GBDT
Fitting 5 folds for each of 192 candidates, totalling 960 fits


[Parallel(n_jobs=1)]: Done 960 out of 960 | elapsed:  5.4min finished


LDA
Fitting 5 folds for each of 18 candidates, totalling 90 fits
QDA
Fitting 5 folds for each of 6 candidates, totalling 30 fits
static result


[Parallel(n_jobs=1)]: Done  90 out of  90 | elapsed:    0.2s finished
[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:    0.0s finished


Unnamed: 0,name,AP,AUC,AUC + (1-RMSE),Accuracy,F1,Precision,RMSE,Recall
0,LR,0.500 $\pm$ 0.000,0.606 $\pm$ 0.000,1.034 $\pm$ 0.000,0.673 $\pm$ 0.000,0.452 $\pm$ 0.000,0.500 $\pm$ 0.000,0.572 $\pm$ 0.000,0.412 $\pm$ 0.000
1,SVM,0.923 $\pm$ 0.000,0.839 $\pm$ 0.000,1.499 $\pm$ 0.000,0.885 $\pm$ 0.000,0.800 $\pm$ 0.000,0.923 $\pm$ 0.000,0.340 $\pm$ 0.000,0.706 $\pm$ 0.000
2,ANN,0.327 $\pm$ 0.000,0.500 $\pm$ 0.000,0.680 $\pm$ 0.000,0.327 $\pm$ 0.000,0.493 $\pm$ 0.000,0.327 $\pm$ 0.000,0.820 $\pm$ 0.000,1.000 $\pm$ 0.000
3,GBDT,0.750 $\pm$ 0.000,0.796 $\pm$ 0.000,1.380 $\pm$ 0.000,0.827 $\pm$ 0.000,0.727 $\pm$ 0.000,0.750 $\pm$ 0.000,0.416 $\pm$ 0.000,0.706 $\pm$ 0.000
4,LDA,0.421 $\pm$ 0.000,0.578 $\pm$ 0.000,0.958 $\pm$ 0.000,0.615 $\pm$ 0.000,0.444 $\pm$ 0.000,0.421 $\pm$ 0.000,0.620 $\pm$ 0.000,0.471 $\pm$ 0.000
5,QDA,0.458 $\pm$ 0.000,0.638 $\pm$ 0.000,1.033 $\pm$ 0.000,0.635 $\pm$ 0.000,0.537 $\pm$ 0.000,0.458 $\pm$ 0.000,0.604 $\pm$ 0.000,0.647 $\pm$ 0.000


static best_params


Unnamed: 0,name,params
0,LR,{'penalty': 'l1'}
1,SVM,"{'C': 3.0, 'gamma': 30.0, 'kernel': 'rbf'}"
2,ANN,"{'early_stopping': True, 'hidden_layer_sizes':..."
3,GBDT,"{'max_depth': 4, 'min_samples_leaf': 4, 'n_est..."
4,LDA,"{'solver': 'lsqr', 'tol': 1e-10}"
5,QDA,{'tol': 1e-10}


LR
Fitting 5 folds for each of 2 candidates, totalling 10 fits


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.1s finished


SVM
Fitting 5 folds for each of 121 candidates, totalling 605 fits


[Parallel(n_jobs=1)]: Done 605 out of 605 | elapsed:   53.8s finished


ANN
Fitting 5 folds for each of 84 candidates, totalling 420 fits


[Parallel(n_jobs=1)]: Done 420 out of 420 | elapsed:   15.1s finished


GBDT
Fitting 5 folds for each of 192 candidates, totalling 960 fits
