In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt

### Load data with prediction

In [2]:
bkt_default = pd.read_csv('E:/Project/AL/Data/WhereWhenHowNoFoa_80_BKT_uniqueQuestions/Predict/bkt_default_predict.txt', sep = '\t')

In [3]:
bkt_human = pd.read_csv('E:/Project/AL/Data/WhereWhenHowNoFoa_80_BKT_uniqueQuestions/Predict/bkt_human_predict.txt', sep = '\t')

In [4]:
bkt_random = pd.read_csv('E:/Project/AL/Data/WhereWhenHowNoFoa_80_BKT_uniqueQuestions/Predict/bkt_random_predict.txt', sep = '\t')

In [5]:
random = pd.read_csv('E:/Project/AL/Data/WhereWhenHowNoFoa_80_BKT_uniqueQuestions/Predict/random_predict.txt', sep = '\t')

In [6]:
streak = pd.read_csv('E:/Project/AL/Data/WhereWhenHowNoFoa_80_BKT_uniqueQuestions/Predict/streak_predict.txt', sep = '\t')

### How many KCs in each model

In [7]:
# Dictionary for mapping
dic = {'Default BKT': bkt_default, 'BKT (human-parameters)': bkt_human, 'BKT (random_parameters)': bkt_random, 'Random': random, 'Streak': streak}

In [8]:
def numberKC(dic, colname):
    arr = []
    names = []
    for key in dic:
        arr.append(pd.DataFrame(dic[key][colname].value_counts()).sort_index())
        names.append(key)
    res = pd.concat(arr, axis = 1)
    res.columns = names
    return res

In [9]:
# ptype_selection
numberKC(dic, 'KC (ptype_selection)')

Unnamed: 0,Default BKT,BKT (human-parameters),BKT (random_parameters),Random,Streak
AD JCommTable4.R0C0,270,384,242,2400,705.0
AD JCommTable4.R1C0,270,384,242,2401,705.0
AD JCommTable5.R0C0,270,384,242,2400,705.0
AD JCommTable5.R1C0,270,384,242,2400,705.0
AD JCommTable6.R0C0,270,384,242,2401,705.0
AD JCommTable6.R1C0,270,384,242,2401,705.0
AD JCommTable8.R0C0,270,384,242,2400,705.0
AD done,270,384,242,2401,705.0
AS JCommTable4.R0C0,10,16,11,44,19.0
AS JCommTable4.R1C0,13,26,29,37,18.0


In [10]:
# selected
numberKC(dic, 'KC (Selected)')

Unnamed: 0,Default BKT,BKT (human-parameters),BKT (random_parameters),Random,Streak
JCommTable4.R0C0,291,420,261,2476,739
JCommTable4.R1C0,299,438,296,2468,738
JCommTable5.R0C0,283,400,259,2466,723
JCommTable5.R1C0,275,389,245,2436,706
JCommTable6.R0C0,624,864,611,7201,1610
JCommTable6.R1C0,624,864,611,7201,1610
JCommTable8.R0C0,411,548,372,2818,892
done,624,864,611,7201,1610


### Find the max Opportunity (ptype_selection) and related Prediction

The related prediction of max(Opportunity (ptype_selection)) turns out to be max(Prediction).

In [11]:
def findMaxOppPreKC(df):
    maxOppKCidx = df.loc[df.groupby(['KC (ptype_selection)', 'Anon Student Id'])['Opportunity (ptype_selection)'].idxmax()] # slice data by row index where max(Opportunity)
    maxOppPre = maxOppKCidx.groupby('KC (ptype_selection)').agg({'Opportunity (ptype_selection)': ['mean', 'count'], 'Prediction': ['mean', 'count']}).sort_index()
    # save the data
    return maxOppPre

In [12]:
findMaxOppPreKC(bkt_default)

Unnamed: 0_level_0,Opportunity (ptype_selection),Opportunity (ptype_selection),Prediction,Prediction
Unnamed: 0_level_1,mean,count,mean,count
KC (ptype_selection),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AD JCommTable4.R0C0,9.0,30,0.8609054,30
AD JCommTable4.R1C0,9.0,30,0.9371797,30
AD JCommTable5.R0C0,9.0,30,0.8024993,30
AD JCommTable5.R1C0,9.0,30,0.9712656,30
AD JCommTable6.R0C0,9.0,30,0.5706834,30
AD JCommTable6.R1C0,9.0,30,0.498437,30
AD JCommTable8.R0C0,9.0,30,0.8333484,30
AD done,9.0,30,0.9753635,30
AS JCommTable4.R0C0,1.25,8,9.353422e-07,8
AS JCommTable4.R1C0,1.3,10,1.469771e-06,10


In [13]:
findMaxOppPreKC(bkt_human)

Unnamed: 0_level_0,Opportunity (ptype_selection),Opportunity (ptype_selection),Prediction,Prediction
Unnamed: 0_level_1,mean,count,mean,count
KC (ptype_selection),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AD JCommTable4.R0C0,12.8,30,0.9643051,30
AD JCommTable4.R1C0,12.8,30,0.9882408,30
AD JCommTable5.R0C0,12.8,30,0.945666,30
AD JCommTable5.R1C0,12.8,30,0.9982779,30
AD JCommTable6.R0C0,12.8,30,0.6424637,30
AD JCommTable6.R1C0,12.8,30,0.5656916,30
AD JCommTable8.R0C0,12.8,30,0.8591965,30
AD done,12.8,30,0.9991551,30
AS JCommTable4.R0C0,1.230769,13,1.125595e-06,13
AS JCommTable4.R1C0,1.529412,17,1.79227e-06,17


In [14]:
findMaxOppPreKC(bkt_random)

Unnamed: 0_level_0,Opportunity (ptype_selection),Opportunity (ptype_selection),Prediction,Prediction
Unnamed: 0_level_1,mean,count,mean,count
KC (ptype_selection),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AD JCommTable4.R0C0,8.066667,30,0.8149312,30
AD JCommTable4.R1C0,8.066667,30,0.9121383,30
AD JCommTable5.R0C0,8.066667,30,0.7414604,30
AD JCommTable5.R1C0,8.066667,30,0.9582551,30
AD JCommTable6.R0C0,8.066667,30,0.5551415,30
AD JCommTable6.R1C0,8.066667,30,0.4842127,30
AD JCommTable8.R0C0,8.066667,30,0.828834,30
AD done,8.066667,30,0.9659146,30
AS JCommTable4.R0C0,1.222222,9,9.610359e-07,9
AS JCommTable4.R1C0,1.45,20,1.586684e-06,20


In [15]:
findMaxOppPreKC(random)

Unnamed: 0_level_0,Opportunity (ptype_selection),Opportunity (ptype_selection),Prediction,Prediction
Unnamed: 0_level_1,mean,count,mean,count
KC (ptype_selection),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AD JCommTable4.R0C0,80.0,30,1.0,30
AD JCommTable4.R1C0,80.033333,30,1.0,30
AD JCommTable5.R0C0,80.0,30,1.0,30
AD JCommTable5.R1C0,80.0,30,1.0,30
AD JCommTable6.R0C0,80.033333,30,0.9779038,30
AD JCommTable6.R1C0,80.033333,30,0.9484559,30
AD JCommTable8.R0C0,80.0,30,0.9595491,30
AD done,80.033333,30,1.0,30
AS JCommTable4.R0C0,2.095238,21,1.198609e-06,21
AS JCommTable4.R1C0,1.48,25,1.980457e-06,25


In [16]:
findMaxOppPreKC(streak)

Unnamed: 0_level_0,Opportunity (ptype_selection),Opportunity (ptype_selection),Prediction,Prediction
Unnamed: 0_level_1,mean,count,mean,count
KC (ptype_selection),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AD JCommTable4.R0C0,23.5,30,0.9981995,30
AD JCommTable4.R1C0,23.5,30,0.9995933,30
AD JCommTable5.R0C0,23.5,30,0.9971901,30
AD JCommTable5.R1C0,23.5,30,0.9999821,30
AD JCommTable6.R0C0,23.5,30,0.7750829,30
AD JCommTable6.R1C0,23.5,30,0.698286,30
AD JCommTable8.R0C0,23.5,30,0.899449,30
AD done,23.5,30,0.9999943,30
AS JCommTable4.R0C0,1.583333,12,1.279004e-06,12
AS JCommTable4.R1C0,1.384615,13,2.078763e-06,13


### How many questions each type were given to each student in each model

In [17]:
def ptypeNumEachStudentConcat(df):
    mean_arr, names = [], []
    for key in dic:
        dic[key]['Problem Type'] = dic[key]['Problem Name'].str[0:2] # creat 'Problem Name' column
        problem = dic[key][['Anon Student Id', 'Problem Name', 'Problem Type']].drop_duplicates()
        problem_cnt = problem.groupby(['Problem Type', 'Anon Student Id'], as_index = False).count()
        cnt_mean = problem_cnt.groupby('Problem Type').mean()
        mean_arr.append(cnt_mean)
        names.append(key)
    res = pd.concat(mean_arr, axis = 1)
    res.columns = names
    return res

In [18]:
ptypeNumEachStudentConcat(dic)

Unnamed: 0_level_0,Default BKT,BKT (human-parameters),BKT (random_parameters),Random,Streak
Problem Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AD,9.0,12.8,8.066667,80,20.133333
AS,5.733333,6.233333,5.0,80,14.2
M,6.066667,9.766667,7.3,80,12.9
