In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt

### Load data with prediction

In [2]:
bkt_default = pd.read_csv('E:/Project/AL/Data/WhereWhenHowNoFoa_80_reviseBKT/Predict/bkt_default_predict.txt', sep = '\t')

In [3]:
bkt_human = pd.read_csv('E:/Project/AL/Data/WhereWhenHowNoFoa_80_reviseBKT/Predict/bkt_human_predict.txt', sep = '\t')

In [4]:
bkt_random = pd.read_csv('E:/Project/AL/Data/WhereWhenHowNoFoa_80_reviseBKT/Predict/bkt_random_predict.txt', sep = '\t')

In [5]:
random = pd.read_csv('E:/Project/AL/Data/WhereWhenHowNoFoa_80_reviseBKT/Predict/random_predict.txt', sep = '\t')

In [6]:
streak = pd.read_csv('E:/Project/AL/Data/WhereWhenHowNoFoa_80_reviseBKT/Predict/streak_predict.txt', sep = '\t')

### How many KCs in each model

In [7]:
# Dictionary for mapping
dic = {'Default BKT': bkt_default, 'BKT (human-parameters)': bkt_human, 'BKT (random_parameters)': bkt_random, 'Random': random, 'Streak': streak}

In [8]:
def numberKC(dic, colname):
    arr = []
    names = []
    for key in dic:
        arr.append(pd.DataFrame(dic[key][colname].value_counts()).sort_index())
        names.append(key)
    res = pd.concat(arr, axis = 1)
    res.columns = names
    return res

In [9]:
# ptype_selection
numberKC(dic, 'KC (ptype_selection)')

Unnamed: 0,Default BKT,BKT (human-parameters),BKT (random_parameters),Random,Streak
AD JCommTable4.R0C0,141,198,206,2400,858
AD JCommTable4.R1C0,141,198,206,2401,858
AD JCommTable5.R0C0,141,198,206,2400,858
AD JCommTable5.R1C0,141,198,206,2400,858
AD JCommTable6.R0C0,141,198,206,2401,858
AD JCommTable6.R1C0,141,198,206,2401,858
AD JCommTable8.R0C0,141,198,206,2400,858
AD done,141,198,206,2401,858
AS JCommTable4.R0C0,27,27,26,44,28
AS JCommTable4.R1C0,28,32,28,37,20


In [10]:
# selected
numberKC(dic, 'KC (Selected)')

Unnamed: 0,Default BKT,BKT (human-parameters),BKT (random_parameters),Random,Streak
JCommTable4.R0C0,192,262,253,2476,910
JCommTable4.R1C0,196,265,265,2468,898
JCommTable5.R0C0,184,243,243,2466,886
JCommTable5.R1C0,170,238,234,2436,862
JCommTable6.R0C0,413,575,616,7201,2038
JCommTable6.R1C0,413,575,616,7201,2038
JCommTable8.R0C0,229,314,314,2818,1088
done,413,575,616,7201,2038


### Find the max Opportunity (ptype_selection) and related Prediction

The related prediction of max(Opportunity (ptype_selection)) turns out to be max(Prediction).

In [11]:
def findMaxOppPreKC(df):
    maxOppKCidx = df.loc[df.groupby(['KC (ptype_selection)', 'Anon Student Id'])['Opportunity (ptype_selection)'].idxmax()] # slice data by row index where max(Opportunity)
    maxOppPre = maxOppKCidx.groupby('KC (ptype_selection)').agg({'Opportunity (ptype_selection)': ['mean', 'count'], 'Prediction': ['mean', 'count']}).sort_index()
    # save the data
    return maxOppPre

In [12]:
findMaxOppPreKC(bkt_default)

Unnamed: 0_level_0,Opportunity (ptype_selection),Opportunity (ptype_selection),Prediction,Prediction
Unnamed: 0_level_1,mean,count,mean,count
KC (ptype_selection),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AD JCommTable4.R0C0,4.7,30,0.4165495,30
AD JCommTable4.R1C0,4.7,30,0.5641032,30
AD JCommTable5.R0C0,4.7,30,0.315281,30
AD JCommTable5.R1C0,4.7,30,0.6156606,30
AD JCommTable6.R0C0,4.7,30,0.3966181,30
AD JCommTable6.R1C0,4.7,30,0.3105599,30
AD JCommTable8.R0C0,4.7,30,0.6375568,30
AD done,4.7,30,0.7572034,30
AS JCommTable4.R0C0,1.421053,19,4.823311e-22,19
AS JCommTable4.R1C0,1.555556,18,1.794343e-20,18


In [13]:
findMaxOppPreKC(bkt_human)

Unnamed: 0_level_0,Opportunity (ptype_selection),Opportunity (ptype_selection),Prediction,Prediction
Unnamed: 0_level_1,mean,count,mean,count
KC (ptype_selection),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AD JCommTable4.R0C0,6.6,30,0.6257977,30
AD JCommTable4.R1C0,6.6,30,0.7620987,30
AD JCommTable5.R0C0,6.6,30,0.5170969,30
AD JCommTable5.R1C0,6.6,30,0.8313856,30
AD JCommTable6.R0C0,6.6,30,0.5059282,30
AD JCommTable6.R1C0,6.6,30,0.4113111,30
AD JCommTable8.R0C0,6.6,30,0.7258383,30
AD done,6.6,30,0.9060729,30
AS JCommTable4.R0C0,1.5,18,6.662776e-22,18
AS JCommTable4.R1C0,1.52381,21,2.507017e-20,21


In [14]:
findMaxOppPreKC(bkt_random)

Unnamed: 0_level_0,Opportunity (ptype_selection),Opportunity (ptype_selection),Prediction,Prediction
Unnamed: 0_level_1,mean,count,mean,count
KC (ptype_selection),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AD JCommTable4.R0C0,6.866667,30,0.5718137,30
AD JCommTable4.R1C0,6.866667,30,0.7080559,30
AD JCommTable5.R0C0,6.866667,30,0.4713879,30
AD JCommTable5.R1C0,6.866667,30,0.7720077,30
AD JCommTable6.R0C0,6.866667,30,0.4808526,30
AD JCommTable6.R1C0,6.866667,30,0.3884614,30
AD JCommTable8.R0C0,6.866667,30,0.7040138,30
AD done,6.866667,30,0.8725162,30
AS JCommTable4.R0C0,1.625,16,6.053959e-22,16
AS JCommTable4.R1C0,1.4,20,2.297569e-20,20


In [15]:
findMaxOppPreKC(random)

Unnamed: 0_level_0,Opportunity (ptype_selection),Opportunity (ptype_selection),Prediction,Prediction
Unnamed: 0_level_1,mean,count,mean,count
KC (ptype_selection),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AD JCommTable4.R0C0,80.0,30,1.0,30
AD JCommTable4.R1C0,80.033333,30,1.0,30
AD JCommTable5.R0C0,80.0,30,1.0,30
AD JCommTable5.R1C0,80.0,30,1.0,30
AD JCommTable6.R0C0,80.033333,30,0.9782281,30
AD JCommTable6.R1C0,80.033333,30,0.9560007,30
AD JCommTable8.R0C0,80.0,30,0.9710388,30
AD done,80.033333,30,1.0,30
AS JCommTable4.R0C0,2.095238,21,1.007559e-21,21
AS JCommTable4.R1C0,1.48,25,3.524746e-20,25


In [16]:
findMaxOppPreKC(streak)

Unnamed: 0_level_0,Opportunity (ptype_selection),Opportunity (ptype_selection),Prediction,Prediction
Unnamed: 0_level_1,mean,count,mean,count
KC (ptype_selection),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AD JCommTable4.R0C0,28.6,30,0.9981875,30
AD JCommTable4.R1C0,28.6,30,0.9996651,30
AD JCommTable5.R0C0,28.6,30,0.995853,30
AD JCommTable5.R1C0,28.6,30,0.9999952,30
AD JCommTable6.R0C0,28.6,30,0.819074,30
AD JCommTable6.R1C0,28.6,30,0.7375056,30
AD JCommTable8.R0C0,28.6,30,0.893359,30
AD done,28.6,30,0.9999977,30
AS JCommTable4.R0C0,2.0,14,1.151932e-21,14
AS JCommTable4.R1C0,1.333333,15,4.1687739999999996e-20,15


### How many questions each type were given to each student in each model

In [17]:
def ptypeNumEachStudentConcat(df):
    mean_arr, names = [], []
    for key in dic:
        dic[key]['Problem Type'] = dic[key]['Problem Name'].str[0:2] # creat 'Problem Name' column
        problem = dic[key][['Anon Student Id', 'Problem Name', 'Problem Type']].drop_duplicates()
        problem_cnt = problem.groupby(['Problem Type', 'Anon Student Id'], as_index = False).count()
        cnt_mean = problem_cnt.groupby('Problem Type').mean()
        mean_arr.append(cnt_mean)
        names.append(key)
    res = pd.concat(mean_arr, axis = 1)
    res.columns = names
    return res

In [18]:
ptypeNumEachStudentConcat(dic)

Unnamed: 0_level_0,Default BKT,BKT (human-parameters),BKT (random_parameters),Random,Streak
Problem Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AD,4.533333,6.5,6.566667,80,24.6
AS,4.066667,5.866667,7.133333,80,18.833333
M,4.7,6.333333,6.066667,80,16.133333
