In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt

### Load data with prediction

In [2]:
bkt_default = pd.read_csv('E:/Project/AL/Data/WhereWhenHowNoFoa_80_reviseKC/Predict/bkt_default_predict.txt', sep = '\t')

In [3]:
bkt_human = pd.read_csv('E:/Project/AL/Data/WhereWhenHowNoFoa_80_reviseKC/Predict/bkt_human_predict.txt', sep = '\t')

In [4]:
bkt_random = pd.read_csv('E:/Project/AL/Data/WhereWhenHowNoFoa_80_reviseKC/Predict/bkt_random_predict.txt', sep = '\t')

In [5]:
random = pd.read_csv('E:/Project/AL/Data/WhereWhenHowNoFoa_80_reviseKC/Predict/random_predict.txt', sep = '\t')

In [6]:
streak = pd.read_csv('E:/Project/AL/Data/WhereWhenHowNoFoa_80_reviseKC/Predict/streak_predict.txt', sep = '\t')

### How many KCs in each model

In [7]:
# Dictionary for mapping
dic = {'Default BKT': bkt_default, 'BKT (human-parameters)': bkt_human, 'BKT (random_parameters)': bkt_random, 'Random': random, 'Streak': streak}

In [8]:
def numberKC(dic, colname):
    arr = []
    names = []
    for key in dic:
        arr.append(pd.DataFrame(dic[key][colname].value_counts()).sort_index())
        names.append(key)
    res = pd.concat(arr, axis = 1)
    res.columns = names
    return res

In [9]:
# ptype_selection
numberKC(dic, 'KC (ptype_selection)')

Unnamed: 0,Default BKT,BKT (human-parameters),BKT (random_parameters),Random,Streak
AD JCommTable4.R0C0,305,409,310,2400,705.0
AD JCommTable4.R1C0,305,409,310,2401,705.0
AD JCommTable5.R0C0,305,409,310,2400,705.0
AD JCommTable5.R1C0,305,409,310,2400,705.0
AD JCommTable6.R0C0,305,409,310,2401,705.0
AD JCommTable6.R1C0,305,409,310,2401,705.0
AD JCommTable8.R0C0,305,409,310,2400,705.0
AD done,305,409,310,2401,705.0
AS JCommTable4.R0C0,23,36,22,44,19.0
AS JCommTable4.R1C0,32,30,35,37,18.0


In [10]:
# selected
numberKC(dic, 'KC (Selected)')

Unnamed: 0,Default BKT,BKT (human-parameters),BKT (random_parameters),Random,Streak
JCommTable4.R0C0,350,476,362,2476,739
JCommTable4.R1C0,367,473,373,2468,738
JCommTable5.R0C0,366,465,369,2466,723
JCommTable5.R1C0,330,449,345,2436,706
JCommTable6.R0C0,841,1176,928,7201,1610
JCommTable6.R1C0,841,1176,928,7201,1610
JCommTable8.R0C0,433,557,443,2818,892
done,841,1176,928,7201,1610


### Find the max Opportunity (ptype_selection) and related Prediction

The related prediction of max(Opportunity (ptype_selection)) turns out to be max(Prediction).

In [11]:
def findMaxOppPreKC(df):
    maxOppKCidx = df.loc[df.groupby(['KC (ptype_selection)', 'Anon Student Id'])['Opportunity (ptype_selection)'].idxmax()] # slice data by row index where max(Opportunity)
    maxOppPre = maxOppKCidx.groupby('KC (ptype_selection)').agg({'Opportunity (ptype_selection)': ['mean', 'count'], 'Prediction': ['mean', 'count']}).sort_index()
    # save the data
    return maxOppPre

In [12]:
findMaxOppPreKC(bkt_default)

Unnamed: 0_level_0,Opportunity (ptype_selection),Opportunity (ptype_selection),Prediction,Prediction
Unnamed: 0_level_1,mean,count,mean,count
KC (ptype_selection),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AD JCommTable4.R0C0,10.166667,30,0.9017617,30
AD JCommTable4.R1C0,10.166667,30,0.9605387,30
AD JCommTable5.R0C0,10.166667,30,0.8390437,30
AD JCommTable5.R1C0,10.166667,30,0.9868365,30
AD JCommTable6.R0C0,10.166667,30,0.5970175,30
AD JCommTable6.R1C0,10.166667,30,0.5131991,30
AD JCommTable8.R0C0,10.166667,30,0.7954862,30
AD done,10.166667,30,0.9954019,30
AS JCommTable4.R0C0,1.533333,15,1.516644e-13,15
AS JCommTable4.R1C0,1.454545,22,3.943112e-17,22


In [13]:
findMaxOppPreKC(bkt_human)

Unnamed: 0_level_0,Opportunity (ptype_selection),Opportunity (ptype_selection),Prediction,Prediction
Unnamed: 0_level_1,mean,count,mean,count
KC (ptype_selection),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AD JCommTable4.R0C0,13.633333,30,0.9644704,30
AD JCommTable4.R1C0,13.633333,30,0.9879462,30
AD JCommTable5.R0C0,13.633333,30,0.937621,30
AD JCommTable5.R1C0,13.633333,30,0.997252,30
AD JCommTable6.R0C0,13.633333,30,0.6591868,30
AD JCommTable6.R1C0,13.633333,30,0.5729629,30
AD JCommTable8.R0C0,13.633333,30,0.8265204,30
AD done,13.633333,30,0.999096,30
AS JCommTable4.R0C0,1.894737,19,1.618546e-13,19
AS JCommTable4.R1C0,1.304348,23,4.2913030000000007e-17,23


In [14]:
findMaxOppPreKC(bkt_random)

Unnamed: 0_level_0,Opportunity (ptype_selection),Opportunity (ptype_selection),Prediction,Prediction
Unnamed: 0_level_1,mean,count,mean,count
KC (ptype_selection),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AD JCommTable4.R0C0,10.333333,30,0.8980731,30
AD JCommTable4.R1C0,10.333333,30,0.9581446,30
AD JCommTable5.R0C0,10.333333,30,0.8347107,30
AD JCommTable5.R1C0,10.333333,30,0.9849205,30
AD JCommTable6.R0C0,10.333333,30,0.5981475,30
AD JCommTable6.R1C0,10.333333,30,0.5154709,30
AD JCommTable8.R0C0,10.333333,30,0.7943037,30
AD done,10.333333,30,0.9945955,30
AS JCommTable4.R0C0,1.294118,17,1.558864e-13,17
AS JCommTable4.R1C0,1.521739,23,3.9540580000000004e-17,23


In [15]:
findMaxOppPreKC(random)

Unnamed: 0_level_0,Opportunity (ptype_selection),Opportunity (ptype_selection),Prediction,Prediction
Unnamed: 0_level_1,mean,count,mean,count
KC (ptype_selection),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AD JCommTable4.R0C0,80.0,30,1.0,30
AD JCommTable4.R1C0,80.033333,30,1.0,30
AD JCommTable5.R0C0,80.0,30,1.0,30
AD JCommTable5.R1C0,80.0,30,1.0,30
AD JCommTable6.R0C0,80.033333,30,0.9811069,30
AD JCommTable6.R1C0,80.033333,30,0.9555364,30
AD JCommTable8.R0C0,80.0,30,0.9734336,30
AD done,80.033333,30,1.0,30
AS JCommTable4.R0C0,2.095238,21,1.608893e-13,21
AS JCommTable4.R1C0,1.48,25,4.1988060000000003e-17,25


In [16]:
findMaxOppPreKC(streak)

Unnamed: 0_level_0,Opportunity (ptype_selection),Opportunity (ptype_selection),Prediction,Prediction
Unnamed: 0_level_1,mean,count,mean,count
KC (ptype_selection),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AD JCommTable4.R0C0,23.5,30,0.9982386,30
AD JCommTable4.R1C0,23.5,30,0.9996456,30
AD JCommTable5.R0C0,23.5,30,0.996396,30
AD JCommTable5.R1C0,23.5,30,0.9999797,30
AD JCommTable6.R0C0,23.5,30,0.7794043,30
AD JCommTable6.R1C0,23.5,30,0.6966641,30
AD JCommTable8.R0C0,23.5,30,0.8804252,30
AD done,23.5,30,0.9999945,30
AS JCommTable4.R0C0,1.583333,12,1.792074e-13,12
AS JCommTable4.R1C0,1.384615,13,4.6114650000000006e-17,13


### How many questions each type were given to each student in each model

In [17]:
def ptypeNumEachStudentConcat(df):
    mean_arr, names = [], []
    for key in dic:
        dic[key]['Problem Type'] = dic[key]['Problem Name'].str[0:2] # creat 'Problem Name' column
        problem = dic[key][['Anon Student Id', 'Problem Name', 'Problem Type']].drop_duplicates()
        problem_cnt = problem.groupby(['Problem Type', 'Anon Student Id'], as_index = False).count()
        cnt_mean = problem_cnt.groupby('Problem Type').mean()
        mean_arr.append(cnt_mean)
        names.append(key)
    res = pd.concat(mean_arr, axis = 1)
    res.columns = names
    return res

In [18]:
ptypeNumEachStudentConcat(dic)

Unnamed: 0_level_0,Default BKT,BKT (human-parameters),BKT (random_parameters),Random,Streak
Problem Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AD,9.5,12.433333,9.7,80,20.133333
AS,9.1,11.633333,9.533333,80,14.2
M,7.933333,12.233333,9.833333,80,12.9
