In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt

### Load data with prediction

In [2]:
bkt = pd.read_csv('E:/Project/AL/Data/WhereWhenHowNoFoa_80/bkt_predict.txt', sep = '\t')

In [3]:
random = pd.read_csv('E:/Project/AL/Data/WhereWhenHowNoFoa_80/random_predict.txt', sep = '\t')

In [4]:
streak = pd.read_csv('E:/Project/AL/Data/WhereWhenHowNoFoa_80/streak_predict.txt', sep = '\t')

### How many KCs in each model

In [5]:
# Dictionary for mapping
dic = {'BKT': bkt, 'Random': random, 'Streak': streak}

In [6]:
def numberKC(dic, colname):
    arr = []
    names = []
    for key in dic:
        arr.append(pd.DataFrame(dic[key][colname].value_counts()).sort_index())
        names.append(key)
    res = pd.concat(arr, axis = 1)
    res.columns = names
    return res

In [7]:
# ptype_selection
numberKC(dic, 'KC (ptype_selection)')

Unnamed: 0,BKT,Random,Streak
AD JCommTable4.R0C0,217,2473,950
AD JCommTable4.R1C0,192,2447,909
AD JCommTable5.R0C0,236,2536,1003
AD JCommTable5.R1C0,206,2456,917
AD JCommTable6.R0C0,324,2797,1068
AD JCommTable6.R1C0,335,3006,1159
AD JCommTable8.R0C0,174,2410,883
AD done,192,2428,965
AS JCommTable4.R0C0,29,45,30
AS JCommTable4.R1C0,34,37,23


In [8]:
# selected
numberKC(dic, 'KC (Selected)')

Unnamed: 0,BKT,Random,Streak
JCommTable4.R0C0,276,2550,1006
JCommTable4.R1C0,252,2514,954
JCommTable5.R0C0,290,2603,1032
JCommTable5.R1C0,241,2493,921
JCommTable6.R0C0,768,7741,2340
JCommTable6.R1C0,776,7931,2453
JCommTable8.R0C0,277,2828,1113
done,601,7292,2170


### Find the max Opportunity (ptype_selection) and related Prediction

The related prediction of max(Opportunity (ptype_selection)) turns out to be max(Prediction).

In [9]:
def findMaxOppPreKC(df):
    maxOppKCidx = df.loc[df.groupby(['KC (ptype_selection)', 'Anon Student Id'])['Opportunity (ptype_selection)'].idxmax()] # slice data by row index where max(Opportunity)
    maxOppPre = maxOppKCidx.groupby('KC (ptype_selection)').agg({'Opportunity (ptype_selection)': ['mean', 'count'], 'Prediction': ['mean', 'count']}).sort_index()
    # save the data
    return maxOppPre

In [10]:
findMaxOppPreKC(bkt)

Unnamed: 0_level_0,Opportunity (ptype_selection),Opportunity (ptype_selection),Prediction,Prediction
Unnamed: 0_level_1,mean,count,mean,count
KC (ptype_selection),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AD JCommTable4.R0C0,7.233333,30,0.4723846,30
AD JCommTable4.R1C0,6.4,30,0.622809,30
AD JCommTable5.R0C0,7.866667,30,0.3630378,30
AD JCommTable5.R1C0,6.866667,30,0.681475,30
AD JCommTable6.R0C0,10.8,30,0.4496009,30
AD JCommTable6.R1C0,11.166667,30,0.3900955,30
AD JCommTable8.R0C0,5.8,30,0.6714373,30
AD done,6.4,30,0.6849642,30
AS JCommTable4.R0C0,1.26087,23,2.664685e-08,23
AS JCommTable4.R1C0,1.36,25,4.999391e-08,25


In [11]:
findMaxOppPreKC(random)

Unnamed: 0_level_0,Opportunity (ptype_selection),Opportunity (ptype_selection),Prediction,Prediction
Unnamed: 0_level_1,mean,count,mean,count
KC (ptype_selection),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AD JCommTable4.R0C0,82.433333,30,1.0,30
AD JCommTable4.R1C0,81.566667,30,1.0,30
AD JCommTable5.R0C0,84.533333,30,1.0,30
AD JCommTable5.R1C0,81.866667,30,1.0,30
AD JCommTable6.R0C0,93.233333,30,0.9839323,30
AD JCommTable6.R1C0,100.2,30,0.9655203,30
AD JCommTable8.R0C0,80.333333,30,0.9676011,30
AD done,80.933333,30,1.0,30
AS JCommTable4.R0C0,2.142857,21,4.743222e-08,21
AS JCommTable4.R1C0,1.48,25,8.742531e-08,25


In [12]:
findMaxOppPreKC(streak)

Unnamed: 0_level_0,Opportunity (ptype_selection),Opportunity (ptype_selection),Prediction,Prediction
Unnamed: 0_level_1,mean,count,mean,count
KC (ptype_selection),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AD JCommTable4.R0C0,31.666667,30,0.9939265,30
AD JCommTable4.R1C0,30.3,30,0.9988394,30
AD JCommTable5.R0C0,33.433333,30,0.9901413,30
AD JCommTable5.R1C0,30.566667,30,0.9999344,30
AD JCommTable6.R0C0,35.6,30,0.8330129,30
AD JCommTable6.R1C0,38.633333,30,0.7764343,30
AD JCommTable8.R0C0,29.433333,30,0.8927448,30
AD done,32.166667,30,0.9999439,30
AS JCommTable4.R0C0,2.142857,14,5.276147e-08,14
AS JCommTable4.R1C0,1.533333,15,1.02856e-07,15


### How many questions each type were given to each student in each model

In [13]:
def ptypeNumEachStudentConcat(df):
    mean_arr, names = [], []
    for key in dic:
        dic[key]['Problem Type'] = dic[key]['Problem Name'].str[0:2] # creat 'Problem Name' column
        problem = dic[key][['Anon Student Id', 'Problem Name', 'Problem Type']].drop_duplicates()
        problem_cnt = problem.groupby(['Problem Type', 'Anon Student Id'], as_index = False).count()
        cnt_mean = problem_cnt.groupby('Problem Type').mean()
        mean_arr.append(cnt_mean)
        names.append(key)
    res = pd.concat(mean_arr, axis = 1)
    res.columns = names
    return res

In [14]:
ptypeNumEachStudentConcat(dic)

Unnamed: 0_level_0,BKT,Random,Streak
Problem Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AD,5.266667,80,24.6
AS,5.466667,80,18.833333
M,5.766667,80,16.133333
