In [2]:
import pandas as pd
import numpy as np
import matplotlib as plt

### How many KCs in each model

In [3]:
bkt = pd.read_csv('E:/Project/AL/Data/SamePerType/bkt_predict.txt', sep = '\t')

In [4]:
random = pd.read_csv('E:/Project/AL/Data/SamePerType/random_predict.txt', sep = '\t')

In [5]:
streak = pd.read_csv('E:/Project/AL/Data/SamePerType/streak_predict.txt', sep = '\t')

In [6]:
# Dictionary for mapping
dic = {'BKT': bkt, 'Random': random, 'Streak': streak}

In [7]:
def numberKC(dic, colname):
    arr = []
    names = []
    for key in dic:
        arr.append(pd.DataFrame(dic[key][colname].value_counts()).sort_index())
        names.append(key)
    res = pd.concat(arr, axis = 1)
    res.columns = names
    return res

In [8]:
# ptype_selection
numberKC(dic, 'KC (ptype_selection)')

Unnamed: 0,BKT,Random,Streak
AD JCommTable4.R0C0,242,1290,1057
AD JCommTable4.R1C0,223,1231,1021
AD JCommTable5.R0C0,280,1382,1197
AD JCommTable5.R1C0,217,1346,1119
AD JCommTable6.R0C0,365,1597,1290
AD JCommTable6.R1C0,406,1881,1428
AD JCommTable8.R0C0,197,1200,995
AD done,231,1218,1077
AS JCommTable4.R0C0,22,60,13
AS JCommTable4.R1C0,38,47,40


In [9]:
# selected
numberKC(dic, 'KC (Selected)')

Unnamed: 0,BKT,Random,Streak
JCommTable4.R0C0,293,1393,1105
JCommTable4.R1C0,343,1337,1109
JCommTable5.R0C0,311,1420,1254
JCommTable5.R1C0,269,1430,1173
JCommTable6.R0C0,775,4108,2284
JCommTable6.R1C0,822,4380,2408
JCommTable8.R0C0,311,1475,1167
done,631,3682,2034


### Find the max Opportunity (ptype_selection) and related Prediction

The related prediction of max(Opportunity (ptype_selection)) turns out to be max(Prediction).

In [10]:
def findMaxOppPreKC(df):
    maxOppKCidx = df.loc[df.groupby(['KC (ptype_selection)', 'Anon Student Id'])['Opportunity (ptype_selection)'].idxmax()] # slice data by row index where max(Opportunity)
    maxOppPre = maxOppKCidx.groupby('KC (ptype_selection)').agg({'Opportunity (ptype_selection)': ['mean', 'count'], 'Prediction': ['mean', 'count']}).sort_index()
    # save the data
    return maxOppPre

In [11]:
findMaxOppPreKC(bkt)

Unnamed: 0_level_0,Opportunity (ptype_selection),Opportunity (ptype_selection),Prediction,Prediction
Unnamed: 0_level_1,mean,count,mean,count
KC (ptype_selection),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AD JCommTable4.R0C0,8.066667,30,0.4647255,30
AD JCommTable4.R1C0,7.433333,30,0.5033648,30
AD JCommTable5.R0C0,9.333333,30,0.412516,30
AD JCommTable5.R1C0,7.233333,30,0.4796262,30
AD JCommTable6.R0C0,12.166667,30,0.361208,30
AD JCommTable6.R1C0,13.533333,30,0.31946,30
AD JCommTable8.R0C0,6.566667,30,0.7071822,30
AD done,7.7,30,0.8007223,30
AS JCommTable4.R0C0,1.294118,17,8.893298e-13,17
AS JCommTable4.R1C0,1.809524,21,7.790908000000001e-17,21


In [12]:
findMaxOppPreKC(random)

Unnamed: 0_level_0,Opportunity (ptype_selection),Opportunity (ptype_selection),Prediction,Prediction
Unnamed: 0_level_1,mean,count,mean,count
KC (ptype_selection),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AD JCommTable4.R0C0,43.0,30,0.9936609,30
AD JCommTable4.R1C0,41.033333,30,0.9927079,30
AD JCommTable5.R0C0,46.066667,30,0.9770985,30
AD JCommTable5.R1C0,44.866667,30,0.9744625,30
AD JCommTable6.R0C0,53.233333,30,0.894145,30
AD JCommTable6.R1C0,62.7,30,0.8733145,30
AD JCommTable8.R0C0,40.0,30,0.9706023,30
AD done,40.6,30,1.0,30
AS JCommTable4.R0C0,2.727273,22,1.190042e-12,22
AS JCommTable4.R1C0,1.958333,24,1.113683e-16,24


In [13]:
findMaxOppPreKC(streak)

Unnamed: 0_level_0,Opportunity (ptype_selection),Opportunity (ptype_selection),Prediction,Prediction
Unnamed: 0_level_1,mean,count,mean,count
KC (ptype_selection),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
AD JCommTable4.R0C0,35.233333,30,0.976299,30
AD JCommTable4.R1C0,34.033333,30,0.978244,30
AD JCommTable5.R0C0,39.9,30,0.9590885,30
AD JCommTable5.R1C0,37.3,30,0.954725,30
AD JCommTable6.R0C0,43.0,30,0.8525678,30
AD JCommTable6.R1C0,47.6,30,0.8058393,30
AD JCommTable8.R0C0,33.166667,30,0.9625427,30
AD done,35.9,30,0.9999953,30
AS JCommTable4.R0C0,1.857143,7,1.452328e-12,7
AS JCommTable4.R1C0,1.6,25,1.364981e-16,25


### How many questions each type were given to each student in each model

In [15]:
def ptypeNumEachStudentConcat(df):
    mean_arr, names = [], []
    for key in dic:
        dic[key]['Problem Type'] = dic[key]['Problem Name'].str[0:2] # creat 'Problem Name' column
        problem = dic[key][['Anon Student Id', 'Problem Name', 'Problem Type']].drop_duplicates()
        problem_cnt = problem.groupby(['Problem Type', 'Anon Student Id'], as_index = False).count()
        cnt_mean = problem_cnt.groupby('Problem Type').mean()
        mean_arr.append(cnt_mean)
        names.append(key)
    res = pd.concat(mean_arr, axis = 1)
    res.columns = names
    return res

In [16]:
ptypeNumEachStudentConcat(dic)

Unnamed: 0_level_0,BKT,Random,Streak
Problem Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AD,6.166667,40,22.333333
AS,4.533333,40,13.666667
M,6.1,40,12.1
