In [45]:
# Required
import json

import pandas as pd
import numpy as np
from scipy.optimize import minimize

In [46]:
# Package options
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

In [47]:
# ASSISTment Data
df = pd.DataFrame()
data = pd.read_csv('../skill_builder_data.csv')

In [48]:
# Prereq Graph
f = open('prereq2.json')
prereqs = json.load(f)

In [49]:
'''
Initialize alpha (grasping power), 
f_s (takeaway fractions), 
counter_f_s (index for skill to takeaway fractions)
'''
def initialize(n, all_skills):
    
    alpha_vector = np.zeros(n+1)
    
    f_s = {}
    counter_f_s = {}
    counter = 0
    for each in all_skills:
        f_s[each] = 0.0
        counter_f_s[each] = counter
        counter += 1
    return alpha_vector, f_s, counter_f_s

In [50]:
'''
Sigmoid function
1/(1+e^{-w'x})
'''
def sigmoid_function(weight, x):
    return 1/(1 + np.exp(-np.dot(weight, x)))

In [51]:
'''
Update Rule for Takeaway Fraction
F_s(T_i, n) = F_s(T_i, n-1) + (1-F_s[T_i, n-1])*alpha(T_i, n)
'''
def update_f_s(f_s, skill_name, alpha):
    f_s[skill_name] = f_s[skill_name] + (1-f_s[skill_name])*alpha
    return f_s

In [52]:
'''
Construct the feature vector
'''
def construct_feature_vector(n, f_s, counter_f_s, skill_name, prereqs):
    # initilalize feature vector
    feature_vector = np.zeros(n+1)
    
    # bias term
    feature_vector[n] = 1
    
    # add F_s(skill_name)
    feature_vector[counter_f_s[skill_name]] = f_s[skill_name]
    
    # add F_s for all prereqs
    for each_skill in prereqs[skill_name]:
        if(each_skill in f_s):
            feature_vector[counter_f_s[each_skill]] = f_s[each_skill]
    return feature_vector

In [53]:
def run_per_student(weight_vector, alpha_vector, f_s, counter_f_s, n_skills, per_student_frame):
    for index, row in per_student_frame.iterrows():
        # skill boiler
        skill_name = row['skill_name']
        skill_index = counter_f_s[skill_name]
    
        # construct feature vector
        feature_vector = construct_feature_vector(n_skills, f_s, counter_f_s, 
                                                  skill_name, prereqs)
        
        # alpha vector update for T_i
        alpha_vector[skill_index] = sigmoid_function(weight_vector, feature_vector)
        
        # Takeaway fraction update
        f_s = update_f_s(f_s, skill_name, alpha_vector[skill_index])
    return f_s

In [54]:
def run_all_students(weight_vector, user_ids, all_skills, sign=1.0):
    students_to_f_s = {}
    n_skills = len(all_skills)
    for each_user in user_ids:
        per_student = data.loc[data['user_id'] == each_user]
        per_student = per_student[pd.notnull(per_student['skill_name'])]
        per_student_frame = per_student.sort_values(by=['order_id'])

        alpha_vector, f_s, counter_f_s = initialize(len(all_skills), all_skills)
        all_f_s = run_per_student(weight_vector, alpha_vector, f_s, counter_f_s, n_skills, per_student_frame)
        s = 1
        for each in all_f_s.values():
            s = s + each
        students_to_f_s[each_user] = s
    
    mle = 1.
    for each_ks in students_to_f_s.values():
        mle = mle+np.log(each_ks)
    return mle

In [69]:
all_skills = data['skill_name'].unique()
user_ids = data['user_id'].unique()[:20]

In [70]:
w0 = np.zeros(len(all_skills)+1)
opt = minimize(run_all_students, w0, method='BFGS', args=(user_ids, all_skills), options={'disp':True})

Optimization terminated successfully.
         Current function value: 1.000005
         Iterations: 1
         Function evaluations: 456
         Gradient evaluations: 4


In [71]:
np.argmin(opt.x)

111

In [72]:
opt.x[111]

-21.17319164081335

In [73]:
alpha_vector, f_s, counter_f_s = initialize(len(all_skills), all_skills)

In [74]:
for k, v in counter_f_s.items():
    print(opt.x[v], k)

(0.0, nan)
(-0.052755590753834965, 'Circumference ')
(-0.043782573474287205, 'Scientific Notation')
(-0.12524167096401762, 'Rounding')
(-0.34427293818645616, 'Multiplication Fractions')
(0.0, 'Finding Slope From Situation')
(-0.052823247811550685, 'Probability of Two Distinct Events')
(-0.021472658692527569, 'Surface Area Rectangular Prism')
(-0.033913100180006123, 'Counting Methods')
(0.0, 'Percents')
(-0.097180906276421536, 'Exponents')
(-0.056256843490623626, 'Interior Angles Figures with More than 3 Sides')
(0.0, 'Percent Discount')
(0.0, 'Area Triangle')
(-0.060637637977716687, 'Solving for a variable')
(0.0, 'Recognize Quadratic Pattern')
(-0.037820295263089124, 'Fraction Of')
(0.0, 'Interpreting Coordinate Graphs ')
(0.0, 'Parts of a Polyomial, Terms, Coefficient, Monomial, Exponent, Variable')
(-0.072198537714890848, 'Scale Factor')
(-0.039207264946261443, 'Estimation')
(0.0, 'Reading a Ruler or Scale')
(-0.019806603646277891, 'Write Linear Equation from Situation')
(-0.0369492