In [None]:
from __future__ import division

import pickle
import os

from sklearn import metrics
import numpy as np

from lentil import evaluate
from lentil import models

import mem

In [None]:
from matplotlib import pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
import matplotlib as mpl
mpl.rc('savefig', dpi=300)
mpl.rc('text', usetex=True)
mpl.rc('text.latex', preamble='\usepackage{amsfonts}')

In [None]:
import matplotlib.lines as mlines

In [None]:
import logging
logging.getLogger().setLevel(logging.DEBUG)

In [None]:
with open(os.path.join('data', 'mnemosyne_history.pkl'), 'rb') as f:
    history = pickle.load(f)

Setup the IRT benchmark models and memory models

In [None]:
def build_1pl_irt_model(history, filtered_history, split_history=None):
    model = models.OneParameterLogisticModel(
        filtered_history, select_regularization_constant=True, name_of_user_id='user_id')
    model.fit()
    return model

def build_2pl_irt_model(history, filtered_history, split_history=None):
    model = models.TwoParameterLogisticModel(
        filtered_history, select_regularization_constant=True, name_of_user_id='user_id')
    model.fit()
    return model

def build_student_biased_coin_model(history, filtered_history, split_history=None):
    model = models.StudentBiasedCoinModel(history, filtered_history, name_of_user_id='user_id')
    model.fit()
    return model

def build_assessment_biased_coin_model(history, filtered_history, split_history=None):
    model = models.AssessmentBiasedCoinModel(history, filtered_history)
    model.fit()
    return model

def meta_build_efc_model(
    strength_model='deck', using_delay=True, 
    using_global_difficulty=False, debug_mode_on=True):
    def build_efc_model(history, filtered_history, split_history=None):
        model = mem.EFCModel(
            filtered_history, strength_model=strength_model, using_delay=using_delay, 
            using_global_difficulty=using_global_difficulty, debug_mode_on=debug_mode_on)
        model.fit(
            learning_rate=(0.5 if using_global_difficulty else 5000.), 
            ftol=(1e-7 if using_global_difficulty else 1e-4))
        return model
    return build_efc_model

def meta_build_logistic_regression_model(C=1.0):
    def build_logistic_regression_model(history, filtered_history, split_history=None):
        model = mem.LogisticRegressionModel(filtered_history)
        model.fit(C=C)
        return model
    return build_logistic_regression_model

In [None]:
model_builders = {
    '0PL IRT (students)' : build_student_biased_coin_model,
    '0PL IRT (assessments)' : build_assessment_biased_coin_model,
    '1PL IRT' : build_1pl_irt_model,
    #'2PL IRT' : build_2pl_irt_model,
    'EFC G/D/N' : meta_build_efc_model(strength_model='nreps', using_delay=True, using_global_difficulty=True),
    'EFC G/D/1' : meta_build_efc_model(strength_model=None, using_delay=True, using_global_difficulty=True),
    'EFC G/1/N' : meta_build_efc_model(strength_model='nreps', using_delay=False, using_global_difficulty=True),
    'EFC G/D/Q' : meta_build_efc_model(strength_model='deck', using_delay=True, using_global_difficulty=True),
    'EFC G/1/Q' : meta_build_efc_model(strength_model='deck', using_delay=False, using_global_difficulty=True),
    'EFC I/D/N' : meta_build_efc_model(strength_model='nreps', using_delay=True, using_global_difficulty=False),
    'EFC I/D/1' : meta_build_efc_model(strength_model=None, using_delay=True, using_global_difficulty=False),
    'EFC I/1/N' : meta_build_efc_model(strength_model='nreps', using_delay=False, using_global_difficulty=False),
    'EFC I/D/Q' : meta_build_efc_model(strength_model='deck', using_delay=True, using_global_difficulty=False),
    'EFC I/1/Q' : meta_build_efc_model(strength_model='deck', using_delay=False, using_global_difficulty=False),
    'LR C=1' : meta_build_logistic_regression_model(C=1.),
    'LR C=0.1' : meta_build_logistic_regression_model(C=0.1),
    'LR C=10' : meta_build_logistic_regression_model(C=10.)
}

In [None]:
print "Number of models = %d" % (len(model_builders))
print '\n'.join(model_builders.keys())

Perform the evaluations

In [None]:
results = evaluate.cross_validated_auc(
    model_builders,
    history,
    num_folds=10,
    random_truncations=True)

In [None]:
# dump results to file
with open(os.path.join('results', 'mnemosyne_lesion_analysis.pkl'), 'wb') as f:
    pickle.dump(results, f, pickle.HIGHEST_PROTOCOL)

In [None]:
# load results from file, replacing current results
with open(os.path.join('results', 'mnemosyne_lesion_analysis.pkl'), 'rb') as f:
    results = pickle.load(f)

In [None]:
df = history.data

First, select the regularization constant for the logistic regression benchmark model

In [None]:
print 'Train AUC\tTest AUC\tValidation AUC\t\tModel'
for k in ['LR C=0.1', 'LR C=1', 'LR C=10']:
    try:
        train_auc = results.training_auc_mean(k)
        val_auc = results.validation_auc_mean(k)
        val_auc_stderr = results.validation_auc_stderr(k)
        test_auc = results.test_auc(k)
    except KeyError:
        continue
    print '%0.3f\t\t%0.3f\t\t%0.3f +/- %0.3f\t\t%s' % (train_auc, test_auc, val_auc, val_auc_stderr, k.replace('Model ', ''))

In [None]:
recall_prob_eqns = [
    r'$\phi(\theta_j)$',
    r'$\phi(-\beta_i)$',
    r'$\phi(\theta_j - \beta_i)$',
    r'$\phi(\beta \cdot x)$',
    r'$\exp{(-\theta \cdot d_{ij} / n_{ij})}$',
    r'$\exp{(-\theta \cdot d_{ij})}$',
    r'$\exp{(-\theta / n_{ij})}$',
    r'$\exp{(-\theta \cdot d_{ij} / q_{ij})}$',
    r'$\exp{(-\theta / q_{ij})}$',
    r'$\exp{(-\theta_i \cdot d_{ij} / n_{ij})}$',
    r'$\exp{(-\theta_i \cdot d_{ij})}$',
    r'$\exp{(-\theta_i / n_{ij})}$',
    r'$\exp{(-\theta_i \cdot d_{ij} / q_{ij})}$',
    r'$\exp{(-\theta_i / q_{ij})}$'
]

model_names = [
    '0PL IRT (students)',
    '0PL IRT (assessments)',
    '1PL IRT',
    'LR C=1',
    'EFC G/D/N',
    'EFC G/D/1',
    'EFC G/1/N',
    'EFC G/D/Q',
    'EFC G/1/Q',
    'EFC I/D/N',
    'EFC I/D/1',
    'EFC I/1/N',
    'EFC I/D/Q',
    'EFC I/1/Q'
]

assert len(recall_prob_eqns) == len(model_names)

Dump all the evaluation results into a table

In [None]:
print 'Train AUC\tTest AUC\tValidation AUC\t\tModel'
for k in model_names:
    try:
        train_auc = results.training_auc_mean(k)
        val_auc = results.validation_auc_mean(k)
        val_auc_stderr = results.validation_auc_stderr(k)
        test_auc = results.test_auc(k)
    except KeyError:
        continue
    print '%0.3f\t\t%0.3f\t\t%0.3f +/- %0.3f\t\t%s' % (train_auc, test_auc, val_auc, val_auc_stderr, k)

Make a boxplot and scatterplot for the validation/test AUCs

In [None]:
fig, ax1 = plt.subplots()

model_idxes = [5, 6, 8, 10, 11, 13]

sns.set_style('dark')
ax2 = ax1.twiny()
ax2.set_xlabel('Model')
ax2.set_xticks(range(1, len(model_idxes) + 1))
ax2.set_xticklabels(model_idxes)
ax2.set_xlim([0.5, len(model_idxes) + .5])

sns.set_style('darkgrid')
ax1.set_xlabel(r'$\log{(\mathbb{P}[recall])}$')
ax1.boxplot([results.validation_aucs(model_names[i-1]) for i in model_idxes])
ax1.scatter(
    range(1, len(model_idxes) + 1),
    [results.test_auc(model_names[i-1]) for i in model_idxes],
    color='orange', s=100)

ax1.set_xticks(range(1, len(model_idxes) + 1))
ax1.set_xticklabels(
    [recall_prob_eqns[i-1].replace('\exp', '').replace(
            '(', '').replace(')', '') for i in model_idxes])
ax1.set_xlim([0.5, len(model_idxes) + .5])

orange_circle = mlines.Line2D([], [], color='orange', marker='o', label='Test')
red_line = mlines.Line2D([], [], color='red', marker='_', label='Validation')
plt.legend(handles=[red_line, orange_circle], loc='best')

ax1.set_ylabel('AUC')

plt.savefig(os.path.join('figures', 'mnemosyne', 'auc-boxplots-memory-strengths.pdf'), bbox_inches='tight')
plt.show()

In [None]:
model_idxes = range(1, 15)

plt.xlabel('Model')
plt.boxplot([results.validation_aucs(model_names[i-1]) for i in model_idxes])
plt.scatter(
    range(1, len(model_idxes) + 1),
    [results.test_auc(model_names[i-1]) for i in model_idxes],
    color='orange', s=100)

plt.xlim([0.5, len(model_idxes) + .5])

orange_circle = mlines.Line2D([], [], color='orange', marker='o', label='Test')
red_line = mlines.Line2D([], [], color='red', marker='_', label='Validation')
plt.legend(handles=[red_line, orange_circle], loc='best')

plt.ylabel('AUC')

plt.savefig(os.path.join('figures', 'mnemosyne', 'auc-boxplots-all.pdf'), bbox_inches='tight')
plt.show()

Compute validation AUCs for separate bins of data (separated by deck)

In [None]:
def compute_auc(y_trues, probas_pred):
    assert not any(np.isnan(x) for x in probas_pred)
    try:
        fpr, tpr, thresholds = metrics.roc_curve(y_trues, probas_pred, pos_label=1)
        return metrics.auc(fpr, tpr)
    except:
        return np.nan

In [None]:
decks_of_val_ixns = [df['deck'].ix[idxes].values for idxes, y_trues, probas_pred in results.val_ixn_data]

In [None]:
num_bins = 9
rg = (1, 10)
hist, bin_edges = np.histogram([y for x in decks_of_val_ixns for y in x], bins=num_bins, range=rg)

In [None]:
s_of_row_idx = {}
for row_idx in xrange(1, 15):
    m = model_names[row_idx - 1]
    s_of_row_idx[row_idx] = [[compute_auc(
                [p for p, q in zip(y_trues, vf) if q>=x and (q<y or (bidx==len(bin_edges)-2 and q==y))], 
                [p for p, q in zip(probas_pred[m], vf) if q>=x and (q<y or (bidx==len(bin_edges)-2 and q==y))]) \
                              for (_, y_trues, probas_pred), vf in zip(results.val_ixn_data, decks_of_val_ixns)] \
                             for bidx, (x, y) in enumerate(zip(bin_edges[:-1], bin_edges[1:]))]

In [None]:
t = [(x+y)/2 for x, y in zip(bin_edges[:-1], bin_edges[1:])]
color_of_row = {3: 'black', 5: 'blue', 7: 'blue', 8: 'deepskyblue', 9: 'deepskyblue', 10: 'red', 12: 'red', 13: 'orange', 14: 'orange'}
linestyle_of_row = {3: '-', 5: '-', 7: '--', 8: '-', 9: '--', 10: '-', 12: '--', 13: '-', 14: '--'}

In [None]:
fig, ax1 = plt.subplots()

sns.set_style('dark')
ax2 = ax1.twinx()
ax2.bar(bin_edges[:-1], hist, [y-x for x, y in zip(bin_edges[:-1], bin_edges[1:])], color='gray', alpha=0.5, linewidth=0)
ax2.set_ylabel('Frequency (number of interactions)')

sns.set_style('darkgrid')
lines = []
for row_idx in [3, 5, 7, 8, 9, 10, 12, 13, 14]:
    m = model_names[row_idx-1]
    s1 = s_of_row_idx[row_idx]
    l1 = ax1.errorbar(
        t, [np.nanmean(z) for z in s1], label=r'Model %d: %s' % (row_idx, recall_prob_eqns[row_idx-1]), 
        yerr=[np.nanstd(z)/np.sqrt(len(z)) for z in s1], linestyle=linestyle_of_row[row_idx], 
        color=color_of_row[row_idx])
    lines.append(l1)
ax1.set_xlabel('Deck ($q_{ij}$)')
ax1.set_ylabel('Validation AUC')
ax1.set_ylim([0.1, 0.9])

first_legend = plt.legend(handles=lines[:5], loc='lower center', bbox_to_anchor=(0.25, -0.5))
plt.gca().add_artist(first_legend)
plt.legend(handles=lines[5:], loc='lower center', bbox_to_anchor=(0.75, -0.5))

plt.savefig(os.path.join('figures', 'mnemosyne', 'auc-vs-deck.pdf'), bbox_inches='tight')
plt.show()