In [1]:
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib_venn import venn2, venn3
import locale
from datetime import datetime
import statistics as stat

%matplotlib inline

In [2]:
dat = pd.read_csv('data/assistments17/preprocessed_data.csv', sep='\t')
print(len(dat['timestamp'].unique()))
dat.iloc[0]
target_dat = dat[(dat['user_id'] == 0) & (dat['skill_id'] == 206)]

665161


In [3]:
print(len(dat['skill_id'].unique()))
print(len(dat['user_id'].unique()))

411
1708


In [4]:
def calc_m(B_s, B_i, B_si, ts, ds, cur_t, types):
    ages = [cur_t - t + 1 for t in ts]
    tds = [a ** -ds[idx] for idx, a in enumerate(ages)]
#         tds = [a ** -.5 for idx, a in enumerate(ages)]
    if types:
        tds = np.multiply([1 if t else 0.5 for t in types], tds)
        
    m = B_s + B_i + B_si + np.log(np.sum(tds))
    return m
    

def calc_ms(B_s, B_i, B_si, c, alpha, ts, types=None):
    ms = []
    ds = []
    for k in range(len(ts)):
        dk = alpha if k == 0 else c * np.exp(ms[k-1]) + alpha
        ds.append(dk)
        m = calc_m(B_s, B_i, B_si, ts[:k+1], ds, ts[k], types[:k+1] if types else None)
        ms.append(m)
    return ms, ds

def calc_precall(T, s, m):
    return 1 / (1 + np.exp((T-m)/s))

def test(B_s, B_i, B_si, c, alpha, ts, ds, cur_t, types=None):
    m = calc_m(B_s, B_i, B_si, ts, ds, cur_t, types)
    return m

In [5]:
# Constants
B_s = 0
B_i = 0
B_si = 0
c = 0.42 # Decay scale parameter
alpha = 0.2 # Minimum rate of decay
T = 0 # Recall threshold
s = 0.26 # Noise in activation

In [6]:
def pp(ms, ds, ts):
    for m, d, t in zip(ms, ds, ts):
        print('m={:.5f}, d={:.5f}, precall={:.5f}'.format(m, d, calc_precall(T, s, m)))
#         print('{},{:.5f},{:.5f},{:.5f}'.format(t,m, d, calc_precall(T, s, m)))

def dtos(days):
    if isinstance(days, list): 
        return [d*86400 for d in days]
    return days * 86400

def mtos(minutes):
    if isinstance(minutes, list): 
        return [m*60 for m in minutes]
    return minutes * 60

def tos(day, hour, minute):
#     return day*86400 + hour*3600 + minute*60
    return day*1440 + hour*60 + minute

ts = [
    tos(0, 0, 0),
    tos(0, 0, 1),
    tos(0, 0, 2),
    tos(0, 0, 3),
    tos(0, 0, 4),
    tos(0, 0, 5),
    tos(0, 0, 6),
    tos(0, 0, 7),
    tos(0, 0, 8),
]
ms, ds = calc_ms(B_s, B_i, B_si, c, alpha, ts)
tm = test(B_s, B_i, B_si, c, alpha, ts, ds, tos(60, 0, 45))
print('==== massed ====')
pp(ms, ds, ts)
print('test: m={}, p_recall={}'.format(tm, calc_precall(T, s, tm)))

ts = [
    tos(0, 0, 0),
    tos(0, 12, 0),
    tos(1, 0, 0),
    tos(1, 12, 0),
    tos(2, 0, 0),
    tos(2, 12, 0),
    tos(3, 0, 0),
    tos(3, 12, 0),
    tos(4, 0, 0),
]

ms, ds = calc_ms(B_s, B_i, B_si, c, alpha, ts)
tm = test(B_s, B_i, B_si, c, alpha, ts, ds, tos(64, 0, 0))
print('==== spaced ====')
pp(ms, ds, ts)
print('test: m={}, p_recall={}'.format(tm, calc_precall(T, s, tm)))

==== massed ====
m=0.00000, d=0.20000, precall=0.50000
m=0.62623, d=0.62000, precall=0.91748
m=0.89748, d=0.98563, precall=0.96929
m=1.01845, d=1.23043, precall=0.98049
m=1.06918, d=1.36294, precall=0.98389
m=1.08860, d=1.42345, precall=0.98503
m=1.09519, d=1.44744, precall=0.98540
m=1.09701, d=1.45569, precall=0.98550
m=1.09730, d=1.45799, precall=0.98552
test: m=-2.2649021437033894, p_recall=0.0001647095704148555
==== spaced ====
m=0.00000, d=0.20000, precall=0.50000
m=0.23758, d=0.62000, precall=0.71377
m=0.22346, d=0.73263, precall=0.70255
m=0.21057, d=0.72517, precall=0.69208
m=0.20307, d=0.71844, precall=0.68590
m=0.19815, d=0.71456, precall=0.68181
m=0.19464, d=0.71204, precall=0.67888
m=0.19202, d=0.71025, precall=0.67667
m=0.18998, d=0.70891, precall=0.67496
test: m=-2.2588365869813782, p_recall=0.0001685966069126347


In [7]:
ts = [
    tos(0, 0, 0),
    tos(0, 0, 1),
    tos(0, 0, 2),
    tos(0, 0, 3),
    tos(0, 0, 4),
    tos(0, 0, 5),
    tos(0, 0, 6),
    tos(0, 0, 7),
    tos(0, 0, 8),
    tos(0, 0, 9),
]

types = [
    False, # True = test
    True,
    False,
    True,
    False,
    True,
    False,
    True,
    False,
    True,
]

ms, ds = calc_ms(B_s, B_i, B_si, c, alpha, ts, types)
tm = test(B_s, B_i, B_si, c, alpha, ts, ds, tos(1, 0, 45), types)
print('==== mixed ====')
pp(ms, ds, ts)
print('test: m={:.5f}, p_recall={:.10f}'.format(tm, calc_precall(T, s, tm)))

types = [
    False,
    False,
    False,
    False,
    False,
    False,
    False,
    False,
    False,
    False,
]
ms, ds = calc_ms(B_s, B_i, B_si, c, alpha, ts, types)
tm = test(B_s, B_i, B_si, c, alpha, ts, ds, tos(1, 0, 0), types)
print('==== study ====')
pp(ms, ds, ts)
print('test: m={:.5f}, p_recall={:.10f}'.format(tm, calc_precall(T, s, tm)))

==== mixed ====
m=-0.69315, d=0.20000, precall=0.06501
m=0.36136, d=0.41000, precall=0.80057
m=0.50319, d=0.80282, precall=0.87384
m=0.83417, d=0.89468, precall=0.96115
m=0.77642, d=1.16722, precall=0.95195
m=0.96603, d=1.11295, precall=0.97624
m=0.85279, d=1.30355, precall=0.96374
m=1.00500, d=1.18540, precall=0.97948
m=0.87710, d=1.34740, precall=0.96686
m=1.01893, d=1.20964, precall=0.98052
test: m=-1.77338, p_recall=0.0010897723
==== study ====
m=-0.69315, d=0.20000, precall=0.06501
m=-0.06691, d=0.41000, precall=0.43601
m=0.24505, d=0.59282, precall=0.71960
m=0.42470, d=0.73663, precall=0.83664
m=0.53437, d=0.84223, precall=0.88648
m=0.60386, d=0.91668, precall=0.91073
m=0.64940, d=0.96825, precall=0.92398
m=0.68028, d=1.00404, precall=0.93191
m=0.70200, d=1.02926, precall=0.93703
m=0.71785, d=1.04747, precall=0.94053
test: m=-1.86944, p_recall=0.0007534106
