In [263]:
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib_venn import venn2, venn3
import locale
from datetime import datetime
import statistics as stat

%matplotlib inline

In [257]:
dat = pd.read_csv('data/assistments09/preprocessed_data.csv', sep='\t')
dat = pd.read_csv('data/assistments12/preprocessed_data.csv', sep='\t')
print(len(dat['timestamp'].unique()))
dat.iloc[0]
target_dat = dat[(dat['user_id'] == 0) & (dat['skill_id'] == 206)]

2340432


In [258]:
print(len(dat['skill_id'].unique()))
print(len(dat['user_id'].unique()))

265
22589


In [276]:
def calc_m(B_s, B_i, B_si, ts, ds, cur_t, types):
    ages = [cur_t - t + 1 for t in ts]
    tds = [a ** -ds[idx] for idx, a in enumerate(ages)]
#         tds = [a ** -.5 for idx, a in enumerate(ages)]
    if types:
        tds = np.multiply([1 if t else 0.5 for t in types], tds)
        
    m = B_s + B_i + B_si + np.log(np.sum(tds))
    return m
    

def calc_ms(B_s, B_i, B_si, c, alpha, ts, types=None):
    ms = []
    ds = []
    for k in range(len(ts)):
        dk = alpha if k == 0 else c * np.exp(ms[k-1]) + alpha
        ds.append(dk)
        
#         ages = [ts[k] - t + 1 for t in ts[:k+1]]
#         tds = [a ** -ds[idx] for idx, a in enumerate(ages)]
#         m = B_s + B_i + B_si + np.log(sum(tds))
        
        m = calc_m(B_s, B_i, B_si, ts[:k+1], ds, ts[k], types[:k+1] if types else None)
        ms.append(m)
    return ms, ds

def calc_precall(T, s, m):
    return 1 / (1 + np.exp((T-m)/s))
#     return 1 / (1 + np.exp(-m))

def test(B_s, B_i, B_si, c, alpha, ts, ds, cur_t, types=None):
#     ages = [cur_t - t + 1 for t in ts]
#     tds = [a ** -ds[idx] for idx, a in enumerate(ages)]
#     m = B_s + B_i + B_si + np.log(sum(tds))
    m = calc_m(B_s, B_i, B_si, ts, ds, cur_t, types)
    return m

In [277]:
# Constants
B_s = 0
B_i = 0
B_si = 0
c = 0.42 # Decay scale parameter
alpha = 0.2 # Minimum rate of decay
T = 0 # Recall threshold
s = 0.26 # Noise in activation

In [288]:
def pp(ms, ds, ts):
    for m, d, t in zip(ms, ds, ts):
#         print('m={:.5f}, d={:.5f}, precall={:.5f}'.format(m, d, calc_precall(T, s, m)))
        print('{},{:.5f},{:.5f},{:.5f}'.format(t,m, d, calc_precall(T, s, m)))

def dtos(days):
    if isinstance(days, list): 
        return [d*86400 for d in days]
    return days * 86400

def mtos(minutes):
    if isinstance(minutes, list): 
        return [m*60 for m in minutes]
    return minutes * 60

def tos(day, hour, minute):
#     return day*86400 + hour*3600 + minute*60
    return day*1440 + hour*60 + minute

ts = [
    tos(0, 0, 0),
    tos(0, 0, 1),
    tos(0, 0, 2),
    tos(0, 0, 3),
    tos(0, 0, 4),
    tos(0, 0, 5),
    tos(0, 0, 6),
    tos(0, 0, 7),
    tos(0, 0, 8),
]
ms, ds = calc_ms(B_s, B_i, B_si, c, alpha, ts)
tm = test(B_s, B_i, B_si, c, alpha, ts, ds, tos(60, 0, 45))
print('==== massed ====')
pp(ms, ds, ts)
print('test: m={}, p_recall={}'.format(tm, calc_precall(T, s, tm)))

ts = [
    tos(0, 0, 0),
    tos(0, 12, 0),
    tos(1, 0, 0),
    tos(1, 12, 0),
    tos(2, 0, 0),
    tos(2, 12, 0),
    tos(3, 0, 0),
    tos(3, 12, 0),
    tos(4, 0, 0),
]

ms, ds = calc_ms(B_s, B_i, B_si, c, alpha, ts)
tm = test(B_s, B_i, B_si, c, alpha, ts, ds, tos(64, 0, 0))
print('==== spaced ====')
pp(ms, ds, ts)
print('test: m={}, p_recall={}'.format(tm, calc_precall(T, s, tm)))

==== massed ====
0,0.00000,0.20000,0.50000
1,0.62623,0.62000,0.91748
2,0.89748,0.98563,0.96929
3,1.01845,1.23043,0.98049
4,1.06918,1.36294,0.98389
5,1.08860,1.42345,0.98503
6,1.09519,1.44744,0.98540
7,1.09701,1.45569,0.98550
8,1.09730,1.45799,0.98552
test: m=-2.2649021437033894, p_recall=0.0001647095704148555
==== spaced ====
0,0.00000,0.20000,0.50000
720,0.23758,0.62000,0.71377
1440,0.22346,0.73263,0.70255
2160,0.21057,0.72517,0.69208
2880,0.20307,0.71844,0.68590
3600,0.19815,0.71456,0.68181
4320,0.19464,0.71204,0.67888
5040,0.19202,0.71025,0.67667
5760,0.18998,0.70891,0.67496
test: m=-2.2588365869813782, p_recall=0.0001685966069126347


In [289]:
ts = [
    tos(0, 0, 0),
    tos(0, 0, 1),
    tos(0, 0, 2),
    tos(0, 0, 3),
    tos(0, 0, 4),
    tos(0, 0, 5),
    tos(0, 0, 6),
    tos(0, 0, 7),
    tos(0, 0, 8),
    tos(0, 0, 9),
]

types = [
    False, # True = test
    True,
    False,
    True,
    False,
    True,
    False,
    True,
    False,
    True,
]

ms, ds = calc_ms(B_s, B_i, B_si, c, alpha, ts, types)
tm = test(B_s, B_i, B_si, c, alpha, ts, ds, tos(1, 0, 45), types)
print('==== mixed ====')
pp(ms, ds, ts)
print('test: m={:.5f}, p_recall={:.10f}'.format(tm, calc_precall(T, s, tm)))

types = [
    False,
    False,
    False,
    False,
    False,
    False,
    False,
    False,
    False,
    False,
]
ms, ds = calc_ms(B_s, B_i, B_si, c, alpha, ts, types)
tm = test(B_s, B_i, B_si, c, alpha, ts, ds, tos(1, 0, 0), types)
print('==== study ====')
pp(ms, ds, ts)
print('test: m={:.5f}, p_recall={:.10f}'.format(tm, calc_precall(T, s, tm)))

==== mixed ====
0,-0.69315,0.20000,0.06501
1,0.36136,0.41000,0.80057
2,0.50319,0.80282,0.87384
3,0.83417,0.89468,0.96115
4,0.77642,1.16722,0.95195
5,0.96603,1.11295,0.97624
6,0.85279,1.30355,0.96374
7,1.00500,1.18540,0.97948
8,0.87710,1.34740,0.96686
9,1.01893,1.20964,0.98052
test: m=-1.77338, p_recall=0.0010897723
==== study ====
0,-0.69315,0.20000,0.06501
1,-0.06691,0.41000,0.43601
2,0.24505,0.59282,0.71960
3,0.42470,0.73663,0.83664
4,0.53437,0.84223,0.88648
5,0.60386,0.91668,0.91073
6,0.64940,0.96825,0.92398
7,0.68028,1.00404,0.93191
8,0.70200,1.02926,0.93703
9,0.71785,1.04747,0.94053
test: m=-1.86944, p_recall=0.0007534106
