In [1]:
%matplotlib qt4
from __future__ import division

import math

from models import tools, optimize, models, filters
from models.tests import PerformanceTest

import numpy as np
import pandas as pd
import sklearn as sk
import matplotlib as mpl
import matplotlib.pyplot as plt

mpl.rcParams['text.usetex'] = False
mpl.rcParams['text.latex.unicode'] = False
mpl.rcParams['axes.color_cycle'] = ['#02A5F4', 'orange', 'green']

In [55]:
data = tools.load_data(limit=600000, offset=1000000)
data = data[filters.sequentize(data)]

Loaded 566952 answers.


In [56]:
len(data)

111471

# Calibration

In [20]:
def produce_logger(SuperClass):

    class Logger(SuperClass):
    
        def __init__(self, *args, **kwargs):
            super(Logger, self).__init__(*args, **kwargs)
            self.timing = []

        def update(self, answer):
            item = self.items[answer.user_id, answer.place_id]
            if item.practices:
                diff = tools.time_diff(answer.inserted, item.last_inserted)
                self.timing += [(diff, answer.is_correct, self.predict(answer))]
            super(Logger, self).update(answer)
    
    return Logger

PFAWithLogging = produce_logger(models.PFAModel)
PFAGongWithLogging = produce_logger(models.PFAGong)
PFATimingWithLogging = produce_logger(models.PFATiming)
PFASpacingWithLogging = produce_logger(models.PFASpacing)
PFAForgettingWithLogging = produce_logger(models.PFAForgetting)
PFAStaircaseWithLogging = produce_logger(models.PFAStaircase)

In [34]:
def time_effect_log(t, c=2.0, a=0.15):
    return c - a*np.log(t)

def time_effect_div(t, c=5):
    return c / (t+1)**0.2

def time_effect_exp(t, a=0.96, b=5.5e-6):
    return a * np.exp(-b * t)

In [22]:
def chunks(l, n):
    for i in xrange(0, len(l), n):
        yield l[i:i+n]

def interval_error(timings, interval_size=500, metric=tools.rmse):

    answers = sorted(timings, key=lambda p: p[0])

    def get_diffs_mean(chunk):
        return np.mean([diff for diff, _, _ in chunk])
    def get_answers_mean(chunk):
        return np.mean([pred - obs for _, obs, pred in chunk])
    def get_answers_value(chunk):
        predictions = [pred for _, obs, pred in chunk if np.isfinite(pred)]
        observation = [obs for _, obs, pred in chunk if np.isfinite(pred)]
        return metric(observation, predictions)

    return [
        (get_diffs_mean(chunk), get_answers_value(chunk))
        for chunk in chunks(answers, interval_size)
    ]

In [57]:
pfa = PFAWithLogging(models.EloModel())
pfa.train(data)

In [58]:
pfat = PFATimingWithLogging(models.EloModel(), time_effect_fun=time_effect_log)
pfat.train(data)

In [68]:
pfate = PFATimingWithLogging(models.EloModel(), time_effect_fun=time_effect_exp)
pfate.train(data)

In [60]:
pfatd = PFATimingWithLogging(models.EloModel(), time_effect_fun=time_effect_div)
pfatd.train(data)

In [61]:
pfag = PFAGongWithLogging(models.EloModel(), decay=0.8, gamma=2.1, delta=-1.2)
pfag.train(data)

In [92]:
pfaf = PFAForgettingWithLogging(models.EloModel(), time_effect_fun=lambda t: 1 - 0.072 * np.log(t))
pfaf.train(data)

In [63]:
pfast = PFAStaircaseWithLogging(models.EloModel(),
    gamma=2.2, delta=-0.9,
    staircase={
        (0, 60): 1.43,
        (60, 90): 1.17,
        (90, 150): 1.01,
        (150, 300): 0.93,
        (300, 600): 0.82,
        (600, 60*30): 0.78,
        (60*30, 60*60*3): 0.76,
        (60*60*3, 60*60*24): 0.63,
        (60*60*24, 60*60*24*5): 0.42,
        (60*60*24*5, np.inf): 0.12,
    }
)
pfast.train(data)

In [101]:
metric = lambda y_true, y_pred: (np.mean(y_true) - np.mean(y_pred))

intervals1 = interval_error(pfat.timing, interval_size=12000, metric=metric)
intervals2 = interval_error(pfate.timing, interval_size=12000, metric=metric)
intervals3 = interval_error(pfatd.timing, interval_size=12000, metric=metric)

plt.figure(num=None, figsize=(8, 6), dpi=120)
plt.axhline(0, color='#888888', linestyle='--')

p1 = plt.plot([x[0] for x in intervals1], [x[1] for x in intervals1], '-')
p2 = plt.plot([x[0] for x in intervals2], [x[1] for x in intervals2], '-')
p3 = plt.plot([x[0] for x in intervals3], [x[1] for x in intervals3], '-')

plt.xscale('log')
plt.ylabel('Correctness minus prediction')
plt.xlabel('Time from previous attempt in seconds')
plt.xlim([min([x[0] for x in intervals1]), max([x[0] for x in intervals1])])
plt.legend([p1[0], p2[0], p3[0]], ('PFAE log', 'PFAE exp', 'PFAE poly'), loc=4)

plt.show()
plt.tight_layout()

In [75]:
intervals1 = interval_error(pfag.timing, interval_size=9000)

In [76]:
intervals2 = interval_error(pfaf.timing, interval_size=9000)

In [77]:
intervals3 = interval_error(pfast.timing, interval_size=9000)

In [78]:
p1 = plt.plot([x[0] for x in intervals1], [x[1] for x in intervals1], '.-')
p2 = plt.plot([x[0] for x in intervals2], [x[1] for x in intervals2], '.-', color='#59d700')
p3 = plt.plot([x[0] for x in intervals3], [x[1] for x in intervals3], '.-', color='orange')
plt.xscale('log')
plt.ylabel('RMSE')
plt.xlabel('time from previous attempt (seconds)')
plt.xlim([min([x[0] for x in intervals2]) - 20, max([x[0] for x in intervals2]) + 100000])
plt.legend([p2[0], p1[0], p3[0]], ('PFA Gong + Forgetting', 'PFAE + Forgetting', 'PFAE + Staircase',))
plt.show()

In [15]:
pfaf1 = PFAForgettingWithLogging(models.EloModel(), time_effect_fun=lambda t: 1.2 - 0.08*np.log(t))
pfaf1.train(data)

In [6]:
pfaf2 = PFAForgettingWithLogging(models.EloModel(), time_effect_fun=lambda t: 1.4 - 0.1*np.log(t))
pfaf2.train(data)

In [64]:
pfaf3 = PFAForgettingWithLogging(models.EloModel(), time_effect_fun=time_effect_d80)
pfaf3.train(data)

In [16]:
intervals1 = interval_error(pfaf1.timing, interval_size=9000)
intervals2 = interval_error(pfaf2.timing, interval_size=9000)
intervals3 = interval_error(pfaf3.timing, interval_size=9000)

In [17]:
p1 = plt.plot([x[0] for x in intervals1], [x[1] for x in intervals1], '.-')
p2 = plt.plot([x[0] for x in intervals2], [x[1] for x in intervals2], '.-', color='#59d700')
p3 = plt.plot([x[0] for x in intervals3], [x[1] for x in intervals3], '.-', color='orange')
plt.xscale('log')
plt.ylabel('RMSE')
plt.xlabel('time from previous attempt (seconds)')
plt.xlim([min([x[0] for x in intervals1]) - 20, max([x[0] for x in intervals1]) + 100000])
plt.legend([p1[0], p2[0], p3[0]], (r'$1.2\ -\ 0.1\ \log({t})$',
                                   r'$1.4\ -\ 0.1\ \log({t})$',
                                   r'$1.6\ -\ 0.1\ \log({t})$'))
plt.show()

In [26]:
plots = []
params = [(x, y) for x in [1.1, 1.2, 1.3] for y in [0.08, 0.09]]
intervals = []
metric = lambda y_true, y_pred: (np.mean(y_pred) - np.mean(y_true))

time_effect_template = lambda a, b: (lambda t: a - b*np.log(t))

for time_effect in [time_effect_template(*args) for args in params]:
    pfa = PFAForgettingWithLogging(models.EloModel(), time_effect_fun=time_effect)
    pfa.train(data)
    intervals += [interval_error(pfa.timing, interval_size=1000, metric=metric)]
    print len(intervals), 'done'

for interval in intervals:
    plots += [plt.plot([x[0] for x in interval], [x[1] for x in interval], '.-')]

plt.xscale('log')
plt.ylabel('observed - predicted')
plt.xlabel('time from previous attempt (seconds)')
plt.xlim([min([x[0] for x in intervals[0]]) - 20, max([x[0] for x in intervals[0]]) + 100000])
plt.legend([p[0] for p in plots], map(lambda x: 'a={},b={}'.format(*x), params))
plt.show()

1 done
2 done
3 done
4 done
5 done
6 done


# Different Approach

In [10]:
data = tools.add_spacing(data)

In [12]:
ranges = [0, 60, 90, 150, 300, 600, 1800, 10800, 86400, 259200, 2592000]
intervals = {i: None for i in zip(ranges, ranges[1:] + [np.inf])}

for interval in intervals.keys():
    lower, upper = interval
    data_slice = data[(data['spacing'] > lower) & (data['spacing'] < upper)].copy()
    
    if data_slice.empty:
        continue
        
    print interval, len(data_slice)
    
    pfat = models.PFATiming(models.EloModel(), time_effect_fun=lambda t: t/80)
    pfat_test = PerformanceTest(pfat, data_slice)
    pfat_test.run()
    
    intervals[interval] = pfat_test.results['train'].off
    
intervals = sorted([(np.mean(interval), value) for interval, value in intervals.items()], key=lambda x: x[0])

(600, 1800) 5403
(150, 300) 12653
(90, 150) 9562
(86400, 259200) 2470
(300, 600) 6758
(259200, 2592000) 355
(10800, 86400) 4430
(60, 90) 6124
(1800, 10800) 4449
(0, 60) 4970


In [19]:
ranges = [0, 60, 90, 150, 300, 600, 1800, 10800, 86400, 259200, 2592000]
intervals = {i: None for i in zip(ranges, ranges[1:] + [np.inf])}

for interval in intervals.keys():
    lower, upper = interval
    data_slice = data[(data['spacing'] > lower) & (data['spacing'] <= upper)]
    
    if len(data_slice) > 0:
        correct = len(data_slice[data_slice['is_correct'] == 1]) / len(data_slice)
        intervals[interval] = correct
        
intervals.pop((2592000, np.inf))

In [20]:
intervals = sorted([(np.mean(interval), value) for interval, value in intervals.items()], key=lambda x: x[0])
plt.plot([x[0] for x in intervals], [x[1] for x in intervals])
plt.xscale('log')
plt.show()

In [18]:
ind = np.arange(len(intervals))    # the x locations for the groups
width = 0.50       # the width of the bars: can also be len(x) sequence

correctness = [intervals[i] * 100 for i in sorted(intervals)]
incorrectness = [(1 - intervals[i]) * 100 for i in sorted(intervals)]

p1 = plt.bar(ind, correctness, width, color='#7FFF24')
p2 = plt.bar(ind, incorrectness, width, color='#ff512e', bottom=correctness)

plt.ylabel('%')
plt.xticks(ind+width/2., ('60 s', '90 s', '150 s', '5 m', '10 m',
                          '30 m', '3 h', '24 h', '3 d', '30 d'))
plt.yticks(np.arange(0, 101, 10))
plt.legend((p1[0], p2[0]), ('correct', 'incorrect'), loc=4)

plt.show()

In [6]:
items = {}

for _, row in data.iterrows():
    index = (row.user_id, row.place_id)
    answer = models.Answer(**row.to_dict())
    if index in items:
        items[index].append(answer)
    else:
        items[index] = [answer]

In [7]:
ranges = [0, 60, 90, 150, 300, 600, 1800, 10800, 86400, 259200, 2592000]
intervals = zip(ranges, ranges[1:] + [np.inf])

def get_interval(value, list_of_intervals):
    for lower, upper in list_of_intervals:
        if lower < value <= upper:
            return lower, upper

correct_before = {i: [] for i in intervals}
incorrect_before = {i: [] for i in intervals}

for index in items:
    answers = sorted(items[index], key=lambda x: x.inserted)
    for a1, a2 in zip(answers, answers[1:]):
        diff = tools.time_diff(a2.inserted, a1.inserted)
        interval = get_interval(diff, intervals)
        if interval is None:
            continue
        if a1.is_correct:
            correct_before[interval].append(a2.is_correct)
        else:
            incorrect_before[interval].append(a2.is_correct)        

In [9]:
correct_intervals = {i: np.mean(v) for i, v in correct_before.items()}
incorrect_intervals = {i: np.mean(v) for i, v in incorrect_before.items()}

In [17]:
ind = (np.arange(len(intervals)-1) -1.2) * 1.15   # the x locations for the groups
width = 0.4       # the width of the bars: can also be len(x) sequence

correctness = [correct_intervals[i] * 100 for i in sorted(intervals) if i != (2592000, np.inf)]
incorrectness = [incorrect_intervals[i] * 100 for i in sorted(intervals) if i != (2592000, np.inf)]

p1 = plt.bar(ind-0.25, correctness, width, color='#7FFF24')
p2 = plt.bar(ind+0.25, incorrectness, width, color='#ff512e')

plt.ylabel('%')
plt.xticks(ind+width/2., ('60 s', '90 s', '150 s', '5 m', '10 m',
                          '30 m', '3 h', '24 h', '3 d', '30 d'))
plt.yticks(np.arange(0, 101, 10))
plt.legend((p2[0], p1[0]), ('incorrect before', 'correct before'), loc=4)

plt.show()