In [1]:
%matplotlib qt4
from __future__ import division

from models import tools, optimize, models, filters
from models.tests import PerformanceTest

import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

mpl.rcParams['axes.color_cycle'] = ['#02A5F4', 'orange', 'green']

# The Naive Way

In [None]:
data = tools.load_data(limit=40000, offset=2400000)

In [None]:
grad = optimize.NaiveDescent(data)

In [None]:
descent1 = grad.search_pfa(1.5, -2, step_size=3, maxiter=100, precision=0.005)

In [None]:
descent2 = grad.search_pfa(5, 0.5, step_size=2.5, maxiter=100, precision=0.005)

In [None]:
descent3 = grad.search_pfag(1.5, -2, step_size=20, maxiter=36, precision=0.005)

In [None]:
elo = models.EloModel()
pfa = models.PFAModel(elo, gamma=2.99622612646, delta=-0.476090204636)
pfa_test = PerformanceTest(pfa, data)
pfa_test.run()

In [None]:
pfa_test.results['train']

In [None]:
def annotate(descent, number, mark, xadd, yadd):
    row = descent.params.loc[number]
    grad = descent.grads.loc[number]
    plt.annotate(r'$\gamma={}$, $\delta={}$'.format(round(row.gamma, 2), round(row.delta, 2)),
                 xy=(number, grad), xycoords='data',
                 xytext=(number + xadd, grad + yadd), textcoords='data',
                 bbox=dict(boxstyle="round", fc="w"))
    plt.plot(number, grad, mark)
    
#annotate(descent1, 1, 'go', 0.8, -0.006)
#annotate(descent1, 10, 'go', 0.8, -0.006)
annotate(descent1, 34, 'go', -8, -0.009)

#annotate(descent3, 1, 'ro', 0.7, 0.004)
#annotate(descent3, 11, 'ro', 0.8, 0.004)
annotate(descent3, 20, 'ro', 0.8, 0.006)

plt.xlabel('number of iteration')
plt.ylabel('predicted - observed')

plt.xlim([0, 35])
plt.ylim([-0.08, 0.03])

line1, = plt.plot(descent1.grads[:35], 'g', label=r'step size = $3$')
line2, = plt.plot(descent3.grads[:36], 'r', label=r'step size = $20$')

plt.legend(handles=[line1, line2], loc='lower right')

plt.show()

# The Proper Way

In [5]:
reload(filters)

<module 'models.filters' from '/home/pavel/Projects/thesis/models/filters.py'>

In [25]:
data = tools.load_data(limit=1000000, offset=1500000)

Loaded 972028 answers.


In [29]:
data_spaced = data[filters.spaced_presentations(data)]
print len(data_spaced)

78027


In [30]:
data_massed = data[filters.massed_presentations(data)]
print len(data_massed)

75162


In [32]:
descents = {
    'Spaced Presentations': (optimize.GradientDescent(data_spaced), {}),
    'Massed Presentations': (optimize.GradientDescent(data_massed), {}),
}

In [33]:
dresults = {}
for name, (descent, kwargs),  in descents.items():
    tools.echo(name, clear=False)
    dresults[name] = descent.search_staircase(
        init_learn_rate=0.015,
        number_of_iter=20,
        **kwargs
    )

Spaced Presentations
   2.50000    0.80000        inf
   2.63886    1.22040    0.00072
   2.39013    1.30804    0.00048
   2.25814    1.35747    0.00043
   2.17414    1.35478    0.00042
   2.12339    1.32521    0.00041
   2.09250    1.28376    0.00040
   2.07337    1.23897    0.00039
   2.06142    1.19527    0.00039
   2.05396    1.15464    0.00038
   2.04938    1.11780    0.00037
   2.04662    1.08480    0.00037
   2.04501    1.05542    0.00036
   2.04410    1.02934    0.00035
   2.04357    1.00621    0.00035
   2.04323    0.98573    0.00034
   2.04293    0.96759    0.00034
   2.04259    0.95153    0.00033
   2.04216    0.93732    0.00033
   2.04161    0.92477    0.00032
   2.04092    0.91369    0.00032
Massed Presentations
   2.50000    0.80000        inf
   2.82707    1.70931   -0.00139
   2.80447    1.46832   -0.00189
   2.81617    1.27605   -0.00162
   2.81853    1.15835   -0.00154
   2.81909    1.08448   -0.00152
   2.82028    1.03651   -0.00152
   2.82209    1.00476   -0.00152
 

In [36]:
plots = []
for name, dresult in dresults.items():
    p, = dresult.plot()
    plots += [(name, p, dresult)]

if len(plots) > 1:
    gamma_delta = ' ($\gamma = {0[gamma]:.3f}, \delta = -{0[delta]:.3f}$)'
    plt.legend([item[1] for item in plots],
               [n + gamma_delta.format(r.best) for n, p, r in plots])

### 10 Runs with Std Err

In [7]:
train_data = []
for i in range(7):
    limit, offset = 4e5, i * 1e6 + 4e5
    df = tools.load_data(limit=limit, offset=offset, echo_loaded=False)
    df = df[filters.massed_presentations(df)]
    train_data.append(df.copy())
    tools.echo('[{}]: Loaded {} answers.'.format(i, len(df)), clear=False)

tools.echo('Data loaded.', clear=False)

results_massed = []
for i in range(7):
    descent = optimize.GradientDescent(train_data[i])
    res = descent.search_staircase(init_learn_rate=0.02, number_of_iter=15,
                                   echo_iterations=False, random_factor=1,
                                   random_chance=20)
    results_massed.append(res)
    tools.echo('[{}]: done!'.format(i), clear=False)

[0]: Loaded 73626 answers.
[1]: Loaded 55088 answers.
[2]: Loaded 57805 answers.
[3]: Loaded 69797 answers.
[4]: Loaded 66576 answers.
[5]: Loaded 80221 answers.
[6]: Loaded 58058 answers.
Data loaded.
[0]: done!
[1]: done!
[2]: done!
[3]: done!
[4]: done!
[5]: done!
[6]: done!


  data = self._reader.read(nrows)


In [38]:
def prepare_plot_data(descent_results):
    x_matrix = []
    y_matrix = []
    for res in descent_results:
        stairs = sorted(res.staircases[-1].items(), key=lambda x: x[0])
        staircase_times = res.model.metadata['staircase_times']

        xi_axis = [np.mean(staircase_times[i]) for i in res.intervals]
        yi_axis = [value for interval, value in stairs]

        x_matrix.append(xi_axis)
        y_matrix.append(yi_axis)

    x_axis = []
    y_axis = []
    e_vals = []
    for i in range(len(x_matrix[0])):
        x_axis += [np.mean([x_matrix[j][i] for j in range(len(x_matrix))])]
        y_axis += [np.mean([y_matrix[j][i] for j in range(len(x_matrix))])]
        e_vals += [np.std([y_matrix[j][i] for j in range(len(x_matrix))]) / 2.]
    
    return x_axis, y_axis, e_vals

x_spaced_axis, y_spaced_axis, _ = prepare_plot_data(results_spaced)
x_massed_axis, y_massed_axis, _ = prepare_plot_data(results_massed)

plt.figure(num=None, figsize=(8, 6), dpi=160)
#plt.errorbar(x_axis, y_axis, e_vals,
#             ecolor='orange', elinewidth='2',
#             linestyle='--', linewidth='2',
#             capthick='2', capsize=4,
#             color='#02A5F4', marker='o')
p1, p2 = plt.plot(x_spaced_axis, y_spaced_axis, 'o-',
                  x_massed_axis, y_massed_axis, 's-')

plt.xscale('log')
plt.xlabel('Time from previous attempt in seconds')
plt.ylabel('Increase in memory activation')
plt.xlim([30, 1e6])

plt.legend([p1, p2], [
    'Spaced $\gamma = {avg[0]:.2f}, \delta = {avg[1]:.2f}$'.format(**spaced_gamma_delta),
    'Massed $\gamma = {avg[0]:.2f}, \delta = {avg[1]:.2f}$'.format(**massed_gamma_delta),
])

plt.show()
plt.tight_layout()

In [39]:
def get_gamma_delta(descent_results):
    gamma_std = np.std([res.gammas[-1] for res in descent_results])
    delta_std = np.std([res.deltas[-1] for res in descent_results])
    gamma_mean = np.mean([res.gammas[-1] for res in descent_results])
    delta_mean = np.mean([res.deltas[-1] for res in descent_results])
    tools.echo('std: gamma={:.3f}, delta={:.3f}'.format(gamma_std, delta_std), clear=False)
    tools.echo('avg: gamma={:.3f}, delta={:.3f}'.format(gamma_mean, delta_mean), clear=False)
    return {
        'std': [gamma_std, delta_std],
        'avg': [gamma_mean, delta_mean],
    }
spaced_gamma_delta = get_gamma_delta(results_spaced)
massed_gamma_delta = get_gamma_delta(results_massed)