In [1]:
%matplotlib qt4
from __future__ import division

from models import tools, optimize, models, filters
from models.tests import PerformanceTest

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# The Naive Way

In [None]:
data = tools.load_data(limit=40000, offset=2400000)

In [None]:
grad = optimize.NaiveDescent(data)

In [None]:
descent1 = grad.search_pfa(1.5, -2, step_size=3, maxiter=100, precision=0.005)

In [None]:
descent2 = grad.search_pfa(5, 0.5, step_size=2.5, maxiter=100, precision=0.005)

In [None]:
descent3 = grad.search_pfag(1.5, -2, step_size=20, maxiter=36, precision=0.005)

In [None]:
elo = models.EloModel()
pfa = models.PFAModel(elo, gamma=2.99622612646, delta=-0.476090204636)
pfa_test = PerformanceTest(pfa, data)
pfa_test.run()

In [None]:
pfa_test.results['train']

In [None]:
def annotate(descent, number, mark, xadd, yadd):
    row = descent.params.loc[number]
    grad = descent.grads.loc[number]
    plt.annotate(r'$\gamma={}$, $\delta={}$'.format(round(row.gamma, 2), round(row.delta, 2)),
                 xy=(number, grad), xycoords='data',
                 xytext=(number + xadd, grad + yadd), textcoords='data',
                 bbox=dict(boxstyle="round", fc="w"))
    plt.plot(number, grad, mark)
    
#annotate(descent1, 1, 'go', 0.8, -0.006)
#annotate(descent1, 10, 'go', 0.8, -0.006)
annotate(descent1, 34, 'go', -8, -0.009)

#annotate(descent3, 1, 'ro', 0.7, 0.004)
#annotate(descent3, 11, 'ro', 0.8, 0.004)
annotate(descent3, 20, 'ro', 0.8, 0.006)

plt.xlabel('number of iteration')
plt.ylabel('predicted - observed')

plt.xlim([0, 35])
plt.ylim([-0.08, 0.03])

line1, = plt.plot(descent1.grads[:35], 'g', label=r'step size = $3$')
line2, = plt.plot(descent3.grads[:36], 'r', label=r'step size = $20$')

plt.legend(handles=[line1, line2], loc='lower right')

plt.show()

# The Proper Way

In [None]:
reload(tools)

In [10]:
data = tools.load_data(limit=100000, offset=1500000)

Loaded 81261 answers.


In [11]:
data = data[~filters.classmates(data)]
print len(data)

63470


In [12]:
descent = optimize.GradientDescent(data)

In [13]:
non_classmates = descent.search_staircase(init_learn_rate=0.015, number_of_iter=20)

   2.50000    0.80000        inf
   2.21528    0.51492    0.00409
   2.24417    0.36329    0.00322
   2.26565    0.36220    0.00314
   2.28299    0.39082    0.00311
   2.29586    0.42207    0.00309
   2.30499    0.45019    0.00308
   2.31129    0.47447    0.00308
   2.31554    0.49537    0.00307
   2.31833    0.51349    0.00306
   2.32008    0.52934    0.00306
   2.32109    0.54333    0.00305
   2.32159    0.55576    0.00305
   2.32173    0.56689    0.00305
   2.32161    0.57689    0.00304
   2.32132    0.58592    0.00304
   2.32092    0.59411    0.00304
   2.32045    0.60156    0.00304
   2.31993    0.60835    0.00304
   2.31938    0.61456    0.00304
   2.31883    0.62025    0.00304


In [14]:
non_classmates.plot()

[<matplotlib.lines.Line2D at 0x7fe4cbb5d090>]

In [17]:
classmates_p, = classmates.plot(color='orange')
non_classmates_p, = non_classmates.plot(color='#02A5F4')
gamma_delta = '($\gamma = {0[gamma]:.3f}, \delta = -{0[delta]:.3f}$)'
plt.legend([classmates_p, non_classmates_p],
           [('Classmates ' + gamma_delta).format(classmates.best),
            ('All the others ' + gamma_delta).format(non_classmates.best)])

<matplotlib.legend.Legend at 0x7fe4ca850350>

In [None]:
train_data = []
for i in range(10):
    limit, offset = 120000, i * 120000 + 1000000
    df = tools.load_data(limit=limit, offset=offset, echo_loaded=False)
    df = df[filters.for_staircase(df)]
    train_data.append(df.copy())
    tools.echo('[{}]: Loaded {} answers.'.format(i, len(df)), clear=False)

tools.echo('Data loaded.')

results = []
for i in range(10):
    descent = optimize.GradientDescent(train_data[i])
    res = descent.search_staircase(init_learn_rate=0.01, number_of_iter=25,
                                   echo_iterations=False)
    results.append(res)
    tools.echo('[{}]: done!'.format(i), clear=False)

In [None]:
x_matrix = []
y_matrix = []
for res in results:
    stairs = sorted(res.staircases[-1].items(), key=lambda x: x[0])
    staircase_times = res.model.metadata['staircase_times']

    xi_axis = [np.mean(staircase_times[i]) for i in res.intervals]
    yi_axis = [value for interval, value in stairs]
    
    x_matrix.append(xi_axis)
    y_matrix.append(yi_axis)

x_axis = []
y_axis = []
e_vals = []
for i in range(len(x_matrix[0])):
    x_axis += [np.mean([x_matrix[j][i] for j in range(len(x_matrix))])]
    y_axis += [np.mean([y_matrix[j][i] for j in range(len(x_matrix))])]
    e_vals += [np.std([y_matrix[j][i] for j in range(len(x_matrix))]) / 2]

plt.errorbar(x_axis, y_axis, e_vals,
             ecolor='orange', elinewidth='2',
             linestyle='--', linewidth='2',
             capthick='2', capsize=4,
             color='#02A5F4', marker='o')

plt.xscale('log')
plt.xlabel('Time from previous attempt in seconds.')
plt.ylabel('Memory activation')

plt.show()

In [None]:
gamma_std = np.mean([res.gammas[-1] for res in results])
delta_std = np.mean([res.deltas[-1] for res in results])
tools.echo('gamma={:.3f}, delta={:.3f}'.format(gamma_std, delta_std), clear=False)