In [1]:
%matplotlib qt4
from __future__ import division

from models import tools, optimize, models
from models.tests import PerformanceTest

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# The Naive Way

In [29]:
data = tools.load_data(limit=40000, offset=2400000)

Loaded 29981 answers.


In [19]:
grad = optimize.NaiveDescent(data)

In [20]:
descent1 = grad.search_pfa(1.5, -2, step_size=3, maxiter=100, precision=0.005)

gamma: 3.18298528419; grad: -0.000741008113952
delta: -0.303539220688; grad: -0.000729927486385


In [46]:
descent2 = grad.search_pfa(5, 0.5, step_size=2.5, maxiter=100, precision=0.005)

gamma: 3.80556357186; grad: 0.000890172684958
delta: -0.678034006194; grad: 0.000878019898582


In [93]:
descent3 = grad.search_pfag(1.5, -2, step_size=20, maxiter=36, precision=0.005)

gamma: 1.85300525851; grad: 0.000260020806278
delta: -1.57543851455; grad: 0.000284481491125


In [37]:
elo = models.EloModel()
pfa = models.PFAModel(elo, gamma=2.99622612646, delta=-0.476090204636)
pfa_test = PerformanceTest(pfa, data)
pfa_test.run()

In [38]:
pfa_test.results['train']

RMSE: 0.361919204016
AUC: 0.787337458274
OFF: -6.24923069748e-05
Set Size: 50000

In [120]:
def annotate(descent, number, mark, xadd, yadd):
    row = descent.params.loc[number]
    grad = descent.grads.loc[number]
    plt.annotate(r'$\gamma={}$, $\delta={}$'.format(round(row.gamma, 2), round(row.delta, 2)),
                 xy=(number, grad), xycoords='data',
                 xytext=(number + xadd, grad + yadd), textcoords='data',
                 bbox=dict(boxstyle="round", fc="w"))
    plt.plot(number, grad, mark)
    
#annotate(descent1, 1, 'go', 0.8, -0.006)
#annotate(descent1, 10, 'go', 0.8, -0.006)
annotate(descent1, 34, 'go', -8, -0.009)

#annotate(descent3, 1, 'ro', 0.7, 0.004)
#annotate(descent3, 11, 'ro', 0.8, 0.004)
annotate(descent3, 20, 'ro', 0.8, 0.006)

plt.xlabel('number of iteration')
plt.ylabel('predicted - observed')

plt.xlim([0, 35])
plt.ylim([-0.08, 0.03])

line1, = plt.plot(descent1.grads[:35], 'g', label=r'step size = $3$')
line2, = plt.plot(descent3.grads[:36], 'r', label=r'step size = $20$')

plt.legend(handles=[line1, line2], loc='lower right')

plt.show()

# The Proper Way

In [37]:
reload(optimize)

<module 'models.optimize' from '/home/pavel/Projects/thesis/models/optimize.py'>

In [30]:
data = tools.load_data(limit=100000, offset=2000000)

Loaded 85915 answers.


In [38]:
descent = optimize.GradientDescent(data)

In [40]:
result = descent.search_staircase(init_learn_rate=0.01, number_of_iter=15)

   2.50000    0.80000        inf
   1.95979    0.68000    0.00228
   2.07159    0.77587    0.00131
   2.13821    0.80102    0.00119
   2.17747    0.81221    0.00115
   2.20197    0.81786    0.00113
   2.21871    0.82009    0.00112
   2.23103    0.82021    0.00112
   2.24059    0.81909    0.00112
   2.24828    0.81726    0.00112
   2.25462    0.81502    0.00112
   2.25996    0.81256    0.00112
   2.26454    0.80998    0.00112
   2.26852    0.80734    0.00112
   2.27201    0.80468    0.00112
   2.27512    0.80203    0.00112


In [44]:
result

Iterations: 15
Gamma: 2.27512
Delta: 0.80203
Staircase:
       60       90      150      300      600     1800    10800    86400   604800      inf
   +1.457   +0.994   +0.854   +0.538   +0.660   +0.661   +0.510   +0.405   +0.268   +0.000

In [45]:
result.plot(color='orange')

[<matplotlib.lines.Line2D at 0x7f28ed767050>]

In [47]:
results = []
for i in range(10):
    limit, offset = 100000, i * 100000 + 1000000
    data = tools.load_data(limit=limit, offset=offset)
    tools.echo('{} to {}'.format(offset, offset + limit), clear=False)
    descent = optimize.GradientDescent(data)
    res = descent.search_staircase(init_learn_rate=0.01, number_of_iter=25,
                                   echo_iterations=False)
    results.append(res)

Loaded 80434 answers.
1000000 to 1100000
Loaded 75260 answers.
1100000 to 1200000
Loaded 80025 answers.
1200000 to 1300000
Loaded 82272 answers.
1300000 to 1400000
Loaded 85786 answers.
1400000 to 1500000
Loaded 81261 answers.
1500000 to 1600000
Loaded 89355 answers.
1600000 to 1700000
Loaded 88183 answers.
1700000 to 1800000
Loaded 87314 answers.
1800000 to 1900000
Loaded 82810 answers.
1900000 to 2000000


In [48]:
x_matrix = []
y_matrix = []
for res in results:
    stairs = sorted(res.staircases[-1].items(), key=lambda x: x[0])
    staircase_times = res.model.metadata['staircase_times']

    xi_axis = [np.mean(staircase_times[i]) for i in res.intervals]
    yi_axis = [value for interval, value in stairs]
    
    x_matrix.append(xi_axis)
    y_matrix.append(yi_axis)

x_axis = []
y_axis = []
e_vals = []
for i in range(len(x_matrix[0])):
    x_axis += [np.mean([x_matrix[j][i] for j in range(len(x_matrix))])]
    y_axis += [np.mean([y_matrix[j][i] for j in range(len(x_matrix))])]
    e_vals += [np.std([y_matrix[j][i] / 2 for j in range(len(x_matrix))])]

plt.errorbar(x_axis, y_axis, e_vals,
             ecolor='orange', elinewidth='2',
             linestyle='--', linewidth='2',
             capthick='2', capsize=4,
             color='#02A5F4', marker='o')

plt.xscale('log')
plt.xlabel('Time from previous attempt in seconds.')
plt.ylabel('Memory activation')

plt.show()

In [49]:
gamma_std = np.std([res.gammas[-1] for res in results])
delta_std = np.std([res.deltas[-1] for res in results])
tools.echo('gamma={:.3f}, delta={:.3f}'.format(gamma_std, delta_std), clear=False)

gamma=0.199, delta=0.149
