In [1]:
%matplotlib qt4
from __future__ import division

from models import tools, optimize, models
from models.tests import PerformanceTest

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# The Naive Way

In [29]:
data = tools.load_data(limit=40000, offset=2400000)

Loaded 29981 answers.


In [19]:
grad = optimize.NaiveDescent(data)

In [20]:
descent1 = grad.search_pfa(1.5, -2, step_size=3, maxiter=100, precision=0.005)

gamma: 3.18298528419; grad: -0.000741008113952
delta: -0.303539220688; grad: -0.000729927486385


In [46]:
descent2 = grad.search_pfa(5, 0.5, step_size=2.5, maxiter=100, precision=0.005)

gamma: 3.80556357186; grad: 0.000890172684958
delta: -0.678034006194; grad: 0.000878019898582


In [93]:
descent3 = grad.search_pfag(1.5, -2, step_size=20, maxiter=36, precision=0.005)

gamma: 1.85300525851; grad: 0.000260020806278
delta: -1.57543851455; grad: 0.000284481491125


In [37]:
elo = models.EloModel()
pfa = models.PFAModel(elo, gamma=2.99622612646, delta=-0.476090204636)
pfa_test = PerformanceTest(pfa, data)
pfa_test.run()

In [38]:
pfa_test.results['train']

RMSE: 0.361919204016
AUC: 0.787337458274
OFF: -6.24923069748e-05
Set Size: 50000

In [120]:
def annotate(descent, number, mark, xadd, yadd):
    row = descent.params.loc[number]
    grad = descent.grads.loc[number]
    plt.annotate(r'$\gamma={}$, $\delta={}$'.format(round(row.gamma, 2), round(row.delta, 2)),
                 xy=(number, grad), xycoords='data',
                 xytext=(number + xadd, grad + yadd), textcoords='data',
                 bbox=dict(boxstyle="round", fc="w"))
    plt.plot(number, grad, mark)
    
#annotate(descent1, 1, 'go', 0.8, -0.006)
#annotate(descent1, 10, 'go', 0.8, -0.006)
annotate(descent1, 34, 'go', -8, -0.009)

#annotate(descent3, 1, 'ro', 0.7, 0.004)
#annotate(descent3, 11, 'ro', 0.8, 0.004)
annotate(descent3, 20, 'ro', 0.8, 0.006)

plt.xlabel('number of iteration')
plt.ylabel('predicted - observed')

plt.xlim([0, 35])
plt.ylim([-0.08, 0.03])

line1, = plt.plot(descent1.grads[:35], 'g', label=r'step size = $3$')
line2, = plt.plot(descent3.grads[:36], 'r', label=r'step size = $20$')

plt.legend(handles=[line1, line2], loc='lower right')

plt.show()

# The Proper Way

In [300]:
reload(optimize)

<module 'models.optimize' from '/home/pavel/Projects/thesis/models/optimize.py'>

In [247]:
data = tools.load_data(limit=80000, offset=8000000)

Loaded 60217 answers.


In [301]:
descent = optimize.GradientDescent(data)

In [302]:
result = descent.search_staircase(init_gamma=2.3, init_delta=0.8, init_learn_rate=0.02, number_of_iter=10)

   2.30000    0.80000        inf
   2.34590    0.63027    0.00302
   2.40387    0.65443    0.00189
   2.43757    0.67926    0.00186
   2.45930    0.68905    0.00189
   2.47563    0.69302    0.00193
   2.48867    0.69539    0.00196
   2.49937    0.69775    0.00198
   2.50821    0.70067    0.00199
   2.51552    0.70427    0.00200
   2.52154    0.70853    0.00199


In [303]:
result

Iterations: 11
Gamma: 2.52154
Delta: 0.70853
Staircase:
       60       90      150      300      600     1800    10800    86400   259200      inf
   +1.285   +0.927   +0.631   +0.489   +0.393   +0.331   +0.252   -0.019   -0.205   +0.000

In [304]:
result.plot(color='orange')

[<matplotlib.lines.Line2D at 0x7fe9aac3af90>]