In [1]:
%matplotlib qt4
from __future__ import division

from models import tools, optimize, models
from models.tests import PerformanceTest

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# The Naive Way

In [29]:
data = tools.load_data(limit=40000, offset=2400000)

Loaded 29981 answers.


In [19]:
grad = optimize.NaiveDescent(data)

In [20]:
descent1 = grad.search_pfa(1.5, -2, step_size=3, maxiter=100, precision=0.005)

gamma: 3.18298528419; grad: -0.000741008113952
delta: -0.303539220688; grad: -0.000729927486385


In [46]:
descent2 = grad.search_pfa(5, 0.5, step_size=2.5, maxiter=100, precision=0.005)

gamma: 3.80556357186; grad: 0.000890172684958
delta: -0.678034006194; grad: 0.000878019898582


In [93]:
descent3 = grad.search_pfag(1.5, -2, step_size=20, maxiter=36, precision=0.005)

gamma: 1.85300525851; grad: 0.000260020806278
delta: -1.57543851455; grad: 0.000284481491125


In [37]:
elo = models.EloModel()
pfa = models.PFAModel(elo, gamma=2.99622612646, delta=-0.476090204636)
pfa_test = PerformanceTest(pfa, data)
pfa_test.run()

In [38]:
pfa_test.results['train']

RMSE: 0.361919204016
AUC: 0.787337458274
OFF: -6.24923069748e-05
Set Size: 50000

In [120]:
def annotate(descent, number, mark, xadd, yadd):
    row = descent.params.loc[number]
    grad = descent.grads.loc[number]
    plt.annotate(r'$\gamma={}$, $\delta={}$'.format(round(row.gamma, 2), round(row.delta, 2)),
                 xy=(number, grad), xycoords='data',
                 xytext=(number + xadd, grad + yadd), textcoords='data',
                 bbox=dict(boxstyle="round", fc="w"))
    plt.plot(number, grad, mark)
    
#annotate(descent1, 1, 'go', 0.8, -0.006)
#annotate(descent1, 10, 'go', 0.8, -0.006)
annotate(descent1, 34, 'go', -8, -0.009)

#annotate(descent3, 1, 'ro', 0.7, 0.004)
#annotate(descent3, 11, 'ro', 0.8, 0.004)
annotate(descent3, 20, 'ro', 0.8, 0.006)

plt.xlabel('number of iteration')
plt.ylabel('predicted - observed')

plt.xlim([0, 35])
plt.ylim([-0.08, 0.03])

line1, = plt.plot(descent1.grads[:35], 'g', label=r'step size = $3$')
line2, = plt.plot(descent3.grads[:36], 'r', label=r'step size = $20$')

plt.legend(handles=[line1, line2], loc='lower right')

plt.show()

# The Proper Way

In [42]:
reload(optimize)
descent = optimize.GradientDescent(data)

In [56]:
r = descent.search_staircase()

In [57]:
import decimal
for i, v in sorted(r.items(), key=lambda x: x[0]):
    s = list(sorted(v['staircase'].items(), key=lambda x: x[0]))
    print '   '.join([str(decimal.Decimal(str(v)).quantize(decimal.Decimal('.000'))) for k, v in s])

0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000   0.000
2.374   1.647   1.097   1.050   1.005   0.339   0.731   0.149   -0.126   0.000   0.000
2.795   2.000   1.309   1.026   0.964   0.654   0.756   0.359   -0.107   0.000   0.000
3.092   2.229   1.415   1.073   1.014   0.802   0.795   0.462   -0.094   0.000   0.000
3.325   2.410   1.496   1.125   1.076   0.900   0.833   0.529   -0.084   0.000   0.000
3.519   2.563   1.566   1.172   1.135   0.974   0.867   0.578   -0.076   0.000   0.000
3.685   2.695   1.629   1.214   1.187   1.034   0.897   0.617   -0.069   0.000   0.000
3.831   2.813   1.686   1.251   1.235   1.083   0.924   0.649   -0.063   0.000   0.000
3.960   2.918   1.739   1.284   1.277   1.126   0.949   0.677   -0.058   0.000   0.000
4.076   3.014   1.787   1.313   1.315   1.164   0.971   0.701   -0.053   0.000   0.000
4.182   3.102   1.832   1.340   1.350   1.198   0.991   0.722   -0.049   0.000   0.000
