In [1]:
%matplotlib qt4
from __future__ import division

from models import tools, optimize, models, filters
from models.tests import PerformanceTest

import numpy as np
import pandas as pd
import sklearn as sk
import matplotlib.pyplot as plt

In [2]:
data = tools.load_data(offset=1700000, limit=500000)
data = data[filters.sequentize(data)]
print len(data)

Loaded 483062 answers.
140664


In [3]:
elo_test = PerformanceTest(models.EloModel(), data, split_data=True)
elo_test.run()
elo_test.results['train']

RMSE: 0.409553335789
AUC: 0.728133976982
LL: -12641.1566218
OFF: 0.00342525129833
CORRECT: 18929
ACCURACY: 0.759468785107
Set Size: 24924

In [4]:
elot_test = PerformanceTest(models.EloResponseTime(), data, split_data=True)
elot_test.run()
elot_test.results['train']

RMSE: 0.414665115892
AUC: 0.718464163852
LL: -12967.4453475
OFF: -0.00945367770268
CORRECT: 18754
ACCURACY: 0.752447440218
Set Size: 24924

In [6]:
models.PFAExt(models.EloModel(), gamma=2.3, delta=-0.8)
pfae_test = PerformanceTest(pfae, data)
pfae_test.run()
pfae_test.results['train']

RMSE: 0.349081007676
AUC: 0.794363936652
LL: -55059.5761948
OFF: -0.0309137416068
CORRECT: 116919
ACCURACY: 0.831193482341
Set Size: 140664

In [7]:
pfag = models.PFAGong(models.EloModel(), gamma=1.5, delta=-0.2, decay=0.8)
pfag_test = PerformanceTest(pfag, data)
pfag_test.run()
pfag_test.results['train']

RMSE: 0.353317420482
AUC: 0.77273277106
LL: -62152.5584432
OFF: 0.0339080158358
CORRECT: 117430
ACCURACY: 0.83482625263
Set Size: 140664

In [8]:
pfaet = models.PFAExtTiming(models.EloModel(), time_effect_fun=lambda t: 2.507/(t+1)**0.166)
pfaet_test = PerformanceTest(pfaet, data)
pfaet_test.run()
pfaet_test.results['train']

RMSE: 0.342808321951
AUC: 0.801598404829
LL: -53287.4166581
OFF: 0.000135530189312
CORRECT: 118055
ACCURACY: 0.839269464824
Set Size: 140664

In [9]:
pfaes = models.PFAExtSpacing(models.EloModel(), iota=1.5, decay_rate=0.18)
pfaes_test = PerformanceTest(pfaes, data)
pfaes_test.run()
pfaes_test.results['train']

RMSE: 0.347316268585
AUC: 0.778816743654
LL: -55103.8329468
OFF: 0.012345210262
CORRECT: 117504
ACCURACY: 0.83823057333
Set Size: 140181

In [10]:
staircase = {
    (0, 60): 1.43,
    (60, 90): 1.17,
    (90, 150): 1.01,
    (150, 300): 0.93,
    (300, 600): 0.82,
    (600, 60*30): 0.78,
    (60*30, 60*60*3): 0.76,
    (60*60*3, 60*60*24): 0.63,
    (60*60*24, 60*60*24*5): 0.42,
    (60*60*24*5, np.inf): 0.12,
}

pfaest = models.PFAExtStaircase(models.EloModel(), staircase=staircase)
pfaest_test = PerformanceTest(pfaest, data)
pfaest_test.run()
pfaest_test.results['train']

RMSE: 0.343096498471
AUC: 0.799689008754
LL: -53409.9651533
OFF: 0.00795375163498
CORRECT: 118095
ACCURACY: 0.839553830404
Set Size: 140664

In [18]:
staircase2 = tools.connect_points(
    [(0, 1.6), (45, 1.43), (75, 1.17), (120, 1.01), (225, 0.93), (1200, 0.78),
     (6300, 0.76), (48600, 0.63), (259200, 0.42), (432000, 0.12), (10e10, 0.2)]
)
pfaest2 = models.PFAExtTiming(models.EloModel(), time_effect_fun=staircase2)
pfaest2_test = PerformanceTest(pfaest2, data)
pfaest2_test.run()
pfaest2_test.results['train']

RMSE: 0.342860171684
AUC: 0.801698405912
LL: -53288.4460063
OFF: 0.000379587042317
CORRECT: 118053
ACCURACY: 0.839255246545
Set Size: 140664

In [15]:
fpr, tpr, thresholds = sk.metrics.roc_curve(
    pfag_test.train_values['observed'],
    pfag_test.train_values['predicted'], pos_label=1)
plt.plot(fpr, tpr)

[<matplotlib.lines.Line2D at 0x7fca9543cd50>]

In [18]:
positives = []
negatives = []
p = pfae_test.train_values[pfae_test.train_values['observed'] == 1]
n = pfae_test.train_values[pfae_test.train_values['observed'] == 0]

intervals = zip(np.arange(0, 0.99, 0.01), np.arange(0.01, 1, 0.01))
for lower, upper in intervals:
    positive_count = len(p[(p['predicted'] > lower) & (p['predicted'] < upper)])
    negative_count = len(n[(n['predicted'] > lower) & (n['predicted'] < upper)])
    positives.append(positive_count)
    negatives.append(negative_count)

In [19]:
plt.plot([np.mean(interval) for interval in intervals], positives, 'g-')
plt.plot([np.mean(interval) for interval in intervals], negatives, 'r-')

[<matplotlib.lines.Line2D at 0x7f1b0015b0d0>]