# Actor GP Model

In [1]:
import numpy as np
import os.path

from datetime import datetime
from thesis.playerkern import Game, init_db, ActorGPModel, TestSet

In [2]:
# Prettify the display of numpy arrays.
np.set_printoptions(precision=3, suppress=True)

# EURO 2008.
BEGIN_TRAIN = datetime(2006, 7, 1)
BEGIN_TEST = datetime(2008, 6, 7)
END_TEST = datetime(2008, 6, 30)

# EURO 2012.
# BEGIN_TRAIN = datetime(2006, 7, 1)
# BEGIN_TEST = datetime(2012, 6, 8)
# END_TEST = datetime(2012, 7, 2)

# EURO 2016.
# BEGIN_TRAIN = datetime(2007, 7, 1)
# BEGIN_TEST = datetime(2016, 6, 10)
# END_TEST = datetime(2016, 7, 11)

Connect to the database.

In [3]:
db_path = "data/kdb-2017-11-15-ft-corrected.db"

assert os.path.exists(db_path)
init_db("sqlite:///{}".format(db_path))

## Fitting the model

In [4]:
hyperparams = {
    'actors_var': 0.045,
    'home_adv_var': 0.25,
}
alpha = 0.6
model = ActorGPModel(alpha, home_advantage=True, **hyperparams)

In [5]:
%%time
games = Game.select().where(
    (BEGIN_TRAIN <= Game.kickoff_time)
    & (Game.kickoff_time < BEGIN_TEST))

model.fit(games)

print("Number of games: %s" % (model.featmat.shape[0],))
print("Number of features: %s" % (model.featmat.shape[1],))
print(model.gp)

Number of games: 5527
Number of features: 39472

Name : gp
Objective : 5388.536570804834
Number of Parameters : 2
Number of Optimization Parameters : 2
Updates : True
Parameters:
  [1mgp.                   [0;0m  |  value  |  constraints  |  priors
  [1msum.actors.variances  [0;0m  |  0.045  |      +ve      |        
  [1msum.home_adv.variances[0;0m  |   0.25  |      +ve      |        
CPU times: user 4min 27s, sys: 8.72 s, total: 4min 35s
Wall time: 1min 18s


## Evaluating the model's performance

In [6]:
test_set = TestSet.from_games(
        (Game.kickoff_time >= BEGIN_TEST)
        & (Game.kickoff_time <= END_TEST)
        & (Game.competition == 'European Championship'))

test_set.evaluate(model).print_summary()

number of samples: 31
0-1 loss: 17.000
log loss: 29.816


In [7]:
test_set.evaluate_fct(lambda x: (1/3, 1/3, 1/3)).print_summary()

number of samples: 31
0-1 loss: 20.000
log loss: 34.057
