In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('../src/'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
import torch
from torchvision import transforms
from torch.utils import data
from datetime import datetime
import itertools

from dataset.db_query import *
from dataset.dataset import SingleSeasonSingleLeagueTimeSlices, SingleSeasonSingleLeague
from dataset.util import pretty_print_match_ts, pretty_print_match
from dataset.train_valid_test_loader import *

import logging
logging.basicConfig(level=logging.DEBUG)


In [3]:
sqlpath = "../data/database.sqlite"


In [None]:

data = SingleSeasonSingleLeagueTimeSlices(sqlpath, LeagueTag.GER, SeasonTag.S13_14, 3)

#data = SingleSeasonSingleLeagueTime(sqlpath, LeagueTag.BEL, SeasonTag.S09_10)

In [None]:
data[99]

In [None]:
pretty_print_match_ts(data[99])

In [4]:
train_data = make_small_train_set(sqlpath)
#valid_data = make_small_valid_set(sqlpath)

INFO:root:got 1458 rows from db
INFO:root:got 183978 rows from db
INFO:root:got 306 rows (league: NET (id 13274), season: 2014/2015) from db
DEBUG:root:time for loading sql data 2.877454996109009
DEBUG:root:player with id nan could not be found in players table
DEBUG:root:player with id nan could not be found in players table
DEBUG:root:player with id nan could not be found in players table
DEBUG:root:player with id nan could not be found in players table
DEBUG:root:player with id nan could not be found in players table
DEBUG:root:player with id nan could not be found in players table
DEBUG:root:player with id nan could not be found in players table
DEBUG:root:player with id nan could not be found in players table
DEBUG:root:player with id nan could not be found in players table
DEBUG:root:time for processing data 30.735285997390747
INFO:root:got 1458 rows from db
INFO:root:got 183978 rows from db
INFO:root:got 380 rows (league: SPA (id 21518), season: 2008/2009) from db
DEBUG:root:tim

DEBUG:root:player with id nan could not be found in players table
DEBUG:root:player with id nan could not be found in players table
DEBUG:root:player with id nan could not be found in players table
DEBUG:root:player with id nan could not be found in players table
DEBUG:root:player with id nan could not be found in players table
DEBUG:root:player with id nan could not be found in players table
DEBUG:root:player with id nan could not be found in players table
DEBUG:root:player with id nan could not be found in players table
DEBUG:root:player with id nan could not be found in players table
DEBUG:root:player with id nan could not be found in players table
DEBUG:root:player with id nan could not be found in players table
DEBUG:root:player with id nan could not be found in players table
DEBUG:root:player with id nan could not be found in players table
DEBUG:root:player with id nan could not be found in players table
DEBUG:root:player with id nan could not be found in players table
DEBUG:root

In [5]:
len(train_data)

686

In [6]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=1, shuffle=False)
#valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=1, shuffle=False)


In [33]:
import numpy as np
import torch 
import torch.nn as nn
import torch.nn.functional as F

class TeamEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_hidden_layers, dropout_prob=0.5, bidirectional=False):
        """ 
        Args:
            input_size (int): length of player vectors
            hidden_size (int): size of hidden layers
            num_hidden_layers (int): number of hidden layers in the recurrent part of the network
            dropout_prob (float): dropout probability
        """
        super(TeamEncoder, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size 
        self.num_layers = num_hidden_layers 
        self.dropout_prob = dropout_prob
        self.bidirectional = bidirectional
        
        # player vector projection embedding
        self.fc1 = nn.Linear(self.input_size, self.hidden_size, bias=True)

        self.rnn1 = nn.LSTM(
            input_size=self.hidden_size,
            hidden_size=self.hidden_size,
            num_layers=self.num_layers,
            dropout=self.dropout_prob,
            batch_first=True,
            bidirectional=self.bidirectional)


    def forward(self, inp, hidden): 
        inp = F.relu(self.fc1(inp))
        inp, hidden = self.rnn1(inp, hidden)
        return inp, hidden
        
        
    def init_hidden(self, batch_size=1):
        """
        Note that a LSTM has two 'hidden' states (hidden, cell) thus we 
        give two values here.
        """
        num_directions = 2 if self.bidirectional else 1
        arg1 = num_directions * self.num_layers
        
        return torch.zeros(arg1, batch_size, self.hidden_size), torch.zeros(arg1, batch_size, self.hidden_size)


class SiamesePredictionNet(nn.Module):
    def __init__(self, input_size):
        super(SiamesePredictionNet, self).__init__()
        
        self.encoder = TeamEncoder(input_size, 128, 3)
        self.fc1 = nn.Linear(128, 1, bias=True)
        
        
    def forward(self, inp1, inp2):
        hidden1 = self.encoder.init_hidden()
        hidden2 = self.encoder.init_hidden()
        
        for x1 in inp1:
            x1 = torch.unsqueeze(x1, 0)
            y1, hidden1 = self.encoder(x1, hidden1)
            
        for x2 in inp2:
            x2 = torch.unsqueeze(x2, 0)
            y2, hidden2 = self.encoder(x2, hidden2)
        
        
        y1 = torch.squeeze(y1, dim=0)
        y2 = torch.squeeze(y2, dim=0)
        
        p1 = F.relu(y1 - y2)
        p2 = F.relu(y2 - y1)
        pd = F.relu(y1 - y2) - F.relu(y2 - y1)
        
        p1 = self.fc1(p1)
        p2 = self.fc1(p2)
        pd = self.fc1(pd)
        
        outp = torch.cat((p1, p2, pd), dim=1)
        return F.softmax(outp)


In [34]:
def train_one_epoch(model, loss_fn, optimizer, epoch_num):
    running_loss = 0.0
    for i, (match_dict, result) in enumerate(train_loader):
        optimizer.zero_grad()
        
        players_home = match_dict["players_home"]
        players_away = match_dict["players_away"]

        pred_result = model(players_home, players_away)
        
        error = loss_fn(pred_result, torch.unsqueeze(result.to(dtype=torch.float32), 0))
        error.backward()

        optimizer.step()

        running_loss += error.item()
        
        if i > 0 and i % 150 == 0:
            print("epoch {} | step {} | running loss {}".format(epoch_num, i, running_loss / 150))
            running_loss = 0.0
            
def train(model, loss_fn, optimizer):
    for epoch in range(20):
        train_one_epoch(model, loss_fn, optimizer, epoch)


In [35]:
model = SiamesePredictionNet(35)
loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

train(model, loss_fn, optimizer)

    

  "Please ensure they have the same size.".format(target.size(), input.size()))


epoch 0 | step 150 | running loss 0.6462632481257121
epoch 0 | step 300 | running loss 0.640191023349762
epoch 0 | step 450 | running loss 0.6367666629950205
epoch 0 | step 600 | running loss 0.6372081442674001
epoch 1 | step 150 | running loss 0.6407873471577962
epoch 1 | step 300 | running loss 0.6352630023161571
epoch 1 | step 450 | running loss 0.6369182980060577
epoch 1 | step 600 | running loss 0.6360160291194916
epoch 2 | step 150 | running loss 0.6413133923212687


KeyboardInterrupt: 