In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import pytorch_lightning as pl

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class FootballDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def __len__(self):
        return len(self.y)

    def __getitem__(self, i):
        return self.x[i], self.y[i]


class FootballDataModule(pl.LightningDataModule):
    def __init__(self, national_filename, league_filename, feature_cols=None):
        super().__init__()
        self.national_filename = national_filename
        self.league_filename = league_filename
        self.feature_cols = feature_cols
        self.n_classes = 6
    
    def setup(self, stage=None):
        df1 = pd.read_parquet(self.national_filename)
        df2 = pd.read_parquet(self.league_filename)
        df = pd.concat([df1, df2])
        if self.feature_cols is not None:
            x = df[self.feature_cols].values
        else:
            x = df.drop(columns=['team1', 'team2', 'team1_score', 'team1_home']).values
        y = df['team1_score'].values
        y[y > 5] = 5
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=42)
        x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1, random_state=42)
        scaler = MinMaxScaler()
        x_train = scaler.fit_transform(x_train)
        x_val = scaler.transform(x_val)
        x_test = scaler.transform(x_test)
        self.ds_train = FootballDataset(torch.Tensor(x_train), torch.LongTensor(y_train))
        self.ds_val = FootballDataset(torch.Tensor(x_val), torch.LongTensor(y_val))
        self.ds_test = FootballDataset(torch.Tensor(x_test), torch.LongTensor(y_test))

    def train_dataloader(self):
        return DataLoader(self.ds_train, batch_size=64)

    def val_dataloader(self):
        return DataLoader(self.ds_val, batch_size=64)

    def test_dataloader(self):
        return DataLoader(self.ds_test, batch_size=64)

In [9]:
class FootballNN(pl.LightningModule):
    def __init__(self, n_input, n_classes, learning_rate=1e-3,
        use_dropout=False, use_batch_norm=True, activation='tanh'):
        super().__init__()
        self.save_hyperparameters()

        def block(n_in, n_out, use_dropout, use_batch_norm, activation):
            layers = [nn.Linear(n_in, n_out)]
            if use_batch_norm:
                layers.append(nn.BatchNorm1d(n_out))
            if activation == 'tanh':
                layers.append(nn.Tanh())
            else:
                layers.append(nn.ReLU())
            if use_dropout:
                layers.append(nn.Dropout(0.2))
            return layers

        do = self.hparams.use_dropout
        bn = self.hparams.use_batch_norm
        act = self.hparams.activation

        self.model = nn.Sequential(
            *block(self.hparams.n_input, 50, do, bn, act),
            *block(50, 100, do, bn, act),
            *block(100, 100, do, bn, act),
            *block(100, 50, do, bn, act),
            *block(50, 10, do, bn, act),
            nn.Linear(10, self.hparams.n_classes)
        )

    def forward(self, x):
         return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        self.log('val_loss', loss)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        self.log('test_loss', loss)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
        return optimizer

In [10]:
x_cols = ['overall', 'potential', 'skill_moves', 'attacking_finishing', 
          'skill_long_passing', 'movement_sprint_speed', 'movement_agility', 
          'value_eur', 'wage_eur', 'attacking_finishing', 'power_stamina']
x_cols = [s + a for s in x_cols  for a in ['_min', '_mean', '_max']]
x_cols = x_cols + ['goalkeeping_positioning_max', 'goalkeeping_reflexes_max']
x_cols = [t + s for t in ['team1_', 'team2_'] for s in x_cols]
y_cols = ['team1_score']

In [11]:
dl = FootballDataModule('national_games.parquet', 'league_games.parquet', feature_cols=x_cols)

In [15]:
n_input = 70
model = FootballNN(n_input, dl.n_classes, use_dropout=False, use_batch_norm=True, learning_rate=0.05)

In [16]:
trainer = pl.Trainer()

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(overfit_batches=1)` was configured so 1 batch will be used.


In [17]:
trainer.fit(model, dl)

  rank_zero_warn(

  | Name  | Type       | Params
-------------------------------------
0 | model | Sequential | 25.0 K
-------------------------------------
25.0 K    Trainable params
0         Non-trainable params
25.0 K    Total params
0.100     Total estimated model params size (MB)


Sanity Checking DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 142.86it/s]

  rank_zero_warn(


                                                                            

  rank_zero_warn(
  rank_zero_warn(


Epoch 999: 100%|██████████| 2/2 [00:00<00:00, 42.51it/s, loss=5.56e-05, v_num=22] 

`Trainer.fit` stopped: `max_epochs=1000` reached.


Epoch 999: 100%|██████████| 2/2 [00:00<00:00, 37.30it/s, loss=5.56e-05, v_num=22]


In [None]:
trainer.test(model, dl)

  rank_zero_warn(


Testing DataLoader 0: 100%|██████████| 34/34 [00:00<00:00, 336.62it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_loss           1.7324435710906982
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 1.7324435710906982}]