# Prep

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
% cd drive/MyDrive/sp/data
! ls

/content/drive/MyDrive/sp/data
FakeData_EPL.csv   KaggleDataset_withBO.txt  PL_scraped_ord.csv
KaggleDataset.csv  old_FakeData_EPL.csv


In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
from torch.nn import Embedding,\
                     Module,\
                     Linear,\
                     Dropout,\
                     Tanh,\
                     ReLU,\
                     BatchNorm1d,\
                     LogSoftmax,\
                     NLLLoss,\
                     Conv1d,\
                     ModuleList
import torch.nn.functional as F

# Dataset

In [4]:
dataset = pd.read_csv('KaggleDataset_withBO.txt')
dataset.tail(3)

Unnamed: 0,match_id,country,league,season,week,date,home_team,away_team,home_goal,away_goal,result,home_lineup,away_lineup,B365H,B365D,B365A,BWH,BWD,BWA,IWH,IWD,IWA,LBH,LBD,LBA
19524,24495,Spain,Spain LIGA BBVA,2015/2016,38,2016-05-15 00:00:00,Málaga CF,UD Las Palmas,4,1,win,Guillermo Ochoa - Miguel Torres - Raul Albento...,Raul Lizoain - David Garcia - Pedro Bigas - Pa...,1.8,3.75,4.5,1.83,3.7,4.0,1.85,3.45,4.0,1.8,3.6,4.33
19525,24496,Spain,Spain LIGA BBVA,2015/2016,38,2016-05-14 00:00:00,Atlético Madrid,RC Celta de Vigo,2,0,win,Jan Oblak - Juanfran - Stefan Savic - Diego Go...,Sergio Alvarez - Johny - Hugo Mallo - Sergi Go...,1.75,3.75,4.5,1.83,3.6,4.1,1.85,3.7,3.7,1.83,3.6,4.2
19526,24497,Spain,Spain LIGA BBVA,2015/2016,38,2016-05-15 00:00:00,Rayo Vallecano,Levante UD,3,1,win,Yoel Rodriguez - Quini - Antonio Amaya - Tito ...,Diego Marino - Ivan Lopez - David Navarro - Ca...,1.33,5.25,9.0,1.33,4.75,9.25,1.4,5.0,6.0,1.33,5.25,9.0


# Dataset Transform

## Label Setting

In [5]:
teams = np.unique(dataset[['away_team', 'home_team']].values)
team_lblenc = LabelEncoder()
team_lblenc.fit(teams)

players = np.unique(
    np.concatenate(
      (
        np.stack(dataset['home_lineup'].apply(lambda lineup: lineup.split(' - '))).reshape(-1),
        np.stack(dataset['away_lineup'].apply(lambda lineup: lineup.split(' - '))).reshape(-1)
      )    
    )
)
player_lblenc = LabelEncoder()
player_lblenc.fit(players)

results = pd.unique(dataset['result'])
result_lblenc = LabelEncoder()
result_lblenc.fit(results)

LabelEncoder()

## Labeling

In [6]:
home_team_labels = team_lblenc.transform(dataset['home_team'])
away_team_labels = team_lblenc.transform(dataset['away_team'])

home_player_labels = player_lblenc.transform(
     np.stack(dataset['home_lineup'].apply(lambda lineup: lineup.split(' - '))).reshape(-1)
).reshape(-1, 11)
away_player_labels = player_lblenc.transform(
     np.stack(dataset['away_lineup'].apply(lambda lineup: lineup.split(' - '))).reshape(-1)
).reshape(-1, 11)

result_labels = result_lblenc.transform(dataset['result'])



In [7]:
str_home_players = np.char.mod('%d', home_player_labels)
str_home_player_labels = np.apply_along_axis(lambda arr: ' - '.join(arr), axis=1, arr=str_home_players)

str_away_players = np.char.mod('%d', away_player_labels)
str_away_player_labels = np.apply_along_axis(lambda arr: ' - '.join(arr), axis=1, arr=str_away_players)

## Creating dataset

In [8]:
meta_dict = {
        'home_team_label': home_team_labels,
        'away_team_label': away_team_labels,
        'result_label': result_labels,
        'home_lineup_label': str_home_player_labels,
        'away_lineup_label': str_away_player_labels,
}

for column in dataset.columns:
  meta_dict.update({column: dataset[column].values})

transformed_dataset = pd.DataFrame(
    meta_dict
)

# Dataset Split

In [9]:
train_groups = []
dev_groups = []
test_groups = []
batch_size = 64
for league, data in transformed_dataset.groupby('league'):
  split_size = int(data.shape[0] * 0.63 // batch_size * batch_size)
  # split_size = int(data.shape[0] * 0.6)
  train_set, dev_test = train_test_split(data, shuffle=False, stratify=None, train_size=split_size)
  dev_set, test_set = train_test_split(dev_test, shuffle=False, stratify=None, train_size=0.5)

  print(league)
  print('Train Dev Test')
  print(train_set.shape[0], dev_set.shape[0], test_set.shape[0])
  print(f'{train_set.shape[0] / data.shape[0] * 100:.2f}%, {dev_set.shape[0] / data.shape[0] * 100:.2f}%, {test_set.shape[0] / data.shape[0] * 100:.2f}%')
  print('-' * 24)

  train_groups.append(train_set)
  dev_groups.append(dev_set)
  test_groups.append(test_set)

Belgium Jupiler League
Train Dev Test
704 238 239
59.61%, 20.15%, 20.24%
------------------------
England Premier League
Train Dev Test
1856 549 549
62.83%, 18.58%, 18.58%
------------------------
France Ligue 1
Train Dev Test
1792 531 532
62.77%, 18.60%, 18.63%
------------------------
Germany 1. Bundesliga
Train Dev Test
1472 448 448
62.16%, 18.92%, 18.92%
------------------------
Italy Serie A
Train Dev Test
1664 520 520
61.54%, 19.23%, 19.23%
------------------------
Netherlands Eredivisie
Train Dev Test
1216 401 402
60.23%, 19.86%, 19.91%
------------------------
Portugal Liga ZON Sagres
Train Dev Test
768 231 231
62.44%, 18.78%, 18.78%
------------------------
Scotland Premier League
Train Dev Test
960 284 285
62.79%, 18.57%, 18.64%
------------------------
Spain LIGA BBVA
Train Dev Test
1664 511 512
61.93%, 19.02%, 19.05%
------------------------


# Optimization and Evaluation Algorithms

## Training

In [10]:
def train(model, optimizer, home, away, y, loss_fn):
  model.train()
  optimizer.zero_grad()

  output = model(home, away) # forward prop
  loss = loss_fn(output, y) # loss
  loss.backward() # backprop
  optimizer.step() # parameter updating

  return loss.item()

## Evaluating

In [11]:
@torch.no_grad()
def evaluate(model, home, away, y):
  model.eval()

  output = model(home, away)
  prediction = output.argmax(-1)

  correct = (prediction == y).sum()

  return correct.item(), y.shape[0]

## Gradient Descent Fitting

In [12]:
def fit(model, train_set, dev_set, optimizer, loss_fn, num_epochs, every=100):
  assert train_set[0].shape[-1] == train_set[1].shape[-1]
  assert dev_set[0].shape[-1] == dev_set[1].shape[-1]
  
  train_home, train_away, train_y = train_set
  dev_home, dev_away, dev_y = dev_set

  print('Initial State')
  train_correct, train_all = evaluate(
      model, 
      train_home.flatten(0, -train_home.ndim + 1), 
      train_away.flatten(0, -train_away.ndim + 1), 
      train_y.reshape(-1)
  )
  dev_correct, dev_all = evaluate(model, dev_home, dev_away, dev_y)
  print(f'Train Acc%: {train_correct / train_all * 100:.4f}')
  print(f'Dev   Acc%: {dev_correct / dev_all * 100:.4f}')

  for epoch in range(1, num_epochs + 1):
    
    epoch_loss = 0
    for home_batch, away_batch, y_batch in zip(train_home, train_away, train_y):
      batch_loss = train(model, optimizer, home_batch, away_batch, y_batch, loss_fn)
      epoch_loss += batch_loss
    train_correct, train_all = evaluate(
      model, 
      train_home.flatten(0, -train_home.ndim + 1), 
      train_away.flatten(0, -train_away.ndim + 1), 
      train_y.reshape(-1)
    )
    dev_correct, dev_all = evaluate(model, dev_home, dev_away, dev_y)
    if epoch % every == 0:
      print('-' * 60)
      print(f'Epoch {epoch}')
      print(f'Avg Train Loss: {epoch_loss / train_home.shape[0]:.4f}')
      print(f'Train Acc%:     {train_correct / train_all * 100:.4f}')
      print(f'Dev   Acc%:     {dev_correct / dev_all * 100:.4f}')

## Main

In [13]:
def main(train_group, dev_group, test_group, league, model, optimizer, loss_fn, n_epochs, every):
  try:
    (home_train, away_train, y_train) = train_group
    (home_dev, away_dev, y_dev) = dev_group
    (home_test, away_test, y_test) = test_group
    print(f'Fitting on the {league} for {n_epochs} epochs')
    print('.' * 60)
    fit(
      model, 
      (home_train, away_train, y_train),
      (home_dev, away_dev, y_dev),
      optimizer,
      loss_fn,
      n_epochs,
      every
    )
  except KeyboardInterrupt:
      pass
  finally:
    test_correct, test_all = evaluate(model, home_test, away_test, y_test.reshape(-1))
    print(f'Test Acc%: {test_correct / test_all * 100:.4f}')
    print('=' * 60)


# Team Blade Chest Modeling

## TeamBladeChest Model 

In [56]:
class TeamBladeChest(Module):
  def __init__(self, num_teams, embedding_size, hidden_size, dropout=0.5):
    super(TeamBladeChest, self).__init__()
    self.num_teams = num_teams
    self.embedding_size = embedding_size
    self.hidden_size = hidden_size
    self.dropout=dropout

    self.team_embedder = Embedding(self.num_teams, self.embedding_size)
    self.emb_bn = BatchNorm1d(self.embedding_size)

    self.chest_transform = Linear(self.embedding_size, self.hidden_size, bias=False)
    self.chest_bn = BatchNorm1d(self.hidden_size)

    self.blade_transform = Linear(self.embedding_size, self.hidden_size, bias=False)
    self.blade_bn = BatchNorm1d(self.hidden_size)

    self.regularizer = Dropout(p=self.dropout)
    self.activation = Tanh()

    self.result_transform = Linear(1, 3)
    self.classifier = LogSoftmax(dim=-1)

  def _encode_team(self, team):
    embedding = self.team_embedder(team)
    embedding = self.emb_bn(embedding)
    #dropout here?
    emnedding = self.regularizer(embedding)

    blade = self.blade_transform(embedding)
    blade = self.blade_bn(blade)
    blade = self.activation(blade)
    blade = self.regularizer(blade)

    chest = self.chest_transform(embedding)
    chest = self.chest_bn(chest)
    chest = self.activation(chest)
    chest = self.regularizer(chest)

    return blade, chest

  def _matchup(self, home_blade, home_chest, away_blade, away_chest):
    return (home_blade * away_chest).sum(-1) - (away_blade * home_chest).sum(-1)

  def forward(self, home, away):
    home_blade, home_chest = self._encode_team(home)
    away_blade, away_chest = self._encode_team(away)

    matchup_score = self._matchup(home_blade, home_chest, away_blade, away_chest).reshape(-1, 1)

    result = self.result_transform(matchup_score)
    result = self.classifier(result)
    result = self.regularizer(result)

    return self.classifier(result)

## Hyperparameters

In [57]:
home = torch.from_numpy(transformed_dataset.loc[:, ['home_team_label']].values.reshape(-1))
away = torch.from_numpy(transformed_dataset.loc[:, ['away_team_label']].values.reshape(-1))
y = torch.from_numpy(transformed_dataset.loc[:, ['result_label']].values.reshape(-1))

assert home.max() == away.max()
num_teams = home.max() + 1
embedding_size = 5
hidden_size = 15
batch_size = 64
learning_rate = 1e-3
n_epochs = 4000
every = 800

tbc_model = TeamBladeChest(
    num_teams=num_teams,
    embedding_size=embedding_size,
    hidden_size=hidden_size,
    dropout=0.5
)

optimizer = torch.optim.Adam(tbc_model.parameters(), lr=learning_rate)
criterion = NLLLoss()
tbc_model

TeamBladeChest(
  (team_embedder): Embedding(254, 5)
  (emb_bn): BatchNorm1d(5, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (chest_transform): Linear(in_features=5, out_features=15, bias=False)
  (chest_bn): BatchNorm1d(15, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (blade_transform): Linear(in_features=5, out_features=15, bias=False)
  (blade_bn): BatchNorm1d(15, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (regularizer): Dropout(p=0.5, inplace=False)
  (activation): Tanh()
  (result_transform): Linear(in_features=1, out_features=3, bias=True)
  (classifier): LogSoftmax(dim=-1)
)

In [58]:
class TeamBladeChest(Module):
  def __init__(self, num_teams, embedding_size, hidden_size, dropout=0.5):
    super(TeamBladeChest, self).__init__()
    self.num_teams = num_teams
    self.embedding_size = embedding_size
    self.hidden_size = hidden_size
    self.dropout=dropout

    self.team_embedder = Embedding(self.num_teams, self.embedding_size)
    self.emb_bn = BatchNorm1d(self.embedding_size)

    self.chest_transform = Linear(self.embedding_size, self.hidden_size, bias=False)
    self.chest_bn = BatchNorm1d(self.hidden_size)

    self.blade_transform = Linear(self.embedding_size, self.hidden_size, bias=False)
    self.blade_bn = BatchNorm1d(self.hidden_size)

    self.regularizer = Dropout(p=self.dropout)
    self.activation = Tanh()

    self.result_transform = Linear(1, 3)
    self.classifier = LogSoftmax(dim=-1)

  def _encode_team(self, team):
    embedding = self.team_embedder(team)
    embedding = self.emb_bn(embedding)
    #dropout here?
    emnedding = self.regularizer(embedding)

    blade = self.blade_transform(embedding)
    blade = self.blade_bn(blade)
    blade = self.activation(blade)
    blade = self.regularizer(blade)

    chest = self.chest_transform(embedding)
    chest = self.chest_bn(chest)
    chest = self.activation(chest)
    chest = self.regularizer(chest)

    return blade, chest

  def _matchup(self, home_blade, home_chest, away_blade, away_chest):
    return (home_blade * away_chest).sum(-1) - (away_blade * home_chest).sum(-1)

  def forward(self, home, away):
    home_blade, home_chest = self._encode_team(home)
    away_blade, away_chest = self._encode_team(away)

    matchup_score = self._matchup(home_blade, home_chest, away_blade, away_chest).reshape(-1, 1)

    result = self.result_transform(matchup_score)
    result = self.classifier(result)
    result = self.regularizer(result)

    return self.classifier(result)

## Fitting

In [59]:
for train_set, dev_set, test_set in zip(train_groups, dev_groups, test_groups):
  # batch_size = train_set.shape[0]
  home_train = torch.from_numpy(train_set.loc[:, ['home_team_label']].values.reshape(-1))
  home_train = home_train.reshape(-1, batch_size)
  away_train = torch.from_numpy(train_set.loc[:, ['away_team_label']].values.reshape(-1))
  away_train = away_train.reshape(-1, batch_size)
  y_train = torch.from_numpy(train_set.loc[:, ['result_label']].values.reshape(-1))
  y_train = y_train.reshape(-1, batch_size)

  home_dev = torch.from_numpy(dev_set.loc[:, ['home_team_label']].values.reshape(-1))
  away_dev = torch.from_numpy(dev_set.loc[:, ['away_team_label']].values.reshape(-1))
  y_dev = torch.from_numpy(dev_set.loc[:, ['result_label']].values.reshape(-1))

  home_test = torch.from_numpy(test_set.loc[:, ['home_team_label']].values.reshape(-1))
  away_test = torch.from_numpy(test_set.loc[:, ['away_team_label']].values.reshape(-1))
  y_test = torch.from_numpy(test_set.loc[:, ['result_label']].values.reshape(-1))

  main(
      (home_train, away_train, y_train),
      (home_dev, away_dev, y_dev),
      (home_test, away_test, y_test),
      np.unique(train_set["league"].values).item(),
      tbc_model, 
      optimizer,
       criterion,
       n_epochs,
       every    
  )

Fitting on the Belgium Jupiler League for 4000 epochs
............................................................
Initial State
Train Acc%: 25.8523
Dev   Acc%: 27.3109
------------------------------------------------------------
Epoch 800
Train Loss: 14.9369
Train Acc%: 53.6932
Dev   Acc%: 49.5798
Test Acc%: 49.3724
Fitting on the England Premier League for 4000 epochs
............................................................
Initial State
Train Acc%: 45.1509
Dev   Acc%: 44.4444
Test Acc%: 48.2696
Fitting on the France Ligue 1 for 4000 epochs
............................................................
Initial State
Train Acc%: 41.9643
Dev   Acc%: 45.5744
Test Acc%: 45.3008
Fitting on the Germany 1. Bundesliga for 4000 epochs
............................................................
Initial State
Train Acc%: 42.3913
Dev   Acc%: 45.3125
Test Acc%: 48.2143
Fitting on the Italy Serie A for 4000 epochs
............................................................
Initial State
Train 

# Player Blade Chest Modeling

## PlayerOneHot Model

In [11]:
class PlayerOneHot(Module):
  def __init__(self, num_players):
    super(PlayerOneHot, self).__init__()
    self.num_players = num_players

  def forward(self, players):
    return F.one_hot(players, self.num_players).sum(-2).double()

## PlayerBladeChest Model

In [74]:
class PlayerBladeChest(Module):
  def __init__(self, num_players, hidden_size, dropout=0.5):
    super(PlayerBladeChest, self).__init__()
    self.num_players = num_players
    # self.embedding_size = embedding_size
    self.hidden_size = hidden_size
    self.dropout=dropout

    # self.team_embedder = Embedding(self.num_teams, self.embedding_size)
    # self.emb_bn = BatchNorm1d(self.embedding_size)

    self.one_hot = PlayerOneHot(self.num_players)

    self.chest_transform = Linear(self.num_players, self.hidden_size, bias=False)
    self.chest_bn = BatchNorm1d(self.hidden_size)

    self.blade_transform = Linear(self.num_players, self.hidden_size, bias=False)
    self.blade_bn = BatchNorm1d(self.hidden_size)

    self.regularizer = Dropout(p=self.dropout)
    self.activation = Tanh()

    self.result_transform = Linear(1, 3)
    self.classifier = LogSoftmax(dim=-1)

  def _encode_team(self, team):
    # embedding = self.team_embedder(team)
    # embedding = self.emb_bn(embedding)
    # #dropout here?
    # emnedding = self.regularizer(embedding)

    embedding = self.one_hot(team)
    blade = self.blade_transform(embedding)
    blade = self.blade_bn(blade)
    blade = self.activation(blade)
    blade = self.regularizer(blade)

    chest = self.chest_transform(embedding)
    chest = self.chest_bn(chest)
    chest = self.activation(chest)
    chest = self.regularizer(chest)

    return blade, chest

  def _matchup(self, home_blade, home_chest, away_blade, away_chest):
    return (home_blade * away_chest).sum(-1) - (away_blade * home_chest).sum(-1)

  def forward(self, home, away):
    home_blade, home_chest = self._encode_team(home)
    away_blade, away_chest = self._encode_team(away)

    matchup_score = self._matchup(home_blade, home_chest, away_blade, away_chest).reshape(-1, 1)

    result = self.result_transform(matchup_score)
    result = self.classifier(result)
    result = self.regularizer(result)

    return self.classifier(result)

## Hyperparameters

In [113]:
home_players = torch.from_numpy(home_player_labels)
away_players = torch.from_numpy(away_player_labels)
assert home_players.max() == away_players.max()
num_players = home_players.max() + 1

# embedding_size = 5
hidden_size = 2
learning_rate = 1e-3
n_epochs = 4000
every = 800
batch_size = 64

pbc_model = PlayerBladeChest(
    num_players=num_players,
    # embedding_size=embedding_size,
    hidden_size=hidden_size,
    dropout=0.5
).double()

optimizer = torch.optim.Adam(pbc_model.parameters(), lr=learning_rate)
criterion = NLLLoss()
pbc_model

PlayerBladeChest(
  (one_hot): PlayerOneHot()
  (chest_transform): Linear(in_features=9513, out_features=2, bias=False)
  (chest_bn): BatchNorm1d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (blade_transform): Linear(in_features=9513, out_features=2, bias=False)
  (blade_bn): BatchNorm1d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (regularizer): Dropout(p=0.5, inplace=False)
  (activation): Tanh()
  (result_transform): Linear(in_features=1, out_features=3, bias=True)
  (classifier): LogSoftmax(dim=-1)
)

## Fitting

In [116]:
for train_set, dev_set, test_set in zip(train_groups, dev_groups, test_groups):
  # batch_size = train_set.shape[0]
  home_train = torch.from_numpy(
      np.stack(train_set['home_lineup_label'].apply(lambda lineup: lineup.split(' - '))).astype(np.int16)
  ).long()
  home_train = home_train.reshape(-1, batch_size, 11)
  away_train = torch.from_numpy(
      np.stack(train_set['away_lineup_label'].apply(lambda lineup: lineup.split(' - '))).astype(np.int16)
  ).long()
  away_train = away_train.reshape(-1, batch_size, 11)
  y_train = torch.from_numpy(train_set.loc[:, ['result_label']].values.reshape(-1))
  y_train = y_train.reshape(-1, batch_size)

  home_dev = torch.from_numpy(
      np.stack(dev_set['home_lineup_label'].apply(lambda lineup: lineup.split(' - '))).astype(np.int16)
  ).long()
  away_dev = torch.from_numpy(
      np.stack(dev_set['away_lineup_label'].apply(lambda lineup: lineup.split(' - '))).astype(np.int16)
  ).long()
  y_dev = torch.from_numpy(dev_set.loc[:, ['result_label']].values.reshape(-1))

  home_test = torch.from_numpy(
      np.stack(test_set['home_lineup_label'].apply(lambda lineup: lineup.split(' - '))).astype(np.int16)
  ).long()
  away_test = torch.from_numpy(
      np.stack(test_set['home_lineup_label'].apply(lambda lineup: lineup.split(' - '))).astype(np.int16)
  ).long()
  y_test = torch.from_numpy(test_set.loc[:, ['result_label']].values.reshape(-1))

  main(
      (home_train, away_train, y_train),
      (home_dev, away_dev, y_dev),
      (home_test, away_test, y_test),
      np.unique(train_set["league"].values).item(),
      pbc_model, 
      optimizer,
       criterion,
       n_epochs=15,
       every=5
  )

Fitting on the Belgium Jupiler League for 15 epochs
............................................................
Initial State
Train Acc%: 47.1591
Dev   Acc%: 42.8571
------------------------------------------------------------
Epoch 5
Avg Train Loss: 1.3416
Train Acc%:     60.2273
Dev   Acc%:     42.0168
------------------------------------------------------------
Epoch 10
Avg Train Loss: 1.2682
Train Acc%:     64.9148
Dev   Acc%:     44.1176
------------------------------------------------------------
Epoch 15
Avg Train Loss: 1.2478
Train Acc%:     67.7557
Dev   Acc%:     43.6975
Test Acc%: 48.5356
Fitting on the England Premier League for 15 epochs
............................................................
Initial State
Train Acc%: 62.6616
Dev   Acc%: 39.8907
------------------------------------------------------------
Epoch 5
Avg Train Loss: 1.3459
Train Acc%:     60.8297
Dev   Acc%:     42.4408
------------------------------------------------------------
Epoch 10
Avg Train Loss:

# DeepSet Blade Chest  Modeling

## DeepSet Blade Chest Model

In [14]:
class DeepSetBladeChest(Module):
  def __init__(self, num_players, embedding_size, player_hidden_size, team_hidden_sizes, blade_chest_size, dropout=0.5):
    super(DeepSetBladeChest, self).__init__()
    self.num_players = num_players
    self.embedding_size = embedding_size
    self.player_hidden_size = player_hidden_size
    self.team_hidden_sizes = team_hidden_sizes
    self.blade_chest_size = blade_chest_size
    self.dropout=dropout

    self.player_embedder = Embedding(self.num_players, self.embedding_size)
    self.emb_bn = BatchNorm1d(self.embedding_size)

    self.player_fc = Conv1d(self.embedding_size, self.player_hidden_size, 1)
    self.player_fc_bn = BatchNorm1d(self.player_hidden_size)
    self.team_fc_list = [self.player_hidden_size] + list(team_hidden_sizes)
    self.team_fc = ModuleList(
        [
          Linear(size, self.team_fc_list[index + 1])
          for index, size in enumerate(self.team_fc_list[:-1])
        ]
    )
    self.team_fc_bn = ModuleList(
        [
          BatchNorm1d(size)
          for size in self.team_fc_list[1:]
        ]
    )

    self.chest_transform = Linear(self.team_hidden_sizes[-1], self.blade_chest_size, bias=False)
    self.chest_bn = BatchNorm1d(self.blade_chest_size)

    self.blade_transform = Linear(self.team_hidden_sizes[-1], self.blade_chest_size, bias=False)
    self.blade_bn = BatchNorm1d(self.blade_chest_size)

    self.regularizer = Dropout(p=self.dropout)
    self.blade_chest_activation = Tanh()
    self.fc_activation = ReLU()

    self.result_transform = Linear(1, 3)
    self.classifier = LogSoftmax(dim=-1)

  def _encode_team(self, team):
    # input of shape (n, 11)
    embedding = self.player_embedder(team)
    embedding = self.emb_bn(embedding.moveaxis(-2, -1))
    # #dropout here?
    emnedding = self.regularizer(embedding)

    player_h = self.player_fc(embedding)
    player_h = self.player_fc_bn(player_h)
    player_h = self.fc_activation(player_h)
    player_h = self.regularizer(player_h)
    team_h = player_h.sum(-1)

    for fc, bn in zip(self.team_fc, self.team_fc_bn):
      team_h = fc(team_h)
      team_h = bn(team_h)
      team_h = self.fc_activation(team_h)
      team_h = self.regularizer(team_h)

    blade = self.blade_transform(team_h)
    blade = self.blade_bn(blade)
    blade = self.blade_chest_activation(blade)
    blade = self.regularizer(blade)

    chest = self.chest_transform(team_h)
    chest = self.chest_bn(chest)
    chest = self.blade_chest_activation(chest)
    chest = self.regularizer(chest)

    return blade, chest

  def _matchup(self, home_blade, home_chest, away_blade, away_chest):
    return (home_blade * away_chest).sum(-1) - (away_blade * home_chest).sum(-1)

  def forward(self, home, away):
    home_blade, home_chest = self._encode_team(home)
    away_blade, away_chest = self._encode_team(away)

    matchup_score = self._matchup(home_blade, home_chest, away_blade, away_chest).reshape(-1, 1)

    result = self.result_transform(matchup_score)
    result = self.classifier(result)
    result = self.regularizer(result)

    return self.classifier(result)

## Hyperparameters

In [21]:
home_players = torch.from_numpy(home_player_labels)
away_players = torch.from_numpy(away_player_labels)
assert home_players.max() == away_players.max()
num_players = home_players.max() + 1

embedding_size = 3
player_hidden_size = 12
team_hidden_size = [16, 18]
blade_chest_size = 14
learning_rate = 1e-3
dropout = 0.5
n_epochs = 400
every = 40
batch_size = 64

dsbc_model = DeepSetBladeChest(
    num_players, 
    embedding_size, 
    player_hidden_size, 
    team_hidden_size, 
    blade_chest_size,
    dropout=dropout
)
optimizer = torch.optim.Adam(dsbc_model.parameters(), lr=learning_rate)
criterion = NLLLoss()
# dsbc_model

## Fitting

In [22]:
#@title
for train_set, dev_set, test_set in zip(train_groups, dev_groups, test_groups):
  # batch_size = train_set.shape[0]
  home_train = torch.from_numpy(
      np.stack(train_set['home_lineup_label'].apply(lambda lineup: lineup.split(' - '))).astype(np.int16)
  ).long()
  home_train = home_train.reshape(-1, batch_size, 11)
  away_train = torch.from_numpy(
      np.stack(train_set['away_lineup_label'].apply(lambda lineup: lineup.split(' - '))).astype(np.int16)
  ).long()
  away_train = away_train.reshape(-1, batch_size, 11)
  y_train = torch.from_numpy(train_set.loc[:, ['result_label']].values.reshape(-1))
  y_train = y_train.reshape(-1, batch_size)

  home_dev = torch.from_numpy(
      np.stack(dev_set['home_lineup_label'].apply(lambda lineup: lineup.split(' - '))).astype(np.int16)
  ).long()
  away_dev = torch.from_numpy(
      np.stack(dev_set['away_lineup_label'].apply(lambda lineup: lineup.split(' - '))).astype(np.int16)
  ).long()
  y_dev = torch.from_numpy(dev_set.loc[:, ['result_label']].values.reshape(-1))

  home_test = torch.from_numpy(
      np.stack(test_set['home_lineup_label'].apply(lambda lineup: lineup.split(' - '))).astype(np.int16)
  ).long()
  away_test = torch.from_numpy(
      np.stack(test_set['home_lineup_label'].apply(lambda lineup: lineup.split(' - '))).astype(np.int16)
  ).long()
  y_test = torch.from_numpy(test_set.loc[:, ['result_label']].values.reshape(-1))

  main(
      (home_train, away_train, y_train),
      (home_dev, away_dev, y_dev),
      (home_test, away_test, y_test),
      np.unique(train_set["league"].values).item(),
      dsbc_model, 
      optimizer,
       criterion,
       n_epochs,
       every
  )

Fitting on the Belgium Jupiler League for 400 epochs
............................................................
Initial State
Train Acc%: 25.8523
Dev   Acc%: 24.3697
------------------------------------------------------------
Epoch 40
Avg Train Loss: 1.6370
Train Acc%:     25.8523
Dev   Acc%:     24.3697
------------------------------------------------------------
Epoch 80
Avg Train Loss: 1.4074
Train Acc%:     47.1591
Dev   Acc%:     42.8571
------------------------------------------------------------
Epoch 120
Avg Train Loss: 1.4163
Train Acc%:     47.1591
Dev   Acc%:     42.8571
------------------------------------------------------------
Epoch 160
Avg Train Loss: 1.3728
Train Acc%:     47.1591
Dev   Acc%:     42.8571
------------------------------------------------------------
Epoch 200
Avg Train Loss: 1.4273
Train Acc%:     47.1591
Dev   Acc%:     42.8571
------------------------------------------------------------
Epoch 240
Avg Train Loss: 1.5007
Train Acc%:     47.1591
Dev   