In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [18]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input, Embedding, Flatten, Subtract
from tensorflow.keras.optimizers import Adam

tf.__version__

'2.1.0'

In [4]:
games_season = pd.read_csv('./games_season.csv')

# Category embeddings

In [8]:
print(games_season.shape)
games_season.head()

(312178, 8)


Unnamed: 0,season,team_1,team_2,home,score_diff,score_1,score_2,won
0,1985,3745,6664,0,17,81,64,1
1,1985,126,7493,1,7,77,70,1
2,1985,288,3593,1,7,63,56,1
3,1985,1846,9881,1,16,70,54,1
4,1985,2675,10298,1,12,86,74,1


## Define team lookup

Shared layers allow a model to use the same weight matrix for multiple steps. In this exercise, you will build a "team strength" layer that represents each team by a single number. You will use this number for both teams in the model. The model will learn a number for each team that works well both when the team is team_1 and when the team is team_2 in the input data.

In [12]:
# count number of unique teams from df
n_teams = len(games_season['team_1'].unique())

# create an embedding layer
team_lookup = Embedding(input_dim=n_teams,
                        output_dim=1,
                        input_length=1,
                        name='Team_Strength')

## Define team model

In [14]:
# define model

# create input layer for team ID column
teamid_in = Input(shape=(1,))

# lookup the input in the embedding layer
strength_lookup = team_lookup(teamid_in)

# flatten the output
strength_lookup_flat = Flatten()(strength_lookup)

# build model
team_strength_model = Model(inputs=teamid_in,
                            outputs=strength_lookup_flat,
                            name='Team_Strength_Model')

# Shared layers

In [15]:
team_strength_model.summary()

Model: "Team_Strength_Model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 1)]               0         
_________________________________________________________________
Team_Strength (Embedding)    (None, 1, 1)              10888     
_________________________________________________________________
flatten (Flatten)            (None, 1)                 0         
Total params: 10,888
Trainable params: 10,888
Non-trainable params: 0
_________________________________________________________________


## Defining two inputs

In [16]:
# Input layer for team 1 and team 2
team_in_1 = Input(shape=(1,), name='Team_1_In')
team_in_2 = Input(shape=(1,), name='Team_2_In')

## Lookup both inputs in the same model

In [17]:
# lookup teams 1 and 2 in the same team strength model
team_1_strength = team_strength_model(team_in_1)
team_2_strength = team_strength_model(team_in_2)

# Merge layers

## Output layer using shared layer

In [19]:
# create a Subtract layer using the inputs from the prev cell
score_diff = Subtract()([team_1_strength, team_2_strength])

## Model using two inputs and one output

In [24]:
# create the model
model = Model(inputs=[team_in_1, team_in_2], 
              outputs=score_diff)

# compile
model.compile(optimizer=Adam(0.1),
              loss='mae')

# Predict from your model

## Fit the model to the regular season training data

In [28]:
# get inputs from dataframe
input_1 = games_season['team_1'].to_numpy('float32')
input_2 = games_season['team_2'].to_numpy('float32')
target = games_season['score_diff'].to_numpy('float32')

# fit the model
model.fit([input_1, input_2], target,
          epochs=5,
          batch_size=2048,
          validation_split=0.1,
          verbose=1)

Train on 280960 samples, validate on 31218 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x208ea1e0908>

In [30]:
print(model.evaluate([input_1, input_2], target, verbose=0))

8.186932185037598
