This Python script will read in the binary_90_minutes_cleaned_data_for_lstm.csv file and run it through an LSTM model. The binary_90_minutes_cleaned_data_for_lstm.csv file contains a breakdown for each of the 90 minutes (t = 0 to t = 89) of a given game. Each minute of a given game is modeled as a binary sequence of 16 types of events. If a given event takes place at the given minute of a game, then a 1 is put in place; a 0 otherwise. 

Here, we will create an LSTM model that will take in the first 45 minutes (t = 0 to 44) of every game as X, and whether or not a goal is scored during each minute of the second half as Y. The file will construct the LSTM such that if we are given the first half sequence of events for every minute, we can predict whether a goal is made during each mintue of the second half. We, in effect, are creating a many-to-one LSTM model. The file is, therefore, appropriately named "binary_90_minutes_LSTM_isGoal.py".

In [0]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Import Libraries and packages from Keras
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard
from keras.layers import Dense, Activation, Flatten
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

In [0]:

from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
# Importing Training Set
filename = "drive/Team Drives/Deep Learning Project/armaan/binary_90_minutes_cleaned_data_for_lstm.csv"
dataset_train = pd.read_csv(filename)

cols = dataset_train.columns
print("len(cols):", len(cols))

for col in cols:
    if col == 'id_odsp':
        dataset_train.id_odsp = dataset_train.id_odsp.astype(str)
    else:
        dataset_train[col] = dataset_train[col].astype(int)

len(cols): 18


In [0]:
dataset_train

Unnamed: 0,id_odsp,time,Attempted_Shot,Corner_Kick,Foul,First_Yellow_Card,Second_Yellow_Card,Straight_Red_Card,Substitution,Free_Kick_Awarded,Off_Sides,Hand_Ball,Penalty_Awarded,Key_Pass,Failed_Through_Ball,Sending_Off,Own_Goal,is_goal
0,004f4ING/,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,004f4ING/,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0
2,004f4ING/,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,004f4ING/,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,004f4ING/,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,004f4ING/,5,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0
6,004f4ING/,6,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,004f4ING/,7,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,004f4ING/,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,004f4ING/,9,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0


In [0]:
dataset = dataset.as_matrix() # Using multiple predictors.

X = []
Y = []
    
current_game = "" 

new_row_X = []  #To store the sequence of events for the first half, between t = 0 to 44, for a given game only.
new_row_Y = []  #To store the sequence of goals for the second half, between t = 45 to 89, for a given game only.


#Iterate through the dataset and partition the sequence of
# events for a minute as X (first half) or Y (second half).
for index, row in enumerate(dataset):
    if current_game == row[0]:
        if row[1] <= 44:
            row = np.delete(row, 0)     #drop the game id from the row
            row = np.delete(row, 0)     #drop the time from the row
            new_row_X.append(row)       #Add the sequence of events to the game's first half.
        else:
            new_row_Y.append(row[17])   #Store only whether a goal takes place or not.
    else:
        if index != 0:
            #If operating on a new game, write out the previous game's data to X and Y.
            X.append(new_row_X)         
            Y.append(new_row_Y)
        new_row_X = []
        new_row_Y = []
        current_game = row[0]
        if row[1] <= 45:
            row = np.delete(row, 0)
            row = np.delete(row, 0)
            new_row_X.append(row)
        else:
            new_row_Y.append(row[17]) #Store only whether a goal takes place or not.





In [0]:
X[0]

[array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=object),
 array([0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=object),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=object),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=object),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=object),
 array([0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=object),
 array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=object),
 array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=object),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=object),
 array([0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=object),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=object),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=object),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=object),
 array([1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype=o

In [0]:
X = np.array(X)

In [0]:
X.shape

(9073, 45, 16)

In [0]:
Y

[[[0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [1],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0]],
 [[0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [1],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [1],
  [0],
  [0]],
 [[0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0]],
 [[0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [0],
  [

In [0]:
Y = np.array(Y)
Y.shape

(9073, 45, 1)

In [0]:
# Initializing the LSTM
lstm_model = Sequential()


#Adding the first LSTM layer and some Dropout regularization.
lstm_model.add(LSTM(units=50, return_sequences=True, input_shape=(45, 16)))
lstm_model.add(Dropout(0.2))

#Adding a second LSTM layer and some Dropout regularization.
lstm_model.add(LSTM(units=50, return_sequences=True))
lstm_model.add(Dropout(0.2))

#Adding a third LSTM layer and some Dropout regularization.
lstm_model.add(LSTM(units=50, return_sequences=True))
lstm_model.add(Dropout(0.2))

#Adding a fourth LSTM layer and some Dropout regularization.
lstm_model.add(LSTM(units=50, return_sequences = True))
lstm_model.add(Dropout(0.2))

#Adding the output layer.
lstm_model.add(Dense(units = 1, activation = 'sigmoid'))


In [0]:
# Compiling the LSTM
lstm_model.compile(optimizer='adam', loss="binary_crossentropy", metrics = ['acc'])  
lstm_model.fit(X, Y, epochs = 10, batch_size = 64, validation_split = 0.33)

Train on 6078 samples, validate on 2995 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f39386da208>

In [0]:
#Use the trained model to predict the second half sequence of goal results.
second_half_prediction = lstm_model.predict(X)

In [0]:
second_half_prediction

array([[[0.0571424 ],
        [0.0153549 ],
        [0.02705506],
        ...,
        [0.02821557],
        [0.02821425],
        [0.02821495]],

       [[0.0571424 ],
        [0.01515869],
        [0.02640888],
        ...,
        [0.02821441],
        [0.02821513],
        [0.02821473]],

       [[0.0571424 ],
        [0.01514875],
        [0.02616777],
        ...,
        [0.02821409],
        [0.02821406],
        [0.02821388]],

       ...,

       [[0.0571424 ],
        [0.01506718],
        [0.02571804],
        ...,
        [0.02821408],
        [0.02821523],
        [0.02821562]],

       [[0.0571424 ],
        [0.01506718],
        [0.02552016],
        ...,
        [0.0282154 ],
        [0.02821452],
        [0.02821436]],

       [[0.0571424 ],
        [0.01506718],
        [0.02561639],
        ...,
        [0.02821417],
        [0.02821444],
        [0.02821434]]], dtype=float32)

In [0]:
second_half_prediction.shape

(9073, 45, 1)

In [0]:
#Print out the predicted results for the first game.
second_half_prediction[0]

array([[0.3961647 ],
       [0.16002859],
       [0.0349737 ],
       [0.02309146],
       [0.02521963],
       [0.02647247],
       [0.0268942 ],
       [0.0270023 ],
       [0.02703548],
       [0.02704023],
       [0.02704301],
       [0.0270403 ],
       [0.02703747],
       [0.02703162],
       [0.02703545],
       [0.02703367],
       [0.02703424],
       [0.02703342],
       [0.02703455],
       [0.0270345 ],
       [0.02703007],
       [0.02703452],
       [0.02703508],
       [0.02703498],
       [0.027034  ],
       [0.0270351 ],
       [0.02703389],
       [0.02703504],
       [0.0270351 ],
       [0.02703506],
       [0.02703504],
       [0.02703407],
       [0.02703518],
       [0.02703506],
       [0.02703506],
       [0.0270333 ],
       [0.02703505],
       [0.0270353 ],
       [0.02703521],
       [0.02703405],
       [0.02703518],
       [0.0270305 ],
       [0.02703432],
       [0.0270312 ],
       [0.02703521]], dtype=float32)