This Python script will read in the binary_90_minutes_cleaned_data_for_lstm.csv file and run it through an LSTM model. 
The binary_90_minutes_cleaned_data_for_lstm.csv file contains a breakdown for each of the 90 minutes (t = 0 to t = 89) of a given game.
Each minute of a given game is modeled as a binary sequence of 16 types of events. If a given event takes place at the given minute of a game, then
a 1 is put in place; a 0 otherwise. 

Here, we will create an LSTM model that will take in the first 45 minutes (t = 0 to 44) of every game as X, and the second half of 
every game (t = 45 to 89) as Y. The file will construct the LSTM such that if we are given the first half sequence of events for every minute,
we can predict the sequence of events for every minute of the second half of a given game.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Import Libraries and packages from Keras
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard
from keras.layers import Dense, Activation, Flatten
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

from google.colab import drive
drive.mount('/content/drive')

Using TensorFlow backend.


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
# Importing the dataset.
filename = "drive/Team Drives/Deep Learning Project/armaan/binary_90_minutes_cleaned_data_for_lstm.csv"
dataset = pd.read_csv(filename)

cols = dataset.columns

for col in cols:
    if col == 'id_odsp':
        dataset.id_odsp = dataset.id_odsp.astype(str)
    else:
        dataset[col] = dataset[col].astype(int)

In [0]:
dataset = dataset.as_matrix() # Using multiple predictors.

X = []
Y = []
    
current_game = "" 

new_row_X = []  #To store the sequence of events for the first half, between t = 0 to 44, for a given game only.
new_row_Y = []  #To store the sequence of events for the second half, between t = 45 to 89, for a given game only.

#Iterate through the dataset and partition the sequence of
# events for a minute as X (first half) or Y (second half).
for index, row in enumerate(dataset):
    if current_game == row[0]:
        if row[1] <= 44:
            row = np.delete(row, 0)     #drop the game id from the row
            row = np.delete(row, 0)     #drop the time from the row
            new_row_X.append(row)       #Add the sequence of events to the game's first half.
        else:
            row = np.delete(row, 0)
            row = np.delete(row, 0)
            new_row_Y.append(row)
    else:
        if index != 0:
            #If operating on a new game, write out the previous game's data to X and Y.
            X.append(new_row_X)         
            Y.append(new_row_Y)
        new_row_X = []
        new_row_Y = []
        current_game = row[0]
        if row[1] <= 45:
            row = np.delete(row, 0)
            row = np.delete(row, 0)
            new_row_X.append(row)
        else:
            row = np.delete(row, 0)
            row = np.delete(row, 0)
            new_row_Y.append(row)

In [4]:
X = np.array(X)
Y = np.array(Y)

# Initializing the LSTM model.
lstm_model = Sequential()

#Adding the first LSTM layer and some Dropout regularization.
lstm_model.add(LSTM(units=50, return_sequences=True, input_shape=(45, 16)))
lstm_model.add(Dropout(0.2))

#Adding a second LSTM layer and some Dropout regularization.
lstm_model.add(LSTM(units=50, return_sequences=True))
lstm_model.add(Dropout(0.2))

#Adding a third LSTM layer and some Dropout regularization.
lstm_model.add(LSTM(units=50, return_sequences=True))
lstm_model.add(Dropout(0.2))

#Adding a fourth LSTM layer and some Dropout regularization.
lstm_model.add(LSTM(units=50, return_sequences = True))
lstm_model.add(Dropout(0.2))

#Adding the output layer.
lstm_model.add(Dense(units = 16, activation = 'sigmoid'))

# Compiling the LSTM
lstm_model.compile(optimizer='adam', loss="binary_crossentropy", metrics = ['acc'])  
lstm_model.fit(X, Y, epochs = 10, batch_size = 64, validation_split = 0.33)

#Use the trained model to predict the second half.
second_half_prediction = lstm_model.predict(X)

#Print out the results.
print(second_half_prediction)

np.save('lstm_predicted_second_half', second_half_prediction)


Train on 6078 samples, validate on 2995 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[[[3.4526697e-01 1.7167257e-01 3.7512442e-01 ... 8.8113798e-03
   8.3852699e-03 6.9993831e-02]
  [2.4467954e-01 9.7911514e-02 2.8465354e-01 ... 6.3457014e-04
   5.9264927e-04 2.4299059e-02]
  [2.4027777e-01 9.9880353e-02 2.7334648e-01 ... 4.7088973e-04
   4.5130227e-04 2.3809846e-02]
  ...
  [2.4947649e-01 1.0496087e-01 2.6482001e-01 ... 2.7641552e-04
   4.3969578e-04 3.0868869e-02]
  [2.4947652e-01 1.0496087e-01 2.6482001e-01 ... 2.7641552e-04
   4.3969578e-04 3.0868877e-02]
  [2.4947652e-01 1.0496087e-01 2.6482001e-01 ... 2.7641552e-04
   4.3969578e-04 3.0868884e-02]]

 [[3.4526697e-01 1.7167257e-01 3.7512442e-01 ... 8.8113798e-03
   8.3852699e-03 6.9993831e-02]
  [2.4467923e-01 9.7877972e-02 2.8468999e-01 ... 6.3476944e-04
   5.9257244e-04 2.4289645e-02]
  [2.4027275e-01 9.9875756e-02 2.7335250e-01 ... 4.7099072e-04
   4.51285

In [5]:
#Use the trained model to predict the second half.
second_half_prediction = lstm_model.predict(X)

#Print out the results.
print(second_half_prediction)

np.save('lstm_predicted_second_half', second_half_prediction)


[[[3.4526697e-01 1.7167257e-01 3.7512442e-01 ... 8.8113798e-03
   8.3852699e-03 6.9993831e-02]
  [2.4467954e-01 9.7911514e-02 2.8465354e-01 ... 6.3457014e-04
   5.9264927e-04 2.4299059e-02]
  [2.4027777e-01 9.9880353e-02 2.7334648e-01 ... 4.7088973e-04
   4.5130227e-04 2.3809846e-02]
  ...
  [2.4947649e-01 1.0496087e-01 2.6482001e-01 ... 2.7641552e-04
   4.3969578e-04 3.0868869e-02]
  [2.4947652e-01 1.0496087e-01 2.6482001e-01 ... 2.7641552e-04
   4.3969578e-04 3.0868877e-02]
  [2.4947652e-01 1.0496087e-01 2.6482001e-01 ... 2.7641552e-04
   4.3969578e-04 3.0868884e-02]]

 [[3.4526697e-01 1.7167257e-01 3.7512442e-01 ... 8.8113798e-03
   8.3852699e-03 6.9993831e-02]
  [2.4467923e-01 9.7877972e-02 2.8468999e-01 ... 6.3476944e-04
   5.9257244e-04 2.4289645e-02]
  [2.4027275e-01 9.9875756e-02 2.7335250e-01 ... 4.7099072e-04
   4.5128592e-04 2.3805680e-02]
  ...
  [2.4947649e-01 1.0496087e-01 2.6482001e-01 ... 2.7641552e-04
   4.3969578e-04 3.0868869e-02]
  [2.4947652e-01 1.0496087e-01 2.648