In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import os
import math
import keras
from random import shuffle
from keras.models import Sequential, Model
from keras.layers import Dense
from keras.layers import LSTM, Flatten, Input, TimeDistributed
from keras.layers import BatchNormalization
from keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint
from keras.preprocessing import sequence
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from keras.optimizers import Adam

In [4]:
path = os.path.normpath('/Users/mariekorneliusson/documents/AiSports/Strike_Data_As_Np')

In [5]:
X_First = np.load(path + '/' + 'X_First_Skier.npy')
Y_First = np.load(path + '/' + 'Y_First_Skier.npy')
feature_list_First = []
with open(path + "/keys_First_Skier.txt", "r") as f:
      for line in f:
        feature_list_First.append(line.strip('\n'))

In [6]:
X_Second = np.load(path + '/' + 'X_Second_Skier.npy')
Y_Second = np.load(path + '/' + 'Y_Second_Skier.npy')
feature_list_Second = []
with open(path + "/keys_Second_Skier.txt", "r") as f:
      for line in f:
        feature_list_Second.append(line.strip('\n'))

In [7]:
X_Third = np.load(path + '/' + 'X_Third_Skier.npy')
Y_Third = np.load(path + '/' + 'Y_Third_Skier.npy')
feature_list_Third = []
with open(path + "/keys_Third_Skier.txt", "r") as f:
      for line in f:
        feature_list_Third.append(line.strip('\n'))

In [8]:
feature_Dict = {i:feature_list_Second[i] for i in range(len(feature_list_Second)-1)}
print(len(feature_Dict))

17


In [9]:
print(X_First.shape)
print(X_Second.shape)
print(X_Third.shape)
print(Y_First.shape)
print(Y_Second.shape)
print(Y_Third.shape)

(452, 140, 22)
(382, 140, 18)
(844, 140, 22)
(452, 4)
(382, 4)
(844, 4)


In [10]:
def split_to_test_train_data(X_List,Y_List,ratio,time_step,dimX,nbr_of_classes):
    # Function to split such that train data contains same number of training data for each skier.
    # Returns train X and Y 
    # Returns list of test X and Y, contains a merged test set for all skiers, and test sets for each skier as seperate sets.  
    
    nbr_of_data = len(X_List)
    min_samples = min([x.shape[0] for x in X_List])
    h = int((1-ratio)*min_samples)
    nbr_of_test = [x.shape[0]-h for x in X_List]
    X = np.zeros((h*nbr_of_data,time_step,dimX))
    Y = np.zeros((h*nbr_of_data,nbr_of_classes))
    X_Test_List = [np.zeros((sum(nbr_of_test),time_step,dimX))]
    Y_Test_List = [np.zeros((sum(nbr_of_test),nbr_of_classes))]
    
    for i in range(nbr_of_data):
        x_d = np.zeros((nbr_of_test[i],time_step,dimX))
        y_d = np.zeros((nbr_of_test[i],nbr_of_classes))
        X_Test_List.append(x_d)
        Y_Test_List.append(y_d)

    test_samp_prev = 0 
    for i in range(nbr_of_data):
        x = X_List[i][:,:,:dimX]
        y = Y_List[i]
        test_samp = nbr_of_test[i] + test_samp_prev
        X[h*i:h*(i+1),:,:] = x[:h,:,:]
        Y[h*i:h*(i+1),:] = y[:h,:]
        X_Test_List[0][test_samp_prev:test_samp,:,:] = x[h:,:,:]
        Y_Test_List[0][test_samp_prev:test_samp,:] = y[h:,:]
        X_Test_List[i+1] = x[h:,:,:]
        Y_Test_List[i+1] = y[h:,:]
        test_samp_prev = test_samp
        
    return X,Y,X_Test_List,Y_Test_List

In [11]:
dim_X = len(feature_Dict)
X_List = [X_First,X_Second,X_Third]
Y_List = [Y_First,Y_Second,Y_Third]
ratio = 0.2
time_step = 140
nbr_of_classes = 4
X,Y,X_Test_List,Y_Test_List = split_to_test_train_data(X_List,Y_List,ratio,time_step,dim_X,nbr_of_classes)

In [12]:
def reshape_labels(Y,Y_Test_List,time_step):
    
    Y_Test_List_New = []
    
    Y_New= np.zeros((Y.shape[0],time_step,Y.shape[1]))
    label_ind = np.where(Y == 1)
    Y_New[label_ind[0],:,label_ind[1]] = 1
    
    for i in range(len(Y_Test_List)):
        Y_old = Y_Test_List[i]
        Y_new= np.zeros((Y_old.shape[0],time_step,Y_old.shape[1]))
        label_ind = np.where(Y_old == 1)
        Y_new[label_ind[0],:,label_ind[1]] = 1
        Y_Test_List_New.append(Y_new)
    
    
    return Y_New,Y_Test_List_New

In [13]:
Y,Y_Test_List = reshape_labels(Y,Y_Test_List,time_step)
X_Test = X_Test_List[0]
Y_Test = Y_Test_List[0]
print(X.shape)
print(Y.shape)
print(X_Test.shape)
print(Y_Test.shape)

(915, 140, 17)
(915, 140, 4)
(763, 140, 17)
(763, 140, 4)


In [12]:
def LSTM_One_Multiple_Output():
    
    sequence_input = Input(shape=(time_step,dim_X-1))
    x = BatchNormalization(axis=2,input_shape=(time_step,dim_X-1), momentum=0.99, epsilon=0.001, center=True, scale=True)(sequence_input)
    x = LSTM(64,return_sequences=True)(x)
    preds = Dense(nbr_of_classes, activation='softmax')(x)
    model = Model(sequence_input, preds)
    model.compile(Adam(lr=0.0001),loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

def LSTM_One_Multiple_Output_Timedistributed():
    
    sequence_input = Input(shape=(time_step,dim_X-1))
    x = BatchNormalization(axis=2,input_shape=(time_step,dim_X-1), momentum=0.99, epsilon=0.001, center=True, scale=True)(sequence_input)
    x = LSTM(64,return_sequences=True)(x)
    preds = TimeDistributed(Dense(nbr_of_classes, activation='softmax'))(x)
    model = Model(sequence_input, preds)
    model.compile(Adam(lr=0.0001),loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

In [47]:
model = LSTM_One_Multiple_Output()
eS = EarlyStopping(monitor='acc', min_delta=0.00001, patience=5)
mCp = ModelCheckpoint('Model_1_2_3_Multiple_Output.h5', monitor='val_loss', save_best_only=True)
history = model.fit(X[:,:,1:], Y, epochs=20, batch_size=1, validation_data=(X_Test[:,:,1:],Y_Test),shuffle = True,callbacks = [eS,mCp])

Train on 915 samples, validate on 763 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [15]:
def majority_vote(votes):
    # Input vector of predicted classes in ones strike
    # Output the most frequent class in the strike 
    nbr_of_classes = 4
    nbr_of_votes = len(votes)
    votes_each_class = np.zeros((nbr_of_classes,))
    
    for i in range(nbr_of_votes):
        votes_each_class[votes[i]] = 1 + votes_each_class[votes[i]]
    
    majority_vote = np.argmax(votes_each_class,axis = 0)
    
    return majority_vote 

In [16]:
def majority_vote_strikes(pred_y,test_y):
    # Take predictions as input
    # Outputs the majority vote after, taking the class with maximal prob for each time step 
    nbr_of_samples = pred_y.shape[0]
    time_step = pred_y.shape[1]
    pred_y_new = np.zeros((nbr_of_samples,time_step))
    true_y = np.zeros((nbr_of_samples,time_step))
    
    for i in range(nbr_of_samples):
        strike = pred_y[i,:,:]
        votes = np.argmax(strike, axis=1)
        majority_v = majority_vote(votes)
        pred_y_new[i,:] = majority_v
        
        true_class = np.argmax(test_y[i,:,:], axis=1)
        true_y[i,:] = true_class
        
    return pred_y_new, true_y

In [17]:
def majority_vote_strikes_max_prob(pred_y,test_y):
    # Take predictions as input
    # Outputs the majority vote, as the class with maximal total prob over all time steps 
    nbr_of_samples = pred_y.shape[0]
    time_step = pred_y.shape[1]
    pred_y_new = np.zeros((nbr_of_samples,time_step))
    true_y = np.zeros((nbr_of_samples,time_step))
    
    for i in range(nbr_of_samples):
        strike = pred_y[i,:,:]
        strike_sum = np.sum(strike,axis = 0)
        majority_v = np.argmax(strike_sum,axis = 0)
        
        pred_y_new[i,:] = majority_v
        
        true_class = np.argmax(test_y[i,:,:], axis=1)
        true_y[i,:] = true_class
    
    return pred_y_new, true_y

In [19]:
from keras.models import load_model
model = load_model('LSTM_1_2_3_Majority_Vote_After.h5')
predY = model.predict(X_Test[:,:,1:])
pred_y, true_y = majority_vote_strikes_max_prob(predY,Y_Test)

print(pred_y.shape)
print(true_y.shape)
acc = accuracy_score(true_y[:,0], pred_y[:,0])
print("Accuracy: %.2f" % acc)

pred_y_1, true_y = majority_vote_strikes(predY,Y_Test)

print(pred_y_1.shape)
print(true_y.shape)
acc = accuracy_score(true_y[:,0], pred_y_1[:,0])
print("Accuracy: %.2f" % acc)

(763, 140)
(763, 140)
Accuracy: 0.81
(763, 140)
(763, 140)
Accuracy: 0.81
