In [1]:
import pandas as pd
import numpy as np
import copy
import matplotlib.pyplot as plt
import h5py
import scipy

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
#Find path of each data files
eng_path = '/Users/selbl/Desktop/PhD/Second Year/First Quarter/CS 230/Project/Data/ENG-League-2016-2020.csv'
esp_path = '/Users/selbl/Desktop/PhD/Second Year/First Quarter/CS 230/Project/Data/ESP-League-2016-2020.csv'
fra_path = '/Users/selbl/Desktop/PhD/Second Year/First Quarter/CS 230/Project/Data/FRA-League-2016-2020.csv'
ger_path = '/Users/selbl/Desktop/PhD/Second Year/First Quarter/CS 230/Project/Data/GER-League-2016-2020.csv'
ita_path = '/Users/selbl/Desktop/PhD/Second Year/First Quarter/CS 230/Project/Data/ITA-League-2016-2020.csv'
test_path =  '/Users/selbl/Desktop/PhD/Second Year/First Quarter/CS 230/Project/Data/ENG-League-2020-2021.csv'
#Read each dataframe
df_eng = pd.read_csv(eng_path)
df_esp = pd.read_csv(esp_path)
df_fra = pd.read_csv(fra_path)
df_ger = pd.read_csv(ger_path)
df_ita = pd.read_csv(ita_path)
#Combine
df = [df_eng, df_esp, df_fra,df_ger,df_ita]
df_train = pd.concat(df)
#Get test data
df_test = pd.read_csv(test_path)

In [3]:
#Prepare databases as arrays
X_Train = np.array(df_train[['Home_Elo','Away_Elo']]).T
X_Test = np.array(df_test[['Home_Elo','Away_Elo']]).T

In [48]:
#Pre-Process Y
Y_Train = np.array(df_train['Result'])
Y_Train.reshape(1,len(Y_Train))
Y_Train = np.where(Y_Train == 'W',1,np.where(Y_Train=='L',0,2))
#Same for test
Y_Test = np.array(df_test['Result'])
Y_Test.reshape(1,len(Y_Test))
Y_Test = np.where(Y_Test == 'W',1,np.where(Y_Test=='L',0,2))

In [5]:
#Define softmax
def softmax(z):
    
    # z--> linear part.
    
    # subtracting the max of z for numerical stability.
    exp = np.exp(z - np.max(z))
    
    # Calculating softmax for all examples.
    for i in range(len(z)):
        exp[i] /= np.sum(exp[i])
        
    return exp

In [6]:
def one_hot(y, c):
    
    # y--> label/ground truth.
    # c--> Number of classes.
    
    # A zero matrix of size (m, c)
    y_hot = np.zeros((len(y), c))
    
    # Putting 1 for column where the label is,
    # Using multidimensional indexing.
    y_hot[np.arange(len(y)), y] = 1
    
    return y_hot

In [53]:
#Define fit
def fit(X, y, lr, c, epochs):
    
    # X --> Input.
    # y --> true/target value.
    # lr --> Learning rate.
    # c --> Number of classes.
    # epochs --> Number of iterations.
    
        
    # m-> number of training examples
    # n-> number of features 
    m, n = X.shape
    
    #Normalize
    #This is because for some reason things blow up if not
    X = (X - np.mean(X))/(np.sqrt(np.std(X)))
    
    # Initializing weights and bias randomly.
    w = np.random.randn(n, c)*0.001
    b = np.random.randn(c)*0.001
    # Empty list to store losses.
    losses = []
    
    # Training loop.
    for epoch in range(epochs):
        
        # Calculating hypothesis/prediction.
        z = X@w + b
        y_hat = softmax(z)
        
        # One-hot encoding y.
        y_hot = one_hot(y, c)
        #y_hot = y
        
        # Calculating the gradient of loss w.r.t w and b.
        w_grad = (1/m)*np.dot(X.T, (y_hat - y_hot)) 
        b_grad = (1/m)*np.sum(y_hat - y_hot)
        
        # Updating the parameters.
        w = w - lr*w_grad
        b = b - lr*b_grad
        
        # Calculating loss and appending it in the list.
        loss = -np.mean(np.log(y_hat[np.arange(len(y)), y]))
        #loss = -np.mean(np.log(y_hat))
        #loss = -np.mean(np.multiply(y,y_hat))
        losses.append(loss)
        # Printing out the loss at every 100th iteration.
        if epoch%100==0:
            print('Epoch {epoch}==> Loss = {loss}'
                  .format(epoch=epoch, loss=loss))
    return w, b, losses

In [55]:
#Fit
w, b, l = fit(X_Train.T, Y_Train, lr=0.8, c=3, epochs=5000)

Epoch 0==> Loss = 1.0972498820242642
Epoch 100==> Loss = 10.264138501080259
Epoch 200==> Loss = 10.41449928249175
Epoch 300==> Loss = 9.839082247638407
Epoch 400==> Loss = 13.53013492220324
Epoch 500==> Loss = 11.38624804922244
Epoch 600==> Loss = 12.773764659494
Epoch 700==> Loss = 9.905831772108112
Epoch 800==> Loss = 13.05384616954213
Epoch 900==> Loss = 11.206010781829168
Epoch 1000==> Loss = 12.275497885059698
Epoch 1100==> Loss = 11.461513344560181
Epoch 1200==> Loss = 12.837790082030683
Epoch 1300==> Loss = 10.646485779729291
Epoch 1400==> Loss = 9.271240715280522
Epoch 1500==> Loss = 8.406077053121086
Epoch 1600==> Loss = 8.199393379121588
Epoch 1700==> Loss = 18.50045180737051
Epoch 1800==> Loss = 8.517673001714378
Epoch 1900==> Loss = 10.99733657254689
Epoch 2000==> Loss = 7.054280305253326
Epoch 2100==> Loss = 12.566720195386777
Epoch 2200==> Loss = 9.332390091409259
Epoch 2300==> Loss = 9.225657113147735
Epoch 2400==> Loss = 11.34329518738569
Epoch 2500==> Loss = 9.87942374

In [56]:
#Now, we set the criteria to evaluate model
def predict(X, w, b):
    
    # X --> Input.
    # w --> weights.
    # b --> bias.
    
    # Predicting
    z = X@w + b
    y_hat = softmax(z)
    
    # Returning the class with highest probability.
    return np.argmax(y_hat, axis=1)

In [57]:
def accuracy(y, y_hat):
    return np.sum(y==y_hat)/len(y)

In [58]:
#Measurements
# Accuracy for training set.
train_preds = predict(X_Train.T, w, b)
acc_train = accuracy(Y_Train, train_preds)
print('The accuracy for the train set is: ' + str(acc_train))
# Accuracy for test set.
# Flattening and normalizing.
test_preds = predict(X_Test.T, w, b)
acc_test = accuracy(Y_Test, test_preds)
print('The accuracy for the test set is: ' + str(acc_test))

The accuracy for the train set is: 0.2514862436056961
The accuracy for the test set is: 0.21578947368421053


  exp[i] /= np.sum(exp[i])
