In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import numpy as np
import pandas as pd
from datetime import datetime
from sklearn import preprocessing, model_selection
import matplotlib.pyplot as plt
import seaborn as sn
%matplotlib inline

import mytrain_lib as ml

import importlib

torch.manual_seed(0)
import random
random.seed(0)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

In [None]:
path_train      = 'F://TFG//datasets//data_train//'
path_graphs     = 'F://TFG//graphs//plot_results//'
path_results    = 'F://TFG//results//'

In [None]:
data = pd.read_csv(path_train+'training_features_DF.csv',sep=';',index_col='wyId')
raw_Data = pd.read_json('F://TFG//datasets/raw_datasets//RAW_partidos.json').set_index('wyId')

# X_train = pd.read_csv(path_train+'X_train.csv',sep=';',index_col='wyId')
# y_train = pd.read_csv(path_train+'y_train.csv',sep=';',index_col='wyId')
# X_test = pd.read_csv(path_train+'X_test.csv',sep=';',index_col='wyId')
# y_test = pd.read_csv(path_train+'y_test.csv',sep=';',index_col='wyId')

In [None]:
data.head(5)

In [None]:
train_data  = ml.FootballMatchesDataset(file = 'train')
test_data   = ml.FootballMatchesDataset(file = 'test')

len(train_data), len(test_data)

In [None]:
dataloader = DataLoader(train_data, batch_size=4, shuffle=True, num_workers=0)
train_feat, train_lab, m = next(iter(dataloader))
train_lab, m

In [None]:
data.loc[m]

In [None]:
scaler  = preprocessing.StandardScaler()
train_data.data = scaler.fit_transform(train_data.data)

# Neural Network Implementation

Define the class:

### I) Artificial Neural Network Approach to Football Score Prediction

Multilayer Perceptron with 1 hidden layer with BacpPropagation.
6 units input -> 5 hidden units -> 2 output units w/ sigmoid

Data Normalized [0,1]

In [None]:
train_data  = ml.FootballMatchesDataset(file = 'train')
test_data   = ml.FootballMatchesDataset(file = 'test')

In [None]:
normalizer = preprocessing.Normalizer()
train_data.data = normalizer.fit_transform(train_data.data)

In [None]:
print(train_data.data.mean(), train_data.data.std())
print(train_data.data.max(),  train_data.data.min())

In [None]:
dataloader_train    = DataLoader(train_data, batch_size=20, shuffle=True)
dataloader_test     = DataLoader(test_data,  batch_size=20, shuffle=True)

train_feat, train_lab, m = next(iter(dataloader_train))
train_feat[:3],train_lab[:3] ,m[:3]

In [None]:
raw_Data.loc[[2576042, 2500003, 2576265]].label

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_feature, ouput_classes):
        super().__init__()
        
        self.h1 = nn.Linear(in_features=input_feature,out_features=5)
        self.bn = nn.BatchNorm1d(5)
        self.out = nn.Linear(5,ouput_classes)

    def forward(self,x):
        x = F.relu(self.h1(x))
        x = self.bn(x)
        return F.softmax(self.out(x),1)    

    def reset_weights(self):
        self.h1.reset_parameters()
        self.bn.reset_parameters()
        self.out.reset_parameters()    

In [None]:
model = NeuralNetwork(22,3)

In [None]:
# Print out the architecture and number of parameters.
print(model)
print(f"The model has {sum([x.nelement() for x in model.parameters()]):,} parameters.")

##### Loss Function: Cross-entropy Loss

we can provide `weights`, as prior probability of each class $C$.

In [None]:
train_data.labels   # in 1-hot encoding

In [None]:
weights_class = np.mean(train_data.labels.numpy(),axis=0)

criterion = nn.CrossEntropyLoss()

##### Optimizer

In [None]:
learning_rate = 1e-1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
# optimizar con momentum (nesterov), weight decay

##### Train Loop

In [None]:
# Train the model
model = NeuralNetwork(22,3)
epochs = 5
learning_rate = 1e-1
optimizer_lenet = torch.optim.SGD(model.parameters(), lr=learning_rate)
error,accuracy_train,accuracy_test,confusion_matrix = ml.train_model(model, criterion, 
                                            optimizer, dataloader_train, dataloader_test, epochs)

ml.save_logging()

In [None]:
plt.figure(figsize=(10,6))

for p in [accuracy_train,accuracy_test,error]:
    sn.lineplot(x=range(1,6),y=p)

plt.title('Accuracy: MLP 5 hidden units, batch_size=20')
plt.xticks(np.arange(epochs)+1)
plt.legend()
plt.grid()
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.ylim([0,1.5])
plt.savefig(path_graphs + 'acc_mlp5_bn20_ej1.jpg', format='jpg', dpi=200)
plt.show()

##### Applying Cross Validation

In [None]:
folds = 5
kfold = model_selection.KFold(n_splits=folds,shuffle=True,random_state=0)

In [None]:
importlib.reload(ml)

In [None]:
ml.log = {}

error, accuracy_train, accuracy_test, confusion_matrix = (ml.train_wCrossValidation(
                                        NeuralNetwork(22,3),criterion, optimizer, 
                                        train_data, kfold, epochs=20))

ml.save_logging()

In [None]:
f = 4
(ml.dispConfusionMatrix(confusion_matrix[f],
        'Confusion matrix: Normaliz. MLP 1x5, bn=20',
        'confmat_norm_mlp5_bn20_ej2_' + str(f) ,
        save=False))

In [None]:
plt.figure(figsize=(10,6))

for p in error:
    plt.plot(p)

plt.title('Error Cross-Validation: MLP 5 hidden units, batch_size=20')
plt.xticks(np.arange(20))
plt.legend()
plt.grid()
plt.xlabel('epochs')
plt.ylabel('error')
plt.ylim([0.5,1.5])
# plt.savefig(path_graphs + 'error_cv5_mlp5_bn20_ej2.jpg', format='jpg', dpi=200)
plt.show()

#### Grid Search

In [None]:
train_data = ml.FootballMatchesDataset(file = 'train')

In [None]:
# hiperparametros generales

In [None]:
# model
model       = NeuralNetwork(22,3)

# scaling/normalization
scalers = [None,preprocessing.MinMaxScaler(), preprocessing.Normalizer(), 
               preprocessing.StandardScaler()]

# loss function
weights_class = torch.tensor(np.mean(train_data.labels.numpy(),axis=0))
criterions = [nn.CrossEntropyLoss()]

# optimizer
learning_rate = [0.5,0.1,1e-2]

# cross-validation
folds = 5
kfold = model_selection.KFold(n_splits=folds,shuffle=True,random_state=0)

# batch-size
bs = [10,20,50]

In [None]:
def plot_results(error,accuracy_train,accuracy_test,confusion_matrix):
    confusion_matrix = np.array(confusion_matrix)
    accuracy_test    = np.array(accuracy_test)
    accuracy_train   = np.array(accuracy_train)
    error            = np.array(error)

    acc_test_lastepoch = accuracy_test[:,:,-1]  # only interested in last epoch

    # best models of each configuration
    best_cv          = acc_test_lastepoch.argmax(axis=1)
    best_config_cv   = acc_test_lastepoch.argmax(axis=0)

    # best configurations are:
    print('config','\t', 'accuracy_test\t', '\taccuracy_train\t', '\terror')

    for c in best_config_cv:
        print(c,'\t', accuracy_test[c,best_cv[c],-1]
                    , accuracy_train[c,best_cv[c],-1]
                    , error[c,best_cv[c],-1])

    hyperparams = (np.array(np.meshgrid(scalers,criterion,learning_rate,b1,b2
                        ,weight_decay,bs)).T.reshape((-1,7)))

    temp = datetime.now().strftime("_%m%d_%H%M%S")

    for i,c in enumerate(best_config_cv):
        print(f'Config of {c} - Fold {best_cv[c]}: {hyperparams[c]}')
        ml.dispConfusionMatrix(confusion_matrix[c,best_cv[c]],
                        f'Confusion Matrix: MLP 1x5 SGD {hyperparams[c]}',
                        f'confmat_mlp5_SGD_t{temp}_id{i}', save=True)

##### Stochastic Gradient Descend

In [None]:
momentum = [0.01,0.1,0.9,0.99]
nesterov = dampening = [True, False]

# EXPERIMENTS with Stochastic Gradient Descend and 10 epochs.
importlib.reload(ml)
error,accuracy_train,accuracy_test,confusion_matrix = ml.Grid_Search_SGD(train_data,scalers,
                                            criterions,learning_rate,momentum,model,
                                            kfold,batch_size=bs,epochs=10)

In [None]:
plot_results(error,accuracy_train,accuracy_test,confusion_matrix)

In [None]:
# EXPERIMENTS with Stochastic Gradient Descend and 10 epochs.
error,accuracy_train,accuracy_test,confusion_matrix = ml.Grid_Search_SGD(train_data,scalers,
                                            criterions,learning_rate,momentum,model,
                                            kfold,batch_size=bs,epochs=20)

In [None]:
plot_results(error,accuracy_train,accuracy_test,confusion_matrix)

##### Adam Optimizer

In [None]:
tuple((2,2))

In [None]:
optimizer   = torch.optim.Adam

# params Adam
r = np.random.rand
b1, b2          = [0.01,0.1,0.9], [0.01,0.1,0.9,0.99]
weight_decay    = [0,1,10]

In [None]:
# 10 epochs
importlib.reload(ml)

error,accuracy_train,accuracy_test,confusion_matrix = ml.Grid_Search_Adam(train_data,scalers,
                                            criterions,learning_rate,b1,b2,model,
                                            kfold,batch_size=bs,weight_decay=weight_decay,epochs=10)

In [None]:
plot_results(error,accuracy_train,accuracy_test,confusion_matrix)

In [None]:
# 20 epochs
error,accuracy_train,accuracy_test,confusion_matrix = ml.Grid_Search_Adam(train_data,scalers,
                                            criterions,learning_rate,momentum,model,
                                            kfold,batch_size=bs,epochs=20)

In [None]:
plot_results(error,accuracy_train,accuracy_test,confusion_matrix)

In [None]:
# SIGUIENTES PASOS:

# 1. otros optimizadores y epochs
# 2. otra arquitectura de red
    # a. reducir params
    # b. añadir hidden layers
# 3. reducir dimensionalidad / seleccionar caract.