In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.transforms import ToTensor
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
import numpy as np
import pandas as pd
from sklearn import preprocessing, metrics, model_selection
import matplotlib.pyplot as plt
%matplotlib inline

torch.manual_seed(0)
import random
random.seed(0)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

In [None]:
path_train      = 'F://TFG//datasets//data_train//'
path_graphs     = 'F://TFG//graphs//plot_results//'

In [None]:
data = pd.read_csv(path_train+'training_features_DF.csv',sep=';',index_col='wyId')

# X_train = pd.read_csv(path_train+'X_train.csv',sep=';',index_col='wyId')
# y_train = pd.read_csv(path_train+'y_train.csv',sep=';',index_col='wyId')
# X_test = pd.read_csv(path_train+'X_test.csv',sep=';',index_col='wyId')
# y_test = pd.read_csv(path_train+'y_test.csv',sep=';',index_col='wyId')

In [None]:
class FootballMatchesDataset(Dataset):
    def __init__(self,file):
        df              = pd.read_csv(path_train+'X_'+file+'.csv',sep=';')
        lab_df          = pd.read_csv(path_train+'y_'+file+'.csv',sep=';')
        self.data       = torch.tensor(df.values[:,1:]).float() 
        self.labels     = F.one_hot(torch.tensor(lab_df.values[:,1]), num_classes=3).float()
        self.matches    = torch.tensor(lab_df.values[:,0])

    def __len__(self):
        return len(self.data)

    def shape(self):
        return self.data.shape

    def __getitem__(self,idx):
        sample  = self.data[idx]
        label   = self.labels[idx]
        match   = self.matches[idx]
        return sample, label, match



In [None]:
train_data  = FootballMatchesDataset(file = 'train')
test_data   = FootballMatchesDataset(file = 'test')

len(train_data), len(test_data)

In [None]:
dataloader = DataLoader(train_data, batch_size=4, shuffle=True, num_workers=0)
train_feat, train_lab, _ = next(iter(dataloader))
train_lab

In [None]:
scaler  = preprocessing.StandardScaler()
train_data.data = scaler.fit_transform(train_data.data)

### Neural Network Implementation

Define the class:

#### I) Artificial Neural Network Approach to Football Score Prediction

Multilayer Perceptron with 1 hidden layer with BacpPropagation.
6 units input -> 5 hidden units -> 2 output units w/ sigmoid

Data Normalized [0,1]

In [None]:
train_data  = FootballMatchesDataset(file = 'train')
test_data   = FootballMatchesDataset(file = 'test')

In [None]:
normalizer = preprocessing.Normalizer()
train_data.data = normalizer.fit_transform(train_data.data)

In [None]:
print(train_data.data.mean(), train_data.data.std())
print(train_data.data.max(),  train_data.data.min())

In [None]:
dataloader_train    = DataLoader(train_data, batch_size=20, shuffle=True)
dataloader_test     = DataLoader(test_data,  batch_size=20, shuffle=True)

train_feat, train_lab, _ = next(iter(dataloader_train))
train_feat[0]

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_feature, ouput_classes):
        super().__init__()
        
        self.h1 = nn.Linear(in_features=input_feature,out_features=5)
        self.bn = nn.BatchNorm1d(5)
        self.out = nn.Linear(5,ouput_classes)

    def forward(self,x):
        x = F.relu(self.h1(x))
        x = self.bn(x)
        return F.softmax(self.out(x),1)    

    def reset_weights(self):
        self.h1.reset_parameters()
        self.bn.reset_parameters()
        self.out.reset_parameters()    

In [None]:
model = NeuralNetwork(22,3)

In [None]:
# Print out the architecture and number of parameters.
print(model)
print(f"The model has {sum([x.nelement() for x in model.parameters()]):,} parameters.")

##### Loss Function: Cross-entropy Loss

we can provide `weights`, as prior probability of each class $C$.

In [None]:
train_data.labels   # in 1-hot encoding

In [None]:
weights_class = np.mean(train_data.labels.numpy(),axis=0)

criterion = nn.CrossEntropyLoss()

##### Optimizer

In [None]:
learning_rate = 1e-1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
# optimizar con momentum (nesterov), weight decay

##### Accuracy

In [None]:
def get_accuracy(pred,y):
    pred_class = torch.argmax(pred,dim=1).numpy()
    y_class    = torch.argmax(y,dim=1).numpy()
    # print(pred,pred_class)
    # print(y,y_class)
    # print()
    return np.mean((pred_class == y_class))
    

##### Train Loop

In [None]:
def train_model(model, criterion, optimizer, dataloader_train, dataloader_test, epochs):

    accuracy_train  = []
    error           = []
    accuracy_test   = []

    for ep in range(epochs):
        # Training.
        model.train()
        acc_batch   = []
        
        for it, batch in enumerate(dataloader_train):

            # 5.1 Load a batch, break it down in images and targets.
            x, y, _ = batch
            # batch to device ????
            
            # 5.2 Run forward pass.
            logits = model(x)
            
            # 5.3 Compute loss (using 'criterion').
            loss = criterion(logits, y)
            
            # 5.4 Run backward pass.
            loss.backward()
            
            # 5.5 Update the weights using optimizer.
            optimizer.step()
            
            # 5.6 Zero-out the accumulated gradients.
            optimizer.zero_grad()
            # `model.zero_grad()` also works

            acc_batch.append(get_accuracy(logits,y))

        accuracy_train.append(np.mean(acc_batch)) 
        error.append(float(loss))

        print('\rEp {}/{}, it {}/{}: loss train: {:.2f}, accuracy train: {:.2f}'.
                format(ep + 1, epochs, it + 1, len(dataloader_train), loss,
                        np.mean(acc_batch)), end='')

        # Validation.
        model.eval()
        with torch.no_grad():
            acc_run = 0
            for it, batch in enumerate(dataloader_test):
                # Get batch of data.
                x, y, _ = batch
                curr_bs = x.shape[0]
                acc_run += get_accuracy(model(x), y) * curr_bs
            acc_test = acc_run / len(dataloader_test.dataset)
            accuracy_test.append(acc_test)

            print(', accuracy test: {:.2f}'.format(acc_test))

    return error,accuracy_train,accuracy_test

In [None]:
# Train the model
epochs = 10
learning_rate = 1e-1
optimizer_lenet = torch.optim.SGD(model.parameters(), lr=learning_rate)
error,accuracy_train,accuracy_test = train_model(model, criterion, optimizer, dataloader_train, dataloader_test, epochs)

In [None]:
plt.figure(figsize=(10,6))

for p in [accuracy_train,accuracy_test,error]:
    plt.plot(p)

plt.title('Accuracy: MLP 5 hidden units, batch_size=20')
plt.xticks(np.arange(epochs))
plt.legend()
plt.grid()
plt.xlabel('epochs')
plt.ylabel('accuracy')

plt.savefig(path_graphs + 'acc_mlp5_bn20.jpg', format='jpg', dpi=200)

##### Applying Cross Validation

In [None]:
folds = 5
kfold = model_selection.KFold(n_splits=folds,shuffle=True,random_state=0)

In [None]:
def train_wCrossValidation(model,criterion,optimizer,train_data,batch_size,epochs=5):

    error           = []
    accuracy_train  = []
    accuracy_test   = []

    for fold,(train_idx,test_idx) in enumerate(kfold.split(train_data.data)):
        train_subsampler    = SubsetRandomSampler(train_idx)
        test_subsampler     = SubsetRandomSampler(test_idx)
        
        trainloader = DataLoader(
                            train_data, 
                            batch_size=batch_size, sampler=train_subsampler)
        testloader  = DataLoader(
                            train_data,
                            batch_size=batch_size, sampler=test_subsampler)
        
        model.reset_weights()

        error_fold,acc_train_fold,acc_test_fold = train_model(
                model, criterion, optimizer, trainloader, testloader, epochs
            )

        error.append(error_fold)
        accuracy_train.append(acc_train_fold)
        accuracy_test.append(acc_test_fold)
        
        print('\rFold {}/{}: loss train: {:.2f}, accuracy train: {:.2f}, accuracy test: {:.2f}'.
                format(fold + 1, folds, np.mean(error_fold),
                        np.mean(acc_train_fold), np.mean(acc_test_fold)), end='')
        print('\n')
    
    return error, accuracy_train, accuracy_test
        

In [None]:
error, accuracy_train, accuracy_test = train_wCrossValidation(model,criterion, optimizer, train_data, epochs)

In [None]:
plt.figure(figsize=(10,6))

for p in error:
    plt.plot(p)

plt.title('Error Cross-Validation: MLP 5 hidden units, batch_size=20')
plt.xticks(np.arange(folds))
plt.legend()
plt.grid()
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.ylim([0.5,1.5])

plt.savefig(path_graphs + 'error_cv5_mlp5_bn20.jpg', format='jpg', dpi=200)