In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import numpy as np
import pandas as pd
from datetime import datetime
from sklearn import preprocessing, model_selection, feature_selection
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sn
%matplotlib inline

import mytrain_lib as ml

import importlib

torch.manual_seed(0)
import random
random.seed(0)

In [6]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [7]:
path_rawdata    = 'F://TFG//datasets/raw_datasets//'
path_train      = 'F://TFG//datasets//data_train//'
path_graphs     = 'F://TFG//graphs//'
path            = 'F:/TFG/datasets/nature-dataset/'

In [8]:
data = pd.read_csv(path_train+'training_features_DF.csv',sep=';',index_col='wyId')
raw_Data = pd.read_json('F://TFG//datasets/raw_datasets//RAW_partidos.json').set_index('wyId')

In [None]:
data.shape

In [None]:
data.loc[2499738]

In [None]:
train_data  = ml.FootballMatchesDataset(file = 'train')
test_data   = ml.FootballMatchesDataset(file = 'test')

len(train_data), len(test_data)

In [None]:
dataloader = DataLoader(train_data, batch_size=4, shuffle=True, num_workers=0)
train_feat, train_lab, m = next(iter(dataloader))
train_lab, m

In [None]:
data.loc[m]

In [None]:
scaler  = preprocessing.StandardScaler()
train_data.data = scaler.fit_transform(train_data.data)

# Neural Network Implementation

Define the class:

## I) Artificial Neural Network Approach to Football Score Prediction

Multilayer Perceptron with 1 hidden layer with BacpPropagation.
6 units input -> 5 hidden units -> 2 output units w/ sigmoid

Data Normalized [0,1]

In [None]:
train_data  = ml.FootballMatchesDataset(file = 'train')
test_data   = ml.FootballMatchesDataset(file = 'test')

In [None]:
normalizer = preprocessing.Normalizer()
train_data.data = normalizer.fit_transform(train_data.data)

In [None]:
print(train_data.data.mean(), train_data.data.std())
print(train_data.data.max(),  train_data.data.min())

In [None]:
dataloader_train    = DataLoader(train_data, batch_size=128, shuffle=True)
dataloader_test     = DataLoader(test_data,  batch_size=128, shuffle=True)

train_feat, train_lab, m = next(iter(dataloader_train))
train_feat[:3],train_lab[:3] ,m[:3]

In [None]:
raw_Data.loc[[2565618, 2501059, 2576040]].label

In [13]:
for i in range(1,3):
    print(i)

1
2


In [19]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_feature, ouput_classes, hidden_neurons=[5]):
        super().__init__()
        
        self.input = nn.Linear(in_features=input_feature, out_features=hidden_neurons[0])
        self.hidden_layers, self.hidden_bn = nn.ModuleList([]),nn.ModuleList([])
        for i in range(1,len(hidden_neurons)):
            self.hidden_bn.append(nn.BatchNorm1d(hidden_neurons[i-1]))
            self.hidden_layers.append(nn.Linear(in_features=hidden_neurons[i-1],out_features=hidden_neurons[i]))
        
        self.bn_out = nn.BatchNorm1d(hidden_neurons[-1])
        self.out = nn.Linear(hidden_neurons[-1],ouput_classes)

    def forward(self,x):
        x = self.input(x)
        for bn,layer in zip(self.hidden_bn,self.hidden_layers):
            x = layer(F.relu(bn(x)))
        x = F.relu(self.bn(x))
        return F.softmax(self.out(x),1)    

    def reset_weights(self):
        self.input.reset_parameters()
        for bn,layer in zip(self.hidden_bn,self.hidden_layers):
            bn.reset_parameters(); layer.reset_parameters()
        self.bn.reset_parameters()
        self.out.reset_parameters()            


In [23]:
model = NeuralNetwork(160,3,hidden_neurons=[20,10,5])
# Print out the architecture and number of parameters.
print(model)
print(f"The model has {sum([x.nelement() for x in model.parameters()]):,} parameters.")

NeuralNetwork(
  (input): Linear(in_features=160, out_features=20, bias=True)
  (hidden_layers): ModuleList(
    (0): Linear(in_features=20, out_features=10, bias=True)
    (1): Linear(in_features=10, out_features=5, bias=True)
  )
  (hidden_bn): ModuleList(
    (0): BatchNorm1d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (bn_out): BatchNorm1d(5, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (out): Linear(in_features=5, out_features=3, bias=True)
)
The model has 3,573 parameters.


##### Loss Function: Cross-entropy Loss

we can provide `weights`, as prior probability of each class $C$.

In [None]:
train_data.labels   # in 1-hot encoding

In [None]:
# weights_class = np.mean(train_data.labels.numpy(),axis=0)

criterion = nn.BCELoss()
# criterion = nn.CrossEntropyLoss()

In [None]:
input = torch.tensor([[0.15,0.24,0.61],[0.18,0.59,0.23],[0.35,0.34,0.31]]).float()
target = torch.tensor([[0,1,0],[0,1,0],[1,0,0]]).float()
print(F.softmax(input),target)

output = nn.CrossEntropyLoss()(input, target)
print(output.item())

##### Optimizer

In [None]:
learning_rate = 1e-1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
# optimizar con momentum (nesterov), weight decay

##### Train Loop

In [None]:
# Train the model
importlib.reload(ml)

ml.log = {}

model = NeuralNetwork(22,3)
epochs = 100
learning_rate = 1e-1
optimizer_lenet = torch.optim.SGD(model.parameters(), lr=learning_rate)
error,accuracy_train,accuracy_test,confusion_matrix = ml.train_model(model, criterion, 
                                            optimizer, dataloader_train, dataloader_test, epochs)

ml.save_logging()

In [None]:
plt.figure(figsize=(10,6))

for p in [accuracy_train,accuracy_test,error]:
    sn.lineplot(x=range(1,6),y=p)

plt.title('Accuracy: MLP 5 hidden units, batch_size=20')
plt.xticks(np.arange(epochs)+1)
plt.legend()
plt.grid()
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.ylim([0,1.5])
plt.savefig(path_graphs + 'acc_mlp5_bn20_ej1.jpg', format='jpg', dpi=200)
plt.show()

##### Applying Cross Validation

In [None]:
folds = 5
kfold = model_selection.KFold(n_splits=folds,shuffle=True,random_state=0)

In [None]:
importlib.reload(ml)

In [None]:
ml.log = {}

model = NeuralNetwork(22,3)

error, accuracy_train, accuracy_test, confusion_matrix = (ml.train_wCrossValidation(
                                        NeuralNetwork(22,3),criterion, optimizer, 
                                        train_data, kfold, epochs=100))

temp = datetime.now().strftime("_%m%d_%H%M%S")
ml.save_logging(temp, title='debug_crossentropy')

In [None]:
f = 4
(ml.dispConfusionMatrix(confusion_matrix[f],
        'Confusion matrix: Normaliz. MLP 1x3, bn=20',
        'confmat_norm_mlp5_bn20_ej2_' + str(f) ,
        save=False))

In [None]:
plt.figure(figsize=(10,6))

for f,p in enumerate(error):
    plt.plot(p,label=f'{f}')

plt.title('Error Cross-Validation: MLP 5 hidden units, batch_size=20')
plt.xticks(np.arange(20))
plt.legend(title='Folder')
# plt.grid()
plt.xlabel('epochs')
plt.ylabel('error')
plt.ylim([np.min(error)-0.3,np.max(error)+0.3])
# plt.savefig(path_graphs + 'error_cv5_mlp5_bn20_ej2.jpg', format='jpg', dpi=200)
plt.show()

#### Grid Search

In [16]:
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('dataset', type=str)
parser.add_argument('drop', type=str, nargs='*')
group_opt = parser.add_mutually_exclusive_group()
group_opt.add_argument("-sgd", action="store_true")
group_opt.add_argument("-adam", action="store_true")
parser.add_argument("-betas", nargs='+', type=list, default=[[.01, .1, .5],[.001, .01]])
group_dimred = parser.add_mutually_exclusive_group()
group_dimred.add_argument("-anova", nargs='+', type=int)
group_dimred.add_argument("-pca", nargs='+', type=int)
group_dimred.add_argument("-feat", nargs='+', type=int)
parser.add_argument('-units', type=int, nargs='+')
parser.add_argument('-scaler', type=str, choices=['minmax','norm','std','maxabs'], nargs='+' )
parser.add_argument('-lr' ,type=float, nargs='+', default=[.0001,.001,.01,.1,.5,1,10])
args = parser.parse_args('wyscout -sgd -anova 5 10 -units 2 5 10 -scaler minmax std'.split())
print(args)

Namespace(adam=False, anova=[5, 10], betas=[[0.01, 0.1, 0.5], [0.001, 0.01]], dataset='wyscout', drop=[], feat=None, lr=[0.0001, 0.001, 0.01, 0.1, 0.5, 1, 10], pca=None, scaler=['minmax', 'std'], sgd=True, units=[2, 5, 10])


In [None]:
train_data = ml.FootballMatchesDataset(file = 'train')

In [None]:
# hiperparametros generales

In [None]:
# model
model       = NeuralNetwork(22,3)

# scaling/normalization
scalers = [None,preprocessing.MinMaxScaler(), preprocessing.Normalizer()]

# loss function
weights_class = torch.tensor(np.mean(train_data.labels.numpy(),axis=0))
criterions = [nn.CrossEntropyLoss(), nn.CrossEntropyLoss(weight=weights_class)]

# optimizer
learning_rate = [0.5,0.1,1e-2]

# cross-validation
folds = 5
kfold = model_selection.KFold(n_splits=folds,shuffle=True,random_state=0)

# batch-size
bs = [32,64,128]

In [None]:
def plot_results(error,accuracy_train,accuracy_test,confusion_matrix, hyperparams):
    confusion_matrix = np.array(confusion_matrix)
    accuracy_test    = np.array(accuracy_test)
    accuracy_train   = np.array(accuracy_train)
    error            = np.array(error)

    acc_test_lastepoch = accuracy_test[:,:,-1]  # only interested in last epoch

    # best models of each configuration
    best_cv          = acc_test_lastepoch.argmax(axis=1)
    best_config_cv   = np.unique(acc_test_lastepoch.argmax(axis=0))

    # best configurations are:
    print('config','\t', 'accuracy_test\t', '\taccuracy_train\t', '\terror')

    for c in best_config_cv:
        print(c,'\t', accuracy_test[c,best_cv[c],-1]
                    , accuracy_train[c,best_cv[c],-1]
                    , error[c,best_cv[c],-1])

    temp = datetime.now().strftime("_%m%d_%H%M%S")

    for i,c in enumerate(best_config_cv):
        print(f'Config of {c} - Fold {best_cv[c]}: {hyperparams[c]}')
        ml.dispConfusionMatrix(confusion_matrix[c,best_cv[c]],
                        f'Confusion Matrix: MLP 1x5 SGD {hyperparams[c]}',
                        f'confmat_mlp5_SGD_t{temp}_id{i}', save=True)

        

##### Stochastic Gradient Descend

In [None]:
momentum = [0.01,0.9]
nesterov = dampening = [True, False]

# EXPERIMENTS with Stochastic Gradient Descend and 10 epochs.
importlib.reload(ml)
error,accuracy_train,accuracy_test,confusion_matrix = ml.Grid_Search_SGD(train_data,scalers,
                                            criterions,learning_rate,momentum,model,
                                            kfold,batch_size=bs,epochs=5)

In [None]:
plot_results(error,accuracy_train,accuracy_test,confusion_matrix)

In [None]:
# EXPERIMENTS with Stochastic Gradient Descend and 10 epochs.
error,accuracy_train,accuracy_test,confusion_matrix = ml.Grid_Search_SGD(train_data,scalers,
                                            criterions,learning_rate,momentum,model,
                                            kfold,batch_size=bs,epochs=20)

In [None]:
plot_results(error,accuracy_train,accuracy_test,confusion_matrix)

##### Adam Optimizer

In [None]:
optimizer   = torch.optim.Adam

# params Adam
r = np.random.rand
b1, b2          = [0.01,0.1,0.9], [0.01,0.1,0.9,0.99]
weight_decay    = [0,1,10]

In [None]:
# 10 epochs
importlib.reload(ml)

error,accuracy_train,accuracy_test,confusion_matrix = ml.Grid_Search_Adam(train_data,scalers,
                                            criterions,learning_rate,b1,b2,model,
                                            kfold,batch_size=bs,weight_decay=weight_decay,epochs=10)

In [None]:
plot_results(error,accuracy_train,accuracy_test,confusion_matrix)

In [None]:
# 20 epochs
error,accuracy_train,accuracy_test,confusion_matrix = ml.Grid_Search_Adam(train_data,scalers,
                                            criterions,learning_rate,momentum,model,
                                            kfold,batch_size=bs,epochs=20)

In [None]:
hyperparams = (np.array(np.meshgrid(scalers,criterions,learning_rate,b1,b2
                        ,weight_decay,bs)).T.reshape((-1,7)))

plot_results(error,accuracy_train,accuracy_test,confusion_matrix,hyperparams)

### Dimensionality Reduction

In [None]:
# Dimensionality Reduction: PCA

train_data = ml.FootballMatchesDataset('train')
print(train_data.data.shape)

pca = PCA(n_components=10,random_state=0)
train_data.data = torch.tensor(pca.fit_transform(train_data.data))
print(train_data.data.shape)

In [None]:
# scaling/normalization
scalers = [None,preprocessing.MinMaxScaler(), preprocessing.Normalizer()]

# loss function
weights_class = torch.tensor(np.mean(train_data.labels.numpy(),axis=0))
criterions = [nn.CrossEntropyLoss(), nn.CrossEntropyLoss(weight=weights_class)]

# optimizer
learning_rate = [0.1,0.5]

# cross-validation
folds = 5
kfold = model_selection.KFold(n_splits=folds,shuffle=True,random_state=0)

# batch-size
bs = [10,20]

In [None]:
# params SGD
momentum = [0.1,0.9]
nesterov = dampening = [True]

# params Adam
b1, b2          = [0.01,0.1,0.9], [0.01,0.1,0.9,0.99]
weight_decay    = [0,1,10]

In [None]:
# usar n_componentes dependiendo de cuanta varianza conservamos

for pca_n in [3,5,10,15]:
    train_data = ml.FootballMatchesDataset('train')

    pca = PCA(n_components=pca_n,random_state=0)
    train_data.data = torch.tensor(pca.fit_transform(train_data.data)).float()

    model       = NeuralNetwork(pca_n,3)

    # # SGD - 10 epochs
    # _,_,_,_ = ml.Grid_Search_SGD(train_data,scalers,
    #                                         criterions,learning_rate,momentum,model,
    #                                         kfold,batch_size=bs,epochs=20)
    # Adam - 10 epochs
    _,_,_,_ = ml.Grid_Search_Adam(train_data,scalers,
                                            criterions,learning_rate,b1,b2,model,
                                            kfold,batch_size=bs,weight_decay=weight_decay,epochs=10)
    # Adam - 20 epochs
    _,_,_,_ = ml.Grid_Search_Adam(train_data,scalers,
                                            criterions,learning_rate,b1,b2,model,
                                            kfold,batch_size=bs,weight_decay=weight_decay,epochs=20)


In [None]:
# SIGUIENTES PASOS:

# 1. otros optimizadores y epochs
# 2. reducir dimensionalidad / seleccionar caract.
# 3. otra arquitectura de red
    # a. reducir params
    # b. añadir hidden layers

In [None]:
mlp15_sgd_ep20_pca3_error = np.load(path_results+'mlp15_sgd_ep20_pca3//error__07_30_15_03_52.npy')

In [None]:
mlp15_sgd_ep20_pca3_error.shape

In [None]:
acc_test_lastepoch = mlp15_sgd_ep20_pca3_error[:,:,-1]

In [None]:
np.argmin(acc_test_lastepoch,axis=0)

In [None]:
best_cv          = acc_test_lastepoch.argmin(axis=1)
best_config_cv   = np.unique(acc_test_lastepoch.argmin(axis=0))

# best configurations are:
print('config','\t', '\terror')

for c in best_config_cv:
    print(c,'\t', acc_test_lastepoch[c,best_cv[c]])

In [None]:
importlib.reload(ml)

ml.plotError(mlp15_sgd_ep20_pca3_error,best_config_cv,best_cv,'MLP 1x5 SGD PCA3',filename='mlp15_sgd_ep20_pca3_error',save=True)

### Selected features

#### Variance Threshold

Primero entrenaremos varios modelos con diferentes características seleccionadas con la técnica de `Variance Threshold`.

In [None]:
train_data  = ml.FootballMatchesDataset(file = 'train')
test_data   = ml.FootballMatchesDataset(file = 'test')

old_data = train_data.data

thresholds = [0.1,0.15,0.3,0.4]

X_mean = torch.mean(train_data.data,dim=0).numpy()
X_norm = train_data.data / X_mean

len(X_norm), len(test_data)


In [None]:
filter = feature_selection.VarianceThreshold(0.15)
filter.fit(X_norm)
mask_new_feat = filter.get_support()
data.columns[mask_new_feat]

In [None]:
folds = 5
kfold = model_selection.KFold(n_splits=folds,shuffle=True,random_state=0)

temp = datetime.now().strftime("_%m_%d_%H_%M_%S")

for t in thresholds:
    ml.log = {}

    filter = feature_selection.VarianceThreshold(t)
    filter.fit(X_norm)
    _ = filter.transform(X_norm)
    mask_new_feat = filter.get_support()
    train_data.data = old_data[:,mask_new_feat]
    print(f'\nCon threshold: {np.sum(mask_new_feat)}')

    # train
    model = NeuralNetwork(np.sum(mask_new_feat),3)

    train_data.data = (preprocessing.MinMaxScaler()
                        .fit_transform(train_data.data).astype(np.float32))
    
    opt = torch.optim.Adam(model.parameters(),lr=0.1,betas=(0.01,0.99),weight_decay=0)

    er, ac_tr, ac_te, cm = ml.train_wCrossValidation(model,nn.BCELoss(), opt, train_data, 
                                    kfold, epochs=100,bat_size=32)

    ml.save_logging(temp,f'_thres_{t}')
    ml.save_score(er,ac_tr,ac_te,cm,[],temp=temp,title=f'_{np.sum(mask_new_feat)}')


In [None]:
# which features to select?
print(data.columns[mask_new_feat])
print(f'\n{np.sum(mask_new_feat)} selected features')

In [None]:
plt.figure(figsize=(10,5))
plt.clf()
color = [('#6CF570' if m else '#FA3728') for m in mask_new_feat]
plt.bar(data.columns, torch.var(X_norm,dim=0), width=0.75, color=color)
plt.title("Normalized dividing by the mean w/ threshold=0.15")
plt.suptitle("Normalized feature variances",fontsize=15)
plt.xlabel("Feature name")
plt.ylabel("Variance")
plt.xticks(rotation=90)

plt.savefig(path_graphs + 'divmean_feature_variances.jpg', format='jpg', dpi=200, bbox_inches='tight')
plt.show()


#### ANOVA

In [None]:
kbest_chi2 = feature_selection.SelectKBest(score_func=feature_selection.f_classif,k=10)
kbest_chi2.fit(X_norm, train_data.labels)
X_kbest = kbest_chi2.transform(X_norm)
mask_new_feat = kbest_chi2.get_support()
data.columns[mask_new_feat]

#### SELECTED FEATURES

In [None]:
mask_selected_features = [False, False, True, True, True,
       True,True, True, False, False,
       False, False, True, True,
       False, False, True, True, True,
       True]

data.columns[:-2][mask_selected_features]

## II) Model 2

### Different MLP Architectures

#### Multilayer Perceptron with one hidden layer.

We will train models with different number of units in its hidden layer.

In [None]:
# scaling/normalization
scalers = [None,preprocessing.MinMaxScaler(), preprocessing.Normalizer()]

# loss function
# weights_class = torch.tensor(np.mean(train_data.labels.numpy(),axis=0))
criterions = [nn.BCELoss()]

# optimizer
learning_rate = [1,1e-1,1e-2]

# cross-validation
folds = 5
kfold = model_selection.KFold(n_splits=folds,shuffle=True,random_state=0)

# batch-size
bs = [32,64,128]

##################
# SGD hyperparams

momentum = [True,False]

# Adam hyperparams
r = np.random.rand
b1, b2          = [0.01,0.9], [0.1,0.9,0.99]
weight_decay    = [0,1]

In [None]:
importlib.reload(ml)
temp = datetime.now().strftime("_%m_%d_%H_%M_%S")
units_array = [3,10]

train_data  = ml.FootballMatchesDataset(file = 'train')
old_data = train_data.data
X_mean = torch.mean(train_data.data,dim=0).numpy()
X_norm = train_data.data / X_mean

for units in units_array:

    # train
    model = NeuralNetwork(train_data.data.shape[1],3,hidden_neurons=units)

    _,_,_,_ = ml.Grid_Search_SGD(train_data,scalers,criterions,learning_rate,momentum,model,
                                  kfold,batch_size=bs,epochs=100,
                                  root=path_results+f'sgd_mlp_{units}_{temp}//')

    _,_,_,_ = ml.Grid_Search_Adam(train_data,scalers,criterions ,learning_rate,b1,b2,model,
                                  kfold,batch_size=bs,weight_decay=weight_decay,epochs=100,
                                  root=path_results+f'adam_mlp{units}_{temp}//')

In [6]:
import numpy as np
aux = [(.5,.5),(.99,.5)]
np.array(aux)

array([[0.5 , 0.5 ],
       [0.99, 0.5 ]])