In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torchvision

In [None]:
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
from collections import OrderedDict
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier

In [None]:
data_df = pd.read_csv('../data/sixth_df_input.csv')

In [None]:
data_df['Rank'] = data_df['Rank'].astype('int')
data_df.head()

In [None]:
dict_map_sex = {'men': 1, 'women': 0}
data_df['sex'] = data_df['sex'].map(dict_map_sex)

dict_map_format = {'sprint' : 0, 'pursuit': 1, 'individual': 2, 'mass_start': 3, 'sprint_2': 0, 'pursuit_2': 1}
data_df['format'] = data_df['format'].map(dict_map_format)

def rank_transform(x):
    if x == 1:
        return 0
    elif x == 2 or x == 3:
        return 1
    elif x > 3:
        return 2

data_df['Rank'] = data_df['Rank'].apply(rank_transform)

In [None]:
final_df = data_df[['season', 'Rank', 'distance', 'Behind', 'format', 'position', 'km_to_go', 'standing_remaining', 'prone_remaining', 'probability_standing',\
    'probability_prone', 'back_from_median', 'gradient_back', 'all_mistakes_prone', 'all_mistakes_standing', 'gradient_standing', 'gradient_prone',\
        'gradient_pos', 'behind_above', 'behind_below', 'standing_above', 'standing_below', 'prone_above', 'prone_below']]

final_df_train = final_df[final_df['season'] != '2021-2022']
final_df_test = final_df[final_df['season'] == '2021-2022']

final_df_train.drop(['season'], axis = 1, inplace = True)
final_df_test.drop(['season'], axis = 1, inplace = True)

In [None]:
class MyDataset(Dataset):
 
  def __init__(self, df):
    
    x=df.iloc[:,1:].values
    y=df.iloc[:,0].values
 
    self.x_train=torch.tensor(x, dtype=torch.float32)
    self.y_train=torch.tensor(y, dtype=torch.float32)
 
  def __len__(self):
    return len(self.y_train)
   
  def __getitem__(self,idx):
    return self.x_train[idx], self.y_train[idx]

In [None]:
train_df = MyDataset(final_df_train)
test_df = MyDataset(final_df_test)
print(train_df[0], '\n', test_df[0])

In [None]:
train_loader=DataLoader(train_df,batch_size=100,shuffle=True)
test_loader=DataLoader(test_df,batch_size=len(test_df),shuffle=False)

for i, (data, labels) in enumerate(train_loader):
  print(data.shape, labels.shape)
  print(data, labels)
  break


In [None]:
weights = sklearn.utils.class_weight.compute_class_weight('balanced', classes = np.unique(final_df_train['Rank']), y = final_df_train['Rank'])

In [None]:
weights_torch = torch.tensor(weights, dtype=torch.float32)
# Define the loss function
loss_fn = nn.CrossEntropyLoss(weight = weights_torch)

In [None]:
optimizers = ['Adam', 'SGD', 'RMSprop']
architectures = [[22, 50, 50, 3], [22, 100, 3], [22, 10, 10, 10, 10, 10, 3]]
normalized_input = [True, False]
batch_sizes = [100, 200, 500]
weight_decay = [1e-3, 1e-4, 1e-5]
activation_functions = [nn.ReLU(), nn.Sigmoid()]
epochs = 30
batches = [True, False]



In [None]:




class Net(nn.Module):
    
    def __init__(self, architecture, activation, batch):


        self.ord_dict = OrderedDict([])
        for i, layer in enumerate(architecture):
            if i > 0:
                
                self.ord_dict.update({f'layer_{i}': \
                    nn.Linear(architecture[i-1], architecture[i])})

                

                if i < len(architecture) - 1:
                    if batch:
                        self.ord_dict.update({f'Batch_{i}': nn.BatchNorm1d(architecture[i])})
                    self.ord_dict.update({f'activation_{i}': activation})

                



        super().__init__()
        
        
        # feed forward network
        self.feed_forward = nn.Sequential(
            # First convolutional layer
            self.ord_dict,
        )
        
       # forward part of the network
    def forward(self, x):
        x = self.feed_forward(x)
        return x

In [None]:
all_models = np.zeros(len(optimizers) * len(architectures) * len(normalized_input) * len(batch_sizes) * len(weight_decay) * len(activation_functions) *\
     len(batches), dtype = object)
i = 0
for optimizer in optimizers:
    for architecture in architectures:
        for normalized in normalized_input:
            for batch_size in batch_sizes:
                for weight in weight_decay:
                    for activation in activation_functions:
                        for batch in batches:
                        
                            all_models[i] = {'optimizer': optimizer, 'architecture': architecture, 'normalized': normalized,\
                                'batch_size': batch_size, 'weight_decay': weight, 'activation': activation, 'batch': batch}
                            i += 1
                        

In [None]:
#train_df = MyDataset(final_df_train)
columns_list = list(final_df_train.columns)
final_df_train_2 = final_df_train.copy()
final_df_train_2[columns_list[1:]] = final_df_train_2[columns_list[1:]].apply(lambda x: (x - x.mean()) / (x.std()))

final_df_test_2 = final_df_test.copy()
final_df_test_2[columns_list[1:]] = final_df_test_2[columns_list[1:]].apply(lambda x: (x - x.mean()) / (x.std()))


In [None]:
train_df_2 = MyDataset(final_df_train_2)
test_df_2 = MyDataset(final_df_test_2)

In [None]:
n_folds = 5
kf = KFold(n_splits = n_folds, shuffle = True, random_state = 123)

In [None]:
### TRAINING LOOP
num_epochs = 50
fold_losses = []
train_loss_log = {}
val_loss_log = {}

l1_loss = nn.L1Loss()

for activation_ in list(activations.keys()):   
    for reg in regularizations:
        for l1 in l1_weights:
            for l2 in l2_weights:
                for lr in learning_rates:
                    for optimizer_ in optimizers:
                        for architecture_ in list(architecures.keys()):
                            print((activation_, reg, l1, l2, lr, optimizer_, architecture_))
                            train_loss_log[(activation_, reg, l1, l2, lr, optimizer_, architecture_)] = {}
                            val_loss_log[(activation_, reg, l1, l2, lr, optimizer_, architecture_)] = {}
                            fold_index = 0

                            #print((loss_, activation_, reg, l1, l2, lr, optimizer))
                            for train_idx, val_idx in kf.split(np.array(train_df)):
                            
                                net = Net(architecures[architecture_], activations[activation_])
                                if optimizer_ == 'Adam':
                                    optimizer = optim.Adam(net.parameters(), lr=lr)
                                elif optimizer_ == 'RMSprop':
                                    optimizer = optim.RMSprop(net.parameters(), lr=lr)

                                elif optimizer_ == 'SGD_with_momentum':
                                    optimizer = optim.SGD(net.parameters(), lr=lr, momentum = 0.9)
                                fold_index += 1
                                
                                train_loss_log[(activation_, reg, l1, l2, lr, optimizer_, architecture_)][f'fold_{fold_index}'] = []
                                train_sample = RegressionDataset(train_df.iloc[train_idx], transform= \
                                lambda x: (torch.Tensor([x[0]]).float(),torch.Tensor([x[1]]).float()))

                                val_loss_log[(activation_, reg, l1, l2, lr, optimizer_, architecture_)][f'fold_{fold_index}'] = []
                                validation_sample = RegressionDataset(train_df.iloc[val_idx], transform= \
                                lambda x: (torch.Tensor([x[0]]).float(),torch.Tensor([x[1]]).float()))

                                train_data = DataLoader(train_sample, batch_size=4, shuffle=False)
                                validation_data = DataLoader(validation_sample, batch_size=len(validation_sample), shuffle=False)
                                
                                for epoch_num in range(num_epochs):
                                    

                                    ### TRAIN
                                    train_loss= []
                                    net.train() # Training mode (e.g. enable dropout, batchnorm updates,...)
                                    for sample_batched in train_data:

                                        
                                        x_batch = sample_batched[0]
                                        label_batch = sample_batched[1]
                                        
                                        # Forward pass
                                        out = net(x_batch)

                                        # Compute loss
                                        loss_fn = l1_loss
                                        
                                        loss = loss_fn(out, label_batch)
                                        
                                        l1_penalty = l1 * sum([p.abs().sum() for name, p in net.named_parameters()\
                                            if 'bias' not in name])
                                        l2_penalty = l2 * sum([(p**2).sum() for name, p in net.named_parameters()\
                                            if 'bias' not in name])
                                        
                                        if reg == 'L1':
                                            loss += l1_penalty 
                                        elif reg == 'L2':
                                            loss += + l2_penalty 
                                        elif reg == 'L2':
                                            loss += + l2_penalty + l1_penalty
                                        # Backpropagation
                                        net.zero_grad()
                                        loss.backward()

                                        # Update the weights
                                        optimizer.step()

                                        # Save train loss for this batch
                                        loss_batch = loss.detach().cpu().numpy() # we detach it from the computational graph
                                        train_loss.append(loss_batch)

                                    # Save average train loss
                                    train_loss = np.mean(train_loss)
                                    
                                    
                                    train_loss_log[( activation_, reg, l1, l2, lr, optimizer_, architecture_)][f'fold_{fold_index}']\
                                        .append(train_loss)

                                    ### VALIDATION
                                    
                                    val_loss= []
                                    net.eval() # Evaluation mode (e.g. disable dropout, batchnorm,...)
                                    with torch.no_grad(): # Disable gradient tracking, we don't want to update the weights
                                        for sample_batched in validation_data:
                                            

                                            x_batch = sample_batched[0]
                                            
                                            label_batch = sample_batched[1]
                                            
                                            # Forward pass
                                            
                                            out = net(x_batch)
                                
                                            # Compute loss
                                            loss_fn = l1_loss
                                            
                                            loss = loss_fn(out, label_batch)

                                            if reg == 'L1':
                                                loss += + l1_penalty 
                                            elif reg == 'L2':
                                                loss += + l2_penalty 
                                            elif reg == 'L2':
                                                loss += + l2_penalty + l1_penalty

                                            # Save val loss for this batch
                                            loss_batch = loss.detach().numpy()
                                            val_loss.append(loss_batch)

                                        # Save average validation loss
                                        val_loss = np.mean(val_loss)
                                        #print(f"AVERAGE VAL LOSS: {np.mean(val_loss)}")
                                        val_loss_log[(activation_, reg, l1, l2, lr, optimizer_, architecture_)][f'fold_{fold_index}']\
                                            .append(val_loss)

                            