In [1]:
import numpy as np

import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

import matplotlib.pyplot as plt

In [2]:
class Multiclass1(nn.Module):
    def __init__(self, input_size, output_size, dropout_rate):
        super().__init__()
        
        # inicialmente o ReLU para tudo
        self.act1 = nn.ELU()
        self.act2 = nn.Tanh()
        
        self.outputact = nn.Sigmoid()
        
        self.hidden1 = nn.Linear(input_size, 64)
        self.hidden2 = nn.Linear(64, 256)
        self.hidden3 = nn.Linear(256, 256)
        
        self.output = nn.Linear(256, output_size)
        
        self.dropout = nn.Dropout(dropout_rate)  # camada droupout é um tipo de regularização da rede neural
        
    def forward(self, x):
        x = self.hidden1(x)
        x = self.act2(x)
        x = self.dropout(x)  # Aplicar Dropout após a primeira camada oculta

        x = self.hidden2(x)
        x = self.act2(x)
        x = self.dropout(x)  

        x = self.hidden3(x)
        x = self.act1(x)
        x = self.dropout(x)
        
        x = self.output(x)
        x = self.outputact(x)
        return x


In [3]:
# preciso fazer o pré-processamento dos dados, OneHot e Normalização
loc = "./Premier-League/"

dataset = pd.read_csv(loc + "premier_league2001-2021.csv")

dataset.info() # posso considerar a data ou não, acredito que vou manter apenas as season

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7980 entries, 0 to 7979
Data columns (total 42 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Date           7980 non-null   object 
 1   HomeTeam       7980 non-null   object 
 2   AwayTeam       7980 non-null   object 
 3   FTHG           7980 non-null   int64  
 4   FTAG           7980 non-null   int64  
 5   FTR            7980 non-null   object 
 6   HTGS           7980 non-null   int64  
 7   ATGS           7980 non-null   int64  
 8   HTGC           7980 non-null   int64  
 9   ATGC           7980 non-null   int64  
 10  HTP            7980 non-null   float64
 11  ATP            7980 non-null   float64
 12  HM1            7980 non-null   object 
 13  HM2            7980 non-null   object 
 14  HM3            7980 non-null   object 
 15  HM4            7980 non-null   object 
 16  HM5            7980 non-null   object 
 17  AM1            7980 non-null   object 
 18  AM2     

In [4]:
# Uma possibilidade a contornar isso pode ser unir os times em times "Bons", "Ruins" e "Medianos"
# Poderia ser com base nas taxas de vitórias, ou se eles aparecem muito nos campeonatos ou até mesmo pela media dos
# LP's

datasetParsed = dataset.drop(dataset.loc[dataset["MW"] == 1].index)
datasetParsed = datasetParsed.drop(["HomeTeam", "AwayTeam", "Date"], axis = 1)

In [5]:
x_train = datasetParsed[["DiffLP", "DiffPts", "DiffFormPts", 
                         "HTWinStreak3", "ATWinStreak3","HTLossStreak3", "ATLossStreak3",
                         "HTGS", "ATGS", "HTGC", "ATGC", "HTFormPts", "ATFormPts",
                         "HTGD", "ATGD", "HomeTeamLP", "AwayTeamLP"]]

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)

y_train = np.array(datasetParsed["FTR"]).reshape(-1, 1)

encoder = OneHotEncoder()
print(np. unique(y_train, return_counts=True))
y_train = encoder.fit_transform(y_train).toarray()

input_size = x_train.shape[1]
output_size = y_train.shape[1]

x_train = torch.tensor(x_train).float()
y_train = torch.from_numpy(y_train).float()

print(input_size, output_size)
print(y_train)

(array(['A', 'D', 'H'], dtype=object), array([2241, 1946, 3583]))
17 3
tensor([[0., 0., 1.],
        [0., 0., 1.],
        [0., 1., 0.],
        ...,
        [0., 0., 1.],
        [0., 0., 1.],
        [1., 0., 0.]])


In [6]:
from sklearn.model_selection import train_test_split

# Dividir o conjunto de treinamento em treinamento e validação
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.4)

# Exibir o shape dos conjuntos de treinamento e validação
print("x_train shape:", x_train.shape)
print("y_train shape:", y_train.shape)
print("x_valid shape:", x_valid.shape)
print("y_valid shape:", y_valid.shape)

x_train shape: torch.Size([4662, 17])
y_train shape: torch.Size([4662, 3])
x_valid shape: torch.Size([3108, 17])
y_valid shape: torch.Size([3108, 3])


In [None]:
n_epochs = 300

momentums = [0.7]
learning_rates = [0.1] #0.01
weight_decay = 0.0005 

best_acc = - np.nan
models = []
final_loss = []

CROSS_losses_train = []
CROSS_losses_val = []

ACC_losses_train = []
ACC_losses_val = []

parameters = []
batch_size = 64

for momentum in momentums:
    for lr in learning_rates:
            parameters.append({"learning_rate": lr,
                               "momentum": momentum, 
                               "weight_decay": weight_decay})
                
            batches_per_epoch = len(x_train) // batch_size
            model = Multiclass1(input_size, output_size, 0.3)
                
            current_CROSS_train = []
            current_CROSS_val = []

            current_ACC_train = []
            current_ACC_val = []
                
            loss_fn = nn.CrossEntropyLoss()
            optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay = weight_decay)
            for epoch in range(n_epochs):
                epoch_loss = []
                epoch_acc = []
                model.train()
                for i in range(batches_per_epoch):
                    start = i * batch_size
                    X_batch = x_train[start:start+batch_size]
                    y_batch = y_train[start:start+batch_size]

                            # forward 
                    y_pred = model(X_batch)
                    loss = loss_fn(y_pred, y_batch)
                            # backward 
                    optimizer.zero_grad()
                    loss.backward()
                            # update weights
                    optimizer.step()
                    acc = (torch.argmax(y_pred, 1) == torch.argmax(y_batch, 1)).float().mean()
                    epoch_loss.append(float(loss))
                    epoch_acc.append(float(acc))
                    # set model in evaluation mode and run through the test set
                    
                model.eval()
                y_pred = model(x_valid)
                print(y_pred.mean(axis = 0))
                print(y_valid.mean(axis = 0))
                ce = loss_fn(y_pred, y_valid)
                acc = (torch.argmax(y_pred, 1) == torch.argmax(y_valid, 1)).float().mean()

                ce = float(ce)
                acc = float(acc)
                print(f"Epoch {epoch} train: Cross-entropy={sum(epoch_loss)/batches_per_epoch}, Accuracy={sum(epoch_acc)/batches_per_epoch}")
                    
                current_CROSS_train.append(np.mean(epoch_loss))
                current_ACC_train.append(np.mean(epoch_acc))
                    
                current_CROSS_val.append(ce)
                current_ACC_val.append(acc)
                    
                if acc > best_acc:
                    best_acc = acc
                    best_weights = copy.deepcopy(model.state_dict())
                print(f"Epoch {epoch} validation: Cross-entropy={ce}, Accuracy={acc}")
                    
                CROSS_losses_train.append(current_CROSS_train)
                CROSS_losses_val.append(current_CROSS_val)

                ACC_losses_train.append(current_ACC_train)
                ACC_losses_val.append(current_ACC_val)
                final_loss.append(ce)

tensor([0.4576, 0.4132, 0.6190], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 0 train: Cross-entropy=1.0634023050467174, Accuracy=0.4934895833333333
Epoch 0 validation: Cross-entropy=1.0341356992721558, Accuracy=0.522522509098053
tensor([0.4112, 0.3480, 0.6618], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 1 train: Cross-entropy=1.019120589726501, Accuracy=0.5234375
Epoch 1 validation: Cross-entropy=1.0143070220947266, Accuracy=0.5228442549705505
tensor([0.3822, 0.3069, 0.6861], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 2 train: Cross-entropy=1.008160874247551, Accuracy=0.5286458333333334
Epoch 2 validation: Cross-entropy=1.0097640752792358, Accuracy=0.523809552192688
tensor([0.3724, 0.2799, 0.6936], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 3 train: Cross-entropy=1.0023747988873057, Accuracy=0.5308159722222222
Epoch 3 validation: Cross-entropy=1.0082751512527466, Accuracy=0.523809552192688
tensor([0.3613

tensor([0.3084, 0.1886, 0.6927], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 33 train: Cross-entropy=0.9937616826759444, Accuracy=0.5353732638888888
Epoch 33 validation: Cross-entropy=1.0048956871032715, Accuracy=0.5199485421180725
tensor([0.3078, 0.1875, 0.6918], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 34 train: Cross-entropy=0.9931532988945643, Accuracy=0.5392795138888888
Epoch 34 validation: Cross-entropy=1.0048305988311768, Accuracy=0.5209137797355652
tensor([0.3057, 0.1885, 0.6919], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 35 train: Cross-entropy=0.9940887126657698, Accuracy=0.5334201388888888
Epoch 35 validation: Cross-entropy=1.0047796964645386, Accuracy=0.5209137797355652
tensor([0.3064, 0.1868, 0.6895], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 36 train: Cross-entropy=0.9931190659602483, Accuracy=0.5379774305555556
Epoch 36 validation: Cross-entropy=1.0046523809432983, Accuracy=0.521235525

tensor([0.2932, 0.1708, 0.6763], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 66 train: Cross-entropy=0.9915500490201844, Accuracy=0.5388454861111112
Epoch 66 validation: Cross-entropy=1.0041553974151611, Accuracy=0.5218790173530579
tensor([0.2941, 0.1662, 0.6751], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 67 train: Cross-entropy=0.9927617948916223, Accuracy=0.5362413194444444
Epoch 67 validation: Cross-entropy=1.0040732622146606, Accuracy=0.5228442549705505
tensor([0.2892, 0.1690, 0.6783], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 68 train: Cross-entropy=0.9909571607907613, Accuracy=0.5381944444444444
Epoch 68 validation: Cross-entropy=1.0042201280593872, Accuracy=0.5222007632255554
tensor([0.2888, 0.1688, 0.6784], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 69 train: Cross-entropy=0.9928594860765669, Accuracy=0.5373263888888888
Epoch 69 validation: Cross-entropy=1.0041719675064087, Accuracy=0.522200763

tensor([0.2794, 0.1619, 0.6686], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 99 train: Cross-entropy=0.9909336798720889, Accuracy=0.5340711805555556
Epoch 99 validation: Cross-entropy=1.0037683248519897, Accuracy=0.5218790173530579
tensor([0.2803, 0.1625, 0.6674], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 100 train: Cross-entropy=0.9911014603243934, Accuracy=0.5368923611111112
Epoch 100 validation: Cross-entropy=1.0038695335388184, Accuracy=0.5212355256080627
tensor([0.2792, 0.1633, 0.6679], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 101 train: Cross-entropy=0.9901279426283307, Accuracy=0.5373263888888888
Epoch 101 validation: Cross-entropy=1.0039187669754028, Accuracy=0.5205920338630676
tensor([0.2793, 0.1619, 0.6673], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 102 train: Cross-entropy=0.9911418855190277, Accuracy=0.5373263888888888
Epoch 102 validation: Cross-entropy=1.0038752555847168, Accuracy=0.520

tensor([0.2731, 0.1494, 0.6601], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 132 train: Cross-entropy=0.9900340321991179, Accuracy=0.5381944444444444
Epoch 132 validation: Cross-entropy=1.003922462463379, Accuracy=0.5189832448959351
tensor([0.2735, 0.1492, 0.6595], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 133 train: Cross-entropy=0.991044807765219, Accuracy=0.5381944444444444
Epoch 133 validation: Cross-entropy=1.0036753416061401, Accuracy=0.5212355256080627
tensor([0.2738, 0.1500, 0.6588], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 134 train: Cross-entropy=0.9899584344691701, Accuracy=0.5399305555555556
Epoch 134 validation: Cross-entropy=1.003740906715393, Accuracy=0.5205920338630676
tensor([0.2732, 0.1497, 0.6594], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 135 train: Cross-entropy=0.9901833393507533, Accuracy=0.5401475694444444
Epoch 135 validation: Cross-entropy=1.0037295818328857, Accuracy=0.5202

tensor([0.2676, 0.1466, 0.6534], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 165 train: Cross-entropy=0.9900541595286794, Accuracy=0.5434027777777778
Epoch 165 validation: Cross-entropy=1.00351881980896, Accuracy=0.5222007632255554
tensor([0.2720, 0.1457, 0.6491], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 166 train: Cross-entropy=0.9905927098459668, Accuracy=0.5401475694444444
Epoch 166 validation: Cross-entropy=1.0035516023635864, Accuracy=0.5209137797355652
tensor([0.2683, 0.1444, 0.6510], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 167 train: Cross-entropy=0.991179668241077, Accuracy=0.5381944444444444
Epoch 167 validation: Cross-entropy=1.00347900390625, Accuracy=0.5209137797355652
tensor([0.2677, 0.1428, 0.6523], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 168 train: Cross-entropy=0.9906341127223439, Accuracy=0.5373263888888888
Epoch 168 validation: Cross-entropy=1.0034724473953247, Accuracy=0.520913

tensor([0.2601, 0.1396, 0.6448], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 198 train: Cross-entropy=0.9903875928786066, Accuracy=0.5381944444444444
Epoch 198 validation: Cross-entropy=1.0034371614456177, Accuracy=0.5212355256080627
tensor([0.2597, 0.1375, 0.6446], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 199 train: Cross-entropy=0.991533488035202, Accuracy=0.5368923611111112
Epoch 199 validation: Cross-entropy=1.0032951831817627, Accuracy=0.5218790173530579
tensor([0.2585, 0.1376, 0.6451], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 200 train: Cross-entropy=0.9904211163520813, Accuracy=0.5399305555555556
Epoch 200 validation: Cross-entropy=1.003363847732544, Accuracy=0.5212355256080627
tensor([0.2583, 0.1359, 0.6455], grad_fn=<MeanBackward1>)
tensor([0.2876, 0.2597, 0.4527])
Epoch 201 train: Cross-entropy=0.9894149767027961, Accuracy=0.5414496527777778
Epoch 201 validation: Cross-entropy=1.0033332109451294, Accuracy=0.520

##### 

In [None]:
# Lutei bastante mas não consegui resolver o problema do Draw
# Não gostei de reduzir a dificuldade do problema, acho q o problema real está nas features
# Elas não trazem tanta informação quanto parece
# Vi um artigo desse mesmo problema que ele utiliza features criadas completamente diferentes, mas infelizmente
# Não tem código, o resultado dele era bem melhor
# Vamo ter que conseguir algo com as criação de outras features, talvez Kmeans e PCA