In [1]:
import shap

ModuleNotFoundError: No module named 'shap'

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class FootballPredictor(nn.Module):
    def __init__(self, num_teams, embedding_dim, input_dim, hidden_dim, output_dim):
        super(FootballPredictor, self).__init__()
        self.team_embeddings = nn.Embedding(num_teams, embedding_dim)
        self.lstm = nn.LSTM(input_dim + 2 * embedding_dim, hidden_dim, batch_first=True)
        self.fc_goals = nn.Linear(hidden_dim, 1)  # Liczba goli
        self.fc_corners = nn.Linear(hidden_dim, 1)  # Rzuty rożne
        self.fc_cards = nn.Linear(hidden_dim, 2)  # Kartki
        self.fc_result = nn.Linear(hidden_dim, 3)  # Wynik meczu (wygrana, remis, przegrana)

    def forward(self, x, home_teams, away_teams, h2h_features):
        # Embeddings dla drużyn
        home_embeddings = self.team_embeddings(home_teams)
        away_embeddings = self.team_embeddings(away_teams)
        
        # Łączenie danych wejściowych z embeddingami
        x = torch.cat((x, home_embeddings, away_embeddings), dim=-1)
        
        # LSTM
        lstm_out, _ = self.lstm(x)
        lstm_out = lstm_out[:, -1, :]  # Używamy tylko ostatniego outputu
        
        # Wyjścia
        goals = self.fc_goals(lstm_out)
        corners = self.fc_corners(lstm_out)
        cards = self.fc_cards(lstm_out)
        result = F.softmax(self.fc_result(lstm_out), dim=1)
        
        return goals, corners, cards, result

# Użycie modelu
num_teams = 1000  # Liczba drużyn
embedding_dim = 20
input_dim = 10  # Liczba cech na mecz, bez embedingów
hidden_dim = 128
output_dim = 4

model = FootballPredictor(num_teams, embedding_dim, input_dim, hidden_dim, output_dim)

In [6]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()  # Można dostosować w zależności od potrzeb

import torch

def train_model(model, data_loader, optimizer, criterion, epochs=10):
    model.train()  # Przełączenie modelu w tryb treningu
    for epoch in range(epochs):
        total_loss = 0
        for batch in data_loader:
            x, home_teams, away_teams, h2h_features, goals, corners, cards, results = batch
            optimizer.zero_grad()  # Zerowanie gradientów
            predicted_goals, predicted_corners, predicted_cards, predicted_results = model(x, home_teams, away_teams, h2h_features)
            
            # Obliczanie straty jako sumę strat poszczególnych wyników
            loss_goals = criterion(predicted_goals, goals)
            loss_corners = criterion(predicted_corners, corners)
            loss_cards = criterion(predicted_cards, cards)
            loss_results = criterion(predicted_results, results)
            loss = loss_goals + loss_corners + loss_cards + loss_results
            
            loss.backward()  # Propagacja wsteczna
            optimizer.step()  # Aktualizacja parametrów modelu
            total_loss += loss.item()
        
        average_loss = total_loss / len(data_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {average_loss:.4f}")

In [5]:
def evaluate_model(model, data_loader, criterion):
    model.eval()  # Przełączenie modelu w tryb ewaluacji
    total_loss = 0
    with torch.no_grad():  # Wyłączenie obliczeń gradientów
        for batch in data_loader:
            x, home_teams, away_teams, h2h_features, goals, corners, cards, results = batch
            predicted_goals, predicted_corners, predicted_cards, predicted_results = model(x, home_teams, away_teams, h2h_features)
            
            # Obliczanie straty jako sumę strat poszczególnych wyników
            loss_goals = criterion(predicted_goals, goals)
            loss_corners = criterion(predicted_corners, corners)
            loss_cards = criterion(predicted_cards, cards)
            loss_results = criterion(predicted_results, results)
            loss = loss_goals + loss_corners + loss_cards + loss_results
            
            total_loss += loss.item()
    
    average_loss = total_loss / len(data_loader)
    return average_loss

In [59]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
data = pd.read_csv('./data/clean_data.csv')

In [12]:
data['result'] = np.where(data['home_team_goal_count'] == data['away_team_goal_count'], 0, np.where(data['home_team_goal_count'] > data['away_team_goal_count'], 1, 2))

In [60]:
data.head()

Unnamed: 0.1,Unnamed: 0,timestamp,date_GMT,status,attendance,home_team_name,away_team_name,referee,Game Week,Pre-Match PPG (Home),...,odds_ft_home_team_win,odds_ft_draw,odds_ft_away_team_win,odds_ft_over15,odds_ft_over25,odds_ft_over35,odds_ft_over45,odds_btts_yes,odds_btts_no,stadium_name
0,306,1282329000,Aug 20 2010 - 6:30pm,complete,,Bayern München,Wolfsburg,Thorsten Kinhöfer,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Allianz Arena (München)
1,307,1282397400,Aug 21 2010 - 1:30pm,complete,,Hoffenheim,Werder Bremen,Günter Perl,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,PreZero Arena (Sinsheim)
2,308,1282397400,Aug 21 2010 - 1:30pm,complete,,Borussia M'gladbach,Nürnberg,Peter Gagelmann,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Stadion im BORUSSIA-PARK (Mönchengladbach)
3,309,1282397400,Aug 21 2010 - 1:30pm,complete,,Köln,Kaiserslautern,Felix Brych,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,RheinEnergieStadion (Köln)
4,310,1282397400,Aug 21 2010 - 1:30pm,complete,,Freiburg,St. Pauli,Jochen Drees,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Schwarzwald-Stadion (Freiburg im Breisgau)


In [13]:
X = data[['home_ppg', 'away_ppg', 'total_goal_count', 'total_goals_at_half_time', 'home_team_goal_count_half_time', 'away_team_goal_count_half_time', 'home_team_corner_count', 'away_team_corner_count',
          'home_team_yellow_cards', 'home_team_red_cards', 'away_team_yellow_cards', 'away_team_red_cards', 'home_team_shots', 'away_team_shots', 'home_team_shots_on_target', 'away_team_shots_on_target',
          'home_team_shots_off_target', 'away_team_shots_off_target', 'home_team_fouls', 'away_team_fouls', 'home_team_possession', 'away_team_possession', 'team_a_xg', 'team_b_xg', 'average_goals_per_match_pre_match',
          'btts_percentage_pre_match', 'odds_ft_home_team_win', 'odds_ft_draw', 'odds_ft_away_team_win']]
y = data['result']

In [58]:
def extract_h2h_features(df, team1, team2):
    # Wybór meczów h2h z przeszłości między team1 i team2
    h2h_games = df[(df['home_team_name'] == team1) & (df['away_team_name'] == team2) |
                   (df['home_team_name'] == team2) & (df['away_team_name'] == team1)]
    h2h_features = h2h_games[['home_team_goal_count', 'away_team_goal_count', 'home_team_corner_count', 'away_team_corner_count', 'home_team_yellow_cards', 'away_team_yellow_cards']].mean().values
    return h2h_features

# Stworzenie nowej cechy dla każdego meczu
data['h2h_features'] = data.apply(lambda x: extract_h2h_features(data, x['home_team_name'], x['away_team_name']), axis=1)

KeyboardInterrupt: 

In [61]:
data[(data['home_team_name'] == 'Hoffenheim') & (data['away_team_name'] == 'Werder Bremen') |
                   (data['home_team_name'] == 'Werder Bremen') & (data['away_team_name'] == 'Hoffenheim')]

Unnamed: 0.1,Unnamed: 0,timestamp,date_GMT,status,attendance,home_team_name,away_team_name,referee,Game Week,Pre-Match PPG (Home),...,odds_ft_home_team_win,odds_ft_draw,odds_ft_away_team_win,odds_ft_over15,odds_ft_over25,odds_ft_over35,odds_ft_over45,odds_btts_yes,odds_btts_no,stadium_name
1,307,1282397400,Aug 21 2010 - 1:30pm,complete,,Hoffenheim,Werder Bremen,Günter Perl,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,PreZero Arena (Sinsheim)
155,461,1295101800,Jan 15 2011 - 2:30pm,complete,,Werder Bremen,Hoffenheim,Felix Zwayer,18.0,1.63,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,wohninvest WESERSTADION (Bremen)
337,644,1314451800,Aug 27 2011 - 1:30pm,complete,28750.0,Hoffenheim,Werder Bremen,,4.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,PreZero Arena (Sinsheim)
488,795,1328970600,Feb 11 2012 - 2:30pm,complete,39176.0,Werder Bremen,Hoffenheim,Michael Weiner,21.0,2.44,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,wohninvest WESERSTADION (Bremen)
744,1051,1354458600,Dec 02 2012 - 2:30pm,complete,23500.0,Hoffenheim,Werder Bremen,Knut Kircher,15.0,1.14,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,PreZero Arena (Sinsheim)
893,1200,1367674200,May 04 2013 - 1:30pm,complete,,Werder Bremen,Hoffenheim,Robert Hartmann,32.0,1.2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,wohninvest WESERSTADION (Bremen)
1037,1344,1385821800,Nov 30 2013 - 2:30pm,complete,25608.0,Hoffenheim,Werder Bremen,,14.0,0.86,...,1.82,3.95,4.6,0.0,0.0,0.0,0.0,0.0,0.0,WIRSOL Rhein-Neckar-Arena
1189,1496,1397914200,Apr 19 2014 - 1:30pm,complete,40100.0,Werder Bremen,Hoffenheim,,31.0,1.2,...,3.08,3.72,2.34,0.0,0.0,0.0,0.0,0.0,0.0,Weserstadion
1235,1542,1409405400,Aug 30 2014 - 1:30pm,complete,,Werder Bremen,Hoffenheim,Robert Hartmann,2.0,0.0,...,3.13,3.73,2.31,0.0,0.0,0.0,0.0,0.0,0.0,wohninvest WESERSTADION (Bremen)
1390,1697,1423076400,Feb 04 2015 - 7:00pm,complete,23631.0,Hoffenheim,Werder Bremen,,19.0,1.89,...,1.72,4.28,4.84,0.0,0.0,0.0,0.0,0.0,0.0,WIRSOL Rhein-Neckar-Arena


In [40]:
X.values

array([[2.41, 1.  , 3.  , ..., 0.  , 0.  , 0.  ],
       [1.53, 1.  , 5.  , ..., 0.  , 0.  , 0.  ],
       [1.06, 1.06, 2.  , ..., 0.  , 0.  , 0.  ],
       ...,
       [1.17, 0.94, 4.  , ..., 1.91, 3.5 , 3.2 ],
       [1.22, 1.61, 3.  , ..., 2.3 , 3.5 , 2.5 ],
       [2.39, 1.56, 6.  , ..., 1.4 , 4.25, 6.5 ]])

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)

In [19]:
train_loader = DataLoader(X_train, batch_size=32, shuffle=True)

In [21]:
dir(train_loader)

['_DataLoader__initialized',
 '_DataLoader__multiprocessing_context',
 '_IterableDataset_len_called',
 '__annotations__',
 '__class__',
 '__class_getitem__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__orig_bases__',
 '__parameters__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_auto_collation',
 '_dataset_kind',
 '_get_iterator',
 '_index_sampler',
 '_is_protocol',
 '_iterator',
 'batch_sampler',
 'batch_size',
 'check_worker_number_rationality',
 'collate_fn',
 'dataset',
 'drop_last',
 'generator',
 'multiprocessing_context',
 'num_workers',
 'persistent_workers',
 'pin_memory',
 'pin_memory_device',
 'prefetch_factor',
 'sampler',
 'timeout',
 'worker_init_

In [36]:
train_loader.__getitem__

AttributeError: 'DataLoader' object has no attribute '__getitem__'

In [38]:
from torch.utils.data import Dataset, DataLoader
import torch

class FootballDataset(Dataset):
    def __init__(self, features, labels):
        """
        features: Lista tensorów, gdzie każdy tensor zawiera dane wejściowe dla jednego meczu.
        labels: Lista tensorów, gdzie każdy tensor zawiera etykiety wyników dla jednego meczu.
        """
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

In [37]:
class FootballDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features  # To powinny być tensory lub listy tensorów
        self.labels = labels      # Analogicznie, tensory lub listy tensorów

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        x = self.features[idx]
        home_teams = x['home_teams']
        away_teams = x['away_teams']
        h2h_features = x['h2h_features']
        goals = self.labels[idx]['goals']
        corners = self.labels[idx]['corners']
        cards = self.labels[idx]['cards']
        results = self.labels[idx]['results']
        return x, home_teams, away_teams, h2h_features, goals, corners, cards, results

NameError: name 'Dataset' is not defined

In [26]:
model.train()

FootballPredictor(
  (team_embeddings): Embedding(1000, 20)
  (lstm): LSTM(50, 128, batch_first=True)
  (fc_goals): Linear(in_features=128, out_features=1, bias=True)
  (fc_corners): Linear(in_features=128, out_features=1, bias=True)
  (fc_cards): Linear(in_features=128, out_features=2, bias=True)
  (fc_result): Linear(in_features=128, out_features=3, bias=True)
)

In [30]:
for batch in train_loader:
    print(batch)

KeyError: 42204

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()  # Można dostosować w zależności od potrzeb

import torch

def train_model(model, data_loader, optimizer, criterion, epochs=10):
    model.train()  # Przełączenie modelu w tryb treningu
    for epoch in range(epochs):
        total_loss = 0
        for batch in data_loader:
            x, home_teams, away_teams, h2h_features, goals, corners, cards, results = batch
            optimizer.zero_grad()  # Zerowanie gradientów
            predicted_goals, predicted_corners, predicted_cards, predicted_results = model(x, home_teams, away_teams, h2h_features)
            
            # Obliczanie straty jako sumę strat poszczególnych wyników
            loss_goals = criterion(predicted_goals, goals)
            loss_corners = criterion(predicted_corners, corners)
            loss_cards = criterion(predicted_cards, cards)
            loss_results = criterion(predicted_results, results)
            loss = loss_goals + loss_corners + loss_cards + loss_results
            
            loss.backward()  # Propagacja wsteczna
            optimizer.step()  # Aktualizacja parametrów modelu
            total_loss += loss.item()
        
        average_loss = total_loss / len(data_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {average_loss:.4f}")

In [18]:
from torch.utils.data import DataLoader

# Załóżmy, że `train_dataset` i `test_dataset` są instancjami odpowiedniej klasy Dataset w PyTorch
train_loader = DataLoader(X_train, batch_size=32, shuffle=True)
# test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Parametry
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = torch.nn.MSELoss()

# Proces trenowania
train_model(model, train_loader, optimizer, criterion, epochs=10)

# # Proces ewaluacji
# test_loss = evaluate_model(model, test_loader, criterion)
# print(f"Test Loss: {test_loss:.4f}")

KeyError: 3262

In [41]:
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Przekształcenie danych z pandas DataFrame do tensorów PyTorch
X_numpy = X.values
y_numpy = y.values

# Skalowanie cech
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_numpy)

# Podział na zbiór treningowy i testowy
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_numpy, test_size=0.2, random_state=42)

# Konwersja danych numpy na tensory PyTorch
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)  # Używamy torch.long dla etykiet klasyfikacji
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

In [42]:
class FootballDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# Tworzenie instancji Dataset
train_dataset = FootballDataset(X_train_tensor, y_train_tensor)
test_dataset = FootballDataset(X_test_tensor, y_test_tensor)

# Tworzenie DataLoaderów
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [45]:
dir(train_loader)

['_DataLoader__initialized',
 '_DataLoader__multiprocessing_context',
 '_IterableDataset_len_called',
 '__annotations__',
 '__class__',
 '__class_getitem__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__orig_bases__',
 '__parameters__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_auto_collation',
 '_dataset_kind',
 '_get_iterator',
 '_index_sampler',
 '_is_protocol',
 '_iterator',
 'batch_sampler',
 'batch_size',
 'check_worker_number_rationality',
 'collate_fn',
 'dataset',
 'drop_last',
 'generator',
 'multiprocessing_context',
 'num_workers',
 'persistent_workers',
 'pin_memory',
 'pin_memory_device',
 'prefetch_factor',
 'sampler',
 'timeout',
 'worker_init_

In [48]:
train_loader.dataset

<__main__.FootballDataset at 0x1abeb90b7d0>

In [49]:
import torch.nn as nn
import torch.optim as optim

class SimpleNN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 100)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(100, 50)
        self.fc3 = nn.Linear(50, output_dim)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Ustawienie parametrów modelu
input_dim = X_train_tensor.shape[1]
output_dim = len(torch.unique(y_train_tensor))  # Liczba unikalnych etykiet

model = SimpleNN(input_dim, output_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Funkcja treningowa
def train_model(model, data_loader, optimizer, criterion, epochs=10):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for features, labels in data_loader:
            optimizer.zero_grad()
            outputs = model(features)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {total_loss/len(data_loader)}')

# Funkcja ewaluacji
def evaluate_model(model, data_loader, criterion):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for features, labels in data_loader:
            outputs = model(features)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
    return total_loss / len(data_loader)

# Trenowanie i ewaluacja
train_model(model, train_loader, optimizer, criterion, epochs=10)
test_loss = evaluate_model(model, test_loader, criterion)
print(f"Test Loss: {test_loss:.4f}")

Epoch 1, Loss: 0.6789405154668284
Epoch 2, Loss: 0.5303399778074689
Epoch 3, Loss: 0.4457440562123885
Epoch 4, Loss: 0.42238177640683167
Epoch 5, Loss: 0.4140063473336731
Epoch 6, Loss: 0.40637438982522406
Epoch 7, Loss: 0.4027114865100483
Epoch 8, Loss: 0.39814505781851656
Epoch 9, Loss: 0.39590630669622645
Epoch 10, Loss: 0.3903192943321064
Test Loss: 0.4137


In [51]:
class SoccerMatchPredictor(nn.Module):
    def __init__(self, num_teams, embedding_dim, hidden_dim, num_outputs):
        super(SoccerMatchPredictor, self).__init__()
        self.team_embeddings = nn.Embedding(num_teams, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.h2h_layer = nn.Linear(hidden_dim, hidden_dim)
        self.output_layer = nn.Linear(hidden_dim, num_outputs)

    def forward(self, team_ids, features, hidden=None):
        team_embeds = self.team_embeddings(team_ids)
        combined_features = torch.cat((team_embeds, features), dim=-1)
        lstm_out, hidden = self.lstm(combined_features, hidden)
        h2h_features = self.h2h_layer(lstm_out[:, -1, :])
        outputs = self.output_layer(h2h_features)
        return outputs

num_teams = 1000
model = SoccerMatchPredictor(num_teams, 20, 128, 4)  # 4 outputs: goals, corners, cards, match result

In [52]:
def train(model, train_loader, optimizer, criterion, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for team_ids, features, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(team_ids, features)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}")

def test(model, test_loader, criterion):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for team_ids, features, targets in test_loader:
            outputs = model(team_ids, features)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    print(f"Test Loss: {total_loss / len(test_loader)}")

In [53]:
from torch.utils.data import Dataset, DataLoader
import torch

class FootballMatchesDataset(Dataset):
    def __init__(self, team_ids, features, labels):
        """
        team_ids: Tensor zawierający identyfikatory drużyn.
        features: Tensor zawierający cechy meczów.
        labels: Tensor zawierający etykiety wyników meczów.
        """
        self.team_ids = team_ids
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.team_ids[idx], self.features[idx], self.labels[idx]

# Załóżmy, że X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor są już przygotowane
train_dataset = FootballMatchesDataset(X_train_tensor, y_train_tensor)
test_dataset = FootballMatchesDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [55]:
model = SoccerMatchPredictor(num_teams, embedding_dim=20, hidden_dim=128, num_outputs=4)  # 4 wyjścia mogą być np. gole, rogi, kartki, wynik meczu

In [56]:
import torch.optim as optim

# Definicja optymalizatora i funkcji straty
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()  # lub inna funkcja straty w zależności od zadania

# Funkcja trenująca
def train_model(model, train_loader, optimizer, criterion, epochs):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for data in train_loader:
            features, labels = data
            optimizer.zero_grad()
            outputs = model(features)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch+1}/{epochs}, Training Loss: {total_loss / len(train_loader)}')

# Funkcja testująca
def evaluate_model(model, test_loader, criterion):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for data in test_loader:
            features, labels = data
            outputs = model(features)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
    print(f'Test Loss: {total_loss / len(test_loader)}')


In [57]:
train_model(model, train_loader, optimizer, criterion, epochs=10)
evaluate_model(model, test_loader, criterion)

TypeError: SoccerMatchPredictor.forward() missing 1 required positional argument: 'features'