In [15]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from tqdm import tqdm
import matplotlib.pyplot as plt

from dataloader import SequentialSoccerDataset

from rnn import SimpleRNNModel 


In [49]:
import pandas as pd

# Example: events for one match
events = pd.read_pickle("../data/raw/epl_2015.pkl")[0]["events"]  # first match

print(events.columns)


Index(['game_id', 'period', 'minute', 'second', 'expanded_minute', 'type',
       'outcome_type', 'team_id', 'team', 'player_id', 'player', 'x', 'y',
       'end_x', 'end_y', 'goal_mouth_y', 'goal_mouth_z', 'blocked_x',
       'blocked_y', 'qualifiers', 'is_touch', 'is_shot', 'is_goal',
       'card_type', 'related_event_id', 'related_player_id'],
      dtype='object')


In [45]:
def build_time_sequence(df, team, time_interval=5):
    """
    Convert DataFrame of events into a sequence of feature vectors for one team.
    """
    max_minute = df["minute"].max()
    sequence = []
    
    for start_min in range(0, max_minute, time_interval):
        end_min = start_min + time_interval
        interval_events = df[(df["minute"] >= start_min) & 
                             (df["minute"] < end_min) & 
                             (df["team"] == team)]
        
        # Example features: number of passes, shots, goals
        num_passes = len(interval_events[(interval_events["type"] == "Pass") & 
                                         (interval_events["outcome_type"] == "Successful")])
        num_shots = len(interval_events[interval_events["type"] == "Shot"])
        num_goals = len(interval_events[interval_events["type"] == "Goal"])
        
        # You can add more features as needed
        feature_vector = [num_passes, num_shots, num_goals]
        sequence.append(feature_vector)
    
    return pd.DataFrame(sequence)  # [T, F]


In [50]:
matches = pd.read_pickle("../data/raw/epl_2015.pkl")
print(matches.columns)



AttributeError: 'list' object has no attribute 'columns'

In [52]:
print(matches[0].keys())

dict_keys(['stage_id', 'game_id', 'status', 'start_time', 'home_team_id', 'home_team', 'home_yellow_cards', 'home_red_cards', 'away_team_id', 'away_team', 'away_yellow_cards', 'away_red_cards', 'has_incidents_summary', 'has_preview', 'score_changed_at', 'elapsed', 'last_scorer', 'is_top_match', 'home_team_country_code', 'away_team_country_code', 'comment_count', 'is_lineup_confirmed', 'is_stream_available', 'match_is_opta', 'home_team_country_name', 'away_team_country_name', 'date', 'home_score', 'away_score', 'incidents', 'bets', 'aggregate_winner_field', 'winner_field', 'period', 'extra_result_field', 'home_extratime_score', 'away_extratime_score', 'home_penalty_score', 'away_penalty_score', 'started_at_utc', 'first_half_ended_at_utc', 'second_half_started_at_utc', 'stage', 'events'])


In [59]:
print(matches[0]['winner_field'])
print(matches[0]['home_score'])
print(matches[0]['away_score'])



1.0
0
1


In [34]:
from torch.utils.data import random_split

n = len(dataset)
train_size = int(0.8 * n)
val_size = int(0.1 * n)
test_size = n - train_size - val_size

train_set, val_set, test_set = random_split(dataset, [train_size, val_size, test_size])


In [None]:
from torch.utils.data import DataLoader
import torch.nn as nn

def collate_fn(batch):
    sequences = []
    targets = []  # placeholder: match outcome
    for graphs in batch:
        seq = build_team_sequence(list(graphs.values())[0])  # select one team
        sequences.append(seq)
        targets.append(torch.tensor(0.0))  # replace with real label
    lengths = torch.tensor([s.size(0) for s in sequences])
    sequences_padded = nn.utils.rnn.pad_sequence(sequences, batch_first=True)
    targets = torch.stack(targets)
    return sequences_padded, targets, lengths

train_loader = DataLoader(train_set, batch_size=8, shuffle=True, collate_fn=collate_fn)

<torch.utils.data.dataset.Subset object at 0x000002B1FFA1BCB0>


In [36]:
import torch.nn as nn

class SimpleRNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super().__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # x: [batch, seq_len, input_size]
        out, _ = self.rnn(x)        # out: [batch, seq_len, hidden_size]
        out = out[:, -1, :]         # last time step
        out = self.fc(out)          # [batch, output_size]
        return out


In [40]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = SimpleRNNModel(input_size=1, hidden_size=32, num_layers=1, output_size=1).to(device)
criterion = nn.MSELoss()  # regression; use BCEWithLogitsLoss() for classification
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(5):
    model.train()
    total_loss = 0
    for (x, y, lengths) in train_loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        preds = model(x)
        loss = criterion(preds.squeeze(), y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * x.size(0)
    print(f"Epoch {epoch+1} - Loss: {total_loss/len(train_set):.4f}")


UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, [1mdo those steps only if you trust the source of the checkpoint[0m. 
	(1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
	(2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
	WeightsUnpickler error: Unsupported global: GLOBAL numpy.ndarray was not an allowed global by default. Please use `torch.serialization.add_safe_globals([numpy.ndarray])` or the `torch.serialization.safe_globals([numpy.ndarray])` context manager to allowlist this global if you trust this class/function.

Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.