In [1]:
import sys
import os
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import pandas as pd
import torch
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
sys.path.append(os.path.abspath(os.path.join('..', 'src')))
%matplotlib inline

In [2]:
%load_ext autoreload
%autoreload 2
from data import ApiFetcher
from model import TeamEmbeddings

In [3]:
api = ApiFetcher(2015, 2025)
df = api.get_dataframe(numeric=False, ids=True)
num_teams = len(df['home_team_id'].unique())
model = TeamEmbeddings(num_teams=num_teams)

print(f"Model structure:\n{sum(p.numel() for p in model.parameters() if p.requires_grad)} parameters")

Model structure:
953 parameters


In [4]:
# Przygotuj dane
feature_cols = ['home_fga', 'away_fga', 'home_fg_pct', 'away_fg_pct', 
               'home_fg3a', 'away_fg3a', 'home_fg3_pct', 'away_fg3_pct',
               'home_oreb', 'away_oreb', 'home_dreb', 'away_dreb',
               'home_ast', 'away_ast', 'home_stl', 'away_stl',
               'home_blk', 'away_blk', 'home_tov', 'away_tov',
               'home_pf', 'away_pf']

X_features = torch.tensor(df[feature_cols].values, dtype=torch.float32)
home_team_ids = torch.tensor(df['home_team_id'].values, dtype=torch.long)
away_team_ids = torch.tensor(df['away_team_id'].values, dtype=torch.long)
targets = torch.tensor(df['home_pts'].values, dtype=torch.float32)

In [5]:
print(f"✅ Data ready:")
print(f"  Features: {X_features.shape}")
print(f"  Targets: {targets.shape}")
print(f"  Model params: 889")
print(f"  Data/params ratio: {len(targets)/889:.1f}")

✅ Data ready:
  Features: torch.Size([11973, 22])
  Targets: torch.Size([11973])
  Model params: 889
  Data/params ratio: 13.5


In [6]:
# Test forward pass
model.eval()
with torch.no_grad():
    test_pred = model(home_team_ids[:5], away_team_ids[:5], X_features[:5])
    print(f"Sample predictions: {test_pred.squeeze()}")
    print(f"Actual targets:     {targets[:5]}")
    print("✅ Model is working!")

Sample predictions: tensor([-0.2058, -0.4275,  0.0507, -0.1570, -0.2136])
Actual targets:     tensor([ 94.,  97., 111.,  97., 112.])
✅ Model is working!


In [7]:
# Dodaj przed treningiem
print("Weight ranges before training:")
for name, param in model.named_parameters():
    print(f"{name}: min={param.min():.3f}, max={param.max():.3f}")

Weight ranges before training:
embedding.weight: min=-0.337, max=0.262
feature_net.0.weight: min=-0.212, max=0.212
feature_net.0.bias: min=-0.182, max=0.179
feature_net.2.weight: min=-0.250, max=0.244
feature_net.2.bias: min=-0.218, max=0.226
final_net.0.weight: min=-0.203, max=0.204
final_net.0.bias: min=-0.201, max=0.177
final_net.2.weight: min=-0.335, max=0.338
final_net.2.bias: min=0.012, max=0.012


In [8]:
# Checking whether the model can overfit a small sample
model.train()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.MSELoss()

# Take only 10 samples
small_sample = 10
X_small = X_features[:small_sample]
home_small = home_team_ids[:small_sample]
away_small = away_team_ids[:small_sample]
targets_small = targets[:small_sample]

print("Test overfittingu do małej próbki:")
print(f"Targets: {targets_small}")

# Train only on the small sample
for epoch in range(500):
    optimizer.zero_grad()
    predictions = model(home_small, away_small, X_small).squeeze()
    loss = criterion(predictions, targets_small)
    loss.backward()
    
    # Dodaj gradient clipping dla bezpieczeństwa
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    
    optimizer.step()
    
    if epoch % 50 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}, Predictions: {predictions[:3].detach()}")

print(f"\nFinal predictions: {predictions.detach()}")
print(f"Actual targets:    {targets_small}")
print(f"Final loss: {loss.item():.4f}")

Test overfittingu do małej próbki:
Targets: tensor([ 94.,  97., 111.,  97., 112.,  95.,  76., 104., 111., 112.])
Epoch 0, Loss: 10344.0635, Predictions: tensor([-0.2058, -0.4275,  0.0507])
Epoch 50, Loss: 83.5516, Predictions: tensor([101.5138,  99.1199, 113.0829])
Epoch 100, Loss: 10.9588, Predictions: tensor([ 90.8592,  93.8344, 106.5650])
Epoch 150, Loss: 12.1014, Predictions: tensor([ 90.5365,  93.4781, 107.1021])
Epoch 200, Loss: 2.7293, Predictions: tensor([ 95.6251,  98.6275, 112.8380])
Epoch 250, Loss: 7.4149, Predictions: tensor([ 96.6915,  99.7030, 114.0056])
Epoch 300, Loss: 14.0922, Predictions: tensor([ 90.2982,  93.2819, 106.8518])
Epoch 350, Loss: 3.1749, Predictions: tensor([ 92.2222,  95.2124, 109.0492])
Epoch 400, Loss: 5.2856, Predictions: tensor([ 91.8159,  94.8009, 108.4493])
Epoch 450, Loss: 8.4236, Predictions: tensor([ 96.8748,  99.8772, 114.1884])

Final predictions: tensor([ 97.9631, 100.9722, 115.4038, 100.9861, 116.0654,  98.9586,  79.5253,
        107.7582,

In [9]:
# Split data into training and validation sets
X_train, X_val, home_train, home_val, away_train, away_val, y_train, y_val = train_test_split(
    X_features, home_team_ids, away_team_ids, targets, 
    test_size=0.2, random_state=42
)

print(f"Train size: {len(X_train)}")
print(f"Validation size: {len(X_val)}")

# Create model for full training
model_full = TeamEmbeddings(num_teams=num_teams)
optimizer = torch.optim.Adam(model_full.parameters(), lr=0.001)
criterion = torch.nn.MSELoss()

# Training loop
model_full.train()
train_losses = []
val_losses = []

for epoch in range(100):
    # Training
    optimizer.zero_grad()
    train_pred = model_full(home_train, away_train, X_train).squeeze()
    train_loss = criterion(train_pred, y_train)
    train_loss.backward()
    torch.nn.utils.clip_grad_norm_(model_full.parameters(), max_norm=1.0)
    optimizer.step()
    
    # Validation
    model_full.eval()
    with torch.no_grad():
        val_pred = model_full(home_val, away_val, X_val).squeeze()
        val_loss = criterion(val_pred, y_val)
    model_full.train()
    
    train_losses.append(train_loss.item())
    val_losses.append(val_loss.item())
    
    if epoch % 20 == 0:
        print(f"Epoch {epoch}, Train Loss: {train_loss.item():.2f}, Val Loss: {val_loss.item():.2f}")

print("Training complete!")

Train size: 9578
Validation size: 2395
Epoch 0, Train Loss: 12722.87, Val Loss: 12588.77
Epoch 20, Train Loss: 11854.89, Val Loss: 11734.86
Epoch 40, Train Loss: 10500.94, Val Loss: 10343.90
Epoch 60, Train Loss: 8202.57, Val Loss: 8003.70
Epoch 80, Train Loss: 4802.92, Val Loss: 4574.79
Training complete!
