In [None]:
import sys
import os
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import pandas as pd
sys.path.append(os.path.abspath(os.path.join('..', 'src')))
%matplotlib inline

In [None]:
%load_ext autoreload
%autoreload 2
from data import ApiFetcher
from utils import distribution_calculating, check_distribution
from model import TeamEmbeddings

In [None]:
api = ApiFetcher(2015, 2025)
df = api.df_with_id()
num_teams = len(df['home_team_id'].unique())
model = TeamEmbeddings(num_teams=num_teams)

print(f"Model structure:\n{sum(p.numel() for p in model.parameters() if p.requires_grad)} parameters")

In [None]:
import torch
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

# Przygotuj dane
feature_cols = ['home_fga', 'away_fga', 'home_fg_pct', 'away_fg_pct', 
               'home_fg3a', 'away_fg3a', 'home_fg3_pct', 'away_fg3_pct',
               'home_oreb', 'away_oreb', 'home_dreb', 'away_dreb',
               'home_ast', 'away_ast', 'home_stl', 'away_stl',
               'home_blk', 'away_blk', 'home_tov', 'away_tov',
               'home_pf', 'away_pf']

X_features = torch.tensor(df[feature_cols].values, dtype=torch.float32)
home_team_ids = torch.tensor(df['home_team_id'].values, dtype=torch.long)
away_team_ids = torch.tensor(df['away_team_id'].values, dtype=torch.long)
targets = torch.tensor(df['home_pts'].values, dtype=torch.float32)

In [None]:
print(f"✅ Data ready:")
print(f"  Features: {X_features.shape}")
print(f"  Targets: {targets.shape}")
print(f"  Model params: 889")
print(f"  Data/params ratio: {len(targets)/889:.1f}")

In [None]:
# Test forward pass
model.eval()
with torch.no_grad():
    test_pred = model(home_team_ids[:5], away_team_ids[:5], X_features[:5])
    print(f"Sample predictions: {test_pred.squeeze()}")
    print(f"Actual targets:     {targets[:5]}")
    print("✅ Model is working!")

In [None]:
# Dodaj przed treningiem
print("Weight ranges before training:")
for name, param in model.named_parameters():
    print(f"{name}: min={param.min():.3f}, max={param.max():.3f}")

In [None]:
# Checking whether the model can overfit a small sample
model.train()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.MSELoss()

# Take only 10 samples
small_sample = 10
X_small = X_features[:small_sample]
home_small = home_team_ids[:small_sample]
away_small = away_team_ids[:small_sample]
targets_small = targets[:small_sample]

print("Test overfittingu do małej próbki:")
print(f"Targets: {targets_small}")

# Train only on the small sample
for epoch in range(500):
    optimizer.zero_grad()
    predictions = model(home_small, away_small, X_small).squeeze()
    loss = criterion(predictions, targets_small)
    loss.backward()
    
    # Dodaj gradient clipping dla bezpieczeństwa
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    
    optimizer.step()
    
    if epoch % 50 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}, Predictions: {predictions[:3].detach()}")

print(f"\nFinal predictions: {predictions.detach()}")
print(f"Actual targets:    {targets_small}")
print(f"Final loss: {loss.item():.4f}")

In [None]:
# Split data into training and validation sets
X_train, X_val, home_train, home_val, away_train, away_val, y_train, y_val = train_test_split(
    X_features, home_team_ids, away_team_ids, targets, 
    test_size=0.2, random_state=42
)

print(f"Train size: {len(X_train)}")
print(f"Validation size: {len(X_val)}")

# Create model for full training
model_full = TeamEmbeddings(num_teams=num_teams)
optimizer = torch.optim.Adam(model_full.parameters(), lr=0.001)
criterion = torch.nn.MSELoss()

# Training loop
model_full.train()
train_losses = []
val_losses = []

for epoch in range(100):
    # Training
    optimizer.zero_grad()
    train_pred = model_full(home_train, away_train, X_train).squeeze()
    train_loss = criterion(train_pred, y_train)
    train_loss.backward()
    torch.nn.utils.clip_grad_norm_(model_full.parameters(), max_norm=1.0)
    optimizer.step()
    
    # Validation
    model_full.eval()
    with torch.no_grad():
        val_pred = model_full(home_val, away_val, X_val).squeeze()
        val_loss = criterion(val_pred, y_val)
    model_full.train()
    
    train_losses.append(train_loss.item())
    val_losses.append(val_loss.item())
    
    if epoch % 20 == 0:
        print(f"Epoch {epoch}, Train Loss: {train_loss.item():.2f}, Val Loss: {val_loss.item():.2f}")

print("Training complete!")