In [1]:
# 1. Load standard libraries FIRST
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import sys
import importlib
import inspect
import os

# 2. Verify standard libraries are healthy
print(f"Pandas version: {pd.__version__}")

# 3. NOW setup path and local imports
repo_root = "/files/pixlball"
if repo_root not in sys.path:
    sys.path.insert(0, repo_root)

import src.data as data
import src.model as model
import src.train as train
import src.config as config
import src.dataset as dataset
import src.evaluate as evaluate

from src.config import DEVICE 


# 4. Force a clean reload of your specific logic
importlib.reload(data)
importlib.reload(train)

# 5. THE SMOKE TEST
print("Signature check:", inspect.signature(data.prepare_nn_dataset))

Pandas version: 2.0.3
Signature check: (events_df, nn_layers_df, target_cols=['nn_target'], id_col='id', context_cols=False, keep_context_ids=False)


In [2]:
data_events = pd.read_parquet(os.path.join(repo_root, "data", "events_data.parquet"), engine="fastparquet")
data_360 = pd.read_parquet(os.path.join(repo_root, "data", "sb360_data.parquet"), engine="fastparquet")

In [3]:
admin_events = [
        'Starting XI', 'Half Start', 'Half End', 'Player On', 'Player Off',
        'Substitution', 'Tactical Shift', 'Referee Ball-Drop', 'Injury Stoppage',
        'Bad Behaviour', 'Shield', 'Goal Keeper'
    ]

cleaned_df = data.drop_events(data_events, rows_to_drop=admin_events)

cleaned_df = cleaned_df.sort_values(['match_id', 'timestamp'])

2462 events.


In [4]:
# -----------------------------
# Example usage
# -----------------------------
columns_to_drop = ['clearance_body_part',
                   'clearance_head',
                   'clearance_left_foot',
                   'clearance_other',
                   'clearance_right_foot',
                   'shot_technique',
                   'substitution_replacement_id',
                   'substitution_replacement',
                   'substitution_outcome',
                   'shot_saved_off_target',
                   'pass_miscommunication',
                   'goalkeeper_shot_saved_off_target',
                   'goalkeeper_punched_out',
                   'shot_first_time',
                   'shot_first_time',
                   'shot_body_part',
                   'related_events',
                   'pass_shot_assist', 
                   'pass_straight', 
                   'pass_switch', 
                   'pass_technique', 
                   'pass_through_ball',
                   'goalkeeper_body_part',
                   'goalkeeper_end_location', 
                   'goalkeeper_outcome', 
                   'goalkeeper_position', 
                   'goalkeeper_technique', 
                   'goalkeeper_type', 
                   'goalkeeper_penalty_saved_to_post', 
                   'goalkeeper_shot_saved_to_post', 
                   'goalkeeper_lost_out', 
                   'goalkeeper_Clear', 
                   'goalkeeper_In Play Safe',
                   'shot_key_pass_id',
                   'shot_one_on_one',
                   'shot_end_location',
                   'shot_type',
                   'pass_angle',
                   'pass_body_part',
                   'pass_type',
                   'pass_length',
                   'pass_outswinging',
                   'pass_inswinging',
                   'pass_cross', 
                   'pass_cut_back', 
                   'pass_deflected', 
                   'pass_goal_assist', 
                   'pass_recipient', 
                   'pass_recipient_id', 
                   'pass_assisted_shot_id', 
                   'pass_no_touch', 
                   'pass_end_location', 
                   'pass_aerial_won',
                   'pass_height',
                   'substitution_outcome_id',
                   'tactics',
                   'block_deflection',
                   'dribble_no_touch',
                   'shot_open_goal', 
                   'shot_saved_to_post',
                   'shot_redirect', 
                   'shot_follows_dribble',
                   'period',
                   'injury_stoppage_in_chanin',
                   'block_save_block',
                   'ball recovery_offensive',


                   ]
cleaned_df = data.drop_columns(cleaned_df, columns_to_drop)

# add lookahead outcome
df_with_targets = data.assign_lookahead_outcomes(cleaned_df, lookahead=6)

df_with_targets = data.add_ball_trajectory_features(df_with_targets)


counts of each outcome nn_target
Keep Possession    70920
Lose Possession    27465
Shot                4764
Name: count, dtype: int64


# Prepare 360 Data

In [5]:
df_with_targets

Unnamed: 0,50_50,ball_receipt_outcome,ball_recovery_recovery_failure,carry_end_location,clearance_aerial_won,counterpress,dribble_nutmeg,dribble_outcome,duel_outcome,duel_type,...,shot_deflected,bad_behaviour_card,block_offensive,foul_committed_offensive,foul_committed_penalty,foul_won_penalty,ball_recovery_offensive,nn_target,goal_flag,ball_trajectory_vector
0,,,,,,,,,,,...,,,,,,,,Lose Possession,0,"[61.0, 40.1, 61.0, 40.1, 61.0, 40.1, 61.0, 40.1]"
1,,,,,,,,,,,...,,,,,,,,Lose Possession,0,"[46.6, 41.5, 61.0, 40.1, 46.6, 41.5, 46.6, 41.5]"
2,,,,"[46.4, 41.6]",,,,,,,...,,,,,,,,Lose Possession,0,"[46.6, 41.5, 46.6, 41.5, 61.0, 40.1, 46.6, 41.5]"
3,,,,,,,,,,,...,,,,,,,,Lose Possession,0,"[72.9, 40.9, 46.6, 41.5, 46.6, 41.5, 61.0, 40.1]"
4,,,,,,,,,,,...,,,,,,,,Lose Possession,0,"[46.4, 41.6, 72.9, 40.9, 46.6, 41.5, 46.6, 41.5]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103144,,,,,,,,,,,...,,,,,,,,Shot,1,"[108.1, 40.1, 107.7, 40.1, 108.1, 40.1, 108.1,..."
103145,,,,,,,,,,,...,,,,,,,,Shot,0,"[107.8, 40.1, 108.1, 40.1, 107.7, 40.1, 108.1,..."
103146,,,,,,,,,,,...,,,,,,,,Shot,0,"[107.9, 40.1, 107.8, 40.1, 108.1, 40.1, 107.7,..."
103147,,,,,,,,,,,...,,,,,,,,Shot,0,"[107.9, 40.1, 107.9, 40.1, 107.8, 40.1, 108.1,..."


In [6]:
df_360 = data.assign_grid_cells(data_360)
nn_final = data.aggregate_nn_layers_vectorized(df_360)

# Finalize NN Df

In [7]:
import src.data as data

def prepare_nn_dataset_fixed(
        events_df,
        nn_layers_df,
        target_cols=['nn_target'],
        id_col='id',
        context_cols=False,
        temporal_context=True, 
        keep_context_ids=False
    ):
    # Base columns to keep
    cols_to_keep = [id_col] + target_cols

    # 1. Add Temporal Context (Ball Trajectory Vector)
    if temporal_context and 'ball_trajectory_vector' in events_df.columns:
        cols_to_keep.append('ball_trajectory_vector')

    # 2. If keeping match & possession IDs
    if keep_context_ids:
        for col in ['match_id', 'possession']:
            if col in events_df.columns:
                cols_to_keep.append(col)
        
    # 3. If keeping static context cols
    if context_cols:
        static_features = ['under_pressure', 'counterpress', 'dribble_nutmeg']
        for feat in static_features:
            if feat in events_df.columns:
                cols_to_keep.append(feat)

    events_trimmed = events_df[cols_to_keep].drop_duplicates(subset=id_col)
    return nn_layers_df.merge(events_trimmed, on=id_col, how='inner')

# Overwrite the function in the loaded 'data' module
data.prepare_nn_dataset = prepare_nn_dataset_fixed

print("Function successfully patched! You can now run the next cell.")

Function successfully patched! You can now run the next cell.


In [8]:
nn_dataset = data.prepare_nn_dataset(df_with_targets, nn_final, target_cols=['nn_target', 'goal_flag'], context_cols = True, keep_context_ids = True, temporal_context = True ) # adjust cols depending on model

# Neural Network final Data Prep

In [9]:
context_cols = [
    'under_pressure', 
    'counterpress', 
    'dribble_nutmeg'
]

# Impute NaN values with 0.0 (float)
# This assumes NaN means the event was NOT under pressure, NOT a counterpress, etc.
nn_dataset[context_cols] = nn_dataset[context_cols].fillna(0.0)


target_map = {"Keep Possession": 0, "Lose Possession": 1, "Shot": 2}

# Apply mapping
nn_dataset['nn_target_int'] = nn_dataset['nn_target'].map(target_map)



# Add context Columns
context_df = pd.DataFrame(nn_dataset['ball_trajectory_vector'].tolist(), index=nn_dataset.index)


# Check
print(nn_dataset.head())


                                     id  \
0  8b621ae4-ea81-415c-af41-9669db9bdd93   
1  4706efbe-767c-45aa-9351-09528a77d135   
2  084b9a88-4efa-4947-b94d-b89face472be   
3  27fa7d4d-d637-4487-98e2-5c078ad600c7   
4  764d437f-f799-4489-a38f-69fbb219a6fa   

                                          ball_layer  \
0  [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0....   
1  [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0....   
2  [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0....   
3  [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0....   
4  [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0....   

                                     teammates_layer  \
0  [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0....   
1  [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0....   
2  [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0....   
3  [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0....   
4  [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0....   

                                     opponents_layer        nn_targ

In [10]:
voxels_list = data.generate_temporal_voxels(nn_dataset, lookback=3)

In [12]:
from sklearn.model_selection import train_test_split

# 1. Generate the 4D Voxels (Channels, Time, Height, Width)
# Lookback 3 = 4 frames total (t, t-1, t-2, t-3)
voxels_list = data.generate_temporal_voxels(nn_dataset, lookback=3)

# 2. Add as a column
nn_dataset['temporal_voxel'] = voxels_list

# 3. Create your Train/Test Split
# We stratify on the event target to keep class balance
train_df, test_df = train_test_split(
    nn_dataset, 
    test_size=0.2, 
    random_state=42, 
    stratify=nn_dataset['nn_target_int']
)

In [13]:
from torch.utils.data import Dataset

class VoxelPitchDataset(Dataset):
    def __init__(self, df, voxel_col='temporal_voxel'):
        self.df = df
        # Extract the voxels into a single numpy array for speed
        # voxels are already (3, 4, 12, 8) from your function
        self.voxels = np.stack(df[voxel_col].values).astype(np.float32)
        
        self.event_targets = torch.tensor(df['nn_target_int'].values, dtype=torch.long)
        self.goal_targets = torch.tensor(df['goal_flag'].values, dtype=torch.float32)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        return (
            torch.tensor(self.voxels[idx]), 
            self.event_targets[idx], 
            self.goal_targets[idx]
        )

In [14]:
# Create the datasets
import src.dataset as dataset

train_dataset_3d = VoxelPitchDataset(train_df)
test_dataset_3d = VoxelPitchDataset(test_df)

# SMOKE TEST: Check the shape of the first item
voxel, event, goal = train_dataset_3d[0]
print(f"Voxel Shape: {voxel.shape}") # MUST be [3, 4, 12, 8]

Voxel Shape: torch.Size([3, 4, 12, 8])


In [15]:
def train_3d_model(
    dataset, 
    event_class_weights, 
    goal_pos_weight, 
    epochs=15, 
    batch_size=32, 
    lr=0.001
):
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    model = Tiny3DCNN_MultiTask().to(DEVICE)
    
    # Define Losses
    criterion_event = nn.CrossEntropyLoss(weight=event_class_weights.to(DEVICE))
    criterion_goal = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([goal_pos_weight]).to(DEVICE))
    
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for voxels, event_targets, goal_targets in dataloader:
            voxels = voxels.to(DEVICE)
            event_targets = event_targets.to(DEVICE)
            goal_targets = goal_targets.to(DEVICE)
            
            optimizer.zero_grad()
            
            # Forward pass (3D CNN expects 4D input: C, T, H, W)
            event_logits, goal_prob = model(voxels)
            
            # Calculate Losses
            loss_event = criterion_event(event_logits, event_targets)
            # Use raw logits for BCEWithLogits if your head doesn't have Sigmoid, 
            # OR adjust if your model already has Sigmoid (as defined in previous step)
            loss_goal = nn.functional.binary_cross_entropy(goal_prob, goal_targets)
            
            loss = loss_event + (2.0 * loss_goal) # Weight goal loss higher
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(dataloader):.4f}")
        
    return model

# The Goal Multi Task CNN

In [16]:
from collections import Counter

# ------------------------------------
# 1. Define input columns & targets
# ------------------------------------
# This assumes nn_dataset is already loaded and processed in previous cells.
layer_columns = ["ball_layer", "teammates_layer", "opponents_layer"]

# Ensure labels are in the correct format
event_targets = nn_dataset['nn_target_int'].values   # 0=keep, 1=lose, 2=shot (int)
# CRITICAL: Goal flags must be float for BCEWithLogitsLoss
goal_flags = nn_dataset['goal_flag'].values.astype(np.float32) 

# ------------------------------------
# 3. Compute class weights and positive weight
# ------------------------------------

# A. Event Weights (Multi-Class) - For CrossEntropyLoss
event_counts = Counter(event_targets)
total_events = len(event_targets)

# Using inverse frequency: total / count
class_weights_event = torch.tensor(
    [total_events / event_counts.get(c, 1) for c in range(len(event_counts))],
    dtype=torch.float32
).to(DEVICE)

# B. Goal Positive Weight (Binary) - For BCEWithLogitsLoss
goal_counts = Counter(goal_flags)

STABLE_GOAL_POS_WEIGHT = 3.0
goal_pos_weight = torch.tensor(STABLE_GOAL_POS_WEIGHT, dtype=torch.float32).to(config.DEVICE)

print(f"Goal Positive Weight (0/1 ratio): {goal_pos_weight.item():.2f}")


Goal Positive Weight (0/1 ratio): 3.00


In [18]:
class Tiny3DCNN_MultiTask(nn.Module):
    def __init__(self, num_event_classes=3):
        super(Tiny3DCNN_MultiTask, self).__init__()
        
        # Input shape: (Batch, 3, 4, 12, 8) -> (Channels, Time, Height, Width)
        
        # 1. Spatio-Temporal Feature Extractor
        self.features = nn.Sequential(
            # Block 1: Capture short-term motion
            nn.Conv3d(3, 16, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
            nn.ReLU(),
            # We pool spatially but keep Time dimension at 4
            nn.MaxPool3d(kernel_size=(1, 2, 2)), 
            
            # Block 2: Higher level interactions
            nn.Conv3d(16, 32, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
            nn.ReLU(),
            # Now we pool Time from 4 down to 2, and H/W further
            nn.MaxPool3d(kernel_size=(2, 2, 2))
        )
        
        # Flattening Calculation:
        # After Pool 1: (16, 4, 6, 4)
        # After Pool 2: (32, 2, 3, 2) 
        # Total: 32 * 2 * 3 * 2 = 384
        self.flatten_size = 384
        
        # 2. Shared Dense Layer
        self.fc_shared = nn.Sequential(
            nn.Linear(self.flatten_size, 128),
            nn.ReLU(),
            nn.Dropout(0.3)
        )
        
        # 3. Multi-Task Heads
        self.event_head = nn.Linear(128, num_event_classes)
        
        self.goal_head = nn.Sequential(
            nn.Linear(128, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        # x shape: (B, 3, 4, 12, 8)
        x = self.features(x)
        x = x.view(x.size(0), -1) # Flatten to (B, 384)
        x = self.fc_shared(x)
        
        event_logits = self.event_head(x)
        goal_prob = self.goal_head(x)
        
        return event_logits, goal_prob.squeeze(-1)

In [19]:
# Run the training
# We use the 3-item training loop we just built
from torch.utils.data import DataLoader

model_3d = train_3d_model(
    dataset=train_dataset_3d,
    event_class_weights=class_weights_event, # Ensure these are pre-calculated tensors
    goal_pos_weight=goal_pos_weight,
    epochs=15,
    batch_size=32 
)

Epoch 1/15, Loss: 1.1867
Epoch 2/15, Loss: 1.1696
Epoch 3/15, Loss: 1.1599
Epoch 4/15, Loss: 1.1290
Epoch 5/15, Loss: 1.0657
Epoch 6/15, Loss: 0.9971
Epoch 7/15, Loss: 0.9218
Epoch 8/15, Loss: 0.8653
Epoch 9/15, Loss: 0.8134
Epoch 10/15, Loss: 0.7678
Epoch 11/15, Loss: 0.7383
Epoch 12/15, Loss: 0.7060
Epoch 13/15, Loss: 0.6891
Epoch 14/15, Loss: 0.6657
Epoch 15/15, Loss: 0.6529


In [23]:
import torch
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report, balanced_accuracy_score, roc_auc_score

def get_3d_predictions(model, dataset, device):
    model.eval()
    dataloader = DataLoader(dataset, batch_size=32, shuffle=False)
    
    all_event_preds = []
    all_event_targets = []
    all_goal_probs = []
    all_goal_targets = []
    
    with torch.no_grad():
        for voxels, event_targets, goal_targets in dataloader:
            voxels = voxels.to(device)
            
            # Forward pass
            event_logits, goal_probs = model(voxels)
            
            # For multi-class (Events), take the argmax
            event_preds = torch.argmax(event_logits, dim=1)
            
            all_event_preds.extend(event_preds.cpu().numpy())
            all_event_targets.extend(event_targets.numpy())
            all_goal_probs.extend(goal_probs.cpu().numpy())
            all_goal_targets.extend(goal_targets.numpy())
            
    return np.array(all_event_targets), np.array(all_event_preds), \
           np.array(all_goal_targets), np.array(all_goal_probs)

# Get predictions
y_ev_true, y_ev_pred, y_goal_true, y_goal_probs = get_3d_predictions(model_3d, test_dataset_3d, DEVICE)

In [24]:
print("--- 3D CNN EVENT CONFUSION MATRIX ---")
ev_cm = confusion_matrix(y_ev_true, y_ev_pred)
print(ev_cm)

print("\n--- CLASSIFICATION REPORT ---")
target_names = ['Keep Possession', 'Lose Possession', 'Shot']
print(classification_report(y_ev_true, y_ev_pred, target_names=target_names))

print(f"Balanced Accuracy: {balanced_accuracy_score(y_ev_true, y_ev_pred):.4f}")

--- 3D CNN EVENT CONFUSION MATRIX ---
[[9000 2096 1392]
 [3262  890  505]
 [ 395   93  397]]

--- CLASSIFICATION REPORT ---
                 precision    recall  f1-score   support

Keep Possession       0.71      0.72      0.72     12488
Lose Possession       0.29      0.19      0.23      4657
           Shot       0.17      0.45      0.25       885

       accuracy                           0.57     18030
      macro avg       0.39      0.45      0.40     18030
   weighted avg       0.58      0.57      0.57     18030

Balanced Accuracy: 0.4535


In [25]:
from sklearn.metrics import roc_auc_score

# Assuming you ran the get_3d_predictions function provided earlier:
# y_goal_true: Actual 0/1 goal flags
# y_goal_probs: Model's predicted probability (0.0 to 1.0)

auc_score = roc_auc_score(y_goal_true, y_goal_probs)
print(f"3D CNN Goal AUC-ROC: {auc_score:.4f}")

3D CNN Goal AUC-ROC: 0.8227


In [26]:
# For events, we need the raw probabilities (logits passed through Softmax)
# Ensure your get_3d_predictions function returns event_probs instead of just preds
event_auc_ovr = roc_auc_score(y_ev_true, event_probs, multi_class='ovr')
print(f"3D CNN Event Multi-Class AUC (OvR): {event_auc_ovr:.4f}")

NameError: name 'event_probs' is not defined

# Preparing the Context CNN

In [22]:
import numpy as np
from sklearn.model_selection import train_test_split

layer_columns = ["ball_layer", "teammates_layer", "opponents_layer"]
VALIDATION_SIZE = 0.20
RANDOM_SEED = 42

# --- 1. Define ALL inputs and targets ---
# Input 1: The Grid Layers (X_features)
X_features = nn_dataset[layer_columns].reset_index(drop=True)

# Input 2: The Contextual 1D Features (X_context)
# CRITICAL: Ensure this DataFrame is aligned with X_features
X_context = context_df.reset_index(drop=True)

# Targets
event_targets = nn_dataset['nn_target_int'].values
goal_flags = nn_dataset['goal_flag'].values.astype(np.float32)



# CRITICAL: Assign the 8 returned arrays/DataFrames to 8 descriptive variables
(
    X_feat_train,      # 1. Grid Layers (Train)
    X_feat_val,        # 2. Grid Layers (Validation)
    X_ctx_train,       # 3. Context Features (Train)
    X_ctx_val,         # 4. Context Features (Validation)
    y_event_train,     # 5. Event Targets (Train)
    y_event_val,       # 6. Event Targets (Validation)
    y_goal_train,      # 7. Goal Targets (Train)
    y_goal_val         # 8. Goal Targets (Validation)
) = train_test_split(
    X_features,        # Input 1
    X_context,         # Input 2 (NEW)
    event_targets,     # Input 3
    goal_flags,        # Input 4
    test_size=VALIDATION_SIZE, 
    random_state=RANDOM_SEED,
    stratify=event_targets # Stratify only on the multi-class target
)

# --- 3. Instantiate the two Contextual Dataset objects ---

# Training Dataset (uses four 'train' splits)
train_dataset_context = dataset.ContextBallVectorPitchDatasetMultiTask(
    nn_layers_df=X_feat_train,          # Grid Layers (Train)
    event_targets=y_event_train,        # Event Targets (Train)
    goal_flags=y_goal_train,            # Goal Targets (Train)
    contextual_features_df=X_ctx_train  # Context Features (Train)
)

# Validation Dataset (uses four 'val' splits)
validation_dataset_context = dataset.ContextBallVectorPitchDatasetMultiTask(
    nn_layers_df=X_feat_val,            # Grid Layers (Validation)
    event_targets=y_event_val,          # Event Targets (Validation)
    goal_flags=y_goal_val,              # Goal Targets (Validation)
    contextual_features_df=X_ctx_val    # Context Features (Validation)
)

print(f"Total training samples: {len(train_dataset_context)}")
print(f"Total validation samples: {len(validation_dataset_context)}")

Total training samples: 72117
Total validation samples: 18030


In [21]:
# Assuming event_class_weights and goal_pos_weight are defined from previous cells
NUM_CONTEXT_FEATURES = 8 

print("Starting training for Contextual CNN Baseline...")

# Modified the Function in Loss to take correct loss function -> needs to be changed for baseline model again

context_baseline_model = train.train_model_context_threat(
    dataset=train_dataset_context, 
    event_class_weights=class_weights_event, # Use your calculated weights
    goal_pos_weight=goal_pos_weight,         # Use your calculated pos_weight
    num_context_features=NUM_CONTEXT_FEATURES
)

print("\nContextual CNN Training complete.")

Context CNN Epoch 1:   1%|          | 20/2254 [00:00<00:11, 198.46it/s, ev_loss=2.9990, loss=3.4483, sh_loss=0.2995]

Starting training for Contextual CNN Baseline...


Context CNN Epoch 1:  51%|█████▏    | 1160/2254 [00:05<00:05, 212.38it/s, ev_loss=1.5839, loss=1.5839, sh_loss=0.0000]


KeyboardInterrupt: 

In [None]:
# Assuming evaluate_model_context is imported and available

print("\nEvaluating Contextual CNN Model...")

metrics = evaluate.evaluate_model_context_threat(
    model=context_baseline_model, 
    dataset=validation_dataset_context # Evaluate on the contextual dataset
)



In [None]:
import numpy as np
# Assuming metrics contains the result from evaluate_model_context_threat

event_probs = metrics['event_probs']

print("P(Keep) | P(Lose) | P(Shot)")
print("-------------------------------")
print(event_probs[:5])

# You can look at the average predicted probability for the Shot class across all events:
avg_p_shot = np.mean(event_probs[:, 2])
print(f"\nAverage Predicted P(Shot) across all events: {avg_p_shot:.4f}")

In [None]:
import numpy as np
import pandas as pd
# Assuming metrics contains the result from evaluate_model_context_threat

print("--- Goal Prediction Probabilities (xG) Analysis ---")

goal_probs = metrics['goal_probs']
goal_labels = metrics['goal_labels'] # Actual outcome (0=No Goal, 1=Goal)

print(f"Number of Shots Evaluated: {len(goal_probs)}")

# 1. Total xG vs. Actual Goals
total_predicted_xg = np.sum(goal_probs)
total_true_goals = np.sum(goal_labels)
avg_xg_per_shot = np.mean(goal_probs)

print(f"\nTotal Predicted xG: {total_predicted_xg:.2f}")
print(f"Total True Goals Scored: {total_true_goals:.2f}")
print(f"Average Predicted xG per Shot: {avg_xg_per_shot:.4f}")

# 2. Calibration Check (Optional but helpful)
# Compare the average predicted xG for shots that were goals vs. shots that were misses.

# Create a DataFrame for easy slicing
xg_df = pd.DataFrame({'xg': goal_probs, 'goal': goal_labels})

avg_xg_goal = xg_df[xg_df['goal'] == 1]['xg'].mean()
avg_xg_miss = xg_df[xg_df['goal'] == 0]['xg'].mean()

print("\n-- Calibration Check --")
print(f"Average xG for True Goals (should be high): {avg_xg_goal:.4f}")
print(f"Average xG for Missed Shots (should be low): {avg_xg_miss:.4f}")