# Load Training Data

In [None]:
import os
from random import shuffle

import numpy as np
import pandas as pd
import torch

from models.simple_lstm import SimpleLSTM
from pipeline.create_window_tensor import CreateWindowTensor
from data.feature_engineering import FeatureEngineering
from data.preprocessing import DataProcessing, PreProcessing

data_dir = '/Users/sebastian.angermund/Projects/data/hts'
TRAIN_DATA_DIR = os.path.join(data_dir, 'train')
TEST_DATA_DIR = os.path.join(data_dir, 'test')

In [2]:
class EarlyStopping:
    def __init__(self, patience=3, min_delta=1e-4, mode='min', verbose=True):
        """Args:
        patience (int): How many validations to wait before stopping if no improvement.
        min_delta (float): Minimum change to qualify as improvement.
        mode (str): 'min' for minimizing loss, 'max' for maximizing score (e.g., accuracy).
        verbose (bool): Whether to print when stopping.

        """
        self.patience = patience
        self.min_delta = min_delta
        self.mode = mode
        self.verbose = verbose
        self.best_score = None
        self.counter = 0
        self.early_stop = False

    def __call__(self, current_score):
        if self.best_score is None:
            self.best_score = current_score
            return False

        improvement = (current_score < self.best_score - self.min_delta) if self.mode == 'min' else (current_score > self.best_score + self.min_delta)

        if improvement:
            self.best_score = current_score
            self.counter = 0
        else:
            self.counter += 1
            if self.verbose:
                print(f"EarlyStopping counter: {self.counter} out of {self.patience}")
            if self.counter >= self.patience:
                self.early_stop = True
        return self.early_stop


In [None]:
import os
from random import shuffle

import numpy as np
import pandas as pd
import torch

from models.simple_lstm import SimpleLSTM
from pipeline.create_window_tensor import CreateWindowTensor
from data.feature_engineering import FeatureEngineering
from data.preprocessing import DataProcessing, PreProcessing

data_dir = '/Users/sebastian.angermund/Projects/data/hts'
TRAIN_DATA_DIR = os.path.join(data_dir, 'train')
TEST_DATA_DIR = os.path.join(data_dir, 'test')


class EarlyStopping:
    def __init__(self, patience=3, min_delta=1e-4, mode='min', verbose=True):
        """Args:
        patience (int): How many validations to wait before stopping if no improvement.
        min_delta (float): Minimum change to qualify as improvement.
        mode (str): 'min' for minimizing loss, 'max' for maximizing score (e.g., accuracy).
        verbose (bool): Whether to print when stopping.

        """
        self.patience = patience
        self.min_delta = min_delta
        self.mode = mode
        self.verbose = verbose
        self.best_score = None
        self.counter = 0
        self.early_stop = False

    def __call__(self, current_score):
        if self.best_score is None:
            self.best_score = current_score
            return False

        improvement = (current_score < self.best_score - self.min_delta) if self.mode == 'min' else (current_score > self.best_score + self.min_delta)

        if improvement:
            self.best_score = current_score
            self.counter = 0
        else:
            self.counter += 1
            if self.verbose:
                print(f"EarlyStopping counter: {self.counter} out of {self.patience}")
            if self.counter >= self.patience:
                self.early_stop = True
        return self.early_stop


def combine_races_to_tensor(pkl_files,
                            training_features,
                            target,
                            limit_contestants,
                            window_timesteps,
                            randomize):
    X_list = []
    y_list = []
    for pkl_file in pkl_files:
        df_race = pd.read_pickle(pkl_file)

        setup = PreProcessing(df=df_race, target=target)

        if not setup.valid:
            print(f'#### BROKEN DF: {pkl_file}')
            print(f'#### STATUS: n_horses: {setup.n_horses}, winner_index: {setup.winner_index}')
            print('#### SKIPPING')
            continue

        data_prep = DataProcessing(df=setup.df,
                                   winner_index=setup.winner_index,
                                   training_features=training_features)

        df_data_scaled, _ = data_prep.process_data()

        feature_prep = FeatureEngineering(df=df_data_scaled,
                                         training_features=training_features,
                                         target=target,
                                         limit_contestants=limit_contestants,
                                         target_int_mapping=setup.target_int_mapping,
                                         target_mapping=setup.target_mapping,
                                         n_horses=setup.n_horses,
                                         winner_index=setup.winner_index)
        try:
            df_feature = feature_prep.prepare_features()
        except ValueError as e:
            print(f'#### BROKEN DF: {pkl_file}')
            print(f'#### STATUS: {e}')
            print('#### SKIPPING')
            continue

        if len(df_feature) < window_timesteps:
            print(f'#### BROKEN DF: {pkl_file}')
            print('#### STATUS: Not enough data points')
            print('#### SKIPPING')
            continue

        windower = CreateWindowTensor(df=df_feature,
                                      target=target,
                                      n_horses=limit_contestants,
                                      window_timesteps=window_timesteps)

        X, y, _ = windower.create_sliding_windows2()

        X_list.append(X)
        y_list.append(y)

    X = np.concatenate(X_list)
    y = np.concatenate(y_list)

    if randomize:
        np.random.seed(42)
        indices = np.arange(X.shape[0])
        np.random.shuffle(indices)
        X = X[indices]
        y = y[indices]

    return X, y


def get_batch(train_files, training_features, target, contestants, window_timesteps, randomize_training_data, val_files=[]):
    X_train, y_train = combine_races_to_tensor(
        train_files,
        training_features,
        target,
        contestants,
        window_timesteps,
        randomize_training_data
    )
    if val_files:
        X_val, y_val = combine_races_to_tensor(
            val_files,
            training_features,
            target,
            contestants,
            window_timesteps,
            randomize_training_data
        )
    else:
        X_val, y_val = [], []
    return X_train, y_train, X_val, y_val


def fit_model(model, optimizer, loss_fn, X_train, y_train, X_val, y_val, epochs=5, batch_size=64, device='cuda'):
    model.train()
    # dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
    dataset = torch.utils.data.TensorDataset(
        torch.tensor(X_train, dtype=torch.float32),
        torch.tensor(np.argmax(y_train, axis=1), dtype=torch.long)
    )
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

    # val_dataset = torch.utils.data.TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.long))
    val_dataset = torch.utils.data.TensorDataset(
        torch.tensor(X_val, dtype=torch.float32),
        torch.tensor(np.argmax(y_val, axis=1), dtype=torch.long)
    )
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    history = {'loss': [], 'val_loss': []}

    for epoch in range(epochs):
        running_loss = 0.0
        for batch_X, batch_y in dataloader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = loss_fn(outputs, batch_y)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * batch_X.size(0)

        epoch_loss = running_loss / len(dataloader.dataset)
        history['loss'].append(epoch_loss)

        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for val_X, val_y in val_dataloader:
                val_X, val_y = val_X.to(device), val_y.to(device)
                val_outputs = model(val_X)
                v_loss = loss_fn(val_outputs, val_y)
                val_loss += v_loss.item() * val_X.size(0)

        val_loss /= len(val_dataloader.dataset)
        history['val_loss'].append(val_loss)

        print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {epoch_loss:.4f}, Val Loss: {val_loss:.4f}")

        model.train()

    return history


def partition_race_files(path_to_files, partition_size):
    files = os.listdir(path_to_files)
    shuffle(files)
    n_partitions = len(files) // partition_size
    partitions = {f'partition_{i}': [] for i in range(n_partitions)}
    for i, file in enumerate(files):
        partition = i % n_partitions
        full_path = os.path.join(path_to_files, file)
        partitions[f'partition_{partition}'].append(full_path)
    return partitions


# Setup and run training loop:


batch_size = 32 # (races per batch)

batches = partition_race_files(TRAIN_DATA_DIR, batch_size)

limit_contestants = 6
target = 'finishOrder'
added_features = ['leader', 'distance_to_leader']
training_features = ['distance_to_finish', 'v_odds']
window_timesteps = 120

model = SimpleLSTM(
    training_features=training_features,
    added_features=added_features,
    num_contestants=limit_contestants,
)

device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
print(f'Using device: {device}')
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
loss_fn = torch.nn.CrossEntropyLoss()

for batch, files in batches.items():
    print(f'Loading {batch}')
    validation_part = int(len(files) * 0.2)
    train_files = files[validation_part:]
    val_files = files[:validation_part]
    X_train, y_train, X_val, y_val = get_batch(
        train_files=train_files,
        training_features=training_features,
        target=target,
        contestants=limit_contestants,
        window_timesteps=window_timesteps,
        randomize_training_data=False,
        val_files=val_files
    )
    history = fit_model(
        model=model,
        optimizer=optimizer,
        loss_fn=loss_fn,
        X_train=X_train,
        y_train=y_train,
        X_val=X_val,
        y_val=y_val,
        batch_size=64,
        device=device
    )


Using device: mps
Loading partition_0
X_train shape: (23689, 120, 24), y_train shape: (23689, 6)
X_val shape: (5334, 120, 24), y_val shape: (5334, 6)
Fitting model...
Epoch [1/5], Train Loss: 1.7786, Val Loss: 1.9162
Epoch [2/5], Train Loss: 1.7758, Val Loss: 1.9411
Epoch [3/5], Train Loss: 1.7759, Val Loss: 1.9364
Epoch [4/5], Train Loss: 1.7757, Val Loss: 1.9282
Epoch [5/5], Train Loss: 1.7755, Val Loss: 1.9394
Loading partition_1
X_train shape: (23728, 120, 24), y_train shape: (23728, 6)
X_val shape: (5295, 120, 24), y_val shape: (5295, 6)
Fitting model...
Epoch [1/5], Train Loss: 1.6886, Val Loss: 1.5987
Epoch [2/5], Train Loss: 1.6659, Val Loss: 1.5960
Epoch [3/5], Train Loss: 1.6651, Val Loss: 1.5933
Epoch [4/5], Train Loss: 1.6642, Val Loss: 1.5990
Epoch [5/5], Train Loss: 1.6643, Val Loss: 1.5814
Loading partition_2
X_train shape: (23451, 120, 24), y_train shape: (23451, 6)
X_val shape: (5382, 120, 24), y_val shape: (5382, 6)
Fitting model...
Epoch [1/5], Train Loss: 1.6982, Va

# Test

In [4]:
import json
import os

import torch
from sklearn.calibration import calibration_curve
from sklearn.metrics import auc, average_precision_score, brier_score_loss, log_loss, precision_recall_curve, roc_curve


def get_testing_data(window_timesteps, training_features, target, limit_contestants, n_test_files=25):
    pkl_files = [os.path.join(TEST_DATA_DIR, file) for file in os.listdir(TEST_DATA_DIR)]
    pkl_files = pkl_files[:n_test_files]  # Limit for sandboxing
    X_test, y_test = combine_races_to_tensor(
        pkl_files=pkl_files,
        training_features=training_features,
        target=target,
        limit_contestants=limit_contestants,
        window_timesteps=window_timesteps,
        randomize=False
    )
    return X_test, y_test

def run_model(model, X_test, y_test, device, batch_size=32):
    model.eval()

    X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
    y_test_tensor = torch.tensor(np.argmax(y_test, axis=1), dtype=torch.long).to(device)  # Convert test labels to class indices

    test_dataset = torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    total_loss = 0.0
    correct = 0
    total = 0
    y_preds = []

    loss_fn = torch.nn.CrossEntropyLoss()

    with torch.no_grad():
        for batch_X, batch_y in test_loader:
            outputs = model(batch_X)
            loss = loss_fn(outputs, batch_y)
            total_loss += loss.item() * batch_X.size(0)

            preds = torch.softmax(outputs, dim=1)  # Get predicted probabilities
            y_preds.append(preds.cpu().numpy())

            predicted_classes = torch.argmax(preds, dim=1)
            correct += (predicted_classes == batch_y).sum().item()
            total += batch_X.size(0)

    avg_loss = total_loss / total
    accuracy = correct / total
    y_pred_all = np.concatenate(y_preds, axis=0)

    return accuracy, avg_loss, y_pred_all

def evaluate_predictions(y_true, y_pred, mode='micro'):
    results = {}

    if mode == 'micro':
        # Flatten for metrics that treat each horse separately
        y_true_flat = np.ndarray.flatten(np.array(y_true))
        y_pred_flat = np.ndarray.flatten(y_pred)

        results['average_precision'] = average_precision_score(y_true_flat, y_pred_flat, average='micro')
        results['brier_score'] = brier_score_loss(y_true_flat, y_pred_flat)
        results['log_loss'] = log_loss(y_true_flat, y_pred_flat)

        # Curves
        prob_true, prob_pred = calibration_curve(y_true_flat, y_pred_flat, n_bins=10)
        fpr, tpr, _ = roc_curve(y_true_flat, y_pred_flat)
        precision, recall, _ = precision_recall_curve(y_true_flat, y_pred_flat)

        results['calibration_curve'] = {'prob_true': prob_true.tolist(), 'prob_pred': prob_pred.tolist()}
        results['roc_curve'] = {'fpr': fpr.tolist(), 'tpr': tpr.tolist(), 'auc': auc(fpr, tpr)}
        results['pr_curve'] = {'precision': precision.tolist(), 'recall': recall.tolist()}

    elif mode == 'macro':
        # Winner prediction by argmax
        y_pred_indices = np.argmax(y_pred, axis=1)
        y_true_indices = np.argmax(y_true, axis=1)  # Assuming y_true is still multi-hot

        winner_accuracy = np.mean(y_pred_indices == y_true_indices)
        results['winner_accuracy'] = winner_accuracy

    return results

# Main testing block
X_test, y_test = get_testing_data(
    window_timesteps=window_timesteps,
    training_features=training_features,
    target=target,
    limit_contestants=limit_contestants
)

accuracy, loss, y_pred = run_model(model, X_test, y_test, device)

# Compute both micro (probabilities) and macro (winner picking) evaluations
micro_metrics = evaluate_predictions(y_test, y_pred, mode='micro')
macro_metrics = evaluate_predictions(y_test, y_pred, mode='macro')

print("\n--- Scalar Metrics ---")
print(json.dumps({
    'model_accuracy': accuracy,
    'model_loss': loss,
    # **micro_metrics  # unpack micro metrics here
}, indent=4))

print("\n--- Winner Prediction Metrics ---")
print(json.dumps(macro_metrics, indent=4))



--- Scalar Metrics ---
{
    "model_accuracy": 0.23603021477975974,
    "model_loss": 1.7950656537277117
}

--- Winner Prediction Metrics ---
{
    "winner_accuracy": 0.23603021477975974
}
