In [1]:
!git clone https://github.com/juliancanaless/fantasy-rl-draft.git

Cloning into 'fantasy-rl-draft'...
remote: Enumerating objects: 175, done.[K
remote: Counting objects: 100% (175/175), done.[K
remote: Compressing objects: 100% (128/128), done.[K
remote: Total 175 (delta 83), reused 133 (delta 41), pack-reused 0 (from 0)[K
Receiving objects: 100% (175/175), 8.90 MiB | 9.98 MiB/s, done.
Resolving deltas: 100% (83/83), done.


In [3]:
%cd fantasy-rl-draft
!pip install stable-baselines3 sb3-contrib

/content/fantasy-rl-draft
Collecting stable-baselines3
  Downloading stable_baselines3-2.6.0-py3-none-any.whl.metadata (4.8 kB)
Collecting sb3-contrib
  Downloading sb3_contrib-2.6.0-py3-none-any.whl.metadata (4.1 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<

In [7]:
import sys
import os
sys.path.append('/content/fantasy-rl-draft')  # or wherever your src folder is

# Now this should work
from src.multiYearTraining import train_multi_year_model

In [9]:
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', message='.*Kernel._parent_header.*')


In [10]:
!nvidia-smi

Sat Jun 28 00:19:19 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   35C    P8              9W /   70W |       2MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [14]:
# tests/eval_generalization.py - Test model on unseen 2024 data

import numpy as np
import pandas as pd
import tqdm
from pathlib import Path
from sb3_contrib import MaskablePPO
from sb3_contrib.common.wrappers import ActionMasker
from src.fantasyDraftEnv import FantasyDraftEnv

def evaluate_on_year(model, board_data, year_name, num_episodes=300):
    """Evaluate model performance on a specific year's data."""

    def run_episode():
        slot = np.random.randint(1, 13)

        env = FantasyDraftEnv(
            board_df=board_data,
            num_teams=12,
            my_slot=slot,
            rounds=16,
            roster_slots={"QB": 1, "RB": 2, "WR": 3, "TE": 1, "K": 1, "DST": 1, "FLEX": 1},
            bench_spots=6,
        )
        wrapped_env = ActionMasker(env, lambda e: e.get_action_mask())

        obs, info = wrapped_env.reset()
        done = False

        while not done:
            action, _ = model.predict(
                obs,
                deterministic=False,
                action_masks=info["action_mask"]
            )
            obs, _, done, _, info = wrapped_env.step(action)

        agent_score = wrapped_env.unwrapped._lineup_points(
            wrapped_env.unwrapped.board,
            wrapped_env.unwrapped.my_picks
        )
        baseline_score = wrapped_env.unwrapped._baseline_points()

        return agent_score, baseline_score, slot

    print(f"Evaluating {year_name} ({num_episodes} episodes)...")

    agent_scores = []
    baseline_scores = []
    positions = []

    for _ in tqdm.tqdm(range(num_episodes)):
        agent_score, baseline_score, position = run_episode()
        agent_scores.append(agent_score)
        baseline_scores.append(baseline_score)
        positions.append(position)

    return {
        "year": year_name,
        "agent_scores": agent_scores,
        "baseline_scores": baseline_scores,
        "positions": positions,
        "agent_mean": np.mean(agent_scores),
        "baseline_mean": np.mean(baseline_scores),
        "improvement": np.mean(agent_scores) - np.mean(baseline_scores),
        "improvement_pct": (np.mean(agent_scores) / np.mean(baseline_scores) - 1) * 100,
        "win_rate": np.mean([a > b for a, b in zip(agent_scores, baseline_scores)])
    }

def full_generalization_test():
    """Run complete generalization evaluation."""

    model_path = "models/ppo_multi_year"
    if not Path(model_path + ".zip").exists():
        raise FileNotFoundError(f"Model not found: {model_path}")

    model = MaskablePPO.load(model_path)

    data_dir = Path("data/processed")
    results = {}

    test_years = [2021, 2022, 2023, 2024]

    for year in test_years:
        file_path = data_dir / f"training_data_{year}.csv"
        if file_path.exists():
            board_data = pd.read_csv(file_path)
            year_results = evaluate_on_year(model, board_data, year)
            results[year] = year_results

            print(f"\n{year} Results:")
            print(f"  Agent: {year_results['agent_mean']:.1f} pts")
            print(f"  Baseline: {year_results['baseline_mean']:.1f} pts")
            print(f"  Improvement: {year_results['improvement']:+.1f} pts ({year_results['improvement_pct']:+.1f}%)")
            print(f"  Win Rate: {year_results['win_rate']:.1%}")

            if year in [2021, 2022, 2023]:
                print(f"  (Training data)")
            else:
                print(f"  (Test data - GENERALIZATION)")

    print(f"\nGENERALIZATION ANALYSIS")
    print("=" * 50)

    if 2024 in results:
        test_result = results[2024]
        train_results = [results[y] for y in [2021, 2022, 2023] if y in results]

        if train_results:
            avg_train_improvement = np.mean([r['improvement'] for r in train_results])
            test_improvement = test_result['improvement']

            print(f"Training years avg improvement: {avg_train_improvement:+.1f} pts")
            print(f"Test year (2024) improvement: {test_improvement:+.1f} pts")
            print(f"Generalization gap: {test_improvement - avg_train_improvement:+.1f} pts")

            if test_improvement > 0:
                if test_improvement > avg_train_improvement * 0.8:
                    print(f"EXCELLENT: Strong generalization!")
                elif test_improvement > avg_train_improvement * 0.5:
                    print(f"GOOD: Decent generalization")
                else:
                    print(f"WEAK: Poor generalization")
            else:
                print(f"FAILED: No generalization (worse than baseline)")

    return results

if __name__ == "__main__":
    results = full_generalization_test()

    if results:
        import json
        results_dir = Path("results")
        results_dir.mkdir(exist_ok=True)

        json_results = {}
        for year, result in results.items():
            json_results[str(year)] = {
                "agent_mean": result["agent_mean"],
                "baseline_mean": result["baseline_mean"],
                "improvement": result["improvement"],
                "improvement_pct": result["improvement_pct"],
                "win_rate": result["win_rate"]
            }

        with open(results_dir / "generalization_results.json", "w") as f:
            json.dump(json_results, f, indent=2)

Evaluating 2021 (300 episodes)...


100%|██████████| 300/300 [02:01<00:00,  2.47it/s]



2021 Results:
  Agent: 2418.1 pts
  Baseline: 1916.5 pts
  Improvement: +501.5 pts (+26.2%)
  Win Rate: 100.0%
  (Training data)
Evaluating 2022 (300 episodes)...


100%|██████████| 300/300 [02:00<00:00,  2.49it/s]



2022 Results:
  Agent: 2368.0 pts
  Baseline: 1930.7 pts
  Improvement: +437.3 pts (+22.6%)
  Win Rate: 100.0%
  (Training data)
Evaluating 2023 (300 episodes)...


100%|██████████| 300/300 [02:01<00:00,  2.46it/s]



2023 Results:
  Agent: 2113.6 pts
  Baseline: 1944.0 pts
  Improvement: +169.6 pts (+8.7%)
  Win Rate: 93.0%
  (Training data)
Evaluating 2024 (300 episodes)...


100%|██████████| 300/300 [02:02<00:00,  2.45it/s]


2024 Results:
  Agent: 1643.8 pts
  Baseline: 1974.7 pts
  Improvement: -330.9 pts (-16.8%)
  Win Rate: 0.0%
  (Test data - GENERALIZATION)

GENERALIZATION ANALYSIS
Training years avg improvement: +369.5 pts
Test year (2024) improvement: -330.9 pts
Generalization gap: -700.3 pts
FAILED: No generalization (worse than baseline)



