## Setup Dataset

In [None]:
from src.data.setup import download_and_extract

download_and_extract(
    url="https://drive.google.com/uc?id=1STHDs5uR-bx-6beC36qtGaW_UqC1fY7s",
    zip_path="data.zip",
    extract_dir="data",
    verbose=True,
    remove_zip=True
)

## Offline-Train All Classifiers

In [7]:
from src.models.decision_tree.train import main as train_decision_tree
from src.models.hoeffding.train import main as train_hoeffding_tree
from src.models.weighted_forest.train import main as train_weighted_forest

train_decision_tree()
train_hoeffding_tree()
train_weighted_forest()

Running offline training for Decision Tree
Loading data...
Training Decision Tree...
Evaluating model...
  Training Accuracy: 0.9404
  Test Accuracy: 0.8928
Saving model...
  Model saved to: models/dt/decision_tree_pong.pkl
  Metadata saved to: models/dt/decision_tree_metadata.pkl
Training Finished for Decision Tree

Running offline training for Hoeffding Tree
Loading data...
Training Hoeffding Tree...
  Progressive validation accuracy: 0.6255
Evaluating model...
  Training Accuracy: 0.6255
  Test Accuracy: 0.6582
Saving model...
  Model saved to: models/ht/hoeffding_tree_pong.pkl
  Metadata saved to: models/ht/hoeffding_tree_metadata.pkl
Training Finished for Hoeffding Tree

Running offline training for Weighted Forest
Loading data...
Preparing data...
Training Weighted Forest...
Evaluating model...
  Training Accuracy: 0.3850
  Test Accuracy: 0.3883
  Test Balanced Accuracy: 0.3883
Saving model...
  Model saved to: models/wf/weighted_forest_pong.pkl
  Metadata saved to: models/wf/wei

## Launch Interactive Game

Left player controls the paddle using Q (up) and A (down).

Choose the enemy using the mode variable. Choices include:
- "human": play against another player
- "pc": pc player aiming to reach ball_y
- "dt": offline-only trained decision tree
- "ht": offline-only pre-trained hoeffding tree
- "wf:" offline-only pre-trained weighted forest

In [None]:
from src.main import main

mode = "wf" # "human", "pc", "dt", "ht", "wf"
main(mode)

## Online Training - Hoeffding Tree

In [1]:
from src.training.train_online import train_decision_online
from src.training.train_online import train_hoeffding_online
from src.training.train_online import train_weighted_forest_online

# train_decision_online(
#     pretrained_model_path="models/dt/decision_tree_pong.pkl",
#     num_episodes=20,
#     max_score_per_episode=5,
#     save_interval=5
# )

# train_hoeffding_online(
#     pretrained_model_path="models/ht/hoeffding_tree_pong.pkl",
#     num_episodes=20,
#     max_score_per_episode=5,
#     save_interval=5
# )

train_weighted_forest_online(
    pretrained_model_path="models/wf/weighted_forest_pong.pkl",
    metadata_path="models/wf/weighted_forest_metadata.pkl",
    num_episodes=20,
    max_score_per_episode=5,
    save_interval=5
)

Running online training for Weighted Forest
Loading pretrained model from models/wf/weighted_forest_pong.pkl...
Setting up online trainer...
Running 20 training episodes...
[Frame 100] Metrics: {'total_updates': 100, 'total_reward': 38.441599999999845, 'avg_reward': 0.3844159999999984, 'num_cells': 36, 'accuracy': 0.67}
[Frame 200] Metrics: {'total_updates': 200, 'total_reward': 37.64159999999962, 'avg_reward': 0.1882079999999981, 'num_cells': 36, 'accuracy': 0.745}
[Frame 300] Metrics: {'total_updates': 300, 'total_reward': 61.4826333772681, 'avg_reward': 0.20494211125756034, 'num_cells': 36, 'accuracy': 0.7466666666666667}
[Frame 400] Metrics: {'total_updates': 400, 'total_reward': 134.89193926362404, 'avg_reward': 0.3372298481590601, 'num_cells': 36, 'accuracy': 0.81}
[Frame 500] Metrics: {'total_updates': 500, 'total_reward': 139.91949686983588, 'avg_reward': 0.27983899373967175, 'num_cells': 36, 'accuracy': 0.848}
[Frame 600] Metrics: {'total_updates': 600, 'total_reward': 151.035

## Compare Pretrained vs Online Models

In [2]:
from src.evaluation import compare_pretrained_vs_online

results = compare_pretrained_vs_online(max_score=5)

Comparing pre-trained vs online-trained models

Hoeffding Tree:


KeyboardInterrupt: 

## Visualize Online Training Metrics

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

ht_metrics = pd.read_csv("models/ht/hoeffding_online_metrics.csv")
wf_metrics = pd.read_csv("models/wf/weighted_forest_online_metrics.csv")

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

axes[0, 0].plot(ht_metrics['episode'], ht_metrics['survival_seconds'], marker='o')
axes[0, 0].set_title('Hoeffding Tree - Survival Time')
axes[0, 0].set_xlabel('Episode')
axes[0, 0].set_ylabel('Survival (seconds)')

axes[0, 1].plot(ht_metrics['episode'], ht_metrics['progressive_accuracy'], marker='o')
axes[0, 1].set_title('Hoeffding Tree - Progressive Accuracy')
axes[0, 1].set_xlabel('Episode')
axes[0, 1].set_ylabel('Accuracy')

axes[1, 0].plot(wf_metrics['episode'], wf_metrics['survival_seconds'], marker='o')
axes[1, 0].set_title('Weighted Forest - Survival Time')
axes[1, 0].set_xlabel('Episode')
axes[1, 0].set_ylabel('Survival (seconds)')

axes[1, 1].plot(wf_metrics['episode'], wf_metrics['num_cells'], marker='o')
axes[1, 1].set_title('Weighted Forest - Active Cells')
axes[1, 1].set_xlabel('Episode')
axes[1, 1].set_ylabel('Number of Cells')

plt.tight_layout()
plt.show()

## Statistical Analysis (10x5 CV)

In [None]:
import time
import numpy as np
from src.statistical_tests import run_rskf, print_results
from src.data.loader import load_training_data
from src.data.preparation import min_max_scale, convert_str_to_int, undersample
from sklearn.tree import DecisionTreeClassifier
from river.tree import HoeffdingTreeClassifier
from src.models.weighted_forest.clf import WeightedForest, euclidean_distance
from sklearn.metrics import accuracy_score

X, y = load_training_data(random_state=42)
X_np = X.to_numpy()
y_np = y.to_numpy()

X_np, _, _ = min_max_scale(X_np)
y_np, class_mapping = convert_str_to_int(y_np)
X_np, y_np = undersample(X_np, y_np, random_seed=42)

models = {
    'DecisionTree': 'DecisionTree', 
    'HoeffdingTree': 'HoeffdingTree', 
    'WeightedForest': 'WeightedForest'
}

inference_times = {model: [] for model in models}

def train_eval_fn(model_name, X_train, y_train, X_test, y_test):
    preds = []
    
    if model_name == 'DecisionTree':
        clf = DecisionTreeClassifier(max_depth=20, min_samples_split=10, random_state=42)
        clf.fit(X_train, y_train)
        start_time = time.time()
        preds = clf.predict(X_test)
        end_time = time.time()

    elif model_name == 'HoeffdingTree':
        clf = HoeffdingTreeClassifier()
        for x, y_label in zip(X_train, y_train):
            clf.learn_one(dict(enumerate(x)), y_label)
        start_time = time.time()
        preds = [clf.predict_one(dict(enumerate(x))) for x in X_test]
        end_time = time.time()

    elif model_name == 'WeightedForest':
        clf = WeightedForest(
            X_train.shape[1], len(np.unique(y_train)), euclidean_distance,
            accuracy_goal=0.65, random_seed=42
        )
        clf.fit(X_train, y_train, epochs=3)
        start_time = time.time()
        preds = clf.predict(X_test)
        end_time = time.time()
        
        preds = preds.astype(int)

    inference_times[model_name].append(end_time - start_time)

    return accuracy_score(y_test, preds)

output = run_rskf(train_eval_fn, models, X_np, y_np, n_repeats=10, n_splits=5, random_state=42)

print_results(output)

print("\nInference Time Results (Seconds per Fold)")
print(f"{'Model':<20} | {'Mean Time':<12} | {'Std Dev':<10}")
print("-" * 46)

for model_name in models:
    times = inference_times[model_name]
    mean_time = np.mean(times)
    std_time = np.std(times)
    print(f"{model_name:<20} | {mean_time:.6f}s   | ± {std_time:.6f}s")

### Accuracy Results ###
Model Performance (mean ± std):
  DecisionTree: 0.8812 +/- 0.0043
  HoeffdingTree: 0.6308 +/- 0.0108
  WeightedForest: 0.3587 +/- 0.0227

Friedman Test:
  Statistic: 100.0000
  P-value: 0.0000

Post-hoc Pairwise Comparisons (Wilcoxon + Hommel):
  DecisionTree_vs_HoeffdingTree: p=0.0000 *
  DecisionTree_vs_WeightedForest: p=0.0000 *
  HoeffdingTree_vs_WeightedForest: p=0.0000 *

### Inference Time Results (Seconds per Fold) ###
Model                | Mean Time    | Std Dev   
----------------------------------------------
DecisionTree         | 0.000922s   | ± 0.000056s
HoeffdingTree        | 0.099276s   | ± 0.006321s
WeightedForest       | 0.131358s   | ± 0.014056s


## Game Simulation Evaluation (10 Games x 5 Models)

In [None]:
import numpy as np
import pandas as pd
from src.models.model_loader import PongAIPlayer
from src.evaluation import evaluate_model
from src.statistical_tests import friedman_test, wilcoxon_posthoc

NUM_GAMES = 10
MAX_SCORE = 5

MODEL_CONFIGS = {
    'DT_pretrained': ('models/dt/decision_tree_pong.pkl', 'models/dt/decision_tree_metadata.pkl'),
    'HT_pretrained': ('models/ht/hoeffding_tree_pong.pkl', 'models/ht/hoeffding_tree_metadata.pkl'),
    'HT_online': ('models/ht/hoeffding_tree_online.pkl', 'models/ht/hoeffding_tree_metadata.pkl'),
    'WF_pretrained': ('models/wf/weighted_forest_pong.pkl', 'models/wf/weighted_forest_metadata.pkl'),
    'WF_online': ('models/wf/weighted_forest_online.pkl', 'models/wf/weighted_forest_metadata.pkl'),
}

all_results = []

for model_name, (model_path, metadata_path) in MODEL_CONFIGS.items():
    print(f"\nEvaluating {model_name}...")
    
    try:
        ai = PongAIPlayer(model_path, metadata_path)
    except Exception as e:
        print(f"  Error loading model: {e}")
        continue
    
    for game_num in range(1, NUM_GAMES + 1):
        print(f"  Game {game_num}/{NUM_GAMES}", end="\r")
        
        result = evaluate_model(ai, model_name, MAX_SCORE)
        
        goal_diff = result.final_ai_score - result.final_pc_score
        
        all_results.append({
            'model': model_name,
            'game': game_num,
            'survival_time': result.survival_time_seconds,
            'returns': result.total_hits,
            'goal_diff': goal_diff,
            'ai_score': result.final_ai_score,
            'pc_score': result.final_pc_score
        })
    
    print(f"  Completed {NUM_GAMES} games for {model_name}")

df = pd.DataFrame(all_results)
csv_path = 'models/game_simulation_results.csv'
df.to_csv(csv_path, index=False)
print(f"\nResults saved to {csv_path}")



Evaluating DT_pretrained...
  Completed 10 games for DT_pretrained

Evaluating HT_pretrained...
  Completed 10 games for HT_pretrained

Evaluating HT_online...
  Completed 10 games for HT_online

Evaluating WF_pretrained...
  Completed 10 games for WF_pretrained

Evaluating WF_online...
  Completed 10 games for WF_online

Results saved to models/game_simulation_results.csv

RAW GAME RESULTS
        model  game  survival_time  returns  goal_diff  ai_score  pc_score
DT_pretrained     1      39.200000        7         -5         0         5
DT_pretrained     2      37.933333        6         -5         0         5
DT_pretrained     3      22.233333        2         -5         0         5
DT_pretrained     4      34.183333        5         -5         0         5
DT_pretrained     5      41.916667        8         -5         0         5
DT_pretrained     6      34.683333        5         -5         0         5
DT_pretrained     7      41.233333        7         -5         0         5
DT_pr

In [None]:
print("\nSummary Statistics (mean ± std) ###\n")
print(f"{'Model':<15} | {'Survival Time (s)':<20} | {'Returns':<18} | {'Goal Diff':<15}")
print("-" * 75)

summary_stats = df.groupby('model').agg({
    'survival_time': ['mean', 'std'],
    'returns': ['mean', 'std'],
    'goal_diff': ['mean', 'std']
}).round(2)

for model in MODEL_CONFIGS.keys():
    if model in summary_stats.index:
        st_mean, st_std = summary_stats.loc[model, ('survival_time', 'mean')], summary_stats.loc[model, ('survival_time', 'std')]
        ret_mean, ret_std = summary_stats.loc[model, ('returns', 'mean')], summary_stats.loc[model, ('returns', 'std')]
        gd_mean, gd_std = summary_stats.loc[model, ('goal_diff', 'mean')], summary_stats.loc[model, ('goal_diff', 'std')]
        print(f"{model:<15} | {st_mean:>6.2f} ± {st_std:<10.2f} | {ret_mean:>5.2f} ± {ret_std:<8.2f} | {gd_mean:>5.2f} ± {gd_std:<5.2f}")

# Prepare data for statistical tests
metrics = ['survival_time', 'returns', 'goal_diff']
metric_labels = {'survival_time': 'Survival Time', 'returns': 'Returns', 'goal_diff': 'Goal Difference'}

for metric in metrics:
    print(f"\n{metric_labels[metric]} Statistical Tests\n")
    
    # Create results dict for Friedman test
    results_dict = {}
    for model in MODEL_CONFIGS.keys():
        model_data = df[df['model'] == model][metric].values
        if len(model_data) == NUM_GAMES:
            results_dict[model] = model_data.tolist()
    
    if len(results_dict) < 3:
        print("Not enough models for Friedman test (need at least 3)")
        continue
    
    # Friedman test
    try:
        friedman_stat, friedman_p = friedman_test(results_dict)
        print(f"Friedman Test: statistic={friedman_stat:.4f}, p-value={friedman_p:.6f}")
        
        # Post-hoc if significant
        if friedman_p < 0.05:
            print("\nPost-hoc Pairwise Comparisons (Wilcoxon + Hommel):")
            posthoc = wilcoxon_posthoc(results_dict)
            for pair, result in posthoc.items():
                sig = "*" if result['significant'] else ""
                print(f"  {pair}: p={result['p_corrected']:.4f} {sig}")
        else:
            print("  No significant differences found (p >= 0.05)")
    except Exception as e:
        print(f"Error in statistical test: {e}")



STATISTICAL ANALYSIS

### Summary Statistics (mean ± std) ###

Model           | Survival Time (s)    | Returns            | Goal Diff      
---------------------------------------------------------------------------
DT_pretrained   |  32.63 ± 8.22       |  4.80 ± 2.30     | -5.00 ± 0.00 
HT_pretrained   |  27.84 ± 4.48       |  4.00 ± 1.33     | -4.90 ± 0.32 
HT_online       |  32.26 ± 3.29       |  5.00 ± 0.94     | -5.00 ± 0.00 
WF_pretrained   |  24.12 ± 4.22       |  2.40 ± 1.07     | -4.90 ± 0.32 
WF_online       |  25.51 ± 7.19       |  2.90 ± 1.91     | -5.00 ± 0.00 

### Survival Time - Statistical Tests ###

Friedman Test: statistic=11.8400, p-value=0.018582

Post-hoc Pairwise Comparisons (Wilcoxon + Hommel):
  DT_pretrained_vs_HT_pretrained: p=0.1507 
  DT_pretrained_vs_HT_online: p=0.8457 
  DT_pretrained_vs_WF_pretrained: p=0.0928 
  DT_pretrained_vs_WF_online: p=0.1400 
  HT_pretrained_vs_HT_online: p=0.1367 
  HT_pretrained_vs_WF_pretrained: p=0.1289 
  HT_pretrained_vs