In [16]:
import sys
import os
from pathlib import Path
import numpy as np
import pandas as pd
from scipy.stats import ttest_rel
import warnings

# Ensure project root is on the import path
sys.path.insert(0, os.path.abspath('.'))

# Suppress warnings
warnings.filterwarnings("ignore")

# Imports
from Helper.evaluation_helpers import get_predictions
from Training.Helper.PyTorchModular import HORIZONS

PROJECT_ROOT = Path().resolve().parents[0]
PRED_BASE = PROJECT_ROOT / 'Predictions'
SAVE_DIR = PROJECT_ROOT / 'Evaluation' / 't_test_results'
SAVE_DIR.mkdir(parents=True, exist_ok=True)

print("Running paired t-tests on model predictions (significant results will be ranked by p-value):")
for h in HORIZONS:
    preds_dir = PRED_BASE / f"Horizon{h}"
    print(f"\nLooking in: {preds_dir} — Exists? {preds_dir.exists()}")

    if not preds_dir.exists():
        print(f"Horizon {h}: prediction folder not found — skipping.")
        continue

    preds_df = get_predictions(preds_dir)

    # Drop ground truth if present
    preds = preds_df.drop(columns=['ground_truth'], errors='ignore')
    models = preds.columns.tolist()

    print(f"Horizon {h} — {len(models)} models: {models}")

    if len(models) < 2:
        print(f"Horizon {h}: not enough models to compare — skipping.")
        continue

    # Prepare full p-value matrix
    pvals = pd.DataFrame(np.nan, index=models, columns=models)

    # Run paired t-test
    for i in models:
        for j in models:
            if i == j:
                continue
            try:
                _, p = ttest_rel(preds[i], preds[j])
                pvals.loc[i, j] = p
            except Exception as e:
                print(f"Skipping {i} vs {j} due to error: {e}")

    # Save full matrix
    full_path = SAVE_DIR / f"Horizon{h}_full.csv"
    pvals.round(6).to_csv(full_path)
    print(f"Saved full t-test results: {full_path}")

    # Extract significant pairs (p < 0.05)
    sig_pairs = []
    for i in models:
        for j in models:
            if i != j and pd.notna(pvals.loc[i, j]) and pvals.loc[i, j] < 0.05:
                sig_pairs.append((i, j, pvals.loc[i, j]))

    # Create DataFrame, sort by p-value, add Rank column
    sig_df = pd.DataFrame(sig_pairs, columns=["Model_1", "Model_2", "p_value"])
    sig_df = sig_df.sort_values(by="p_value").reset_index(drop=True)
    sig_df.index += 1  # Start rank from 1
    sig_df.insert(0, "Rank", sig_df.index)

    sig_path = SAVE_DIR / f"Horizon{h}_significant.csv"
    sig_df.to_csv(sig_path, index=False)
    print(f"Saved ranked significant pairs (p < 0.05): {sig_path}")


Running paired t-tests on model predictions (significant results will be ranked by p-value):

Looking in: /Users/natalieleung/Desktop/COMP5530M-Group-Project-Inflation-Forecasting/Predictions/Horizon1 — Exists? True
Horizon 1 — 20 models: ['ARDL', 'NBEATSx', 'SARIMAX1990', 'ARIMAX1990', 'RFX', 'TCN', 'SARIMA1990', 'MARS', 'NHITS', 'Tide', 'ARIMA1990', 'VARcointegration', 'Naive', 'LSTM', 'VARccf', 'GRU1990', 'MLR', 'TFT', 'XGB1990', 'RNN1990']
Saved full t-test results: /Users/natalieleung/Desktop/COMP5530M-Group-Project-Inflation-Forecasting/Evaluation/t_test_results/Horizon1_full.csv
Saved ranked significant pairs (p < 0.05): /Users/natalieleung/Desktop/COMP5530M-Group-Project-Inflation-Forecasting/Evaluation/t_test_results/Horizon1_significant.csv

Looking in: /Users/natalieleung/Desktop/COMP5530M-Group-Project-Inflation-Forecasting/Predictions/Horizon3 — Exists? True
Horizon 3 — 16 models: ['NBEATSx', 'ARDL', 'TCN', 'RFX', 'NHITS', 'MARS', 'Tide', 'VARccf', 'LSTM', 'Naive', 'VARcoi

In [21]:
import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
import numpy as np
from pathlib import Path

# Setup
PROJECT_ROOT = Path().resolve().parents[0]
SAVE_DIR = PROJECT_ROOT / 'Evaluation' / 't_test_results'
HORIZONS = [1, 3, 6, 12]

# Plot Top 20 Ranked p-values
def plot_top_pvalues(sig_df, horizon_name):
    top20 = sig_df.head(20)

    plt.figure(figsize=(14, 6))
    plt.barh(
        [f"{row['Model_1']} vs {row['Model_2']}" for idx, row in top20.iterrows()],
        -np.log10(top20['p_value'])  # Negative log to show very small p-values
    )
    plt.xlabel("-log10(p-value)")
    plt.title(f"Top 20 Significant Model Differences — {horizon_name} (Ranked by p-value)")
    plt.gca().invert_yaxis()
    plt.grid(axis='x')
    plt.tight_layout()

    save_path = SAVE_DIR / f"{horizon_name}_top20_barplot.png"
    plt.savefig(save_path)
    plt.close()
    print(f"Saved Top 20 p-value bar chart: {save_path}")

# Plot Network Graph of Significant Results
def plot_network(sig_df, horizon_name):
    G = nx.Graph()

    # Clamp p-values to a very small minimum for stability
    min_p = 1e-300
    for _, row in sig_df.iterrows():
        p = max(row['p_value'], min_p)
        weight = -np.log10(p)
        G.add_edge(row['Model_1'], row['Model_2'], weight=weight)

    if len(G.nodes) == 0:
        print(f"{horizon_name}: No edges to draw.")
        return

    try:
        pos = nx.spring_layout(G, seed=42)
    except Exception as e:
        print(f"Failed to compute layout for {horizon_name}: {e}")
        return

    weights = [G[u][v]['weight'] for u, v in G.edges()]
    plt.figure(figsize=(12, 10))
    nx.draw_networkx(
        G, pos,
        with_labels=True,
        node_color='lightblue',
        edge_color=weights,
        edge_cmap=plt.cm.viridis,
        width=2,
        font_size=10
    )
    plt.title(f"Significance Network Graph — {horizon_name}")
    plt.tight_layout()
    save_path = SAVE_DIR / f"{horizon_name}_network_graph.png"
    plt.savefig(save_path)
    plt.close()
    print(f"Saved significance network graph: {save_path}")

# Loop through all horizons and plot
for h in HORIZONS:
    sig_path = SAVE_DIR / f"Horizon{h}_significant.csv"
    
    if not sig_path.exists():
        print(f"No significant results for Horizon{h} — skipping plots.")
        continue

    sig_df = pd.read_csv(sig_path)

    if sig_df.empty:
        print(f"Significant file for Horizon{h} is empty — skipping plots.")
        continue

    plot_top_pvalues(sig_df, f"Horizon{h}")
    plot_network(sig_df, f"Horizon{h}")


Saved Top 20 p-value bar chart: /Users/natalieleung/Desktop/COMP5530M-Group-Project-Inflation-Forecasting/Evaluation/t_test_results/Horizon1_top20_barplot.png
Saved significance network graph: /Users/natalieleung/Desktop/COMP5530M-Group-Project-Inflation-Forecasting/Evaluation/t_test_results/Horizon1_network_graph.png
Saved Top 20 p-value bar chart: /Users/natalieleung/Desktop/COMP5530M-Group-Project-Inflation-Forecasting/Evaluation/t_test_results/Horizon3_top20_barplot.png
Saved significance network graph: /Users/natalieleung/Desktop/COMP5530M-Group-Project-Inflation-Forecasting/Evaluation/t_test_results/Horizon3_network_graph.png
Saved Top 20 p-value bar chart: /Users/natalieleung/Desktop/COMP5530M-Group-Project-Inflation-Forecasting/Evaluation/t_test_results/Horizon6_top20_barplot.png
Saved significance network graph: /Users/natalieleung/Desktop/COMP5530M-Group-Project-Inflation-Forecasting/Evaluation/t_test_results/Horizon6_network_graph.png
Saved Top 20 p-value bar chart: /Users/n