In [1]:
import pandas as pd
# import seaborn as sns
# import matplotlib.pyplot as plt

In [2]:
#Short
resultsShort70 = pd.read_csv("ActivityShort70N.csv")
resultsShort700 = pd.read_csv("ActivityShort700N.csv")
resultsShort7000 = pd.read_csv("ActivityShort7000N.csv")


#Long
resultsLong70 = pd.read_csv("ActivityLong70N.csv")
resultsLong700 = pd.read_csv("ActivityLong700N.csv")
resultsLong7000 = pd.read_csv("ActivityLong7000N.csv")

In [3]:
# Function to calculate performance metrics relative to Naive baseline
def calculate_performance_metrics(results_df, dataset_name):
    """
    Calculate three performance metrics relative to Naive baseline:
    1. MAE improvement %: ((Naive_MAE - Model_MAE) / Naive_MAE) × 100
    2. MSE improvement %: ((Naive_MSE - Model_MSE) / Naive_MSE) × 100  
    3. Prediction Score: 1 − L(model)/L(naive) where L is MSE loss
    """
    
    # Get Naive baseline values
    naive_row = results_df[results_df['model'] == 'Naive']
    if len(naive_row) == 0:
        print(f"Warning: No Naive model found in {dataset_name}")
        return None
        
    naive_mae = naive_row['mae'].iloc[0]
    naive_mse = naive_row['mse'].iloc[0]
    
    # Calculate metrics for all models
    metrics_df = results_df.copy()
    
    # 1. MAE improvement percentage
    metrics_df['mae_improvement_pct'] = ((naive_mae - metrics_df['mae']) / naive_mae) * 100
    
    # 2. MSE improvement percentage  
    metrics_df['mse_improvement_pct'] = ((naive_mse - metrics_df['mse']) / naive_mse) * 100
    
    # 3. Prediction Score: 1 - L(model)/L(naive)
    metrics_df['prediction_score'] = 1 - (metrics_df['mse'] / naive_mse)
    
    # Add dataset identifier
    metrics_df['dataset'] = dataset_name
    
    return metrics_df

print("Calculating performance metrics for all datasets...")
print("=" * 50)

# Calculate metrics for SHORT forecasting (Context=16, Prediction=8)
short_metrics_70 = calculate_performance_metrics(resultsShort70, "Short_70N")
short_metrics_700 = calculate_performance_metrics(resultsShort700, "Short_700N") 
short_metrics_7000 = calculate_performance_metrics(resultsShort7000, "Short_7000N")

# Calculate metrics for LONG forecasting (Context=48, Prediction=16)
long_metrics_70 = calculate_performance_metrics(resultsLong70, "Long_70N")
long_metrics_700 = calculate_performance_metrics(resultsLong700, "Long_700N")
long_metrics_7000 = calculate_performance_metrics(resultsLong7000, "Long_7000N")

print("Metrics calculated successfully!")
print("\nFormulas used:")
print("1. MAE Improvement %: ((Naive_MAE - Model_MAE) / Naive_MAE) × 100")
print("2. MSE Improvement %: ((Naive_MSE - Model_MSE) / Naive_MSE) × 100") 
print("3. Prediction Score: 1 − L(model)/L(naive) where L = MSE loss")

Calculating performance metrics for all datasets...
Metrics calculated successfully!

Formulas used:
1. MAE Improvement %: ((Naive_MAE - Model_MAE) / Naive_MAE) × 100
2. MSE Improvement %: ((Naive_MSE - Model_MSE) / Naive_MSE) × 100
3. Prediction Score: 1 − L(model)/L(naive) where L = MSE loss


In [4]:
# Combine all metrics into comprehensive tables
import pandas as pd

# Combine all datasets
all_metrics = pd.concat([
    short_metrics_70, short_metrics_700, short_metrics_7000,
    long_metrics_70, long_metrics_700, long_metrics_7000
], ignore_index=True)

# Separate short and long for better organization
short_metrics = pd.concat([short_metrics_70, short_metrics_700, short_metrics_7000], ignore_index=True)
long_metrics = pd.concat([long_metrics_70, long_metrics_700, long_metrics_7000], ignore_index=True)

print("PERFORMANCE METRICS SUMMARY")
print("=" * 80)
print("Context=16, Prediction=8 (SHORT) vs Context=48, Prediction=16 (LONG)")
print("All metrics calculated relative to Naive baseline within each dataset")
print("=" * 80)

# Function to create formatted table
def create_performance_table(metrics_df, title):
    # Exclude Naive from display (it's always 0% improvement, score=0)
    display_df = metrics_df[metrics_df['model'] != 'Naive'].copy()
    
    # Select and rename columns for display
    table_df = display_df[['model', 'dataset', 'mae_improvement_pct', 'mse_improvement_pct', 'prediction_score']].copy()
    table_df.columns = ['Model', 'Dataset', 'MAE Improv %', 'MSE Improv %', 'Pred Score']
    
    # Round values for readability
    table_df['MAE Improv %'] = table_df['MAE Improv %'].round(2)
    table_df['MSE Improv %'] = table_df['MSE Improv %'].round(2)
    table_df['Pred Score'] = table_df['Pred Score'].round(4)
    
    print(f"\n{title}")
    print("-" * 70)
    print(table_df.to_string(index=False))
    
    return table_df

# Create tables for SHORT and LONG separately
short_table = create_performance_table(short_metrics, "SHORT Forecasting Performance (Context=16, Prediction=8)")
long_table = create_performance_table(long_metrics, "LONG Forecasting Performance (Context=48, Prediction=16)")

print(f"\n\nNAIVE BASELINES (Reference Values)")
print("-" * 50)
naive_baselines = all_metrics[all_metrics['model'] == 'Naive'][['dataset', 'mae', 'mse']].copy()
naive_baselines.columns = ['Dataset', 'Naive MAE', 'Naive MSE']
naive_baselines['Naive MAE'] = naive_baselines['Naive MAE'].round(4)
naive_baselines['Naive MSE'] = naive_baselines['Naive MSE'].round(4)
print(naive_baselines.to_string(index=False))

PERFORMANCE METRICS SUMMARY
Context=16, Prediction=8 (SHORT) vs Context=48, Prediction=16 (LONG)
All metrics calculated relative to Naive baseline within each dataset

SHORT Forecasting Performance (Context=16, Prediction=8)
----------------------------------------------------------------------
      Model     Dataset  MAE Improv %  MSE Improv %  Pred Score
       Mean   Short_70N        -24.72        -56.85     -0.5685
    TSMixer   Short_70N          8.82          4.22      0.0422
       POCO   Short_70N         13.37          6.84      0.0684
     Linear   Short_70N          4.23         -5.76     -0.0576
    DLinear   Short_70N          9.24          3.16      0.0316
   Informer   Short_70N        -13.78        -54.56     -0.5456
Transformer   Short_70N         -4.42        -21.72     -0.2172
       Mean  Short_700N         10.41         12.56      0.1256
    TSMixer  Short_700N         18.79         28.38      0.2838
       POCO  Short_700N         21.74         32.09      0.3209


In [5]:
# Advanced Analysis: Best performing models and patterns
print("\nADVANCED ANALYSIS")
print("=" * 60)

def analyze_best_performers(metrics_df, forecasting_type):
    """Analyze best performing models across different metrics"""
    non_naive = metrics_df[metrics_df['model'] != 'Naive'].copy()
    
    print(f"\n{forecasting_type} Forecasting - Best Performers:")
    print("-" * 45)
    
    for dataset in non_naive['dataset'].unique():
        data_subset = non_naive[non_naive['dataset'] == dataset]
        
        # Best by each metric
        best_mae = data_subset.loc[data_subset['mae_improvement_pct'].idxmax()]
        best_mse = data_subset.loc[data_subset['mse_improvement_pct'].idxmax()] 
        best_pred = data_subset.loc[data_subset['prediction_score'].idxmax()]
        
        print(f"\n{dataset}:")
        print(f"  Best MAE Improv: {best_mae['model']:12} ({best_mae['mae_improvement_pct']:+6.2f}%)")
        print(f"  Best MSE Improv: {best_mse['model']:12} ({best_mse['mse_improvement_pct']:+6.2f}%)")  
        print(f"  Best Pred Score: {best_pred['model']:12} ({best_pred['prediction_score']:7.4f})")

# Analyze both forecasting types
analyze_best_performers(short_metrics, "SHORT")
analyze_best_performers(long_metrics, "LONG")

# Overall summary statistics
print(f"\n\nOVERALL PERFORMANCE STATISTICS")
print("=" * 50)

def print_summary_stats(metrics_df, name):
    non_naive = metrics_df[metrics_df['model'] != 'Naive']
    
    mae_stats = non_naive['mae_improvement_pct'].describe()
    mse_stats = non_naive['mse_improvement_pct'].describe()  
    pred_stats = non_naive['prediction_score'].describe()
    
    print(f"\n{name} Forecasting Summary:")
    print(f"MAE Improvement %  - Mean: {mae_stats['mean']:6.2f}, Std: {mae_stats['std']:6.2f}, Max: {mae_stats['max']:6.2f}")
    print(f"MSE Improvement %  - Mean: {mse_stats['mean']:6.2f}, Std: {mse_stats['std']:6.2f}, Max: {mse_stats['max']:6.2f}")
    print(f"Prediction Score   - Mean: {pred_stats['mean']:6.4f}, Std: {pred_stats['std']:6.4f}, Max: {pred_stats['max']:6.4f}")

print_summary_stats(short_metrics, "SHORT")
print_summary_stats(long_metrics, "LONG")

# Model consistency analysis
print(f"\n\nMODEL CONSISTENCY ACROSS DATASETS")
print("=" * 50)

def analyze_consistency(metrics_df, name):
    non_naive = metrics_df[metrics_df['model'] != 'Naive']
    
    # Calculate coefficient of variation for each model across datasets
    model_consistency = non_naive.groupby('model').agg({
        'mae_improvement_pct': ['mean', 'std'],
        'mse_improvement_pct': ['mean', 'std'],
        'prediction_score': ['mean', 'std']
    }).round(3)
    
    print(f"\n{name} - Model Consistency (Mean ± Std across datasets):")
    print("Model        MAE Improv %     MSE Improv %     Pred Score")
    print("-" * 60)
    
    for model in model_consistency.index:
        mae_mean = model_consistency.loc[model, ('mae_improvement_pct', 'mean')]
        mae_std = model_consistency.loc[model, ('mae_improvement_pct', 'std')]
        mse_mean = model_consistency.loc[model, ('mse_improvement_pct', 'mean')]
        mse_std = model_consistency.loc[model, ('mse_improvement_pct', 'std')]
        pred_mean = model_consistency.loc[model, ('prediction_score', 'mean')]
        pred_std = model_consistency.loc[model, ('prediction_score', 'std')]
        
        print(f"{model:12} {mae_mean:6.2f}±{mae_std:5.2f}    {mse_mean:6.2f}±{mse_std:5.2f}    {pred_mean:6.4f}±{pred_std:6.4f}")

analyze_consistency(short_metrics, "SHORT")
analyze_consistency(long_metrics, "LONG")


ADVANCED ANALYSIS

SHORT Forecasting - Best Performers:
---------------------------------------------

Short_70N:
  Best MAE Improv: POCO         (+13.37%)
  Best MSE Improv: POCO         ( +6.84%)
  Best Pred Score: POCO         ( 0.0684)

Short_700N:
  Best MAE Improv: POCO         (+21.74%)
  Best MSE Improv: POCO         (+32.09%)
  Best Pred Score: POCO         ( 0.3209)

Short_7000N:
  Best MAE Improv: POCO         (+25.65%)
  Best MSE Improv: POCO         (+41.86%)
  Best Pred Score: POCO         ( 0.4186)

LONG Forecasting - Best Performers:
---------------------------------------------

Long_70N:
  Best MAE Improv: POCO         (+14.91%)
  Best MSE Improv: POCO         (+11.35%)
  Best Pred Score: POCO         ( 0.1135)

Long_700N:
  Best MAE Improv: POCO         (+23.64%)
  Best MSE Improv: POCO         (+32.75%)
  Best Pred Score: POCO         ( 0.3275)

Long_7000N:
  Best MAE Improv: POCO         (+26.60%)
  Best MSE Improv: POCO         (+43.13%)
  Best Pred Score: POCO  