# Leave One Country Out vs Single country

### Imports

In [1]:
import pandas as pd
import numpy as np
import pickle
import os
import gc
import warnings
from collections import defaultdict

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Bidirectional, Input
from tensorflow.keras.layers import Conv1D, Flatten, RepeatVector, TimeDistributed
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Model

warnings.filterwarnings("ignore")

In [2]:
### Configuration
TARGET_VARIABLES = ['co2', 'gdp', 'primary_energy_consumption']
N_STEPS_IN = 5
N_STEPS_OUT = 3
# Setting test size as 9 samples
TEST_SAMPLES = 9
MAX_LAGS = 4

G20_COUNTRIES = [
    'United States', 'China', 'Japan', 'Germany', 
    'United Kingdom', 'France', 'Italy', 'Canada',
    'Brazil', 'Russia', 'India', 'Australia', 
    'Mexico', 'Indonesia', 'Turkey', 'Saudi Arabia',
    'South Africa', 'Argentina', 'South Korea'
]

FEATURES = [
    'fossil_fuel_consumption', 'energy_per_capita',
    'electricity_generation', 'population',
    'nuclear_consumption', 'renewables_consumption'
]

DEVELOPED_COUNTRIES = [
    'United States', 'Japan', 'Germany', 'United Kingdom',
    'France', 'Italy', 'Canada', 'Australia', 'South Korea'
]

DEVELOPING_COUNTRIES = [
    'China', 'Brazil', 'Russia', 'India', 'Mexico', 'Indonesia',
    'Turkey', 'Saudi Arabia', 'South Africa', 'Argentina'
]

### Utility Functions

In [3]:
def clear_memory():
    tf.keras.backend.clear_session()
    gc.collect()

In [4]:
def rmse(y_pred, y_actual):
    return np.sqrt(mean_squared_error(y_actual, y_pred))

In [5]:
def mase(y_actual, y_pred, period=1):

    mae_forecast = mean_absolute_error(y_actual, y_pred)
    
    # MAE of naive forecast
    naive_forecast = y_actual[:-period] if period > 0 else y_actual[:-1]
    actual_for_naive = y_actual[period:] if period > 0 else y_actual[1:]
    
    if len(naive_forecast) == 0:
        return np.nan
    
    mae_naive = mean_absolute_error(actual_for_naive, naive_forecast)
    
    if mae_naive == 0:
        return 0 if mae_forecast == 0 else np.inf
    
    return mae_forecast / mae_naive

In [6]:
def pct_change_features(data):
    """
    This is to convert the series into the percentage change from the previous time step
    -> It will lead the data more stationary
    Avoiding NaN in the first row
    """
    data = data.copy()

    # Storing the original values for denormalisation
    original_values = {}

    # For each feature, calculate pct_change with lag1
    for feature in FEATURES + TARGET_VARIABLES:
        if feature in data.columns:
            # Store original values
            data[f'{feature}_original'] = data[feature].values.copy()
            
            # pct_change
            if f'{feature}_lag1' in data.columns:
                lag1_values = data[f'{feature}_lag1'].values
                current_values = data[feature].values

                # Create mask for non-zero lag values
                non_zero_mask = lag1_values != 0
                pct_change = np.zeros_like(current_values)
            
                # Calculate pct change only where lag1 is non-zero (curr - lag1) / lag1
                pct_change[non_zero_mask] = (current_values[non_zero_mask] - lag1_values[non_zero_mask]) / lag1_values[non_zero_mask]
                
                data[f'{feature}_pct'] = pct_change
    
    return data

In [7]:
def denormalise_pct_change(pct_preds, last_actual_value):
    """
    Conver pct_change preds back to the original scale
    value_t = value_{t-1} * (1 + pct_change_t)
    """
    actual_values = []
    curr_val = last_actual_value
    
    for pct in pct_preds:
        next_value = curr_val * (1 + pct)
        actual_values.append(next_value)
        curr_val = next_value
    
    return np.array(actual_values)

In [8]:
def prepare_features(data, features, target, use_lags=True):
    """
    Preparing feature matrix with lag features and pct changes
    """
    feature_cols = []

    all_features = features.copy()
    if target not in all_features:
        all_features.append(target)

    for feature in all_features:
        # Use the original column
        if feature in data.columns:
            feature_cols.append(feature)
        # Or use the _pct column if it exists
        elif f'{feature}_pct' in data.columns:
            feature_cols.append(f'{feature}_pct')

        # Lag features
        if use_lags:
            for lag in range(1, 5):
                lag_col = f'{feature}_lag{lag}'

                if lag_col in data.columns:
                    # Calculate pct change for lag features
                    if lag < 4:
                        next_lag = f'{feature}_lag{lag+1}'
                        if next_lag in data.columns:
                            lag_values = data[lag_col].values
                            next_lag_values = data[next_lag].values
                            
                            non_zero_mask = next_lag_values != 0
                            pct_lag = np.zeros_like(lag_values)
                            pct_lag[non_zero_mask] = (lag_values[non_zero_mask] - next_lag_values[non_zero_mask]) / next_lag_values[non_zero_mask]
                            
                            pct_lag_col = f'{feature}_lag{lag}_pct'
                            data[pct_lag_col] = pct_lag
                            feature_cols.append(pct_lag_col)
    
    if f'{target}_pct' in data.columns:
        target_col = f'{target}_pct'
    else:
        target_col = target
    
    return data, feature_cols, target_col

### Data Load

In [9]:
def load_data(save_dir='data_export'):
    lag_path = os.path.join(save_dir, 'lag_df_1965.pkl')
    lag_df = pd.read_pickle(lag_path)
    print(f"Data Shape: {lag_df.shape}")
    return lag_df

### Model builds

In [10]:
def build_lstm(input_shape, output_shape, hidden=32):
    model = Sequential([
        LSTM(hidden, activation='relu', input_shape=input_shape, kernel_regularizer=l2(0.01)),
        Dense(output_shape)
    ])
    model.compile(optimizer=Adam(learning_rate=0.01), loss='mse', metrics=['mae'])
    return model

In [11]:
def build_bilstm(input_shape, output_shape, hidden=16):
    model = Sequential([
        Bidirectional(LSTM(hidden, activation='relu', kernel_regularizer=l2(0.001), recurrent_regularizer=l2(0.01)),
                      input_shape=input_shape),
        Dense(output_shape)
    ])
    model.compile(optimizer=Adam(learning_rate=0.01), loss='mse', metrics=['mae'])
    return model

In [12]:
def build_edlstm(input_shape, output_shape, hidden=16):
    model = Sequential([
        LSTM(hidden, activation='relu', input_shape=input_shape, kernel_regularizer=l2(0.01)),
        RepeatVector(output_shape),
        LSTM(hidden, activation='relu', return_sequences=True, kernel_regularizer=l2(0.01)),
        TimeDistributed(Dense(1))
    ])
    model.compile(optimizer=Adam(learning_rate=0.01), loss='mse', metrics=['mae'])
    return model

In [13]:
def build_cnn(input_shape, output_shape, filters=32, hidden=16):
    model = Sequential([
        Conv1D(filters=filters, kernel_size=3, activation='relu', input_shape=input_shape, padding='same', kernel_regularizer=l2(0.01)),
        Flatten(),
        Dense(hidden, activation='relu', kernel_regularizer=l2(0.01)),
        Dense(output_shape)
    ])
    model.compile(optimizer=Adam(learning_rate=0.01), loss='mse', metrics=['mae'])
    return model

In [None]:
def build_multioutput_model(input_shape, n_features, output_steps, hidden=16):

    inputs = Input(shape=input_shape)

    # LSTM process
    lstm_out = LSTM(hidden, activation='relu', return_sequences=False)(inputs)

    # Output for target
    y_output = Dense(output_steps, name='y_output')(lstm_out)

    # Output for features
    x_output_flat = Dense(output_steps * n_features, name='x_output')(lstm_out)
    x_output = tf.keras.layers.Reshape((output_steps, n_features), name='x_reshape')(x_output_flat)
    
    model = Model(inputs=inputs, outputs=[y_output, x_output])

    model.compile(
        optimizer=Adam(learning_rate=0.01),
        loss={'y_output': 'mse', 'x_output': 'mse'},
        loss_weights={'y_output': 1.0, 'x_output': 0.5},
        metrics={'y_output': 'mae', 'x_output': 'mae'}
    )
    return model

### LOCO
Leave One Country Out

In [15]:
class LOCOForecaster:
    def __init__(self, model_func, input_steps=N_STEPS_IN, output_steps=N_STEPS_OUT):
        self.model_func = model_func
        self.input_steps = input_steps
        self.output_steps = output_steps
        self.model = None
        self.X_scaler = StandardScaler()
        self.y_scaler = StandardScaler()
        self.is_trained = False
        self.n_features = None

    def create_sequences(self, X, y):
        if len(X) < self.input_steps +self.output_steps:
            return np.array([]), np.array([])
        
        X_seq = []
        y_seq = []

        for i in range(len(X) - self.input_steps - self.output_steps + 1):
            X_seq.append(X[i:i + self.input_steps])
            y_seq.append(y[i + self.input_steps:i + self.input_steps + self.output_steps])
        
        X_seq = np.array(X_seq)
        y_seq = np.array(y_seq)

        # Check shapes
        if len(X_seq) > 0 and len(X_seq.shape) == 2:
            X_seq = X_seq.reshape(len(X_seq), self.input_steps, -1)

        if len(y_seq) > 0 and len(y_seq.shape) == 1:
            y_seq = y_seq.reshape(len(y_seq), self.output_steps)
        
        return X_seq, y_seq
    
    def train_model(self, X_train_pct, y_train_pct):
        self.n_features = X_train_pct.shape[1]

        # Scaling
        X_train_scaled = self.X_scaler.fit_transform(X_train_pct)
        y_train_scaled = self.y_scaler.fit_transform(y_train_pct.reshape(-1, 1)).ravel()
        
        # Seq
        X_train_seq, y_train_seq = self.create_sequences(X_train_scaled, y_train_scaled)
        
        if len(X_train_seq) == 0:
            print(f"Not enough data to create sequences")
            return
        
        print(f"Training sequences: {len(X_train_seq)}")

        # Model build
        input_shape = (self.input_steps, self.n_features)
        self.model = self.model_func(input_shape, self.output_steps)

        # Earlystopping
        callbacks = [EarlyStopping(monitor='loss', patience=20, restore_best_weights=True)]        

        # Batch size
        batch_size = min(16, len(X_train_seq))

        self.model.fit(
            X_train_seq, y_train_seq,
            epochs=100,
            batch_size=batch_size,
            callbacks=callbacks,
            verbose=0
        )

        self.is_trained = True

    def calculate_metrics(self, predictions, actuals, interation_num):
        
        if len(predictions) == 0 or len(actuals) == 0:
            return {'iteration': interation_num, 'rmse': np.nan, 'mase': np.nan}
        
        rmse_val = rmse(predictions, actuals)
        mase_val = mase(actuals, predictions)
        return {'iteration': interation_num, 'rmse': rmse_val, 'mase': mase_val}

    def non_recursive_forecast(self, X_train_pct, y_train_pct, X_test_pct, y_test_pct, y_train_original=None, y_test_original=None):
        if not self.is_trained:
            print("Model not trained")
            return None
        
        # Scaling the percentage change data
        X_train_scaled = self.X_scaler.transform(X_train_pct)
        X_test_scaled = self.X_scaler.transform(X_test_pct)
        
        predictions_pct = []
        actuals_pct = []
        predictions_actual = []
        actuals_actual = []
        iterations = []
        metrics_list = []

        # Iteration
        for iteration in range(3):
            if iteration == 0:
                X_window = X_train_scaled[-5:]
                start_idx = 0
                print(f"    Iter {iteration}: Using last 5 train values")
            elif iteration == 1:
                X_window = np.vstack([X_train_scaled[-2:], X_test_scaled[0:3]])
                start_idx = 3
                print(f"    Iter {iteration}: Using last 2 train + test[0:3]")
            elif iteration == 2:
                X_window = X_test_scaled[1:6]
                start_idx = 6
                print(f"    Iter {iteration}: Using test[1:6]")

            X_input = X_window.reshape(1, self.input_steps, -1)

            # Predict -> returns scaled values
            y_pred_scaled = self.model.predict(X_input, verbose=0)
            # ED-LSTM
            if len(y_pred_scaled.shape) == 3:
                y_pred_scaled = y_pred_scaled.reshape(-1)

            # y_pred_scaled is 1D
            y_pred_scaled = y_pred_scaled.flatten()

            # Inverse back to pct_change
            y_pred_pct = self.y_scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()

            # Actual values
            end_idx = min(start_idx + self.output_steps, len(y_test_pct))
            y_actual_pct = y_test_pct[start_idx:end_idx]

            # Convert back to actual values from pct_change
            if y_test_original is not None and y_train_original is not None:
                if iteration == 0:
                    # Use last training value
                    last_known_value = y_train_original[-1]
                else:
                    # Use last actual value from test
                    last_known_value = y_test_original[start_idx - 1]

                # Convert preds to actual values
                y_pred_actual = denormalise_pct_change(y_pred_pct[:len(y_actual_pct)], last_known_value)
                y_actual_actual = y_test_original[start_idx:end_idx]

                if len(y_actual_actual) == len(y_pred_actual):
                    predictions_actual.extend(y_pred_actual)
                    actuals_actual.extend(y_actual_actual)

                    # Metrics
                    metrics = self.calculate_metrics(y_pred_actual, y_actual_actual, iteration)
                else:
                    metrics = self.calculate_metrics(y_pred_pct[:len(y_actual_pct)], y_actual_pct, iteration)
            else:
                metrics = self.calculate_metrics(y_pred_pct[:len(y_actual_pct)], y_actual_pct, iteration)
            
            predictions_pct.extend(y_pred_pct)
            actuals_pct.extend(y_actual_pct)
            iterations.extend([iteration] * len(y_pred_pct))
            metrics_list.append(metrics)
            print(f"    RMSE: {metrics['rmse']:.4f}, MASE: {metrics['mase']:.4f}")
        
        results = {
            'predictions_pct': np.array(predictions_pct),
            'actuals_pct': np.array(actuals_pct),
            'iterations': np.array(iterations),
            'metrics': pd.DataFrame(metrics_list)
        }

        if len(predictions_actual) > 0:
            results['predictions_actual'] = np.array(predictions_actual)
            results['actuals_actual'] = np.array(actuals_actual)
        
        return results


    def recursive_forecast(self, X_train, y_train, X_test, y_test, y_train_original=None, y_test_original=None):
        if not self.is_trained:
            print(f"Model not trained")
            return None
        
        # Scale data
        X_train_scaled = self.X_scaler.transform(X_train)
        X_test_scaled = self.X_scaler.transform(X_test)
        
        predictions_pct = []
        actuals_pct = []
        predictions_actual = []
        actuals_actual = []
        iterations = []
        metrics_list = []

        # Store preds
        all_pred_windows = []

        for iteration in range(3):
            if iteration == 0:
                X_window = X_train_scaled[-5:]
                start_idx = 0
                print(f"    Iter {iteration}: Using last 5 training values")
            elif iteration == 1:
                if len(all_pred_windows) >= 3:
                    pred_window = np.array(all_pred_windows[0:3])
                    X_window = np.vstack([X_train_scaled[-2:], pred_window])
                    start_idx = 3
                    print(f"    Iter {iteration}: Using last 2 training + pred[0:3]")
                else:
                    print(f"    ERROR: Not enough predictions from previous iteration")
                    break
            elif iteration == 2:
                if len(all_pred_windows) >= 6:
                    X_window = np.array(all_pred_windows[1:6])
                    start_idx = 6
                    print(f"    Iter {iteration}: Using pred[1:6]")
                else:
                    print(f"    ERROR: Not enough preds for window (have {len(all_pred_windows)}, need 6)")
                    break
            
            X_input = X_window.reshape(1, self.input_steps, -1)

            # Predict
            y_pred_scaled = self.model.predict(X_input, verbose=0)
            # ED-LSTM
            if len(y_pred_scaled.shape) == 3:
                y_pred_scaled = y_pred_scaled.reshape(-1)

            # Ensure y_pred_scaled is 1D
            y_pred_scaled = y_pred_scaled.flatten()

            # Store preds and synthetic vectors with pred as first feature
            for i in range(self.output_steps):
                pred_features = np.zeros(self.n_features)

                pred_value = float(y_pred_scaled[i]) if np.isscalar(y_pred_scaled[i]) else float(y_pred_scaled[i].item())
                pred_features[0] = pred_value

                test_position = start_idx + i
                if test_position < len(X_test_scaled):
                    # Copy other features from test data
                    pred_features[1:] = X_test_scaled[test_position, 1:]
                else:
                    # Use last known features
                    pred_features[1:] = X_test_scaled[-1, 1:]

                all_pred_windows.append(pred_features)
            
            # Inverse transform to pct change
            y_pred_pct = self.y_scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()
            
            # Actual values for this iteration
            end_idx = min(start_idx + self.output_steps, len(y_test))
            y_actual_pct = y_test[start_idx:end_idx]
            
            # Convert back to actual values
            if y_test_original is not None and y_train_original is not None:
                if iteration == 0:
                    last_known_value = y_train_original[-1]
                else:
                    last_known_value = predictions_actual[-1] if len(predictions_actual) > 0 else y_train_original[-1]
                
                # Convert preds back to actual values
                y_pred_actual = denormalise_pct_change(y_pred_pct[:len(y_actual_pct)], last_known_value)
                y_actual_actual = y_test_original[start_idx:end_idx]
                
                if len(y_actual_actual) == len(y_pred_actual):
                    predictions_actual.extend(y_pred_actual)
                    actuals_actual.extend(y_actual_actual)
                    
                    # Metrics
                    metrics = self.calculate_metrics(y_pred_actual, y_actual_actual, iteration)
                else:
                    metrics = self.calculate_metrics(y_pred_pct[:len(y_actual_pct)], y_actual_pct, iteration)
            else:
                metrics = self.calculate_metrics(y_pred_pct[:len(y_actual_pct)], y_actual_pct, iteration)
            
            predictions_pct.extend(y_pred_pct)
            actuals_pct.extend(y_actual_pct)
            iterations.extend([iteration] * len(y_pred_pct))
            metrics_list.append(metrics)
            
            print(f"    RMSE: {metrics['rmse']:.4f}, MASE: {metrics['mase']:.4f}")
        
        results = {
            'predictions_pct': np.array(predictions_pct),
            'actuals_pct': np.array(actuals_pct),
            'iterations': np.array(iterations),
            'metrics': pd.DataFrame(metrics_list)
        }
        
        if len(predictions_actual) > 0:
            results['predictions_actual'] = np.array(predictions_actual)
            results['actuals_actual'] = np.array(actuals_actual)
        
        return results

In [16]:
class LOCOComparison:    
    def __init__(self, countries, features, target):
        self.countries = countries
        self.features = features
        self.target = target
    
    def run_comparison(self, data, test_country, model_func=build_lstm):
        print(f"Leave-One-Country-Out Comparison for: {test_country}")
        
        # Apply percentage change features
        data = pct_change_features(data)
        
        # Split countries
        train_countries = [c for c in self.countries if c != test_country]
        
        # Prepare training data from all other countries
        X_train_list = []
        y_train_list = []
        y_train_original_list = []
        
        for country in train_countries:
            country_data = data[data['country'] == country].sort_values('year')
            
            country_data, feature_cols, target_col = prepare_features(country_data, self.features, self.target, use_lags=True)
            
            original_target_col = f'{self.target}_original'
            
            valid_cols = feature_cols + [target_col]
            if original_target_col in country_data.columns:
                valid_cols.append(original_target_col)
            
            valid_data = country_data[valid_cols].dropna()
            
            if len(valid_data) > 0:
                # X and y are in percentage form
                X = valid_data[feature_cols].values
                y = valid_data[target_col].values
                y_original = valid_data[original_target_col].values if original_target_col in valid_data.columns else None
                
                # Use all data from other countries for training
                X_train_list.append(X)
                y_train_list.append(y)
                
                if y_original is not None:
                    y_train_original_list.append(y_original)
        
        # Combine training data (in pct)
        X_train_all_pct = np.vstack(X_train_list) if X_train_list else np.array([])
        y_train_all_pct = np.hstack(y_train_list) if y_train_list else np.array([])
        y_train_original_all = np.hstack(y_train_original_list) if y_train_original_list else None
        
        if len(X_train_all_pct) == 0:
            print("No training data available")
            return None
        
        print(f"Total training samples from other countries: {len(X_train_all_pct)}")
        
        # Test country data
        test_data = data[data['country'] == test_country].sort_values('year')
        test_data, feature_cols, target_col = prepare_features(test_data, self.features, self.target, use_lags=True)
        
        original_target_col = f'{self.target}_original'
        test_valid_cols = feature_cols + [target_col]
        if original_target_col in test_data.columns:
            test_valid_cols.append(original_target_col)
        
        test_valid = test_data[test_valid_cols].dropna()
        
        if len(test_valid) == 0:
            print(f"No valid data for test country {test_country}")
            return None
        
        # Split country data
        n_test_country_samples = len(test_valid)
        n_test = TEST_SAMPLES
        n_train = n_test_country_samples - n_test
        
        if n_train < N_STEPS_IN:
            print(f"Not enough training data for {test_country}")
            return None
        
        # Features and targets (pct)
        X_test_country_pct = test_valid[feature_cols].values
        y_test_country_pct = test_valid[target_col].values
        y_test_country_original = test_valid[original_target_col].values if original_target_col in test_valid.columns else None
        
        # Split into train/test for the test country
        X_train_from_test_pct = X_test_country_pct[:n_train]
        y_train_from_test_pct = y_test_country_pct[:n_train]
        y_train_from_test_original = y_test_country_original[:n_train] if y_test_country_original is not None else None
        
        X_test_pct = X_test_country_pct[n_train:n_train + n_test]
        y_test_pct = y_test_country_pct[n_train:n_train + n_test]
        y_test_original = y_test_country_original[n_train:n_train + n_test] if y_test_country_original is not None else None
        
        print(f"Test country train samples: {len(X_train_from_test_pct)}")
        print(f"Test country test samples: {len(X_test_pct)}")
        
        forecaster = LOCOForecaster(model_func)
        
        # Train model on all other countries' data
        forecaster.train_model(X_train_all_pct, y_train_all_pct)
        
        # Run both experiments
        results = {}
        
        # Non-recursive forecasting
        non_recursive_results = forecaster.non_recursive_forecast(
            X_train_from_test_pct, y_train_from_test_pct,
            X_test_pct, y_test_pct,
            y_train_from_test_original, y_test_original
        )
        if non_recursive_results is not None:
            results['non_recursive'] = non_recursive_results
        
        # Recursive forecasting
        recursive_results = forecaster.recursive_forecast(
            X_train_from_test_pct, y_train_from_test_pct,
            X_test_pct, y_test_pct,
            y_train_from_test_original, y_test_original
        )
        if recursive_results is not None:
            results['recursive'] = recursive_results
        
        self.print_summary(results, test_country)
        
        return results
    
    def print_summary(self, results, country):
        print(f"\nSummary for {country}")
        
        for strategy in ['non_recursive', 'recursive']:
            if strategy in results:
                res = results[strategy]
                
                if 'predictions_actual' in res and len(res['predictions_actual']) > 0:
                    preds = res['predictions_actual']
                    acts = res['actuals_actual']
                    data_type = "(actual values)"
                else:
                    preds = res['predictions_pct']
                    acts = res['actuals_pct']
                    data_type = "(% change)"
                
                if len(preds) > 0:
                    overall_rmse = rmse(preds, acts)
                    overall_mase = mase(acts, preds)
                    
                    strategy_name = "Non-Recursive" if strategy == 'non_recursive' else "Recursive"
                    print(f"\n{strategy_name} {data_type}:")
                    print(f"  Overall RMSE: {overall_rmse:.4f}")
                    print(f"  Overall MASE: {overall_mase:.4f}")
                    print(f"  Predictions made: {len(preds)}")

### Single Country

In [17]:
class SingleCountryModel:
    def __init__(self, features, target):
        self.features = features
        self.target = target

    def run_single_country(self, data, country, model_func=build_lstm):
        print(f"\nSingle Country Model for {country}")

        data = pct_change_features(data)

        country_data = data[data['country'] == country].sort_values('year')

        country_data, feature_cols, target_col = prepare_features(country_data, self.features, self.target, use_lags=True)

        original_target_col = f'{self.target}_original'

        valid_cols = feature_cols + [target_col]
        if original_target_col in country_data.columns:
            valid_cols.append(original_target_col)

        valid_data = country_data[valid_cols].dropna()

        if len(valid_data) == 0:
            print(f"No valid data for {country}")
            return None
        
        # Split
        n_samples = len(valid_data)
        n_test = TEST_SAMPLES
        n_train = n_samples - n_test

        if n_train < N_STEPS_IN + N_STEPS_OUT:
            print(f"Not enough training data for {country}")
            return None

        X_pct = valid_data[feature_cols].values
        y_pct = valid_data[target_col].values
        y_original = valid_data[original_target_col].values if original_target_col in valid_data.columns else None
        
        X_train_pct = X_pct[:n_train]
        y_train_pct = y_pct[:n_train]
        y_train_original = y_original[:n_train] if y_original is not None else None
        
        X_test_pct = X_pct[n_train:n_train + n_test]
        y_test_pct = y_pct[n_train:n_train + n_test]
        y_test_original = y_original[n_train:n_train + n_test] if y_original is not None else None
        
        print(f"Training samples: {len(X_train_pct)}")
        print(f"Test samples: {len(X_test_pct)}")

        forecaster = LOCOForecaster(model_func)
        
        forecaster.train_model(X_train_pct, y_train_pct)
        
        results = {}
        
        # Non-recursive forecasting
        non_recursive_results = forecaster.non_recursive_forecast(
            X_train_pct, y_train_pct,
            X_test_pct, y_test_pct,
            y_train_original, y_test_original
        )
        if non_recursive_results is not None:
            results['non_recursive'] = non_recursive_results
        
        # Recursive forecasting
        recursive_results = forecaster.recursive_forecast(
            X_train_pct, y_train_pct,
            X_test_pct, y_test_pct,
            y_train_original, y_test_original
        )
        if recursive_results is not None:
            results['recursive'] = recursive_results
        
        # Print summary
        self.print_summary(results, country)
        
        return results
    
    def print_summary(self, results, country):
        print(f"\nSingle Country Summary for {country}")

        for strategy in ['non_recursive', 'recursive']:
            if strategy in results:
                res = results[strategy]

                if 'predictions_actual' in res and len(res['predictions_actual']) > 0:
                    preds = res['predictions_actual']
                    acts = res['actuals_actual']
                    data_type = "(actual values)"
                else:
                    preds = res['predictions_pct']
                    acts = res['actuals_pct']
                    data_type = "(% change)"
                
                if len(preds) > 0:
                    overall_rmse = rmse(preds, acts)
                    overall_mase = mase(acts, preds)
                    
                    strategy_name = "Non-Recursive" if strategy == 'non_recursive' else "Recursive"
                    print(f"\n{strategy_name} {data_type}:")
                    print(f"  Overall RMSE: {overall_rmse:.4f}")
                    print(f"  Overall MASE: {overall_mase:.4f}")
                    print(f"  Predictions made: {len(preds)}")

### Visualisation

In [18]:
def plot_model_comparison(results, country, target, save_dir='data_export', use_actual_values=True):
    os.makedirs(os.path.join(save_dir, '04_plots'), exist_ok=True)
    
    fig, axes = plt.subplots(2, 4, figsize=(20, 10))
    fig.suptitle(f'{country} - {target}: LOCO vs Single Country Model Comparison', fontsize=16, y=1)
    
    model_types = ['loco', 'single']
    model_labels = ['Leave-One-Country-Out', 'Single Country']
    strategies = ['non_recursive', 'recursive']
    
    for model_idx, (model_type, model_label) in enumerate(zip(model_types, model_labels)):
        if model_type not in results:
            continue
        
        for strat_idx, strategy in enumerate(strategies):
            if strategy not in results[model_type]:
                continue

            strategy_results = results[model_type][strategy]

            if use_actual_values and 'predictions_actual' in strategy_results and len(strategy_results['predictions_actual']) > 0:
                predictions = strategy_results['predictions_actual']
                actuals = strategy_results['actuals_actual']
                value_label = f'{target} (Actual Values)'
            else:
                predictions = strategy_results['predictions_pct']
                actuals = strategy_results['actuals_pct']
                value_label = f'{target} (% Change)'
        
            # Plot predictions vs actuals
            ax1 = axes[model_idx, strat_idx * 2]
            ax1.plot(actuals, label='Actual', alpha=0.8, linewidth=2, color='blue')
            ax1.plot(predictions, label='Predicted', alpha=0.8, linewidth=2, color='orange')
            
            strategy_name = 'Non-Recursive' if strategy == 'non_recursive' else 'Recursive'
            ax1.set_title(f'{model_label} - {strategy_name}')
            ax1.set_xlabel('Time Step')
            ax1.set_ylabel(value_label)
            ax1.legend(loc='upper left')
            
            if len(predictions) >= 3:
                ax1.axvline(x=2.5, color='gray', linestyle=':', alpha=0.3)
            if len(predictions) >= 6:
                ax1.axvline(x=5.5, color='gray', linestyle=':', alpha=0.3)
            
            # Calculate residuals
            residuals = predictions - actuals
            
            # Plot residuals
            ax2 = axes[model_idx, strat_idx * 2 + 1]
            colors = ['red' if r > 0 else 'blue' for r in residuals]
            ax2.bar(range(len(residuals)), residuals, alpha=0.7, color=colors)
            ax2.axhline(y=0, color='black', linestyle='--', alpha=0.5)
            ax2.set_title(f'Residuals - {strategy_name}')
            ax2.set_xlabel('Time Step')
            ax2.set_ylabel('Prediction Error')
            
            # Add RMSE per iteration as bar chart
            metrics_df = strategy_results['metrics']
            if not metrics_df.empty:
                # Create inset axes for RMSE bars
                inset_ax = ax2.inset_axes([0.6, 0.6, 0.35, 0.35])
                inset_ax.bar(metrics_df['iteration'], metrics_df['rmse'], color='coral', alpha=0.7)
                inset_ax.set_xlabel('Iter', fontsize=8)
                inset_ax.set_ylabel('RMSE', fontsize=8)
                inset_ax.tick_params(labelsize=6)
            
            # Add overall metrics as text
            if len(predictions) > 0:
                overall_rmse = rmse(predictions, actuals)
                overall_mase = mase(actuals, predictions)
                
                metrics_text = f'RMSE: {overall_rmse:.4f}\nMASE: {overall_mase:.4f}'
                ax1.text(0.02, 0.98, metrics_text, transform=ax1.transAxes, va='top', ha='left',
                        bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.7),
                        fontsize=9)
    
    plt.tight_layout()
    
    # Save plot
    plot_path = os.path.join(save_dir, '04_plots', f'{country}_{target}_model_comparison.png')
    plt.savefig(plot_path, dpi=150, bbox_inches='tight')
    
    return fig

In [19]:
def plot_comparison_summary(summary_df, target, save_dir='data_export'):
    os.makedirs(os.path.join(save_dir, '04_plots'), exist_ok=True)
    
    fig, axes = plt.subplots(1, 2, figsize=(15, 6))
    fig.suptitle(f'Model Comparison Summary - Target: {target}', fontsize=14)
    
    summary_pivot = summary_df.pivot_table(
        index=['country', 'strategy'],
        columns='model_type',
        values='rmse',
        aggfunc='mean'
    ).reset_index()
    
    # Plot 1: RMSE comparison by country and strategy
    ax1 = axes[0]
    
    countries = summary_df['country'].unique()
    strategies = ['non_recursive', 'recursive']
    
    x = np.arange(len(countries))
    width = 0.3
    
    for i, strategy in enumerate(strategies):
        strategy_data = summary_df[summary_df['strategy'] == strategy]
        
        loco_rmse = []
        single_rmse = []
        
        for country in countries:
            loco_val = strategy_data[(strategy_data['country'] == country) & 
                                   (strategy_data['model_type'] == 'loco')]['rmse'].values
            single_val = strategy_data[(strategy_data['country'] == country) & 
                                     (strategy_data['model_type'] == 'single')]['rmse'].values
            
            loco_rmse.append(loco_val[0] if len(loco_val) > 0 else 0)
            single_rmse.append(single_val[0] if len(single_val) > 0 else 0)
        
        offset = width * (i - 0.5)
        
        bars1 = ax1.bar(x + offset - width/2, loco_rmse, width/2, 
                        label=f'LOCO {strategy.replace("_", "-")}',
                        alpha=0.8, color='blue' if i == 0 else 'darkblue')
        bars2 = ax1.bar(x + offset + width/2, single_rmse, width/2,
                        label=f'Single {strategy.replace("_", "-")}',
                        alpha=0.8, color='orange' if i == 0 else 'darkorange')
    
    ax1.set_xlabel('Country')
    ax1.set_ylabel('RMSE')
    ax1.set_title('RMSE Comparison by Country and Strategy')
    ax1.set_xticks(x)
    ax1.set_xticklabels(countries, rotation=45, ha='right')
    ax1.legend(loc='upper left', fontsize=9)
    
    # Plot 2: Percentage improvement of LOCO over Single Country
    ax2 = axes[1]
    
    improvement_data = []
    
    for country in countries:
        for strategy in strategies:
            loco_data = summary_df[(summary_df['country'] == country) & 
                                 (summary_df['strategy'] == strategy) & 
                                 (summary_df['model_type'] == 'loco')]
            single_data = summary_df[(summary_df['country'] == country) & 
                                    (summary_df['strategy'] == strategy) & 
                                    (summary_df['model_type'] == 'single')]
            
            if not loco_data.empty and not single_data.empty:
                loco_rmse = loco_data['rmse'].values[0]
                single_rmse = single_data['rmse'].values[0]
                
                # Percentage improvement (positive means LOCO is better)
                improvement = ((single_rmse - loco_rmse) / single_rmse) * 100
                
                improvement_data.append({
                    'country': country,
                    'strategy': strategy.replace('_', '-'),
                    'improvement': improvement
                })
    
    if improvement_data:
        improvement_df = pd.DataFrame(improvement_data)
        
        # Grouped bar chart
        improvement_pivot = improvement_df.pivot(
            index='country',
            columns='strategy',
            values='improvement'
        )
        
        improvement_pivot.plot(kind='bar', ax=ax2, width=0.7, alpha=0.8,
                               color=['green', 'darkgreen'])
        
        ax2.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
        ax2.set_xlabel('Country')
        ax2.set_ylabel('Improvement (%)')
        ax2.set_title('LOCO Performance vs Single Country Model\n(Positive = LOCO Better)')
        ax2.legend(title='Strategy', loc='upper right')
        ax2.set_xticklabels(ax2.get_xticklabels(), rotation=45, ha='right')
        
        for container in ax2.containers:
            ax2.bar_label(container, fmt='%.1f%%', padding=3, fontsize=8)
    
    plt.tight_layout()
    
    # Save plot
    plot_path = os.path.join(save_dir, '04_plots', f'{target}_comparison_summary.png')
    plt.savefig(plot_path, dpi=150, bbox_inches='tight')
    
    return fig

### Save Comparison

In [20]:
def run_developed_comparison(countries_to_test=None, target='co2', model_func=build_lstm, save_dir='data_export/04_results'):
    os.makedirs(save_dir, exist_ok=True)
    
    print("\nLOCO vs SINGLE DEVELOPED COUNTRY MODELS")
    print(f"Target: {target}")
    print(f"Model: {model_func.__name__}")
    print(f"Test size: {TEST_SAMPLES} samples")
    
    data = load_data()
    
    if countries_to_test is None:
        countries_to_test = DEVELOPED_COUNTRIES
    
    loco_comparison = LOCOComparison(DEVELOPED_COUNTRIES, FEATURES, target)
    single_country_model = SingleCountryModel(FEATURES, target)
    
    all_results = {}
    comparison_summary = []
    
    for test_country in countries_to_test:
        print(f"TESTING COUNTRY: {test_country}")
        
        country_results = {}
        
        # LOCO model
        print("\nLEAVE-ONE-COUNTRY-OUT MODEL")
        loco_results = loco_comparison.run_comparison(data, test_country, model_func)
        if loco_results is not None:
            country_results['loco'] = loco_results
        
        # Single Country model
        print("\nSINGLE COUNTRY MODEL")
        single_results = single_country_model.run_single_country(data, test_country, model_func)
        if single_results is not None:
            country_results['single'] = single_results
        
        all_results[test_country] = country_results
        
        # Comparison summary
        for model_type in ['loco', 'single']:
            if model_type in country_results:
                for strategy in ['non_recursive', 'recursive']:
                    if strategy in country_results[model_type]:
                        res = country_results[model_type][strategy]
                        
                        if 'predictions_actual' in res and len(res['predictions_actual']) > 0:
                            preds = res['predictions_actual']
                            acts = res['actuals_actual']
                            data_type = 'actual'
                        else:
                            preds = res['predictions_pct']
                            acts = res['actuals_pct']
                            data_type = 'pct'
                        
                        if len(preds) > 0:
                            comparison_summary.append({
                                'country': test_country,
                                'model_type': model_type,
                                'strategy': strategy,
                                'data_type': data_type,
                                'rmse': rmse(preds, acts),
                                'mase': mase(acts, preds),
                                'n_predictions': len(preds)
                            })
    
    # Final comparison
    if comparison_summary:
        summary_df = pd.DataFrame(comparison_summary)
        # Group by country for comparison
        for country in countries_to_test:
            country_df = summary_df[summary_df['country'] == country]
            if not country_df.empty:
                print(f"\n{country}:")
                
                # Compare LOCO vs Single for each strategy
                for strategy in ['non_recursive', 'recursive']:
                    strategy_df = country_df[country_df['strategy'] == strategy]
                    loco_row = strategy_df[strategy_df['model_type'] == 'loco']
                    single_row = strategy_df[strategy_df['model_type'] == 'single']
                    
                    if not loco_row.empty and not single_row.empty:
                        loco_rmse = loco_row.iloc[0]['rmse']
                        single_rmse = single_row.iloc[0]['rmse']
                        
                        print(f"\n  {strategy.upper()}:")
                        print(f"    LOCO RMSE:    {loco_rmse:.4f}")
                        print(f"    Single RMSE: {single_rmse:.4f}")
                        
                        if loco_rmse < single_rmse:
                            loco_better = ((single_rmse - loco_rmse) / single_rmse) * 100
                            print(f"    LOCO is {loco_better:.1f}% better")
                        else:
                            single_better = ((loco_rmse - single_rmse) / single_rmse) * 100
                            print(f"    Single Country is {single_better:.1f}% better")
        
        # Save summary
        summary_path = os.path.join(save_dir, f'comparison_summary_{target}.csv')
        summary_df.to_csv(summary_path, index=False)
    
    clear_memory()
    
    return all_results, summary_df if comparison_summary else None

In [21]:
def run_developing_comparison(countries_to_test=None, target='co2', model_func=build_lstm, save_dir='data_export/04_results'):
    os.makedirs(save_dir, exist_ok=True)
    
    print("\nLOCO vs SINGLE DEVELOPING COUNTRY MODELS")
    print(f"Target: {target}")
    print(f"Model: {model_func.__name__}")
    print(f"Test size: {TEST_SAMPLES} samples")
    
    data = load_data()
    
    if countries_to_test is None:
        countries_to_test = DEVELOPING_COUNTRIES
    
    loco_comparison = LOCOComparison(DEVELOPING_COUNTRIES, FEATURES, target)
    single_country_model = SingleCountryModel(FEATURES, target)
    
    all_results = {}
    comparison_summary = []
    
    for test_country in countries_to_test:
        print(f"TESTING COUNTRY: {test_country}")
        
        country_results = {}
        
        # LOCO model
        print("\nLEAVE-ONE-COUNTRY-OUT MODEL")
        loco_results = loco_comparison.run_comparison(data, test_country, model_func)
        if loco_results is not None:
            country_results['loco'] = loco_results
        
        # Single Country model
        print("\nSINGLE COUNTRY MODEL")
        single_results = single_country_model.run_single_country(data, test_country, model_func)
        if single_results is not None:
            country_results['single'] = single_results
        
        all_results[test_country] = country_results
        
        # Comparison summary
        for model_type in ['loco', 'single']:
            if model_type in country_results:
                for strategy in ['non_recursive', 'recursive']:
                    if strategy in country_results[model_type]:
                        res = country_results[model_type][strategy]
                        
                        if 'predictions_actual' in res and len(res['predictions_actual']) > 0:
                            preds = res['predictions_actual']
                            acts = res['actuals_actual']
                            data_type = 'actual'
                        else:
                            preds = res['predictions_pct']
                            acts = res['actuals_pct']
                            data_type = 'pct'
                        
                        if len(preds) > 0:
                            comparison_summary.append({
                                'country': test_country,
                                'model_type': model_type,
                                'strategy': strategy,
                                'data_type': data_type,
                                'rmse': rmse(preds, acts),
                                'mase': mase(acts, preds),
                                'n_predictions': len(preds)
                            })
    
    # Final comparison
    if comparison_summary:
        summary_df = pd.DataFrame(comparison_summary)
        # Group by country for comparison
        for country in countries_to_test:
            country_df = summary_df[summary_df['country'] == country]
            if not country_df.empty:
                print(f"\n{country}:")
                
                # Compare LOCO vs Single for each strategy
                for strategy in ['non_recursive', 'recursive']:
                    strategy_df = country_df[country_df['strategy'] == strategy]
                    loco_row = strategy_df[strategy_df['model_type'] == 'loco']
                    single_row = strategy_df[strategy_df['model_type'] == 'single']
                    
                    if not loco_row.empty and not single_row.empty:
                        loco_rmse = loco_row.iloc[0]['rmse']
                        single_rmse = single_row.iloc[0]['rmse']
                        
                        print(f"\n  {strategy.upper()}:")
                        print(f"    LOCO RMSE:    {loco_rmse:.4f}")
                        print(f"    Single RMSE: {single_rmse:.4f}")
                        
                        if loco_rmse < single_rmse:
                            loco_better = ((single_rmse - loco_rmse) / single_rmse) * 100
                            print(f"    LOCO is {loco_better:.1f}% better")
                        else:
                            single_better = ((loco_rmse - single_rmse) / single_rmse) * 100
                            print(f"    Single Country is {single_better:.1f}% better")
        
        # Save summary
        summary_path = os.path.join(save_dir, f'comparison_summary_{target}.csv')
        summary_df.to_csv(summary_path, index=False)
    
    clear_memory()
    
    return all_results, summary_df if comparison_summary else None

### Running

In [22]:
def co2_developed_run():
    results, summary = run_developed_comparison(
            countries_to_test=DEVELOPED_COUNTRIES,
            target='co2',
            model_func=build_lstm,
            save_dir='data_export/04_results/developed'
        )

    for country in DEVELOPED_COUNTRIES:
        if country in results:
            print(f"\nPlot comparison for {country}")
            
            country_results = results[country]
            
            fig = plot_model_comparison(
                country_results, 
                country, 
                target='co2',
                save_dir='data_export/04_plots/developed',
                use_actual_values=True
            )
            plt.show()

    if summary is not None:
        
        fig_summary = plot_comparison_summary(
            summary,
            target='co2',
            save_dir='data_export/04_plots/developed'
        )
        plt.show()
        
        print("NUMERICAL SUMMARY")
        
        for country in DEVELOPED_COUNTRIES:
            country_data = summary[summary['country'] == country]
            
            if not country_data.empty:
                print(f"\n{country}:")
                
                for strategy in ['non_recursive', 'recursive']:
                    strategy_data = country_data[country_data['strategy'] == strategy]
                    
                    loco_rmse = strategy_data[strategy_data['model_type'] == 'loco']['rmse'].values
                    single_rmse = strategy_data[strategy_data['model_type'] == 'single']['rmse'].values
                    
                    if len(loco_rmse) > 0 and len(single_rmse) > 0:
                        improvement = ((single_rmse[0] - loco_rmse[0]) / single_rmse[0]) * 100
                        
                        print(f"  {strategy}:")
                        print(f"    LOCO RMSE:    {loco_rmse[0]:.4f}")
                        print(f"    Single RMSE: {single_rmse[0]:.4f}")
                        print(f"    Improvement: {improvement:.2f}%")

    return results, summary

In [23]:
def co2_developing_run():
    results, summary = run_developing_comparison(
            countries_to_test=DEVELOPING_COUNTRIES,
            target='co2',
            model_func=build_lstm,
            save_dir='data_export/04_results/developing'
        )

    for country in DEVELOPING_COUNTRIES:
        if country in results:
            print(f"\nPlot comparison for {country}")
            
            country_results = results[country]
            
            fig = plot_model_comparison(
                country_results, 
                country, 
                target='co2',
                save_dir='data_export/04_plots/developing',
                use_actual_values=True
            )
            plt.show()

    if summary is not None:
        
        fig_summary = plot_comparison_summary(
            summary,
            target='co2',
            save_dir='data_export/04_plots/developing'
        )
        plt.show()
        
        print("NUMERICAL SUMMARY")
        
        for country in DEVELOPING_COUNTRIES:
            country_data = summary[summary['country'] == country]
            
            if not country_data.empty:
                print(f"\n{country}:")
                
                for strategy in ['non_recursive', 'recursive']:
                    strategy_data = country_data[country_data['strategy'] == strategy]
                    
                    loco_rmse = strategy_data[strategy_data['model_type'] == 'loco']['rmse'].values
                    single_rmse = strategy_data[strategy_data['model_type'] == 'single']['rmse'].values
                    
                    if len(loco_rmse) > 0 and len(single_rmse) > 0:
                        improvement = ((single_rmse[0] - loco_rmse[0]) / single_rmse[0]) * 100
                        
                        print(f"  {strategy}:")
                        print(f"    LOCO RMSE:    {loco_rmse[0]:.4f}")
                        print(f"    Single RMSE: {single_rmse[0]:.4f}")
                        print(f"    Improvement: {improvement:.2f}%")

    return results, summary

In [24]:
def gdp_developed_run():
    results, summary = run_developed_comparison(
            countries_to_test=DEVELOPED_COUNTRIES,
            target='gdp',
            model_func=build_cnn,
            save_dir='data_export/04_results/developed'
        )

    for country in DEVELOPED_COUNTRIES:
        if country in results:
            print(f"\nPlot comparison for {country}")
            
            country_results = results[country]
            
            fig = plot_model_comparison(
                country_results, 
                country, 
                target='gdp',
                save_dir='data_export/04_plots/developed',
                use_actual_values=True
            )
            plt.show()

    if summary is not None:
        
        fig_summary = plot_comparison_summary(
            summary,
            target='gdp',
            save_dir='data_export/04_plots/developed'
        )
        plt.show()
        
        print("NUMERICAL SUMMARY")
        
        for country in DEVELOPED_COUNTRIES:
            country_data = summary[summary['country'] == country]
            
            if not country_data.empty:
                print(f"\n{country}:")
                
                for strategy in ['non_recursive', 'recursive']:
                    strategy_data = country_data[country_data['strategy'] == strategy]
                    
                    loco_rmse = strategy_data[strategy_data['model_type'] == 'loco']['rmse'].values
                    single_rmse = strategy_data[strategy_data['model_type'] == 'single']['rmse'].values
                    
                    if len(loco_rmse) > 0 and len(single_rmse) > 0:
                        improvement = ((single_rmse[0] - loco_rmse[0]) / single_rmse[0]) * 100
                        
                        print(f"  {strategy}:")
                        print(f"    LOCO RMSE:    {loco_rmse[0]:.4f}")
                        print(f"    Single RMSE: {single_rmse[0]:.4f}")
                        print(f"    Improvement: {improvement:.2f}%")

    return results, summary

In [25]:
def gdp_developing_run():
    results, summary = run_developing_comparison(
            countries_to_test=DEVELOPING_COUNTRIES,
            target='gdp',
            model_func=build_cnn,
            save_dir='data_export/04_results/developing'
        )

    for country in DEVELOPING_COUNTRIES:
        if country in results:
            print(f"\nPlot comparison for {country}")
            
            country_results = results[country]
            
            fig = plot_model_comparison(
                country_results, 
                country, 
                target='gdp',
                save_dir='data_export/04_plots/developing',
                use_actual_values=True
            )
            plt.show()

    if summary is not None:
        
        fig_summary = plot_comparison_summary(
            summary,
            target='gdp',
            save_dir='data_export/04_plots/developing'
        )
        plt.show()
        
        print("NUMERICAL SUMMARY")
        
        for country in DEVELOPING_COUNTRIES:
            country_data = summary[summary['country'] == country]
            
            if not country_data.empty:
                print(f"\n{country}:")
                
                for strategy in ['non_recursive', 'recursive']:
                    strategy_data = country_data[country_data['strategy'] == strategy]
                    
                    loco_rmse = strategy_data[strategy_data['model_type'] == 'loco']['rmse'].values
                    single_rmse = strategy_data[strategy_data['model_type'] == 'single']['rmse'].values
                    
                    if len(loco_rmse) > 0 and len(single_rmse) > 0:
                        improvement = ((single_rmse[0] - loco_rmse[0]) / single_rmse[0]) * 100
                        
                        print(f"  {strategy}:")
                        print(f"    LOCO RMSE:    {loco_rmse[0]:.4f}")
                        print(f"    Single RMSE: {single_rmse[0]:.4f}")
                        print(f"    Improvement: {improvement:.2f}%")

    return results, summary

In [26]:
def energy_developed_run():
    results, summary = run_developed_comparison(
            countries_to_test=DEVELOPED_COUNTRIES,
            target='primary_energy_consumption',
            model_func=build_bilstm,
            save_dir='data_export/04_results/developed'
        )

    for country in DEVELOPED_COUNTRIES:
        if country in results:
            print(f"\nPlot comparison for {country}")
            
            country_results = results[country]
            
            fig = plot_model_comparison(
                country_results, 
                country, 
                target='primary_energy_consumption',
                save_dir='data_export/04_plots/developed',
                use_actual_values=True
            )
            plt.show()

    if summary is not None:
        
        fig_summary = plot_comparison_summary(
            summary,
            target='primary_energy_consumption',
            save_dir='data_export/04_plots/developed'
        )
        plt.show()
        
        print("NUMERICAL SUMMARY")
        
        for country in DEVELOPED_COUNTRIES:
            country_data = summary[summary['country'] == country]
            
            if not country_data.empty:
                print(f"\n{country}:")
                
                for strategy in ['non_recursive', 'recursive']:
                    strategy_data = country_data[country_data['strategy'] == strategy]
                    
                    loco_rmse = strategy_data[strategy_data['model_type'] == 'loco']['rmse'].values
                    single_rmse = strategy_data[strategy_data['model_type'] == 'single']['rmse'].values
                    
                    if len(loco_rmse) > 0 and len(single_rmse) > 0:
                        improvement = ((single_rmse[0] - loco_rmse[0]) / single_rmse[0]) * 100
                        
                        print(f"  {strategy}:")
                        print(f"    LOCO RMSE:    {loco_rmse[0]:.4f}")
                        print(f"    Single RMSE: {single_rmse[0]:.4f}")
                        print(f"    Improvement: {improvement:.2f}%")

    return results, summary

In [27]:
def energy_developing_run():
    results, summary = run_developing_comparison(
            countries_to_test=DEVELOPING_COUNTRIES,
            target='primary_energy_consumption',
            model_func=build_bilstm,
            save_dir='data_export/04_results/developing'
        )

    for country in DEVELOPING_COUNTRIES:
        if country in results:
            print(f"\nPlot comparison for {country}")
            
            country_results = results[country]
            
            fig = plot_model_comparison(
                country_results, 
                country, 
                target='primary_energy_consumption',
                save_dir='data_export/04_plots/developing',
                use_actual_values=True
            )
            plt.show()

    if summary is not None:
        
        fig_summary = plot_comparison_summary(
            summary,
            target='primary_energy_consumption',
            save_dir='data_export/04_plots/developing'
        )
        plt.show()
        
        print("NUMERICAL SUMMARY")
        
        for country in DEVELOPING_COUNTRIES:
            country_data = summary[summary['country'] == country]
            
            if not country_data.empty:
                print(f"\n{country}:")
                
                for strategy in ['non_recursive', 'recursive']:
                    strategy_data = country_data[country_data['strategy'] == strategy]
                    
                    loco_rmse = strategy_data[strategy_data['model_type'] == 'loco']['rmse'].values
                    single_rmse = strategy_data[strategy_data['model_type'] == 'single']['rmse'].values
                    
                    if len(loco_rmse) > 0 and len(single_rmse) > 0:
                        improvement = ((single_rmse[0] - loco_rmse[0]) / single_rmse[0]) * 100
                        
                        print(f"  {strategy}:")
                        print(f"    LOCO RMSE:    {loco_rmse[0]:.4f}")
                        print(f"    Single RMSE: {single_rmse[0]:.4f}")
                        print(f"    Improvement: {improvement:.2f}%")

    return results, summary

In [28]:
def create_final_summary(save_dir='data_export/04_results/final'):
    os.makedirs(save_dir, exist_ok=True)

    all_sum = []

    experiments = [
        {
            'target': 'co2',
            'model_func': build_lstm,
            'model_name': 'LSTM',
            'country_groups': [
                ('developed', DEVELOPED_COUNTRIES),
                ('developing', DEVELOPING_COUNTRIES)
            ]
        },
        {
            'target': 'co2',
            'model_func': build_bilstm,
            'model_name': 'Bi-LSTM',
            'country_groups': [
                ('developed', DEVELOPED_COUNTRIES),
                ('developing', DEVELOPING_COUNTRIES)
            ]
        },
        {
            'target': 'co2',
            'model_func': build_edlstm,
            'model_name': 'ED-LSTM',
            'country_groups': [
                ('developed', DEVELOPED_COUNTRIES),
                ('developing', DEVELOPING_COUNTRIES)
            ]
        },
        {
            'target': 'co2',
            'model_func': build_cnn,
            'model_name': 'CNN',
            'country_groups': [
                ('developed', DEVELOPED_COUNTRIES),
                ('developing', DEVELOPING_COUNTRIES)
            ]
        },
        {
            'target': 'gdp',
            'model_func': build_lstm,
            'model_name': 'LSTM',
            'country_groups': [
                ('developed', DEVELOPED_COUNTRIES),
                ('developing', DEVELOPING_COUNTRIES)
            ]
        },
        {
            'target': 'gdp',
            'model_func': build_bilstm,
            'model_name': 'Bi-LSTM',
            'country_groups': [
                ('developed', DEVELOPED_COUNTRIES),
                ('developing', DEVELOPING_COUNTRIES)
            ]
        },
        {
            'target': 'gdp',
            'model_func': build_edlstm,
            'model_name': 'ED-LSTM',
            'country_groups': [
                ('developed', DEVELOPED_COUNTRIES),
                ('developing', DEVELOPING_COUNTRIES)
            ]
        },
        {
            'target': 'gdp',
            'model_func': build_cnn,
            'model_name': 'CNN',
            'country_groups': [
                ('developed', DEVELOPED_COUNTRIES),
                ('developing', DEVELOPING_COUNTRIES)
            ]
        },
        {
            'target': 'primary_energy_consumption',
            'model_func': build_lstm,
            'model_name': 'LSTM',
            'country_groups': [
                ('developed', DEVELOPED_COUNTRIES),
                ('developing', DEVELOPING_COUNTRIES)
            ]
        },
        {
            'target': 'primary_energy_consumption',
            'model_func': build_bilstm,
            'model_name': 'Bi-LSTM',
            'country_groups': [
                ('developed', DEVELOPED_COUNTRIES),
                ('developing', DEVELOPING_COUNTRIES)
            ]
        },
        {
            'target': 'primary_energy_consumption',
            'model_func': build_edlstm,
            'model_name': 'ED-LSTM',
            'country_groups': [
                ('developed', DEVELOPED_COUNTRIES),
                ('developing', DEVELOPING_COUNTRIES)
            ]
        },
        {
            'target': 'primary_energy_consumption',
            'model_func': build_cnn,
            'model_name': 'CNN',
            'country_groups': [
                ('developed', DEVELOPED_COUNTRIES),
                ('developing', DEVELOPING_COUNTRIES)
            ]
        }
    ]

    for exp in experiments:
        target = exp['target']
        model_func = exp['model_func']
        model_name = exp['model_name']
        
        for group_name, countries in exp['country_groups']:
            print(f"\nRunning {target} - {model_name} - {group_name}")
            
            # Choose comparison function
            if group_name == 'developed':
                results, summary = run_developed_comparison(
                    countries_to_test=countries,
                    target=target,
                    model_func=model_func,
                    save_dir=f'{save_dir}/{group_name}'
                )
            else:
                results, summary = run_developing_comparison(
                    countries_to_test=countries,
                    target=target,
                    model_func=model_func,
                    save_dir=f'{save_dir}/{group_name}'
                )
            
            # Add data to summary
            if summary is not None and not summary.empty:
                summary = summary.copy()
                summary['target'] = target
                summary['model'] = model_name
                summary['country_group'] = group_name
                all_sum.append(summary)
            
            plot_dir = 'data_export/04_plots/final'
            os.makedirs(plot_dir, exist_ok=True)

            if results is not None:
                for country in countries:
                    if country in results:
                        fig = plot_model_comparison(results[country], country, target=target, save_dir=f'{plot_dir}/{group_name}', use_actual_values=True)
                        plt.close(fig)

                if summary is not None and not summary.empty:
                    fig_summary = plot_comparison_summary(summary, target=target, save_dir=f'{plot_dir}/{group_name}')
                    plt.close(fig_summary)
            
            clear_memory()
    
    # Combine all summaries
    if not all_sum:
        print("No summary data collected")
        return None
    
    combined_summary = pd.concat(all_sum, ignore_index=True)
    
    # Create final summary table
    final_summary_rows = []
    
    # Group by all relevant dim
    grouping_cols = ['target', 'model', 'country_group', 'strategy', 'model_type']
    
    for group_key, group_data in combined_summary.groupby(grouping_cols):
        target, model, country_group, strategy, model_type = group_key
        
        # Map model_type
        data_type = "Single Country" if model_type == 'single' else "All Country (LOCO)"
        
        # Calculate agg metrics
        rmse_values = group_data['rmse'].dropna()
        mase_values = group_data['mase'].dropna()
        
        if len(rmse_values) > 0:
            # Find countries with max and min RMSE
            max_rmse_idx = group_data['rmse'].idxmax()
            min_rmse_idx = group_data['rmse'].idxmin()
            max_rmse_country = group_data.loc[max_rmse_idx, 'country']
            min_rmse_country = group_data.loc[min_rmse_idx, 'country']
            
            final_summary_rows.append({
                'Data_Type': data_type,
                'Method': 'Autoregressive - 4 lags',
                'Strategy': strategy.replace('_', '-').title(),
                'Model': model,
                'Target': target.replace('_', ' ').title(),
                'Country_Group': country_group.title(),
                'Avg_RMSE': rmse_values.mean(),
                'Median_RMSE': rmse_values.median(),
                'Max_RMSE': rmse_values.max(),
                'Min_RMSE': rmse_values.min(),
                'Max_RMSE_Country': max_rmse_country,
                'Min_RMSE_Country': min_rmse_country,
                'Avg_MASE': mase_values.mean() if len(mase_values) > 0 else np.nan,
                'N_Countries': len(group_data)
            })
    
    # Create DataFrame
    final_summary_df = pd.DataFrame(final_summary_rows)
    
    # Sort by target, country group, model type
    final_summary_df = final_summary_df.sort_values([
        'Target', 'Country_Group', 'Model', 'Strategy', 'Data_Type'
    ]).reset_index(drop=True)
    
    # Save to CSV
    summary_path = os.path.join(save_dir, 'final_summary_table.csv')
    final_summary_df.to_csv(summary_path, index=False, float_format='%.4f')
    
    print(f"Total rows in summary: {len(final_summary_df)}")
    
    # Show a few sample rows
    display_cols = ['Data_Type', 'Strategy', 'Model', 'Target', 'Country_Group', 'Avg_RMSE', 'Avg_MASE']
    print(final_summary_df[display_cols].head(10).to_string(index=False))
    
    return final_summary_df

In [29]:
def generate_performance_comparison_table(final_summary_df, save_dir='data_export/04_results/final'):
    if final_summary_df is None:
        print("No summary data available for comparison")
        return None
    
    comparison_rows = []
    
    # Group by target, model, country_group, strategy to compare LOCO vs Single
    grouping_cols = ['Target', 'Model', 'Country_Group', 'Strategy']
    
    for group_key, group_data in final_summary_df.groupby(grouping_cols):
        target, model, country_group, strategy = group_key
        
        # Get LOCO and Single Country results
        loco_data = group_data[group_data['Data_Type'] == 'All Country (LOCO)']
        single_data = group_data[group_data['Data_Type'] == 'Single Country']
        
        if not loco_data.empty and not single_data.empty:
            loco_rmse = loco_data['Avg_RMSE'].iloc[0]
            single_rmse = single_data['Avg_RMSE'].iloc[0]
            
            # Calculate improvement percentage
            if single_rmse != 0:
                improvement_pct = ((single_rmse - loco_rmse) / single_rmse) * 100
            else:
                improvement_pct = 0
            
            better_method = "LOCO" if loco_rmse < single_rmse else "Single Country"
            
            comparison_rows.append({
                'Target': target,
                'Model': model,
                'Country_Group': country_group,
                'Strategy': strategy,
                'LOCO_Avg_RMSE': loco_rmse,
                'Single_Avg_RMSE': single_rmse,
                'Better_Method': better_method,
                'Improvement_Pct': abs(improvement_pct)
            })
    
    comparison_df = pd.DataFrame(comparison_rows)
    
    if not comparison_df.empty:
        # Save comparison table
        comparison_path = os.path.join(save_dir, 'performance_comparison_table.csv')
        comparison_df.to_csv(comparison_path, index=False, float_format='%.4f')
        
        better_counts = comparison_df['Better_Method'].value_counts()
        total_comparisons = len(comparison_df)
        
        for method, count in better_counts.items():
            pct = (count / total_comparisons) * 100
            print(f"{method}: {count}/{total_comparisons} ({pct:.1f}%)")
        
        # Show average improvement by method
        loco_wins = comparison_df[comparison_df['Better_Method'] == 'LOCO']
        single_wins = comparison_df[comparison_df['Better_Method'] == 'Single Country']
        
        if not loco_wins.empty:
            avg_loco_improvement = loco_wins['Improvement_Pct'].mean()
            print(f"\nAverage improvement when LOCO wins: {avg_loco_improvement:.2f}%")
        
        if not single_wins.empty:
            avg_single_improvement = single_wins['Improvement_Pct'].mean()
            print(f"Average improvement when Single Country wins: {avg_single_improvement:.2f}%")
    
    return comparison_df

In [30]:

final_summary = create_final_summary()
performance_comparison = generate_performance_comparison_table(final_summary)


Running co2 - LSTM - developed

LOCO vs SINGLE DEVELOPED COUNTRY MODELS
Target: co2
Model: build_lstm
Test size: 9 samples
Data Shape: (1138, 992)
TESTING COUNTRY: United States

LEAVE-ONE-COUNTRY-OUT MODEL
Leave-One-Country-Out Comparison for: United States
Total training samples from other countries: 279
Test country train samples: 26
Test country test samples: 9
Training sequences: 272
    Iter 0: Using last 5 train values
    RMSE: 866.8773, MASE: 10.0527
    Iter 1: Using last 2 train + test[0:3]
    RMSE: 1264.1968, MASE: 3.5191
    Iter 2: Using test[1:6]
    RMSE: 2198.0867, MASE: 19.6282
    Iter 0: Using last 5 training values
    RMSE: 866.8773, MASE: 10.0527
    Iter 1: Using last 2 training + pred[0:3]
    RMSE: 1439.2034, MASE: 4.2169
    Iter 2: Using pred[1:6]
    RMSE: 2443.7190, MASE: 22.6018

Summary for United States

Non-Recursive (actual values):
  Overall RMSE: 1547.1754
  Overall MASE: 7.1603
  Predictions made: 9

Recursive (actual values):
  Overall RMSE: 171