In [1]:
from sklearn.metrics import mean_squared_error
import numpy as np


def evaluate_rmse_forecast(df_forecast, df_real, col_forecast='rate_pred', col_actual='rate'):
    """
    Compute RMSE between forecasted and actual exchange rates (or log_rates).

    Parameters:
    - df_forecast: DataFrame with predicted values (indexed by date)
    - df_real: DataFrame with actual values (indexed by date)
    - col_forecast: column name in df_forecast (e.g. 'rate_pred' or 'log_rate_pred')
    - col_actual: column name in df_real (e.g. 'rate' or 'log_rate')

    Returns:
    - rmse: root mean square error
    """
    # Align both dataframes on index (date)
    df_eval = df_forecast[[col_forecast]].join(df_real[[col_actual]], how='inner')

    # Extract arrays
    y_pred = df_eval[col_forecast].values
    y_true = df_eval[col_actual].values

    # Compute RMSE
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    print(f"✅ RMSE between simulated forecast and real data: {rmse:.6f}")
    
    return rmse

def evaluate_rmse_horizons(df_forecast, df_real,
                           col_forecast='rate_pred', col_actual='rate'):
    """
    Compute RMSE over 6 months, 12 months, and full forecast period.
    """
    # Align the dataframes
    df = df_forecast[[col_forecast]].join(df_real[[col_actual]], how='inner')
    df = df.sort_index()
    
    # Define horizon lengths (in days)
    horizons = {
        '6_months':  6 * 30,   # ≈180 days
        '12_months': 12 * 30,  # ≈360 days
        'all':       len(df)
    }
    results = {}

    for name, days in horizons.items():
        sub = df.iloc[:days]
        if sub.empty:
            results[name] = np.nan
            continue

        rmse_val = np.sqrt(mean_squared_error(sub[col_actual], sub[col_forecast]))
        results[name] = rmse_val
        print(f"✅ RMSE @ {name.replace('_', ' ')} ({days} days): {rmse_val:.6f}")

    return results

In [2]:
# For rates
import pandas as pd
df_test = pd.read_csv('../2. preprocesing/test_df_dexuseu.csv', index_col='date', parse_dates=True)
df_forecast = pd.read_csv('../3. modelling/df_forecast_dexuseu.csv', index_col='date', parse_dates=True)
# evaluate_rmse_forecast(df_forecast, df_test, col_forecast='rate_mean', col_actual='rate_interpolated')
evaluate_rmse_horizons(df_forecast, df_test, col_forecast='rate_mean', col_actual='rate_interpolated')


✅ RMSE @ 6 months (180 days): 0.020879
✅ RMSE @ 12 months (360 days): 0.018025
✅ RMSE @ all (520 days): 0.018991


{'6_months': 0.0208793127545774,
 '12_months': 0.01802515317846195,
 'all': 0.018990833999870064}

In [3]:
df_test = pd.read_csv('../2. preprocesing/test_df_dexussf.csv', index_col='date', parse_dates=True)
df_forecast = pd.read_csv('../3. modelling/df_forecast_dexussf.csv', index_col='date', parse_dates=True)
# evaluate_rmse_forecast(df_forecast, df_test, col_forecast='rate_mean', col_actual='rate_interpolated')
evaluate_rmse_horizons(df_forecast, df_test, col_forecast='rate_mean', col_actual='rate_interpolated')

✅ RMSE @ 6 months (180 days): 0.003884
✅ RMSE @ 12 months (360 days): 0.004013
✅ RMSE @ all (520 days): 0.003490


{'6_months': 0.0038842822478595065,
 '12_months': 0.004012982368597237,
 'all': 0.003490467628075406}

In [4]:
df_test = pd.read_csv('../2. preprocesing/test_df_usdpen.csv', index_col='date', parse_dates=True)
df_forecast = pd.read_csv('../3. modelling/df_forecast_usdpen.csv', index_col='date', parse_dates=True)
# evaluate_rmse_forecast(df_forecast, df_test, col_forecast='rate_mean', col_actual='rate_interpolated')
evaluate_rmse_horizons(df_forecast, df_test, col_forecast='rate_mean', col_actual='rate_interpolated')

✅ RMSE @ 6 months (180 days): 0.012981
✅ RMSE @ 12 months (360 days): 0.014837
✅ RMSE @ all (520 days): 0.017537


{'6_months': 0.01298068660524391,
 '12_months': 0.014836522486164173,
 'all': 0.017536832704797882}