In [4]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import itertools
import matplotlib.pyplot as plt
import sklearn
from statsmodels.tsa.stattools import adfuller
from arch import arch_model
from sklearn.preprocessing import StandardScaler
from arch import arch_model
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error

# Load data
df = pd.read_excel('data/Monthly Mastersheet with Original Data.xlsx')
print(df.head())
print(df.columns.tolist())

# Ensure date is datetime and set index
df['Month'] = pd.to_datetime(df['Month'])
df.set_index('Month', inplace=True)
df.index = pd.date_range(start=df.index[0], periods=len(df), freq='MS')
df.columns = df.columns.str.strip()

       Month  Bitcoin  Litecoin       XRP  Ethereum  Dogecoin  Cardano  \
0 2017-09-01 -1.39892  -1.02849 -0.898590  -1.32094  -1.40502      NaN   
1 2017-10-01 -1.33954  -0.77151 -1.145490  -0.83798  -0.46490      NaN   
2 2017-11-01 -1.62299  -1.44830 -1.192080  -1.83787  -1.37105      NaN   
3 2017-12-01 -1.26767  -1.11575 -1.254630  -1.31040  -0.70209      NaN   
4 2018-01-01 -0.89251  -0.24164  0.038975  -0.81128  -0.33444      NaN   

    Tether  USD Coin  LFPR  ...     r      M1        IM        EX     CC  \
0 -3.82830       NaN  63.1  ...  1.15  3535.6  2916.022  2535.501   95.1   
1 -3.73861       NaN  62.7  ...  1.15  3615.0  3034.004  2479.107  100.7   
2 -4.01951       NaN  62.7  ...  1.16  3587.6  3034.004  2479.107   98.5   
3 -2.55687       NaN  62.7  ...  1.30  3630.5  3034.004  2479.107   95.9   
4 -2.75193       NaN  62.7  ...  1.41  3673.4  3093.299  2517.268   95.7   

        GDP  PC1_crypto  PC2_crypto    PC1_macro    PC2_macro  
0  19743.84         NaN         Na

In [None]:
from arch import arch_model
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
import pandas as pd

# Forecasting utility
def safe_mape(actual, predicted):
    actual, predicted = np.array(actual), np.array(predicted)
    mask = actual != 0
    return np.mean(np.abs((actual[mask] - predicted[mask]) / actual[mask])) * 100 if np.any(mask) else np.nan

# Updated AR forecast (unchanged)
def rolling_forecast_ar(y_train_scaled, y_test_scaled, p, q, l):
    history = list(y_train_scaled)
    preds = []
    for t in range(len(y_test_scaled)):
        model = arch_model(history, mean='AR', lags=l, vol='GARCH', p=p, q=q, dist='normal', rescale=False)
        res = model.fit(disp='off')
        forecast = res.forecast(horizon=1, method='analytic')
        pred = forecast.mean.values[-1, 0]
        preds.append(pred)
        history.append(y_test_scaled[t])
    return np.array(preds)

# Updated ARX forecast for multivariate x
def rolling_forecast_arx(y_train_scaled, x_train_scaled, y_test_scaled, x_test_scaled, p, q, l):
    history_y = list(y_train_scaled)
    history_x = list(x_train_scaled)
    preds = []
    for t in range(len(y_test_scaled)):
        x_arr = np.array(history_x)
        model = arch_model(history_y, mean='ARX', lags=l, vol='GARCH', p=p, q=q, x=x_arr, dist='normal', rescale=False)
        res = model.fit(disp='off')
        const = res.params.get('Const', 0)
        phi = res.params.get('y[1]', 0)
        betas = np.array([res.params.get(f'x{i}', 0) for i in range(x_arr.shape[1])])
        y_t = history_y[-1]
        x_t1 = x_test_scaled[t]
        pred = const + phi * y_t + np.dot(betas, x_t1)
        preds.append(pred)
        history_y.append(y_test_scaled[t])
        history_x.append(x_t1)
    return np.array(preds)

# === PARAMETERS ===
macro_vars = ['LFPR', 'CPI', 'r', 'M1', 'GDP', 'IM', 'EX', 'CC']
crypto_assets = ['Tether', 'USD Coin', 'Bitcoin', 'Litecoin', 'XRP', 'Ethereum', 'Dogecoin', 'Cardano']
cutoff_date = pd.to_datetime('2024-01-01')

# === RESULTS STORAGE ===
all_results = []
best_results = []

# === GRID SEARCH OVER CRYPTO ASSET TARGETS ===
for asset in crypto_assets:
    for p in range(1, 3):
        for q in range(0, 3):
            for lag in range(0, 7):
                df_temp = df[macro_vars + [asset]].copy()
                df_temp[macro_vars] = df_temp[macro_vars]
                df_temp[asset] = df_temp[asset].shift(lag)
                df_temp.dropna(inplace=True)

                if len(df_temp) < 30 or df_temp[asset].var() == 0:
                    continue

                df_train = df_temp[df_temp.index < cutoff_date]
                df_test = df_temp[df_temp.index >= cutoff_date]

                if len(df_test) == 0 or len(df_train) < 10:
                    continue

                y_col = asset
                x_cols = macro_vars

                scaler_y = StandardScaler()
                scaler_x = StandardScaler()
                y_train_scaled = scaler_y.fit_transform(df_train[[y_col]]).flatten()
                y_test_scaled = scaler_y.transform(df_test[[y_col]]).flatten()
                x_train_scaled = scaler_x.fit_transform(df_train[x_cols])
                x_test_scaled = scaler_x.transform(df_test[x_cols])

                try:
                    pred_ar_scaled = rolling_forecast_ar(y_train_scaled, y_test_scaled, p, q, 1)
                    pred_arx_scaled = rolling_forecast_arx(y_train_scaled, x_train_scaled, y_test_scaled, x_test_scaled, p, q, 1)

                    pred_ar = scaler_y.inverse_transform(pred_ar_scaled.reshape(-1, 1)).flatten()
                    pred_arx = scaler_y.inverse_transform(pred_arx_scaled.reshape(-1, 1)).flatten()
                    y_actual = scaler_y.inverse_transform(y_test_scaled.reshape(-1, 1)).flatten()

                    mse_ar = mean_squared_error(y_actual, pred_ar)
                    mse_arx = mean_squared_error(y_actual, pred_arx)
                    r2_ar = r2_score(y_actual, pred_ar)
                    r2_arx = r2_score(y_actual, pred_arx)
                    mape_ar = safe_mape(y_actual, pred_ar)
                    mape_arx = safe_mape(y_actual, pred_arx)
                    mape_change = ((mape_arx - mape_ar) / mape_ar) * 100 if mape_ar != 0 else np.nan

                    result = {
                        'Crypto': asset,
                        'p': p,
                        'q': q,
                        'Exog Lag': lag,
                        'MAPE_AR': mape_ar,
                        'MAPE_ARX': mape_arx,
                        'R2_AR': r2_ar,
                        'R2_ARX': r2_arx,
                        'MAPE_Improvement(%)': mape_change,
                    }
                    all_results.append(result)

                    if len(best_results) == 0 or (
                        asset == best_results[-1]['Crypto'] and
                        mape_change < best_results[-1]['MAPE_Improvement(%)']
                    ):
                        if len(best_results) > 0 and best_results[-1]['Crypto'] == asset:
                            best_results.pop()
                        best_results.append(result)

                except Exception as e:
                    print(f"⚠️ Failed for {asset} p={p}, q={q}, lag={lag}: {e}")

# === DISPLAY TABLES ===
all_results_df = pd.DataFrame(all_results)
best_results_df = pd.DataFrame(best_results)

# Sort and display
display(best_results_df.sort_values('MAPE_Improvement(%)'))
display(all_results_df.sort_values(['Crypto', 'MAPE_Improvement(%)']))


Unnamed: 0,Crypto,p,q,Exog Lag,MAPE_AR,MAPE_ARX,R2_AR,R2_ARX,MAPE_Improvement(%)
0,Tether,2,1,6,5.947591,4.316994,-0.806367,0.110696,-27.416089


Unnamed: 0,Crypto,p,q,Exog Lag,MAPE_AR,MAPE_ARX,R2_AR,R2_ARX,MAPE_Improvement(%)
104,Bitcoin,1,2,6,10.534912,9.652817,-0.581992,-0.040519,-8.373062
125,Bitcoin,2,2,6,10.535025,9.653034,-0.581965,-0.040477,-8.371988
111,Bitcoin,2,0,6,10.508644,10.326934,-0.575223,-0.221766,-1.729152
118,Bitcoin,2,1,6,10.508404,10.327402,-0.575248,-0.221812,-1.722451
90,Bitcoin,1,0,6,10.508391,10.327653,-0.575199,-0.221824,-1.719944
...,...,...,...,...,...,...,...,...,...
169,XRP,1,0,1,26.971881,32.064021,-0.324795,-0.572697,18.879440
183,XRP,1,2,1,26.971831,32.064733,-0.324807,-0.572575,18.882299
197,XRP,2,1,1,26.971838,32.065520,-0.324821,-0.572678,18.885187
190,XRP,2,0,1,26.971798,32.633973,-0.324789,-0.608344,20.992947
