In [23]:
import pandas as pd
import numpy as np
from arch import arch_model
from sklearn.preprocessing import StandardScaler

# === Load data ===
macro_path = 'data/Monthly Mastersheet with Original Data.xlsx'
crypto_lagged_path = 'data/All Exogenous Variables.xlsx'

df_macro = pd.read_excel(macro_path)
df_crypto_lagged = pd.read_excel(crypto_lagged_path)

# Format datetime
df_macro['Month'] = pd.to_datetime(df_macro['Month'])
df_macro.set_index('Month', inplace=True)
df_macro.index = pd.date_range(start=df_macro.index[0], periods=len(df_macro), freq='MS')

df_crypto_lagged['Month'] = pd.to_datetime(df_crypto_lagged['Month'])
df_crypto_lagged.set_index('Month', inplace=True)
df_crypto_lagged.index = pd.date_range(start=df_crypto_lagged.index[0], periods=len(df_crypto_lagged), freq='MS')

# Merge datasets
df = pd.merge(df_macro, df_crypto_lagged, left_index=True, right_index=True, how='inner')

# === Config ===
macro_vars = ['LFPR','CPI', 'r', 'M1', 'IM', 'EX', 'CC', 'GDP']
crypto_assets = ['Bitcoin', 'Litecoin', 'XRP', 'Ethereum', 'Dogecoin', 'Cardano', 'USD Coin', 'Tether']
included_lags = range(4)  # 0–3
cutoff_date = pd.to_datetime('2024-01-01')

# === Modeling ===
for macro in macro_vars:
    selected_columns = [f"{coin}_lag{lag}" for coin in crypto_assets for lag in included_lags if f"{coin}_lag{lag}" in df.columns]
    df_full = df[[macro] + selected_columns].copy()
    df_full.dropna(inplace=True)

    df_train = df_full[df_full.index < cutoff_date]

    if len(df_train) < 40:
        print(f"⚠️ Skipping {macro} due to insufficient training data.")
        continue

    y_train = df_train[[macro]]
    X_train = df_train[selected_columns]

    scaler_y = StandardScaler()
    scaler_x = StandardScaler()
    y_train_scaled = scaler_y.fit_transform(y_train).flatten()
    x_train_scaled = scaler_x.fit_transform(X_train)

    print(f"\n=== Fitting ARX-GARCH for {macro} using lags 0–3 of 8 crypto assets (32 vars) ===")

    try:
        model = arch_model(
            y_train_scaled,
            mean='ARX',
            lags=1,
            vol='GARCH',
            p=1,
            q=1,
            x=x_train_scaled,
            dist='normal',
            rescale=False
        )
        res = model.fit(disp='off', options={'maxiter': 500})

        # Map p-values to column names
        pvals = res.pvalues
        exog_pvals = {}
        for param, p in pvals.items():
            if param.startswith("x"):
                idx = int(param[1:])
                varname = X_train.columns[idx]
                exog_pvals[varname] = p

        pval_df = pd.DataFrame.from_dict(exog_pvals, orient='index', columns=['p-value'])
        pval_df.index.name = 'Variable'
        pval_df.sort_values(by='p-value', inplace=True)

        # print(f"\n=== P-values for exogenous crypto lags predicting {macro} ===")
        # print(pval_df.to_string())

    except Exception as e:
        print(f"❌ Model fitting failed for {macro}: {e}")

print(pval_df)



=== Fitting ARX-GARCH for LFPR using lags 0–3 of 8 crypto assets (32 vars) ===

=== Fitting ARX-GARCH for CPI using lags 0–3 of 8 crypto assets (32 vars) ===

=== Fitting ARX-GARCH for r using lags 0–3 of 8 crypto assets (32 vars) ===

=== Fitting ARX-GARCH for M1 using lags 0–3 of 8 crypto assets (32 vars) ===

=== Fitting ARX-GARCH for IM using lags 0–3 of 8 crypto assets (32 vars) ===

=== Fitting ARX-GARCH for EX using lags 0–3 of 8 crypto assets (32 vars) ===

=== Fitting ARX-GARCH for CC using lags 0–3 of 8 crypto assets (32 vars) ===

=== Fitting ARX-GARCH for GDP using lags 0–3 of 8 crypto assets (32 vars) ===
                p-value
Variable               
Litecoin_lag1  0.000036
Dogecoin_lag2  0.000182
Ethereum_lag2  0.000316
Bitcoin_lag2   0.003305
Ethereum_lag1  0.006107
Bitcoin_lag3   0.007078
Tether_lag3    0.009607
XRP_lag1       0.015434
Cardano_lag3   0.025256
Dogecoin_lag0  0.030936
XRP_lag0       0.111738
Ethereum_lag0  0.112371
USD Coin_lag0  0.122621
Tether_lag0  

Positive directional derivative for linesearch
See scipy.optimize.fmin_slsqp for code meaning.



In [21]:
# Filter significant predictors (p < 0.05)
significant_vars = pval_df[pval_df['p-value'] < 0.05].index.tolist()

print(significant_vars)


correlation_df = df[[col for col in df.columns if "lag" in col]].corr()

# === Check for high correlations among significant crypto lags ===
# === Check for high correlations among significant crypto lags ===
if significant_vars:
    # Drop missing rows/columns safely
    valid_vars = [var for var in significant_vars if var in correlation_df.index and var in correlation_df.columns]
    corr_subset = correlation_df.loc[valid_vars, valid_vars]

    high_corr_pairs = []
    for macro in macro_vars:
            for i, var1 in enumerate(valid_vars):
                for var2 in valid_vars[i+1:]:
                    try:
                        corr = corr_subset.loc[var1, var2]
                        if pd.notna(corr) and abs(corr) > 0.60:
                            high_corr_pairs.append((var1, var2, corr))
                    except KeyError:
                        continue

            if high_corr_pairs:
                print(f"\n🚨 High Correlation (>|0.60|) among significant predictors for {macro}:")
                for var1, var2, corr in high_corr_pairs:
                    print(f"{var1} & {var2}: {corr:.2f}")
            else:
                print(f"\n✅ No high correlation (>|0.60|) among significant predictors for {macro}.")
    else:
        print(f"\nℹ️ No significant crypto lags (p < 0.05) found for {macro}.")

['Litecoin_lag1', 'Dogecoin_lag2', 'Ethereum_lag2', 'Bitcoin_lag2', 'Ethereum_lag1', 'Bitcoin_lag3', 'Tether_lag3', 'XRP_lag1', 'Cardano_lag3', 'Dogecoin_lag0']

🚨 High Correlation (>|0.60|) among significant predictors for LFPR:
Litecoin_lag1 & Ethereum_lag1: 0.81
Litecoin_lag1 & XRP_lag1: 0.78
Ethereum_lag2 & Bitcoin_lag2: 0.80
Ethereum_lag1 & XRP_lag1: 0.69
Bitcoin_lag3 & Cardano_lag3: 0.67

🚨 High Correlation (>|0.60|) among significant predictors for CPI:
Litecoin_lag1 & Ethereum_lag1: 0.81
Litecoin_lag1 & XRP_lag1: 0.78
Ethereum_lag2 & Bitcoin_lag2: 0.80
Ethereum_lag1 & XRP_lag1: 0.69
Bitcoin_lag3 & Cardano_lag3: 0.67
Litecoin_lag1 & Ethereum_lag1: 0.81
Litecoin_lag1 & XRP_lag1: 0.78
Ethereum_lag2 & Bitcoin_lag2: 0.80
Ethereum_lag1 & XRP_lag1: 0.69
Bitcoin_lag3 & Cardano_lag3: 0.67

🚨 High Correlation (>|0.60|) among significant predictors for r:
Litecoin_lag1 & Ethereum_lag1: 0.81
Litecoin_lag1 & XRP_lag1: 0.78
Ethereum_lag2 & Bitcoin_lag2: 0.80
Ethereum_lag1 & XRP_lag1: 0.69
Bi

In [24]:
import pandas as pd
import numpy as np
from arch import arch_model
from sklearn.preprocessing import StandardScaler

# === Load data ===
macro_path = 'data/Monthly Mastersheet with Original Data.xlsx'
crypto_lagged_path = 'data/All Exogenous Variables.xlsx'

df_macro = pd.read_excel(macro_path)
df_crypto_lagged = pd.read_excel(crypto_lagged_path)

# Format datetime and set as index with monthly frequency
df_macro['Month'] = pd.to_datetime(df_macro['Month'])
df_macro.set_index('Month', inplace=True)
df_macro.index = pd.date_range(start=df_macro.index[0], periods=len(df_macro), freq='MS')

df_crypto_lagged['Month'] = pd.to_datetime(df_crypto_lagged['Month'])
df_crypto_lagged.set_index('Month', inplace=True)
df_crypto_lagged.index = pd.date_range(start=df_crypto_lagged.index[0], periods=len(df_crypto_lagged), freq='MS')

# Merge datasets on index (Month)
df = pd.merge(df_macro, df_crypto_lagged, left_index=True, right_index=True, how='inner')

# === Config ===
macro_vars = ['LFPR', 'CPI', 'r', 'M1', 'IM', 'EX', 'CC', 'GDP']
crypto_assets = ['Bitcoin', 'Litecoin', 'XRP', 'Ethereum', 'Dogecoin', 'Cardano', 'USD Coin', 'Tether']
included_lags = range(4)  # 0 to 3 lags
cutoff_date = pd.to_datetime('2024-01-01')

# Precompute correlation matrix once (for all crypto lag vars)
crypto_lag_cols = [f"{coin}_lag{lag}" for coin in crypto_assets for lag in included_lags if f"{coin}_lag{lag}" in df.columns]
correlation_df = df[crypto_lag_cols].corr()

for macro in macro_vars:
    selected_columns = crypto_lag_cols.copy()
    df_full = df[[macro] + selected_columns].copy()
    df_full.dropna(inplace=True)

    df_train = df_full[df_full.index < cutoff_date]

    if len(df_train) < 40:
        print(f"⚠️ Skipping {macro} due to insufficient training data.")
        continue

    y_train = df_train[[macro]]
    X_train = df_train[selected_columns]

    scaler_y = StandardScaler()
    scaler_x = StandardScaler()
    y_train_scaled = scaler_y.fit_transform(y_train).flatten()
    x_train_scaled = scaler_x.fit_transform(X_train)

    print(f"\n=== Fitting ARX-GARCH for {macro} using lags 0–3 of 8 crypto assets (32 vars) ===")

    try:
        model = arch_model(
            y_train_scaled,
            mean='ARX',
            lags=1,
            vol='GARCH',
            p=1,
            q=1,
            x=x_train_scaled,
            dist='normal',
            rescale=False
        )
        res = model.fit(disp='off', options={'maxiter': 500})

        # Extract p-values for exogenous variables (x0, x1, ...)
        pvals = res.pvalues
        exog_pvals = {}
        for param, p in pvals.items():
            if param.startswith("x"):
                idx = int(param[1:])
                varname = X_train.columns[idx]
                exog_pvals[varname] = p

        pval_df = pd.DataFrame.from_dict(exog_pvals, orient='index', columns=['p-value'])
        pval_df.index.name = 'Variable'
        pval_df.sort_values(by='p-value', inplace=True)

        print(f"\n=== P-values for exogenous crypto lags predicting {macro} ===")
        print(pval_df.to_string())

        # Filter significant predictors (p < 0.05)
        significant_vars = pval_df[pval_df['p-value'] < 0.05].index.tolist()

        if significant_vars:
            print(f"\nSignificant predictors (p < 0.05) for {macro}: {significant_vars}")

            # Check correlations among significant predictors
            valid_vars = [var for var in significant_vars if var in correlation_df.index and var in correlation_df.columns]

            if len(valid_vars) > 1:
                corr_subset = correlation_df.loc[valid_vars, valid_vars]

                high_corr_pairs = []
                for i, var1 in enumerate(valid_vars):
                    for var2 in valid_vars[i+1:]:
                        try:
                            corr = corr_subset.loc[var1, var2]
                            if pd.notna(corr) and abs(corr) > 0.60:
                                high_corr_pairs.append((var1, var2, corr))
                        except KeyError:
                            continue

                if high_corr_pairs:
                    print(f"\n🚨 High Correlation (>|0.60|) among significant predictors for {macro}:")
                    for var1, var2, corr in high_corr_pairs:
                        print(f"{var1} & {var2}: {corr:.2f}")
                else:
                    print(f"\n✅ No high correlation (>|0.60|) among significant predictors for {macro}.")
            else:
                print(f"\nℹ️ Not enough significant variables for correlation check for {macro}.")
        else:
            print(f"\nℹ️ No significant crypto lags (p < 0.05) found for {macro}.")

    except Exception as e:
        print(f"❌ Model fitting failed for {macro}: {e}")



=== Fitting ARX-GARCH for LFPR using lags 0–3 of 8 crypto assets (32 vars) ===

=== P-values for exogenous crypto lags predicting LFPR ===
                     p-value
Variable                    
Cardano_lag0   4.325312e-109
Tether_lag3     8.819589e-76
Bitcoin_lag0    1.408926e-66
XRP_lag0        1.018581e-45
Cardano_lag2    3.396444e-40
Tether_lag2     1.606340e-36
USD Coin_lag3   3.925690e-34
Bitcoin_lag3    2.501625e-31
Bitcoin_lag1    3.092009e-29
Ethereum_lag3   6.092198e-27
USD Coin_lag0   6.037131e-24
Dogecoin_lag1   2.986306e-22
USD Coin_lag2   1.254580e-19
Ethereum_lag1   1.767212e-18
Tether_lag0     6.661697e-18
Litecoin_lag1   7.857785e-17
Litecoin_lag2   9.428422e-16
XRP_lag2        9.277416e-15
Cardano_lag3    4.084170e-10
Dogecoin_lag3   2.677009e-09
Ethereum_lag2   3.735673e-09
Dogecoin_lag2   1.642819e-07
XRP_lag3        2.068114e-06
Cardano_lag1    1.969962e-04
Dogecoin_lag0   3.733020e-04
Litecoin_lag3   9.816210e-04
Litecoin_lag0   1.970232e-02
Ethereum_lag0   3.8

Positive directional derivative for linesearch
See scipy.optimize.fmin_slsqp for code meaning.

