## Prediction of Two Market Regimes

In [1]:
import torch.nn as nn
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import os
from tqdm import tqdm
import math

import logging.config
logging.config.dictConfig({'version': 1, 'disable_existing_loggers': True})

In [2]:
df = pd.read_csv('/home/lmilo_ext/Data/labeled_data_2states.csv')
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)
df.head()

Unnamed: 0_level_0,AdrActCnt,AdrBal1in100KCnt,AdrBal1in10KCnt,AdrBal1in10MCnt,AdrBal1in1KCnt,AdrBal1in1MCnt,AdrBalNtv100Cnt,AdrBalNtv100KCnt,AdrBalNtv10Cnt,AdrBalNtv10KCnt,...,Realized Cap (7d)_btc,Realized Cap (30d)_btc,Realized Cap (90d)_btc,Realized Cap (60d)_btc,Realized Cap (180d)_btc,Realized Cap (365d)_btc,Stock to Flow ratio_btc,ReferenceRate_ma,ReferenceRate_log_return,MarketRegime
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-09,9889732.0,871727.857143,166387.428571,17672550.0,21509.857143,5316332.0,14545550.0,494483.857143,29033740.0,2124377.0,...,882601800.0,1656658000.0,2542547000.0,2175202000.0,3259974000.0,4264022000.0,23.050918,34.808657,0.0233,0
2017-01-16,9845421.0,876928.142857,166228.0,17674220.0,21499.857143,5314798.0,14562190.0,494053.714286,29065920.0,2124969.0,...,858441500.0,1747843000.0,2640501000.0,2259350000.0,3337645000.0,4339008000.0,23.20617,34.013896,-0.023097,0
2017-01-23,9841003.0,882297.857143,166445.0,17710580.0,21526.428571,5318879.0,14567410.0,493311.142857,29087590.0,2122200.0,...,675291400.0,1736224000.0,2654568000.0,2270288000.0,3359355000.0,4354986000.0,23.586007,33.330191,-0.020306,0
2017-01-30,10049790.0,886092.142857,166745.857143,17728730.0,21664.285714,5324168.0,14533960.0,493560.142857,29060410.0,2121745.0,...,644385800.0,1723002000.0,2694323000.0,2305593000.0,3404450000.0,4415296000.0,22.095225,33.232512,-0.002935,0
2017-02-06,10178710.0,887899.857143,167186.571429,17746700.0,21655.285714,5327967.0,14504120.0,494062.714286,29042190.0,2125465.0,...,673302300.0,1673595000.0,2772407000.0,2353981000.0,3454941000.0,4504209000.0,20.314899,33.370602,0.004147,0


### Split for Statistical Evaluation

In [3]:
stat_eval = df.copy()
# Define the split date (06 February 2022), temporeal 80/20 split of the data
split_date = '2022-02-06'

stat_eval_test = stat_eval.loc[stat_eval.index >= split_date]
stat_eval_test.head()

Unnamed: 0_level_0,AdrActCnt,AdrBal1in100KCnt,AdrBal1in10KCnt,AdrBal1in10MCnt,AdrBal1in1KCnt,AdrBal1in1MCnt,AdrBalNtv100Cnt,AdrBalNtv100KCnt,AdrBalNtv10Cnt,AdrBalNtv10KCnt,...,Realized Cap (7d)_btc,Realized Cap (30d)_btc,Realized Cap (90d)_btc,Realized Cap (60d)_btc,Realized Cap (180d)_btc,Realized Cap (365d)_btc,Stock to Flow ratio_btc,ReferenceRate_ma,ReferenceRate_log_return,MarketRegime
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-02-07,49847700.0,2658781.0,289920.857143,55006250.0,48211.142857,14655040.0,58327020.0,994741.428571,120256900.0,6688534.0,...,24374190000.0,55833350000.0,127509500000.0,87266540000.0,184420100000.0,341181300000.0,55.668834,540.09289,-0.03333,0
2022-02-14,50550090.0,2656008.0,289738.0,54995730.0,48201.428571,14640940.0,58301190.0,992627.571429,120406200.0,6683030.0,...,22482680000.0,56196840000.0,123860100000.0,85776440000.0,185366300000.0,338889500000.0,57.686155,528.728625,-0.021266,0
2022-02-21,50483880.0,2655017.0,289279.285714,54968870.0,48141.571429,14628110.0,58156750.0,987176.0,120327800.0,6649993.0,...,21352430000.0,56347520000.0,119746000000.0,84689330000.0,185113100000.0,332257600000.0,54.343513,515.819066,-0.024719,0
2022-02-28,49577440.0,2655316.0,288743.0,55023310.0,48196.285714,14628200.0,58302490.0,987802.714286,120615700.0,6663153.0,...,21249670000.0,54608370000.0,113934100000.0,83263540000.0,184485200000.0,323217700000.0,57.352742,502.690902,-0.025781,0
2022-03-07,50835540.0,2655837.0,288185.428571,55099400.0,48153.428571,14634240.0,58493160.0,991081.571429,120953000.0,6695600.0,...,26474080000.0,55766230000.0,110769100000.0,84718750000.0,185722700000.0,315058300000.0,61.099037,498.0493,-0.009276,0


### Train Test Split

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Separate features and target variable
X = df.drop("MarketRegime", axis=1)  # Features
y = df["MarketRegime"]  # Target variable

# Split the data into training and testing sets
X_train, X_test = X.loc[X.index < split_date], X.loc[X.index >= split_date]
y_train, y_test = y[X.index < split_date], y[X.index >= split_date]

### Random Forest Approach

In [5]:
import pandas as pd
from sklearn.model_selection import KFold, cross_val_predict
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier

# Define the number of splits
kf = KFold(n_splits=3)

# Initialize lists to store predictions and true labels for each fold
rf_all_y_pred = []
rf_all_y_true = []

# Perform cross-validation
for train_index, val_index in kf.split(X_train):
    X_train_cv, X_val = X_train.iloc[train_index], X_train.iloc[val_index]
    y_train_cv, y_val = y_train.iloc[train_index], y_train.iloc[val_index]
    
    # Initialize the Random Forest classifier
    rf_classifier = RandomForestClassifier(random_state=42)
    
    # Fit the Random Forest classifier on the training data
    rf_classifier.fit(X_train_cv, y_train_cv)

    # Make predictions on the validation set
    y_pred_rf = rf_classifier.predict(X_val)

    # Append predictions and true labels for this fold
    rf_all_y_pred.extend(y_pred_rf)
    rf_all_y_true.extend(y_val)

# Calculate the overall classification report
classification_report_rf = classification_report(rf_all_y_true, rf_all_y_pred, target_names=df['MarketRegime'].unique().astype(str), digits=4)

# Print the classification report
print(classification_report_rf)

              precision    recall  f1-score   support

           0     0.8154    0.8908    0.8514       119
           1     0.9037    0.8356    0.8683       146

    accuracy                         0.8604       265
   macro avg     0.8595    0.8632    0.8599       265
weighted avg     0.8640    0.8604    0.8607       265



In [6]:
rf_final = RandomForestClassifier(random_state=42)

rf_final.fit(X_train, y_train)

# Make predictions on the test data
y_pred_test_rf = rf_final.predict(X_test)

# Calculate and print a classification report for the test data
classification_report_rf_test = classification_report(y_test, y_pred_test_rf, digits=4)

# Print the classification report for the test data
print("Classification Report for Test Data:\n", classification_report_rf_test)

Classification Report for Test Data:
               precision    recall  f1-score   support

           0     1.0000    0.9592    0.9792        49
           1     0.9000    1.0000    0.9474        18

    accuracy                         0.9701        67
   macro avg     0.9500    0.9796    0.9633        67
weighted avg     0.9731    0.9701    0.9706        67



In [7]:
stat_rf_test = stat_eval_test.copy()
# Replace the values for the actual market regime with the predicted values
stat_rf_test["MarketRegime"] = y_pred_test_rf.copy()
# Count of Bulls/Bears
count_of_zeros_rf = sum(1 for item in y_pred_test_rf if item == 0)
print("Bear:", count_of_zeros_rf)
count_of_ones_rf = sum(1 for item in y_pred_test_rf if item == 1)
print("Bull:", count_of_ones_rf)
# Statistics
stat_rf_test[['target']] = stat_rf_test[['ReferenceRate']].pct_change().shift(-1)
stat_rf_test.groupby('MarketRegime').agg({'target':['mean', 'std']})

Bear: 47
Bull: 20


Unnamed: 0_level_0,target,target
Unnamed: 0_level_1,mean,std
MarketRegime,Unnamed: 1_level_2,Unnamed: 2_level_2
0,-0.015589,0.060698
1,0.029324,0.059603


### Decision Tree approach

In [8]:
import pandas as pd
from sklearn.model_selection import KFold, cross_val_predict
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier

# Define the number of splits
kf = KFold(n_splits=3)

# Initialize lists to store predictions and true labels for each fold
dt_all_y_pred = []
dt_all_y_true = []

# Perform cross-validation
for train_index, val_index in kf.split(X_train):
    X_train_cv, X_val = X_train.iloc[train_index], X_train.iloc[val_index]
    y_train_cv, y_val = y_train.iloc[train_index], y_train.iloc[val_index]
    
    # Initialize the Random Forest classifier
    dt_classifier = DecisionTreeClassifier(random_state=42)
    
    # Fit the Random Forest classifier on the training data
    dt_classifier.fit(X_train_cv, y_train_cv)

    # Make predictions on the validation set
    y_pred_dt = dt_classifier.predict(X_val)

    # Append predictions and true labels for this fold
    dt_all_y_pred.extend(y_pred_dt)
    dt_all_y_true.extend(y_val)

# Calculate the overall classification report
classification_report_dt = classification_report(dt_all_y_true, dt_all_y_pred, target_names=df['MarketRegime'].unique().astype(str), digits=4)

# Print the classification report
print(classification_report_dt)

              precision    recall  f1-score   support

           0     0.5690    0.8319    0.6758       119
           1     0.7802    0.4863    0.5992       146

    accuracy                         0.6415       265
   macro avg     0.6746    0.6591    0.6375       265
weighted avg     0.6854    0.6415    0.6336       265



In [9]:
dt_final = DecisionTreeClassifier(random_state=42)

dt_final.fit(X_train, y_train)

# Make predictions on the test data
y_pred_test_dt = dt_final.predict(X_test)

# Calculate and print a classification report for the test data
classification_report_dt_test = classification_report(y_test, y_pred_test_dt, digits=4)

# Print the classification report for the test data
print("Classification Report for Test Data:\n", classification_report_dt_test)

Classification Report for Test Data:
               precision    recall  f1-score   support

           0     1.0000    0.4490    0.6197        49
           1     0.4000    1.0000    0.5714        18

    accuracy                         0.5970        67
   macro avg     0.7000    0.7245    0.5956        67
weighted avg     0.8388    0.5970    0.6067        67



In [10]:
stat_dt_test = stat_eval_test.copy()
# Replace the values for the actual market regime with the predicted values
stat_dt_test["MarketRegime"] = y_pred_test_dt.copy()
# Count of Bulls/Bears
count_of_zeros_dt = sum(1 for item in y_pred_test_dt if item == 0)
print("Bear:", count_of_zeros_dt)
count_of_ones_dt = sum(1 for item in y_pred_test_dt if item == 1)
print("Bull:", count_of_ones_dt)
# Statistics
stat_dt_test[['target']] = stat_dt_test[['ReferenceRate']].pct_change().shift(-1)
stat_dt_test.groupby('MarketRegime').agg({'target':['mean', 'std']})

Bear: 22
Bull: 45


Unnamed: 0_level_0,target,target
Unnamed: 0_level_1,mean,std
MarketRegime,Unnamed: 1_level_2,Unnamed: 2_level_2
0,-0.020312,0.072956
1,0.006168,0.056785


### SVM approach

In [11]:
import pandas as pd
from sklearn.model_selection import KFold, cross_val_predict
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert X_train_scaled to a Pandas DataFrame
X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns)

# Define the number of splits
kf = KFold(n_splits=3)

# Initialize lists to store predictions and true labels for each fold
svm_all_y_pred = []
svm_all_y_true = []

# Perform cross-validation
for train_index, val_index in kf.split(X_train_scaled):
    X_train_cv, X_val = X_train_scaled.iloc[train_index], X_train_scaled.iloc[val_index]
    y_train_cv, y_val = y_train.iloc[train_index], y_train.iloc[val_index]
    
    # Initialize the SVM classifier
    svm_classifier = SVC(random_state=42)

    # Train the classifier on the training data
    svm_classifier.fit(X_train_cv, y_train_cv)
    
    # Make predictions on the validation set
    y_pred_svm = svm_classifier.predict(X_val)

    # Append predictions and true labels for this fold
    svm_all_y_pred.extend(y_pred_svm)
    svm_all_y_true.extend(y_val)

# Calculate the overall classification report
classification_report_svm = classification_report(svm_all_y_true, svm_all_y_pred, target_names=df['MarketRegime'].unique().astype(str), digits=4)

# Print the classification report
print(classification_report_svm)

              precision    recall  f1-score   support

           0     0.7820    0.8739    0.8254       119
           1     0.8864    0.8014    0.8417       146

    accuracy                         0.8340       265
   macro avg     0.8342    0.8377    0.8336       265
weighted avg     0.8395    0.8340    0.8344       265



In [12]:
svm_final = SVC(random_state=42)

svm_final.fit(X_train_scaled, y_train)

# Make predictions on the test data
y_pred_test_svm = svm_final.predict(X_test_scaled)

# Calculate and print a classification report for the test data
classification_report_svm_test = classification_report(y_test, y_pred_test_svm, digits=4)

# Print the classification report for the test data
print("Classification Report for Test Data:\n", classification_report_svm_test)

Classification Report for Test Data:
               precision    recall  f1-score   support

           0     0.5854    0.4898    0.5333        49
           1     0.0385    0.0556    0.0455        18

    accuracy                         0.3731        67
   macro avg     0.3119    0.2727    0.2894        67
weighted avg     0.4384    0.3731    0.4023        67



In [13]:
stat_svm_test = stat_eval_test.copy()
# Replace the values for the actual market regime with the predicted values
stat_svm_test["MarketRegime"] = y_pred_test_svm.copy()
# Count of Bulls/Bears
count_of_zeros_svm = sum(1 for item in y_pred_test_svm if item == 0)
print("Bear:", count_of_zeros_svm)
count_of_ones_svm = sum(1 for item in y_pred_test_svm if item == 1)
print("Bull:", count_of_ones_svm)
# Statistics
stat_svm_test[['target']] = stat_svm_test[['ReferenceRate']].pct_change().shift(-1)
stat_svm_test.groupby('MarketRegime').agg({'target':['mean', 'std']})

Bear: 41
Bull: 26


Unnamed: 0_level_0,target,target
Unnamed: 0_level_1,mean,std
MarketRegime,Unnamed: 1_level_2,Unnamed: 2_level_2
0,-0.003787,0.067011
1,-0.000923,0.058466


### Logistic Regression Approach

In [14]:
import pandas as pd
from sklearn.model_selection import KFold, cross_val_predict
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert X_train_scaled to a Pandas DataFrame
X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns)

# Define the number of splits
kf = KFold(n_splits=3)

# Initialize lists to store predictions and true labels for each fold
logreg_all_y_pred = []
logreg_all_y_true = []

# Perform cross-validation
for train_index, val_index in kf.split(X_train_scaled):
    X_train_cv, X_val = X_train_scaled.iloc[train_index], X_train_scaled.iloc[val_index]
    y_train_cv, y_val = y_train.iloc[train_index], y_train.iloc[val_index]
    
    # Initialize the Logistic Regression classifier
    logreg_classifier = LogisticRegression(random_state=42, solver='liblinear')
    
    # Train the classifier on the training data
    logreg_classifier.fit(X_train_cv, y_train_cv)

    # Make predictions on the validation set
    y_pred_logreg = logreg_classifier.predict(X_val)

    # Append predictions and true labels for this fold
    logreg_all_y_pred.extend(y_pred_logreg)
    logreg_all_y_true.extend(y_val)

# Calculate the overall classification report
classification_report_logreg = classification_report(logreg_all_y_true, logreg_all_y_pred, target_names=df['MarketRegime'].unique().astype(str), digits=4)

# Print the classification report
print(classification_report_logreg)

              precision    recall  f1-score   support

           0     0.7697    0.9832    0.8635       119
           1     0.9823    0.7603    0.8571       146

    accuracy                         0.8604       265
   macro avg     0.8760    0.8717    0.8603       265
weighted avg     0.8868    0.8604    0.8600       265



In [15]:
logreg_final = LogisticRegression(random_state=42, solver='liblinear')

logreg_final.fit(X_train_scaled, y_train)

# Make predictions on the test data
y_pred_test_logreg = logreg_final.predict(X_test_scaled)

# Calculate and print a classification report for the test data
classification_report_logreg_test = classification_report(y_test, y_pred_test_logreg, digits=4)

# Print the classification report for the test data
print("Classification Report for Test Data:\n", classification_report_logreg_test)

Classification Report for Test Data:
               precision    recall  f1-score   support

           0     1.0000    0.3673    0.5373        49
           1     0.3673    1.0000    0.5373        18

    accuracy                         0.5373        67
   macro avg     0.6837    0.6837    0.5373        67
weighted avg     0.8300    0.5373    0.5373        67



In [16]:
stat_logreg_test = stat_eval_test.copy()
# Replace the values for the actual market regime with the predicted values
stat_logreg_test["MarketRegime"] = y_pred_test_logreg.copy()
# Count of Bulls/Bears
count_of_zeros_logreg = sum(1 for item in y_pred_test_logreg if item == 0)
print("Bear:", count_of_zeros_logreg)
count_of_ones_logreg = sum(1 for item in y_pred_test_logreg if item == 1)
print("Bull:", count_of_ones_logreg)
# Statistics
stat_logreg_test[['target']] = stat_logreg_test[['ReferenceRate']].pct_change().shift(-1)
stat_logreg_test.groupby('MarketRegime').agg({'target':['mean', 'std']})

Bear: 18
Bull: 49


Unnamed: 0_level_0,target,target
Unnamed: 0_level_1,mean,std
MarketRegime,Unnamed: 1_level_2,Unnamed: 2_level_2
0,-0.014117,0.073333
1,0.001638,0.059437


### XGBoost Approach

In [17]:
import pandas as pd
from sklearn.model_selection import KFold, cross_val_predict
from sklearn.metrics import classification_report
import xgboost as xgb

# Define the number of splits
kf = KFold(n_splits=3)

# Initialize lists to store predictions and true labels for each fold
xgb_all_y_pred = []
xgb_all_y_true = []

# Perform cross-validation
for train_index, val_index in kf.split(X_train):
    X_train_cv, X_val = X_train.iloc[train_index], X_train.iloc[val_index]
    y_train_cv, y_val = y_train.iloc[train_index], y_train.iloc[val_index]
    
    # Initialize the XGBoost classifier
    xgb_classifier = xgb.XGBClassifier(random_state=42)
   
    # Train the classifier on the training data
    xgb_classifier.fit(X_train_cv, y_train_cv)

    # Make predictions on the validation set
    y_pred_xgb = xgb_classifier.predict(X_val)

    # Append predictions and true labels for this fold
    xgb_all_y_pred.extend(y_pred_xgb)
    xgb_all_y_true.extend(y_val)

# Calculate the overall classification report
classification_report_xgb = classification_report(xgb_all_y_true, xgb_all_y_pred, target_names=df['MarketRegime'].unique().astype(str), digits=4)

# Print the classification report
print(classification_report_xgb)

              precision    recall  f1-score   support

           0     0.8527    0.9244    0.8871       119
           1     0.9338    0.8699    0.9007       146

    accuracy                         0.8943       265
   macro avg     0.8933    0.8971    0.8939       265
weighted avg     0.8974    0.8943    0.8946       265



In [18]:
xgb_classifier_final = xgb.XGBClassifier(random_state=42)

xgb_classifier_final.fit(X_train, y_train)

# Make predictions on the test data
y_pred_test_xgb = xgb_classifier_final.predict(X_test)

# Calculate and print a classification report for the test data
classification_report_xgb_test = classification_report(y_test, y_pred_test_xgb, digits=4)

# Print the classification report for the test data
print("Classification Report for Test Data:\n", classification_report_xgb_test)

Classification Report for Test Data:
               precision    recall  f1-score   support

           0     1.0000    0.8571    0.9231        49
           1     0.7200    1.0000    0.8372        18

    accuracy                         0.8955        67
   macro avg     0.8600    0.9286    0.8801        67
weighted avg     0.9248    0.8955    0.9000        67



In [19]:
stat_xgb_test = stat_eval_test.copy()
# Replace the values for the actual market regime with the predicted values
stat_xgb_test["MarketRegime"] = y_pred_test_xgb.copy()
# Count of Bulls/Bears
count_of_zeros_xgb = sum(1 for item in y_pred_test_xgb if item == 0)
print("Bear:", count_of_zeros_xgb)
count_of_ones_xgb = sum(1 for item in y_pred_test_xgb if item == 1)
print("Bull:", count_of_ones_xgb)
# Statistics
stat_xgb_test[['target']] = stat_xgb_test[['ReferenceRate']].pct_change().shift(-1)
stat_xgb_test.groupby('MarketRegime').agg({'target':['mean', 'std']})

Bear: 42
Bull: 25


Unnamed: 0_level_0,target,target
Unnamed: 0_level_1,mean,std
MarketRegime,Unnamed: 1_level_2,Unnamed: 2_level_2
0,-0.011319,0.061506
1,0.012496,0.064925


### ANN Approach

In [20]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

# Define the number of splits
kf = KFold(n_splits=3)

# Initialize lists to store predictions and true labels for each fold
ann_all_y_pred = []
ann_all_y_true = []

# Define ANN model
class ANNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(ANNModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out
    
input_size = X_train_scaled.shape[1]
hidden_size = 50
num_classes = 2

for train_index, val_index in kf.split(X_train):
    X_train_cv, X_val = X_train.iloc[train_index], X_train.iloc[val_index]
    y_train_cv, y_val = y_train.iloc[train_index], y_train.iloc[val_index]

    # Standardize the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_cv)
    X_val_scaled = scaler.transform(X_val)

    # Convert numpy arrays to PyTorch tensors
    X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train_cv.values, dtype=torch.long)
    X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32)
    y_val_tensor = torch.tensor(y_val.values, dtype=torch.long)

    # Create DataLoader for training data
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

    # Initialize the model
    model = ANNModel(input_size, hidden_size, num_classes)

    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Training loop
    num_epochs = 100
    for epoch in range(num_epochs):
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

    model.eval()
    with torch.no_grad():
        y_pred_probs = model(X_val_tensor)
        _, y_pred_ann = torch.max(y_pred_probs, 1)

    # Convert predictions to numpy arrays
    y_pred_ann = y_pred_ann.numpy()

    # Append predictions and true labels for this fold
    ann_all_y_pred.extend(y_pred_ann)
    ann_all_y_true.extend(y_val)

# Calculate the overall classification report
classification_report_ann = classification_report(ann_all_y_true, ann_all_y_pred, target_names=df['MarketRegime'].unique().astype(str), digits=4)

# Print the classification report
print(classification_report_ann)

              precision    recall  f1-score   support

           0     0.6257    0.9412    0.7517       119
           1     0.9186    0.5411    0.6810       146

    accuracy                         0.7208       265
   macro avg     0.7722    0.7411    0.7164       265
weighted avg     0.7871    0.7208    0.7128       265



In [21]:
# Standardize the test features
X_test_scaled = scaler.transform(X_test)

# Convert numpy arrays to PyTorch tensors
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

# Initialize a list to store predictions on the test data
y_pred_test_ann = []

# Set the model to evaluation mode
model.eval()
with torch.no_grad():
    y_pred_probs_test = model(X_test_tensor)
    _, y_pred_test = torch.max(y_pred_probs_test, 1)

    # Convert predictions to numpy arrays
    y_pred_test_ann = y_pred_test.numpy()

# Calculate and print a classification report for the test data
classification_report_ann_test = classification_report(y_test, y_pred_test_ann, digits=4)

# Print the classification report for the test data
print("Classification Report for Test Data:\n", classification_report_ann_test)

Classification Report for Test Data:
               precision    recall  f1-score   support

           0     0.9524    0.8163    0.8791        49
           1     0.6400    0.8889    0.7442        18

    accuracy                         0.8358        67
   macro avg     0.7962    0.8526    0.8117        67
weighted avg     0.8685    0.8358    0.8429        67



In [22]:
stat_ann_test = stat_eval_test.copy()
# Replace the values for the actual market regime with the predicted values
stat_ann_test["MarketRegime"] = y_pred_test_ann.copy()
# Count of Bulls/Bears
count_of_zeros_ann = sum(1 for item in y_pred_test_ann if item == 0)
print("Bear:", count_of_zeros_ann)
count_of_ones_ann = sum(1 for item in y_pred_test_ann if item == 1)
print("Bull:", count_of_ones_ann)
# Statistics
stat_ann_test[['target']] = stat_ann_test[['ReferenceRate']].pct_change().shift(-1)
stat_ann_test.groupby('MarketRegime').agg({'target':['mean', 'std']})

Bear: 42
Bull: 25


Unnamed: 0_level_0,target,target
Unnamed: 0_level_1,mean,std
MarketRegime,Unnamed: 1_level_2,Unnamed: 2_level_2
0,-0.009453,0.066281
1,0.00923,0.057204


### LSTM Approach

In [23]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert X_train_scaled to a Pandas DataFrame
X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns)


# Define LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True, num_layers=2)  # Two layers
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, :])
        return out

input_size = X_train_scaled.shape[1]
hidden_size = 50
num_classes = 2  # Assuming binary classification

# Define the number of splits
kf = KFold(n_splits=3)

# Initialize lists to store predictions and true labels for each fold
lstm_all_y_pred = []
lstm_all_y_true = []

# Perform cross-validation
for train_index, val_index in kf.split(X_train):
    X_train_cv, X_val = X_train.iloc[train_index], X_train.iloc[val_index]
    y_train_cv, y_val = y_train.iloc[train_index], y_train.iloc[val_index]

    # Standardize the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_cv)
    X_val_scaled = scaler.transform(X_val)

    # Convert numpy arrays to PyTorch tensors
    X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train_cv.values, dtype=torch.long)
    X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32)
    y_val_tensor = torch.tensor(y_val.values, dtype=torch.long)

    # Create DataLoader for training data
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    
    # Initialize the LSTM model
    model = LSTMModel(input_size, hidden_size, num_classes)

    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Training loop
    num_epochs = 100
    for epoch in range(num_epochs):
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

    # Evaluate the model
    model.eval()
    with torch.no_grad():
        y_pred_probs = model(X_val_tensor)
        _, y_pred_lstm = torch.max(y_pred_probs, 1)

    # Convert predictions to numpy arrays
    y_pred_lstm = y_pred_lstm.numpy()

    # Append predictions and true labels for this fold
    lstm_all_y_pred.extend(y_pred_lstm)
    lstm_all_y_true.extend(y_val)

# Calculate the overall classification report
classification_report_lstm = classification_report(lstm_all_y_true, lstm_all_y_pred, target_names=df['MarketRegime'].unique().astype(str), digits=4)

# Print the classification report
print(classification_report_lstm)

              precision    recall  f1-score   support

           0     0.6369    0.8992    0.7456       119
           1     0.8763    0.5822    0.6996       146

    accuracy                         0.7245       265
   macro avg     0.7566    0.7407    0.7226       265
weighted avg     0.7688    0.7245    0.7203       265



In [24]:
# Standardize the test features
X_test_scaled = scaler.transform(X_test)

# Convert numpy arrays to PyTorch tensors
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

# Initialize a list to store predictions on the test data
y_pred_test_lstm = []

# Set the model to evaluation mode
model.eval()
with torch.no_grad():
    y_pred_probs_test = model(X_test_tensor)
    _, y_pred_test = torch.max(y_pred_probs_test, 1)

    # Convert predictions to numpy arrays
    y_pred_test_lstm = y_pred_test.numpy()

# Calculate and print a classification report for the test data
classification_report_lstm_test = classification_report(y_test, y_pred_test_lstm, digits=4)

# Print the classification report for the test data
print("Classification Report for Test Data:\n", classification_report_lstm_test)

Classification Report for Test Data:
               precision    recall  f1-score   support

           0     0.7385    0.9796    0.8421        49
           1     0.5000    0.0556    0.1000        18

    accuracy                         0.7313        67
   macro avg     0.6192    0.5176    0.4711        67
weighted avg     0.6744    0.7313    0.6427        67



In [25]:
stat_lstm_test = stat_eval_test.copy()
# Replace the values for the actual market regime with the predicted values
stat_lstm_test["MarketRegime"] = y_pred_test_lstm.copy()
# Count of Bulls/Bears
count_of_zeros_lstm = sum(1 for item in y_pred_test_lstm if item == 0)
print("Bear:", count_of_zeros_lstm)
count_of_ones_lstm = sum(1 for item in y_pred_test_lstm if item == 1)
print("Bull:", count_of_ones_lstm)
# Statistics
stat_lstm_test[['target']] = stat_lstm_test[['ReferenceRate']].pct_change().shift(-1)
stat_lstm_test.groupby('MarketRegime').agg({'target':['mean', 'std']})

Bear: 65
Bull: 2


Unnamed: 0_level_0,target,target
Unnamed: 0_level_1,mean,std
MarketRegime,Unnamed: 1_level_2,Unnamed: 2_level_2
0,-0.004688,0.062824
1,0.062259,0.058072
