# Config

In [1]:
# Set dark mode as default
import matplotlib.pyplot as plt
import plotly.io as pio
plt.style.use("dark_background")
pio.templates.default = "plotly_dark"

# Imports
from warnings import catch_warnings, filterwarnings
from itertools import product
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm
from statsmodels.tools.sm_exceptions import InterpolationWarning
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import f1_score, balanced_accuracy_score, confusion_matrix, classification_report

from market_regime.data import get_data
from market_regime.features import MarkovFeatureBuilder
from market_regime.models import MarkovOccupancyModel
from market_regime.pipelines.base import RegimePipeline

# Data

### data import

In [2]:
# Data import (with labels)
df = get_data()

In [3]:
rp = RegimePipeline(MarkovFeatureBuilder(), MarkovOccupancyModel())

In [4]:
rp.fit(df)

RegimePipeline(feature_builder=<market_regime.features.markov_features.MarkovFeatureBuilder object at 0x000001AF7EC4F230>, estimator=<market_regime.models.markov_occupancy.MarkovOccupancyModel object at 0x000001AF7EC4F380>)

In [8]:
rp.predict(df)

trading_date
2015-11-30    0
2015-12-01    0
2015-12-02    0
2015-12-03    0
2015-12-04    0
             ..
2024-11-22    0
2024-11-25    0
2024-11-26    0
2024-11-27    0
2024-11-28    0
Name: prediction, Length: 2305, dtype: int64

# OOS Prediction

In [None]:
rp = RegimePipeline(MarkovFeatureBuilder(), MarkovOccupancyModel())
oos_days = df[df["delivery_date"] >= "2019-01-01"].index
preds = []
i = 0
for d in tqdm(oos_days, desc="OOS Prediction:"):
    df_train = df.loc[: d - pd.Timedelta(days=1), :]
    df_test = df.loc[[d], :]
    rp.fit(df_train)
    pred = rp.predict(df_test)
    preds.append(pred)
oos_pred = pd.concat(preds).sort_index()

OOS Prediction::   0%|          | 0/1537 [00:00<?, ?it/s]

KeyboardInterrupt: 

: 

In [None]:
y_true = df.loc[oos_pred.index, "target"].astype(int)
y_pred = oos_pred.astype(int)

macro_f1 = f1_score(y_true, y_pred, average="macro", zero_division=0)
bal_acc = balanced_accuracy_score(y_true, y_pred)

cm = confusion_matrix(y_true, y_pred, labels=[-1, 0, 1])
cm_df = pd.DataFrame(cm, index=["true_-1", "true_0", "true_1"], columns=["pred_-1", "pred_0", "pred_1"])

print(f"OOS Macro-F1:          {macro_f1:.4f}")
print(f"OOS Balanced Accuracy: {bal_acc:.4f}\n")

print("Confusion matrix (rows=true, cols=pred):")
display(cm_df)

print("\nClass distribution:")
print("y_true:\n", y_true.value_counts(normalize=True).sort_index())
print("\ny_pred:\n", y_pred.value_counts(normalize=True).sort_index())

print("\nClassification report:")
print(classification_report(y_true, y_pred, labels=[-1, 0, 1], zero_division=0))

# small preview
oos_eval = pd.DataFrame({"y_true": y_true, "y_pred": y_pred})
display(oos_eval.head(20))

OOS Macro-F1:          0.1617
OOS Balanced Accuracy: 0.3327

Confusion matrix (rows=true, cols=pred):


Unnamed: 0,pred_-1,pred_0,pred_1
true_-1,0,611,4
true_0,0,491,1
true_1,0,430,0



Class distribution:
y_true:
 target
-1    0.400130
 0    0.320104
 1    0.279766
Name: proportion, dtype: float64

y_pred:
 prediction
0    0.996747
1    0.003253
Name: proportion, dtype: float64

Classification report:
              precision    recall  f1-score   support

          -1       0.00      0.00      0.00       615
           0       0.32      1.00      0.49       492
           1       0.00      0.00      0.00       430

    accuracy                           0.32      1537
   macro avg       0.11      0.33      0.16      1537
weighted avg       0.10      0.32      0.16      1537



Unnamed: 0_level_0,y_true,y_pred
trading_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-11-30,-1,0
2018-12-03,-1,1
2018-12-04,-1,1
2018-12-05,-1,0
2018-12-06,0,0
2018-12-07,-1,0
2018-12-10,0,0
2018-12-11,0,0
2018-12-12,-1,0
2018-12-13,-1,0
