⚠️ NOTE:
This notebook is for research and validation only.
Production logic lives in the `src/` directory.

In [1]:
# Project path
import sys
from pathlib import Path

PROJECT_ROOT = Path(r"C:\Users\shubh\crypto-market-opportunity-engine")
sys.path.insert(0, str(PROJECT_ROOT))

import pandas as pd
import numpy as np


In [5]:
# Load risk-managed backtest data
DATA_RISK = r"C:\Users\shubh\crypto-market-opportunity-engine\data\processed\BTCUSDT_5m_risk_managed.parquet"

df = pd.read_parquet(DATA_RISK)



In [6]:
# ensure time index
df.index = pd.to_datetime(df.index)
df.sort_index(inplace=True)


In [7]:
# monthly returns
df["monthly_return"] = df["capital"].pct_change()

monthly_returns = (
    df["monthly_return"]
    .resample("M")
    .apply(lambda x: (1 + x).prod() - 1)
)

monthly_returns


open_time
2025-01-31    0.053059
2025-02-28    0.060211
2025-03-31    0.014546
2025-04-30    0.030627
2025-05-31   -0.010831
2025-06-30    0.000000
2025-07-31   -0.000167
2025-08-31    0.024457
2025-09-30   -0.011373
2025-10-31    0.068655
2025-11-30    0.009278
2025-12-31   -0.005638
Freq: M, Name: monthly_return, dtype: float64

In [8]:
# monthly return summary
summary = pd.DataFrame({
    "monthly_return": monthly_returns,
    "positive": monthly_returns > 0
})

summary


Unnamed: 0_level_0,monthly_return,positive
open_time,Unnamed: 1_level_1,Unnamed: 2_level_1
2025-01-31,0.053059,True
2025-02-28,0.060211,True
2025-03-31,0.014546,True
2025-04-30,0.030627,True
2025-05-31,-0.010831,False
2025-06-30,0.0,False
2025-07-31,-0.000167,False
2025-08-31,0.024457,True
2025-09-30,-0.011373,False
2025-10-31,0.068655,True


In [9]:
# quick diagonstics
print("Winning months %:", summary["positive"].mean() * 100)
print("Best month %:", monthly_returns.max() * 100)
print("Worst month %:", monthly_returns.min() * 100)


Winning months %: 58.333333333333336
Best month %: 6.865503152445895
Worst month %: -1.137294161943292


In [10]:
# define market regimes
df = df.copy()

# Rolling volatility
df["rolling_vol"] = df["log_return"].rolling(288).std()  # ~1 day (5-min candles)

# Trend proxy (EMA diff sign)
df["market_trend"] = np.where(df["ema_diff"] > 0, "UP", "DOWN")

# Volatility regime (quantiles)
low_vol = df["rolling_vol"].quantile(0.33)
high_vol = df["rolling_vol"].quantile(0.66)

def vol_regime(v):
    if v < low_vol:
        return "LOW_VOL"
    elif v > high_vol:
        return "HIGH_VOL"
    else:
        return "MID_VOL"

df["vol_regime"] = df["rolling_vol"].apply(vol_regime)

df[["rolling_vol", "market_trend", "vol_regime"]].tail()


Unnamed: 0_level_0,rolling_vol,market_trend,vol_regime
open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-12-31 23:35:00,0.000792,DOWN,LOW_VOL
2025-12-31 23:40:00,0.000792,DOWN,LOW_VOL
2025-12-31 23:45:00,0.000792,DOWN,LOW_VOL
2025-12-31 23:50:00,0.000792,DOWN,LOW_VOL
2025-12-31 23:55:00,0.000792,DOWN,LOW_VOL


In [11]:
# strategy performance by regime
regime_perf = (
    df[df["pnl"] != 0]
    .groupby(["market_trend", "vol_regime"])
    .agg(
        trades=("pnl", "count"),
        win_rate=("pnl", lambda x: (x > 0).mean()),
        avg_pnl=("pnl", "mean"),
        total_pnl=("pnl", "sum"),
    )
)

regime_perf


Unnamed: 0_level_0,Unnamed: 1_level_0,trades,win_rate,avg_pnl,total_pnl
market_trend,vol_regime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
DOWN,HIGH_VOL,9,0.444444,812.623151,7313.60836
DOWN,LOW_VOL,8,0.0,-1408.095723,-11264.765786
DOWN,MID_VOL,3,0.0,-1565.189513,-4695.568538
UP,HIGH_VOL,8,1.0,2384.448427,19075.587415
UP,LOW_VOL,3,1.0,2524.134751,7572.404252
UP,MID_VOL,3,1.0,2469.838372,7409.515117


In [12]:
# equity curve by regime
df["equity_by_regime"] = (
    df.groupby(["market_trend", "vol_regime"])["capital"]
      .transform(lambda x: x / x.iloc[0])
)

df[["equity_by_regime", "market_trend", "vol_regime"]].tail()


Unnamed: 0_level_0,equity_by_regime,market_trend,vol_regime
open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-12-31 23:35:00,1.254108,DOWN,LOW_VOL
2025-12-31 23:40:00,1.254108,DOWN,LOW_VOL
2025-12-31 23:45:00,1.254108,DOWN,LOW_VOL
2025-12-31 23:50:00,1.254108,DOWN,LOW_VOL
2025-12-31 23:55:00,1.254108,DOWN,LOW_VOL


In [13]:
# load the trained model
from src.inference import load_model
import src.config as config

MODEL_NAME = "random_forest"
VERSION = "1"

model = load_model(MODEL_NAME, VERSION)

FEATURE_COLS = config.FEATURE_COLS


In [14]:
# static feature importance
import pandas as pd

feature_importance = pd.DataFrame({
    "feature": FEATURE_COLS,
    "importance": model.feature_importances_
}).sort_values("importance", ascending=False)

feature_importance


Unnamed: 0,feature,importance
1,rsi_14,0.179717
6,rsi_14_lag_1,0.141155
7,rsi_14_lag_2,0.11189
0,ema_diff,0.099393
5,ema_diff_lag_3,0.098781
2,vol_ratio,0.096415
3,ema_diff_lag_1,0.093566
4,ema_diff_lag_2,0.092197
8,vol_ratio_lag_1,0.085165
9,trend,0.001722


In [15]:
# rolling feature importance
WINDOW = int(len(df) * 0.3)   # 30% rolling window
STEP = int(len(df) * 0.1)     # move window by 10%

drift_records = []

for start in range(0, len(df) - WINDOW, STEP):
    end = start + WINDOW

    df_slice = df.iloc[start:end]
    X_slice = df_slice[FEATURE_COLS]
    y_slice = df_slice["target"]

    from sklearn.ensemble import RandomForestClassifier
    rf = RandomForestClassifier(
        n_estimators=200,
        max_depth=6,
        random_state=42
    )
    rf.fit(X_slice, y_slice)

    for f, imp in zip(FEATURE_COLS, rf.feature_importances_):
        drift_records.append({
            "window_start": df_slice.index[0],
            "window_end": df_slice.index[-1],
            "feature": f,
            "importance": imp
        })


In [16]:
# drift summary
drift_df = pd.DataFrame(drift_records)

drift_summary = (
    drift_df
    .groupby("feature")["importance"]
    .agg(["mean", "std"])
    .sort_values("mean", ascending=False)
)

drift_summary


Unnamed: 0_level_0,mean,std
feature,Unnamed: 1_level_1,Unnamed: 2_level_1
rsi_14,0.152284,0.017643
rsi_14_lag_1,0.140825,0.016211
vol_ratio,0.111513,0.014741
rsi_14_lag_2,0.109393,0.006468
vol_ratio_lag_1,0.104122,0.01083
ema_diff,0.09934,0.005571
ema_diff_lag_3,0.095957,0.004729
ema_diff_lag_1,0.094713,0.004266
ema_diff_lag_2,0.090261,0.003954
trend,0.001593,0.00046


In [None]:
#  completed:

# Monthly performance

# Regime analysis

# Feature drift detection

# This is advanced ML + trading analysis.