In [1]:
from pathlib import Path
import sys

# Go 4 levels up to reach topquartile module from the notebook
root = Path().resolve().parents[2]

# Add it to the top of sys.path
if str(root) not in sys.path:
    sys.path.insert(0, str(root))

print("✅ Project root added to sys.path:", root)

✅ Project root added to sys.path: /Users/admin/RR_Project_Regime_Prediction/topquartile


In [2]:
import pandas as pd
import numpy as np
from ta import add_all_ta_features
from datetime import datetime
from sklearn.preprocessing import StandardScaler
from tsfresh import extract_features
from tsfresh.utilities.dataframe_functions import make_forecasting_frame
from BorutaShap import BorutaShap
from statsmodels.tsa.regime_switching.markov_regression import MarkovRegression
from arch.__future__ import reindexing
from topquartile.modules.datamodule.partitions import PurgedGroupTimeSeriesPartition
from topquartile.modules.datamodule.dataloader import DataLoader
from topquartile.modules.datamodule.transforms.covariate import (TechnicalCovariateTransform,
                                                                 FundamentalCovariateTransform,
                                                                 MacroeconomicCovariateTransform)

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
def frac_diff(series, d, thres=0.01):
    w = [1.]
    for k in range(1, len(series)):
        w_ = -w[-1] * (d - k + 1) / k
        if abs(w_) < thres:
            break
        w.append(w_)
    w = np.array(w[::-1]).reshape(-1, 1)
    df = pd.Series(series)
    result = pd.Series(dtype='float64')

    for i in range(len(w), len(df)):
        val = np.dot(w.T, df.iloc[i - len(w):i])[0]
        result.at[df.index[i]] = val

    return result

In [4]:
covtrans_config = [
    (TechnicalCovariateTransform, dict(
        sma=[10, 20, 50, 100],
        ema=[10, 20, 50],
        rsi=[14],
        macd=True,
        macd_signal=True,
        macd_histogram=True,
        roc=[6, 10, 20],
        cmo=[14],
        atr=True,
        trix=[21],
        obv=True,
        mfi=True,
        force_index=True,
        stc=True,
        bb=True,
        ultimate=True,
        awesome=True,
        plus_di=True,
        minus_di=True,
        max_return=[5, 10, 20],
        price_gap=[20],
        price_vs_sma=[20],
        momentum_change=True,
        ulcer=True,
        mean_price_volatility=[21, 252],
        approximate_entropy=True,
        adfuller=True,
        binned_entropy=True,
        cid_ce=True,
        count_above_mean=True,
        count_below_mean=True,
        energy_ratio_chunks=True,
        fft_aggregated=True,
        first_location_maximum=True,
        first_location_minimum=True,
        fourier_entropy=True,
        index_mass_quantile=0.5,
        kurtosis=True,
        last_location_of_maximum=True,
        lempel_ziv_complexity=True,
        linear_trend_timewise=True,
        longest_strike_above_mean=True,
        longest_strike_below_mean=True,
        mean_change=True,
        mean_abs_change=True,
        mean_second_derivative_central=True,
        number_cwt_peaks=True,
        permutation_entropy=True,
        sample_entropy=True,
        skewness=True,
        spkt_welch_density=True,
        time_reversal_asymmetry_statistic=True,
        variation_coefficient=True,
    )),
    (FundamentalCovariateTransform, dict(
        pe_ratio=True,
        earnings_yield=True,
        pe_band=([60, 120], [25, 50, 75]),
        debt_to_assets=True,
        debt_to_capital=True,
        equity_ratio=True,
        market_to_book=True,
        adjusted_roic=True,
        operating_efficiency=True,
        levered_roa=True,
        eps_growth=True,
        price_to_sales=True,
        price_to_book=True,
        dividend_yield=True,
        fx_rate=True,
        credit_ytw=True,
        global_credit_ytw=True,
        vix=True
    )),
    (MacroeconomicCovariateTransform, dict(
        vix_index=True,
        indo_10y_yield=True,
        bi_rate=True,
        fed_funds_rate=True,
        indo_cpi_yoy=True,
        usd_idr=True,
        dxy_index=True
    ))
]

In [5]:
from topquartile.modules.datamodule.transforms.label import BinaryLabelTransformWrapper

In [6]:
labeltrans_config = [
    (BinaryLabelTransformWrapper, dict(label_duration=20, quantile=0.9))
]

In [7]:
macro_df = pd.read_csv("/Users/admin/Desktop/macro_may2025v2.csv", parse_dates=["Dates"])
macro_df = macro_df.loc[:, ~macro_df.columns.str.contains('^Unnamed')]

In [8]:
macro_df

Unnamed: 0,Dates,VIX Index,GTIDR10Y Govt,IDBIRRPO Index,FEDL01 Index,IDCPIY Index,IDR Curncy,DXY Curncy
0,2015-04-28,12.41,7.736,,0.13,6.38,12982,96.092
1,2015-04-29,13.39,7.695,,0.13,6.38,12944,95.209
2,2015-04-30,14.55,7.667,,0.08,6.79,12964,94.600
3,2015-01-05,12.70,7.665,,0.13,6.79,12964,95.297
4,2015-04-05,12.85,7.746,,0.13,6.79,12983,95.480
...,...,...,...,...,...,...,...,...
2625,2025-05-20,18.09,6.830,5.75,4.33,1.95,16415,100.118
2626,2025-05-21,20.87,6.814,5.50,4.33,1.95,16395,99.559
2627,2025-05-22,20.28,6.832,5.50,4.33,1.95,16330,99.960
2628,2025-05-23,22.29,6.811,5.50,4.33,1.95,16222,99.112


In [9]:
print ("✅ Macro data loaded:", macro_df.shape)

✅ Macro data loaded: (2630, 8)


In [10]:
print(macro_df.columns)

Index(['Dates', 'VIX Index', 'GTIDR10Y Govt', 'IDBIRRPO Index', 'FEDL01 Index',
       'IDCPIY Index', 'IDR Curncy', 'DXY Curncy'],
      dtype='object')


In [11]:
macro_df.isnull().sum()

Dates               0
VIX Index           0
GTIDR10Y Govt       0
IDBIRRPO Index    257
FEDL01 Index        0
IDCPIY Index        0
IDR Curncy          0
DXY Curncy          0
dtype: int64

In [12]:
macro_df = macro_df.drop(columns=["IDBIRRPO Index"])

In [13]:
macro_df

Unnamed: 0,Dates,VIX Index,GTIDR10Y Govt,FEDL01 Index,IDCPIY Index,IDR Curncy,DXY Curncy
0,2015-04-28,12.41,7.736,0.13,6.38,12982,96.092
1,2015-04-29,13.39,7.695,0.13,6.38,12944,95.209
2,2015-04-30,14.55,7.667,0.08,6.79,12964,94.600
3,2015-01-05,12.70,7.665,0.13,6.79,12964,95.297
4,2015-04-05,12.85,7.746,0.13,6.79,12983,95.480
...,...,...,...,...,...,...,...
2625,2025-05-20,18.09,6.830,4.33,1.95,16415,100.118
2626,2025-05-21,20.87,6.814,4.33,1.95,16395,99.559
2627,2025-05-22,20.28,6.832,4.33,1.95,16330,99.960
2628,2025-05-23,22.29,6.811,4.33,1.95,16222,99.112


In [14]:
data = DataLoader(
    data_id='dec2024',
    partition_class=PurgedGroupTimeSeriesPartition,
    covariate_transform=covtrans_config,
    label_transform=labeltrans_config
)

data._process_data()
main_df = data.data 

Reading data from: /Users/admin/RR_Project_Regime_Prediction/topquartile/topquartile/data/dec2024.csv
Found 342 raw ticker names.
 Applying TechnicalCovariateTransform with params {'sma': [10, 20, 50, 100], 'ema': [10, 20, 50], 'rsi': [14], 'macd': True, 'macd_signal': True, 'macd_histogram': True, 'roc': [6, 10, 20], 'cmo': [14], 'atr': True, 'trix': [21], 'obv': True, 'mfi': True, 'force_index': True, 'stc': True, 'bb': True, 'ultimate': True, 'awesome': True, 'plus_di': True, 'minus_di': True, 'max_return': [5, 10, 20], 'price_gap': [20], 'price_vs_sma': [20], 'momentum_change': True, 'ulcer': True, 'mean_price_volatility': [21, 252], 'approximate_entropy': True, 'adfuller': True, 'binned_entropy': True, 'cid_ce': True, 'count_above_mean': True, 'count_below_mean': True, 'energy_ratio_chunks': True, 'fft_aggregated': True, 'first_location_maximum': True, 'first_location_minimum': True, 'fourier_entropy': True, 'index_mass_quantile': 0.5, 'kurtosis': True, 'last_location_of_maximum':

KeyError: 'Requested level (ticker) does not match index name (Dates)'