In [1]:
from pathlib import Path
import sys

# Go 4 levels up to reach topquartile module from the notebook
root = Path().resolve().parents[2]

# Add it to the top of sys.path
if str(root) not in sys.path:
    sys.path.insert(0, str(root))

print("✅ Project root added to sys.path:", root)

✅ Project root added to sys.path: /Users/admin/RR_Project_Regime_Prediction/topquartile


In [2]:
import pandas as pd
import numpy as np
from ta import add_all_ta_features
from datetime import datetime
from sklearn.preprocessing import StandardScaler
from tsfresh import extract_features
from tsfresh.utilities.dataframe_functions import make_forecasting_frame
from BorutaShap import BorutaShap
from statsmodels.tsa.regime_switching.markov_regression import MarkovRegression
from arch.__future__ import reindexing
from topquartile.modules.datamodule.partitions import PurgedGroupTimeSeriesPartition
from topquartile.modules.datamodule.dataloader import DataLoader
from topquartile.modules.datamodule.transforms.covariate import (TechnicalCovariateTransform,
                                                                 FundamentalCovariateTransform,
                                                                 MacroeconomicCovariateTransform)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def frac_diff(series, d, thres=0.01):
    """
    Apply fractional differencing to a time series to make it stationary,
    while retaining memory of past information.
    :param series: pandas Series
    :param d: differencing order (fractional)
    :param thres: minimum weight threshold
    :return: fractionally differenced series
    """
    # 1. Compute weights for fractional differencing
    w = [1.]
    for k in range(1, len(series)):
        w_ = -w[-1] * (d - k + 1) / k
        if abs(w_) < thres:
            break
        w.append(w_)
    w = np.array(w[::-1]).reshape(-1, 1)

    # 2. Apply weights to values
    df = pd.Series(series)
    result = pd.Series(dtype='float64')

    for i in range(len(w), len(df)):
        val = np.dot(w.T, df.iloc[i - len(w):i])[0]
        result.at[df.index[i]] = val

    return result

In [4]:
covtrans_config = [
    (TechnicalCovariateTransform, dict(
        sma=[10, 20, 50, 100],
        ema=[10, 20, 50],
        rsi=[14],
        macd=True,
        macd_signal=True,
        macd_histogram=True,
        roc=[6, 10, 20],
        cmo=[14],
        atr=True,
        trix=[21],
        obv=True,
        mfi=True,
        force_index=True,
        stc=True,
        bb=True,
        ultimate=True,
        awesome=True,
        plus_di=True,
        minus_di=True,
        max_return=[5, 10, 20],
        price_gap=[20],
        price_vs_sma=[20],
        momentum_change=True,
        ulcer=True,
        mean_price_volatility=[21, 252],
        approximate_entropy=True,
        adfuller=True,
        binned_entropy=True,
        cid_ce=True,
        count_above_mean=True,
        count_below_mean=True,
        energy_ratio_chunks=True,
        fft_aggregated=True,
        first_location_maximum=True,
        first_location_minimum=True,
        fourier_entropy=True,
        index_mass_quantile=0.5,
        kurtosis=True,
        last_location_of_maximum=True,
        lempel_ziv_complexity=True,
        linear_trend_timewise=True,
        longest_strike_above_mean=True,
        longest_strike_below_mean=True,
        mean_change=True,
        mean_abs_change=True,
        mean_second_derivative_central=True,
        number_cwt_peaks=True,
        permutation_entropy=True,
        sample_entropy=True,
        skewness=True,
        spkt_welch_density=True,
        time_reversal_asymmetry_statistic=True,
        variation_coefficient=True,
    )),
    (FundamentalCovariateTransform, dict(
        pe_ratio=True,
        earnings_yield=True,
        pe_band=([60, 120], [25, 50, 75]),
        debt_to_assets=True,
        debt_to_capital=True,
        equity_ratio=True,
        market_to_book=True,
        adjusted_roic=True,
        operating_efficiency=True,
        levered_roa=True,
        eps_growth=True,
        price_to_sales=True,
        price_to_book=True,
        dividend_yield=True,
        fx_rate=True,
        credit_ytw=True,
        global_credit_ytw=True,
        vix=True
    )),
    (MacroeconomicCovariateTransform, dict(
        vix_index=True,
        indo_10y_yield=True,
        bi_rate=True,
        fed_funds_rate=True,
        indo_cpi_yoy=True,
        usd_idr=True,
        dxy_index=True
    ))
]

In [5]:
from topquartile.modules.datamodule.transforms.label import BinaryLabelTransformWrapper

In [6]:
labeltrans_config = [
    (BinaryLabelTransformWrapper, dict(label_duration=20, quantile=0.9))
]

In [7]:
data = DataLoader(
    data_id='dec2024',
    partition_class=PurgedGroupTimeSeriesPartition,
    covariate_transform=covtrans_config,
    label_transform=labeltrans_config  # <- if you use KAMA+MSR or BinaryLabelTransform
).transform_data()

Reading data from: /Users/admin/RR_Project_Regime_Prediction/topquartile/topquartile/data/dec2024.csv
Found 342 raw ticker names.
INFO: Reindexed data with MultiIndex ['ticker', 'Dates']: ['ticker', 'Dates']
 Applying TechnicalCovariateTransform with params {'sma': [10, 20, 50, 100], 'ema': [10, 20, 50], 'rsi': [14], 'macd': True, 'macd_signal': True, 'macd_histogram': True, 'roc': [6, 10, 20], 'cmo': [14], 'atr': True, 'trix': [21], 'obv': True, 'mfi': True, 'force_index': True, 'stc': True, 'bb': True, 'ultimate': True, 'awesome': True, 'plus_di': True, 'minus_di': True, 'max_return': [5, 10, 20], 'price_gap': [20], 'price_vs_sma': [20], 'momentum_change': True, 'ulcer': True, 'mean_price_volatility': [21, 252], 'approximate_entropy': True, 'adfuller': True, 'binned_entropy': True, 'cid_ce': True, 'count_above_mean': True, 'count_below_mean': True, 'energy_ratio_chunks': True, 'fft_aggregated': True, 'first_location_maximum': True, 'first_location_minimum': True, 'fourier_entropy': T

ValueError: Invalid input, x is constant