In [1]:
# Run only if you have errors loading the topquartile module

from pathlib import Path
import sys

root = Path().resolve().parent.parent.parent.parent
sys.path.append(root)

In [2]:
from topquartile.modules.datamodule.dataloader import DataLoader
from topquartile.modules.datamodule.transforms.covariate import (TechnicalCovariateTransform, FundamentalCovariateTransform)
from topquartile.modules.datamodule.transforms.label import BinaryLabelTransform
from topquartile.modules.datamodule.partitions import PurgedTimeSeriesPartition

In [3]:
covtrans_config = [((TechnicalCovariateTransform, dict(sma = [20, 30],
                                                       ema = [20, 30],
                                                       momentum_change=True,
                                                       volatility = [20, 30],)))]

labeltrans_config = [(BinaryLabelTransform, dict(label_duration=20,
                                                quantile=0.75))]

partition_config = dict(n_splits=5, gap=20, max_train_size=504, test_size=60, verbose=False)

In [4]:
dataloader = DataLoader(data_id='dec2024', covariate_transform=covtrans_config,
                  label_transform=labeltrans_config, partition_class=PurgedTimeSeriesPartition,
                  partition_kwargs=partition_config)

In [5]:
folds = dataloader.get_cv_folds()

Data not yet processed. Processing now...
Reading data from: /Users/gregruyoga/gmoneycodes/topquartile/topquartile/data/dec2024.csv
Found 342 raw ticker names.
 Applying TechnicalCovariateTransform with params {'sma': [20, 30], 'ema': [20, 30], 'momentum_change': True, 'volatility': [20, 30]}
 Applying BinaryLabelTransform with params {'label_duration': 20, 'quantile': 0.75} (globally)


  df_copy.groupby(level=self.ticker_level_name, group_keys=False)[self.price_column]


Data processing complete.
Partitioning data using PurgedTimeSeriesPartition for 5 splits across 113 tickers.


  data_grouped_by_ticker = self.data.groupby("ticker")


Fold 0: Train shape (56952, 39), Test shape (6780, 39)
Fold 1: Train shape (56952, 39), Test shape (6780, 39)
Fold 2: Train shape (56952, 39), Test shape (6780, 39)
Fold 3: Train shape (56952, 39), Test shape (6780, 39)
Fold 4: Train shape (56952, 39), Test shape (6780, 39)
Partitioning complete. Generated 5 CV folds.


In [7]:
folds[0][1]

Unnamed: 0_level_0,Unnamed: 1_level_0,EQY_DVD_YLD_IND,PX_HIGH,PX_LOW,RETURN_COM_EQY,PX_TO_BOOK_RATIO,NEWS_SENTIMENT_DAILY_AVG,NEWS_HEAT_PUB_DAVG,TOT_DEBT_TO_TOT_EQY,VWAP_VOLUME,EQY_WEIGHTED_AVG_PX,...,ema_20,ema_30,volatility_20,volatility_30,roc_126,momentum_change,20d_stock_return,INDEX_RETURN,EXCESS_RETURN,label
TickerIndex,DateIndex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AALI,2023-10-17,5.5694,7250.0,7175.0,6.1205,0.6375,0.0000,0.0,18.0673,322000.0,7210.3649,...,7539.134504,7572.506889,0.033217,0.033994,-8.571429,-1.766695,-2.430556,-1.117642,-1.312913,0
AALI,2023-10-18,5.7286,7300.0,7000.0,6.1205,0.6198,0.0000,0.0,18.0673,2826800.0,7095.6099,...,7487.788361,7535.570961,0.033752,0.033027,-11.111111,-3.758170,0.357143,0.437352,-0.080209,0
AALI,2023-10-19,5.6479,7175.0,7025.0,6.1205,0.6286,0.0000,0.0,18.0673,1188600.0,7079.7724,...,7450.856136,7507.469608,0.033953,0.032399,-7.491857,16.705674,-1.056338,1.629771,-2.686109,0
AALI,2023-10-20,5.6678,7125.0,7025.0,6.1205,0.6264,0.0000,0.0,18.0673,327700.0,7068.7595,...,7415.060313,7479.568343,0.033564,0.031592,-7.516340,16.553139,-0.706714,1.876140,-2.582854,0
AALI,2023-10-23,5.6879,7125.0,7000.0,6.1205,0.6242,0.0000,0.0,18.0673,1001300.0,7029.6614,...,7380.292664,7451.854257,0.032853,0.030640,-8.737864,25.517455,-0.354610,3.751493,-4.106103,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WSKT,2024-01-02,0.3200,212.0,199.0,-52.4353,0.6671,0.0000,0.0,556.7789,18553300.0,202.0616,...,203.206163,204.104712,0.000000,0.099989,0.000000,31.292517,0.000000,-1.793821,1.793821,0
WSKT,2024-01-03,0.3200,212.0,199.0,-52.4353,0.6671,0.0000,0.0,556.7789,18553300.0,202.0616,...,203.091290,203.968924,0.000000,0.099989,0.000000,31.756757,0.000000,-0.977456,0.977456,0
WSKT,2024-01-04,0.3200,212.0,199.0,-65.9300,0.6671,-0.1250,1.0,597.7109,18553300.0,202.0616,...,202.987358,203.841896,0.000000,0.099989,0.000000,30.821918,0.000000,-2.147724,2.147724,0
WSKT,2024-01-05,0.3200,212.0,199.0,-65.9300,0.6671,0.0000,0.0,597.7109,18553300.0,202.0616,...,202.893324,203.723064,0.000000,0.099989,0.000000,32.214765,0.000000,-1.521423,1.521423,0
