In [1]:
import pandas as pd
import numpy as np
from topquartile.modules.datamodule.dataloader import DataLoader
from topquartile.modules.datamodule.transforms.covariate import (TechnicalCovariateTransform, FundamentalCovariateTransform)
from topquartile.modules.datamodule.transforms.label import BinaryLabelTransform, ExcessReturnTransform, NaryLabelTransform
from topquartile.modules.datamodule.partitions import PurgedTimeSeriesPartition
from topquartile.modules.evaluation import Evaluation
from pathlib import Path

root_dir = Path().resolve().parent.parent

In [2]:
root_dir

PosixPath('/Users/gregruyoga/gmoneycodes/topquartile/topquartile')

In [3]:
LABEL_DURATION = 20

covtrans_config = [(
    TechnicalCovariateTransform,
    dict(
        sma=[20, 40, 60],
        ema=[20, 40, 60],
        turnover=[20, 40, 60, 120, 240],
        macd=[(12, 26, 9)],
        price_gap=[20, 40, 60],
        price_ratio=[9, 19, 39, 59, 119],
        acceleration_rate=True,
        volatility=[10, 20, 40, 60, 120],
        volume_std=[10, 20, 40, 60, 120],
    ),
)]

labeltrans_config = [(NaryLabelTransform, dict(label_duration=LABEL_DURATION,
                                                  index_csv='ihsg_may2025'))]
partition_config   = dict(n_splits=5, gap=2, max_train_size=504, test_size=60)

dataloader = DataLoader(
    data_id="covariates_may2025v2",
    covariate_transform=covtrans_config,
    label_transform=labeltrans_config,
    partition_class=PurgedTimeSeriesPartition,
    partition_kwargs=partition_config,
)

folds = dataloader.get_cv_folds()

Data not yet processed. Processing now...
Reading data from: /Users/gregruyoga/gmoneycodes/topquartile/topquartile/data/covariates_may2025v2.csv
Found 134 raw ticker names.
 Applying TechnicalCovariateTransform with params {'sma': [20, 40, 60], 'ema': [20, 40, 60], 'turnover': [20, 40, 60, 120, 240], 'macd': [(12, 26, 9)], 'price_gap': [20, 40, 60], 'price_ratio': [9, 19, 39, 59, 119], 'acceleration_rate': True, 'volatility': [10, 20, 40, 60, 120], 'volume_std': [10, 20, 40, 60, 120]}
THIS IS COLUMNS Index(['TOTAL_EQUITY', 'BOOK_VAL_PER_SH', 'REVENUE_PER_SH', 'RETURN_COM_EQY',
       'CUR_MKT_CAP', 'PX_LAST', 'TOT_DEBT_TO_TOT_ASSET',
       'TOT_DEBT_TO_TOT_EQY', 'BS_TOT_LIAB2', 'BS_TOT_ASSET', 'IS_EPS',
       'PX_HIGH', 'PX_LOW', 'PX_CLOSE_1D', 'PX_VOLUME', 'TURNOVER', 'ticker',
       'DVD_SH_12M'],
      dtype='object')
Applying label transformations globally to the dataset (before partitioning).
 Applying ExcessReturnTransform with params {'label_duration': 20, 'index_csv': 'ihsg_

  self.ihsg.index = pd.to_datetime(self.ihsg.index)


Fold 0: Train shape (42840, 66), Test shape (5100, 66)
Fold 1: Train shape (42840, 66), Test shape (5100, 66)
Fold 2: Train shape (42840, 66), Test shape (5100, 66)
Fold 3: Train shape (42840, 66), Test shape (5100, 66)
Fold 4: Train shape (42840, 66), Test shape (5100, 66)
Partitioning complete. Generated 5 CV folds.


In [6]:
folds = pd.concat(folds[0], axis=0)

In [7]:
folds

Unnamed: 0_level_0,Unnamed: 1_level_0,TOTAL_EQUITY,BOOK_VAL_PER_SH,REVENUE_PER_SH,RETURN_COM_EQY,CUR_MKT_CAP,PX_LAST,TOT_DEBT_TO_TOT_ASSET,TOT_DEBT_TO_TOT_EQY,BS_TOT_LIAB2,BS_TOT_ASSET,...,volume_sma_60,acceleration_10/20,acceleration_10/40,acceleration_10/60,acceleration_20/40,acceleration_20/60,acceleration_40/60,eq_returns_20,index_returns_20,excess_returns_20
TickerIndex,DateIndex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
ADHI,2022-04-25,,,,,2.563812e+06,659.0,,,,,...,1.728154e+07,0.975157,0.646884,0.586762,1.0,0.601710,0.907059,1.365706,-5.199627,6.565333
ADHI,2022-04-26,,,,,2.510399e+06,646.0,,,,,...,1.728141e+07,0.756872,0.517015,0.458720,1.0,0.606074,0.887247,2.786378,-4.397197,7.183575
ADHI,2022-04-27,,,,,2.546007e+06,655.0,,,,,...,1.729681e+07,0.762631,0.516251,0.459596,1.0,0.602645,0.890258,0.610687,-4.352790,4.963477
ADHI,2022-04-28,,,,,2.546007e+06,655.0,,,,,...,1.642053e+07,0.835187,0.541379,0.468859,1.0,0.561382,0.866045,8.396947,,
ADHI,2022-04-29,,,,,2.563812e+06,659.0,,,,,...,1.637810e+07,0.833876,0.557534,0.476374,1.0,0.571277,0.854430,6.980273,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
UNVR,2024-06-17,,,,,1.186465e+08,3110.0,,,,,...,1.861954e+07,1.293922,1.312541,1.375643,1.0,1.063158,1.048076,-4.823151,,
UNVR,2024-06-18,,,,,1.190280e+08,3120.0,,,,,...,2.014640e+07,1.331979,1.492862,1.607158,1.0,1.206594,1.076562,-4.807692,,
UNVR,2024-06-19,,,,,1.175020e+08,3080.0,,,,,...,2.010743e+07,1.308556,1.518325,1.626830,1.0,1.243226,1.071464,-4.545455,7.392671,-11.938126
UNVR,2024-06-20,,,,,1.194095e+08,3130.0,,,,,...,2.003588e+07,1.180353,1.407140,1.510302,1.0,1.279535,1.073313,-6.709265,7.357771,-14.067036
