In [1]:
import pandas as pd
import numpy as np
from topquartile.modules.datamodule.dataloader import DataLoader
from topquartile.modules.datamodule.transforms.covariate import (TechnicalCovariateTransform, FundamentalCovariateTransform)
from topquartile.modules.datamodule.transforms.label import BinaryLabelTransform, ExcessReturnTransform, NaryLabelTransform
from topquartile.modules.datamodule.partitions import PurgedTimeSeriesPartition
from topquartile.modules.evaluation import Evaluation
from pathlib import Path

root_dir = Path().resolve().parent.parent

In [15]:
LABEL_DURATION = 20

covtrans_config = [(
    TechnicalCovariateTransform,
    dict(
        sma=[20, 40, 60],
        ema=[20, 40, 60],
        turnover=[20, 40, 60, 120, 240],
        macd=[(12, 26, 9)],
        price_gap=[20, 40, 60],
        price_ratio=[9, 19, 39, 59, 119],
        acceleration_rate=True,
        volatility=[10, 20, 40, 60, 120],
        volume_std=[10, 20, 40, 60, 120],
    ),
)]

labeltrans_config = [(ExcessReturnTransform, dict(label_duration=LABEL_DURATION,
                                               index_csv='ihsg_may2025'))]

partition_config   = dict(n_splits=5, gap=2, max_train_size=504, test_size=60)

dataloader = DataLoader(
    data_id="covariates_may2025v2",
    covariate_transform=covtrans_config,
    label_transform=labeltrans_config,
    partition_class=PurgedTimeSeriesPartition,
    partition_kwargs=partition_config,
)

folds = dataloader.get_cv_folds()


fold_concat = pd.concat(folds[0], axis=0)
eval = Evaluation(df=fold_concat, n_train=252, n_valid=1)
data = eval.partition_data()

for i in range(252):
    train = data[i][0]
    preds = data[i][1]

Data not yet processed. Processing now...
Reading data from: /Users/gregruyoga/gmoneycodes/topquartile/topquartile/data/covariates_may2025v2.csv
Found 134 raw ticker names.
 Applying TechnicalCovariateTransform with params {'sma': [20, 40, 60], 'ema': [20, 40, 60], 'turnover': [20, 40, 60, 120, 240], 'macd': [(12, 26, 9)], 'price_gap': [20, 40, 60], 'price_ratio': [9, 19, 39, 59, 119], 'acceleration_rate': True, 'volatility': [10, 20, 40, 60, 120], 'volume_std': [10, 20, 40, 60, 120]}
THIS IS COLUMNS Index(['TOTAL_EQUITY', 'BOOK_VAL_PER_SH', 'REVENUE_PER_SH', 'RETURN_COM_EQY',
       'CUR_MKT_CAP', 'PX_LAST', 'TOT_DEBT_TO_TOT_ASSET',
       'TOT_DEBT_TO_TOT_EQY', 'BS_TOT_LIAB2', 'BS_TOT_ASSET', 'IS_EPS',
       'PX_HIGH', 'PX_LOW', 'PX_CLOSE_1D', 'PX_VOLUME', 'TURNOVER', 'ticker',
       'DVD_SH_12M'],
      dtype='object')
Applying label transformations globally to the dataset (before partitioning).
 Applying ExcessReturnTransform with params {'label_duration': 20, 'index_csv': 'ihsg_

  self.ihsg.index = pd.to_datetime(self.ihsg.index)
  self.data = self.data.apply(pd.to_numeric, errors='ignore')


Data processing complete.
Partitioning data using PurgedTimeSeriesPartition for 5 splits across 85 tickers.
Fold 0: Train shape (42840, 66), Test shape (5100, 66)
Fold 1: Train shape (42840, 66), Test shape (5100, 66)
Fold 2: Train shape (42840, 66), Test shape (5100, 66)
Fold 3: Train shape (42840, 66), Test shape (5100, 66)
Fold 4: Train shape (42840, 66), Test shape (5100, 66)
Partitioning complete. Generated 5 CV folds.


In [16]:
preds.index

MultiIndex([('ADHI', '2024-03-28'),
            ('ADMR', '2024-03-28'),
            ('ADRO', '2024-03-28'),
            ('AKRA', '2024-03-28'),
            ('AMMN', '2024-03-28'),
            ('AMRT', '2024-03-28'),
            ('ANTM', '2024-03-28'),
            ('ARTO', '2024-03-28'),
            ('ASII', '2024-03-28'),
            ('AUTO', '2024-03-28'),
            ('AVIA', '2024-03-28'),
            ('BBCA', '2024-03-28'),
            ('BBNI', '2024-03-28'),
            ('BBRI', '2024-03-28'),
            ('BBTN', '2024-03-28'),
            ('BBYB', '2024-03-28'),
            ('BFIN', '2024-03-28'),
            ('BIRD', '2024-03-28'),
            ('BMRI', '2024-03-28'),
            ('BNGA', '2024-03-28'),
            ('BRIS', '2024-03-28'),
            ('BRMS', '2024-03-28'),
            ('BRPT', '2024-03-28'),
            ('BSDE', '2024-03-28'),
            ('BTPS', '2024-03-28'),
            ('BUKA', '2024-03-28'),
            ('CMRY', '2024-03-28'),
            ('CPIN', '2024-0