In [14]:
import pandas as pd
import numpy as np
from topquartile.modules.datamodule.dataloader import DataLoader
from topquartile.modules.datamodule.transforms.covariate import (TechnicalCovariateTransform, FundamentalCovariateTransform)
from topquartile.modules.datamodule.transforms.label import BinaryLabelTransform, ExcessReturnTransform, NaryLabelTransform
from topquartile.modules.datamodule.partitions import PurgedTimeSeriesPartition
from topquartile.modules.evaluation import Evaluation
from pathlib import Path

root_dir = Path().resolve().parent.parent

In [12]:
root_dir

PosixPath('/Users/gregruyoga/gmoneycodes/topquartile')

In [2]:
covtrans_config = [((TechnicalCovariateTransform, dict(sma = [20, 30],
                                                       ema = [20, 30],
                                                       momentum_change=True,
                                                       volatility = [20, 30],)))]

labeltrans_config = [(BinaryLabelTransform, dict(label_duration=20, quantile=0.25))]
partition_config = dict(n_splits=5, gap=20, max_train_size=504, test_size=60, verbose=False)
dataloader = DataLoader(data_id='covariates_may2025v2', covariate_transform=covtrans_config,
                  label_transform=labeltrans_config, partition_class=PurgedTimeSeriesPartition,
                  partition_kwargs=partition_config)

In [3]:
data = dataloader.get_cv_folds()

Data not yet processed. Processing now...
Reading data from: /Users/gregruyoga/gmoneycodes/topquartile/topquartile/data/covariates_may2025v2.csv
Found 134 raw ticker names.
 Applying TechnicalCovariateTransform with params {'sma': [20, 30], 'ema': [20, 30], 'momentum_change': True, 'volatility': [20, 30]}
THIS IS COLUMNS Index(['TOTAL_EQUITY', 'BOOK_VAL_PER_SH', 'REVENUE_PER_SH', 'RETURN_COM_EQY',
       'CUR_MKT_CAP', 'PX_LAST', 'TOT_DEBT_TO_TOT_ASSET',
       'TOT_DEBT_TO_TOT_EQY', 'BS_TOT_LIAB2', 'BS_TOT_ASSET', 'IS_EPS',
       'PX_HIGH', 'PX_LOW', 'PX_CLOSE_1D', 'PX_VOLUME', 'TURNOVER', 'ticker',
       'DVD_SH_12M'],
      dtype='object')
Applying label transformations globally to the dataset (before partitioning).
 Applying BinaryLabelTransform with params {'label_duration': 20, 'quantile': 0.25} (globally)


  self.ihsg.index = pd.to_datetime(self.ihsg.index)
  self.data = self.data.apply(pd.to_numeric, errors='ignore')


Data processing complete.
Partitioning data using PurgedTimeSeriesPartition for 5 splits across 85 tickers.
Fold 0: Train shape (42840, 29), Test shape (5100, 29)
Fold 1: Train shape (42840, 29), Test shape (5100, 29)
Fold 2: Train shape (42840, 29), Test shape (5100, 29)
Fold 3: Train shape (42840, 29), Test shape (5100, 29)
Fold 4: Train shape (42840, 29), Test shape (5100, 29)
Partitioning complete. Generated 5 CV folds.


In [4]:
data = pd.concat(data[0], axis=0)

In [10]:
data

Unnamed: 0_level_0,Unnamed: 1_level_0,TOTAL_EQUITY,BOOK_VAL_PER_SH,REVENUE_PER_SH,RETURN_COM_EQY,CUR_MKT_CAP,PX_LAST,TOT_DEBT_TO_TOT_ASSET,TOT_DEBT_TO_TOT_EQY,BS_TOT_LIAB2,BS_TOT_ASSET,...,ema_20,ema_30,volatility_20,volatility_30,roc_126,momentum_change,eq_returns_20,index_returns_20,excess_returns_20,label
TickerIndex,DateIndex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
ADHI,2022-03-31,5582902.864,1428.8344,598.2535,0.3762,2.635029e+06,678.0,28.1580,196.3527,33348099.57,38931002.43,...,698.210249,712.087594,0.114333,0.098490,-46.529968,-70.116713,-3.392330,2.226873,-5.619203,1
ADHI,2022-04-01,5582902.864,1428.8344,598.2535,0.3762,3.275981e+06,842.0,28.1580,196.3527,33348099.57,38931002.43,...,711.904511,720.469040,0.125182,0.108159,-19.656489,,-22.209026,,,
ADHI,2022-04-02,5582902.864,1428.8344,598.2535,0.3762,2.884288e+06,742.0,28.1580,196.3527,33348099.57,38931002.43,...,714.770748,721.858134,0.126538,0.110384,-16.909295,-10.221729,-11.185984,2.925651,-14.111635,0
ADHI,2022-04-03,5582902.864,1428.8344,598.2535,0.3762,2.724050e+06,700.0,28.1580,196.3527,33348099.57,38931002.43,...,713.364010,720.447932,0.127487,0.110900,9.204368,46.113817,19.000000,2.171260,16.828740,1
ADHI,2022-04-04,6022184.135,1513.8990,973.8728,0.9710,2.635029e+06,678.0,27.0484,174.8732,32912410.85,38934594.98,...,709.996009,717.709355,0.124405,0.110795,3.511450,35.353282,-2.064897,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
UNVR,2024-06-17,4844243,126.9788,264.2082,95.1092,1.186465e+08,3110.0,4.0924,15.4957,13498184.00,18342427.00,...,2701.516595,2690.010287,,,-11.142857,8.948467,-4.823151,,,
UNVR,2024-06-18,4844243,126.9788,264.2082,95.1092,1.190280e+08,3120.0,4.0924,15.4957,13498184.00,18342427.00,...,2741.372158,2717.751559,,,-9.037901,20.240450,-4.807692,,,
UNVR,2024-06-19,4844243,126.9788,264.2082,95.1092,1.175020e+08,3080.0,4.0924,15.4957,13498184.00,18342427.00,...,2773.622428,2741.122426,,,-10.724638,12.266434,-4.545455,7.392671,-11.938126,0
UNVR,2024-06-20,4844243,126.9788,264.2082,95.1092,1.194095e+08,3130.0,4.0924,15.4957,13498184.00,18342427.00,...,2807.563150,2766.211302,,,-7.941176,12.804744,-6.709265,7.357771,-14.067036,0
