In [1]:
# Run only if you have errors loading the topquartile module

from pathlib import Path
import sys

root = Path().resolve().parent.parent.parent.parent
sys.path.append(root)

In [2]:
from topquartile.modules.datamodule.dataloader import DataLoader
from topquartile.modules.datamodule.transforms.covariate import (TechnicalCovariateTransform, FundamentalCovariateTransform)
from topquartile.modules.datamodule.transforms.label import BinaryLabelTransform
from topquartile.modules.datamodule.partitions import PurgedTimeSeriesPartition

In [3]:
covtrans_config = [((TechnicalCovariateTransform, dict(sma = [20, 30],
                                                       ema = [20, 30],
                                                       momentum_change=True,
                                                       volatility = [20, 30],)))]

labeltrans_config = [(BinaryLabelTransform, dict(label_duration=20,
                                                quantile=0.75))]

partition_config = dict(n_splits=5, gap=20, max_train_size=504, test_size=60, verbose=False)

In [4]:
dataloader = DataLoader(data_id='dec2024', covariate_transform=covtrans_config,
                  label_transform=labeltrans_config, partition_class=PurgedTimeSeriesPartition,
                  partition_kwargs=partition_config)

In [5]:
folds = dataloader.get_cv_folds()

Data not yet processed. Processing now...
Reading data from: /Users/gregruyoga/gmoneycodes/topquartile/topquartile/data/dec2024.csv
Found 342 raw ticker names.
 Applying TechnicalCovariateTransform with params {'sma': [20, 30], 'ema': [20, 30], 'momentum_change': True, 'volatility': [20, 30]}
 Applying BinaryLabelTransform with params {'label_duration': 20, 'quantile': 0.75} (globally)


  df_copy.groupby(level=self.ticker_level_name, group_keys=False)[self.price_column]


Data processing complete.
Partitioning data using PurgedTimeSeriesPartition for 5 splits across 113 tickers.


  data_grouped_by_ticker = self.data.groupby("ticker")


Fold 0: Train shape (56952, 39), Test shape (6780, 39)
Fold 1: Train shape (56952, 39), Test shape (6780, 39)
Fold 2: Train shape (56952, 39), Test shape (6780, 39)
Fold 3: Train shape (56952, 39), Test shape (6780, 39)
Fold 4: Train shape (56952, 39), Test shape (6780, 39)
Partitioning complete. Generated 5 CV folds.


In [6]:
train = folds[0][0]
valid = folds[0][1]


In [7]:
train = train.dropna(how='any', inplace=False)
valid = valid.dropna(how='any', inplace=False)

train.drop('ticker', axis=1, inplace=True)
valid.drop('ticker', axis=1, inplace=True)

to_remove = ['INDEX_RETURN', 'EXCESS_RETURN', '20d_stock_return', 'label']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train.drop('ticker', axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  valid.drop('ticker', axis=1, inplace=True)


In [8]:
train_label = train['EXCESS_RETURN']
train_covariates = train.drop(to_remove, axis=1, inplace=False)

In [9]:
valid_label = valid['EXCESS_RETURN']
valid_covariates = valid.drop(to_remove, axis=1, inplace=False)

In [10]:
from quantile_forest import RandomForestQuantileRegressor
qrf = RandomForestQuantileRegressor()
qrf.fit(train_covariates.to_numpy(), train_label.to_numpy())
y_pred = qrf.predict(valid_covariates.to_numpy(), quantiles=[0.025, 0.5, 0.975])

In [11]:
y_pred

array([[ -1.95869523,   4.89947832,  15.28221841],
       [ -4.41215982,   4.89947832,  49.92450134],
       [ -2.62403666,   4.89947832,  15.28221841],
       ...,
       [-10.67199978,   0.19225049,  22.92077153],
       [-34.31479399,   1.2387457 ,  49.32271189],
       [-34.31479399,   0.99046944,  49.32271189]], shape=(4940, 3))