## Setup and Dependencies

In [1]:
# --- Extension Setup ---
%load_ext line_profiler
%load_ext autoreload

%autoreload 2 -p

# --- Autoreload Target ---
%aimport afml

# --- AFML Initialization ---
import afml

# --- Module Imports ---
import sys
sys.path.append("..")  # Adjust if your afml repo is nested differently

[32m2025-12-01 13:43:57.824[0m | [34m[1mDEBUG   [0m | [36mafml.cache[0m:[36m<module>[0m:[36m669[0m - [34m[1mEnhanced cache features available:[0m
[32m2025-12-01 13:43:57.826[0m | [34m[1mDEBUG   [0m | [36mafml.cache[0m:[36m<module>[0m:[36m670[0m - [34m[1m  - Robust cache keys for NumPy/Pandas[0m
[32m2025-12-01 13:43:57.827[0m | [34m[1mDEBUG   [0m | [36mafml.cache[0m:[36m<module>[0m:[36m671[0m - [34m[1m  - MLflow integration: ✓[0m
[32m2025-12-01 13:43:57.832[0m | [34m[1mDEBUG   [0m | [36mafml.cache[0m:[36m<module>[0m:[36m672[0m - [34m[1m  - Backtest caching: ✓[0m
[32m2025-12-01 13:43:57.838[0m | [34m[1mDEBUG   [0m | [36mafml.cache[0m:[36m<module>[0m:[36m673[0m - [34m[1m  - Cache monitoring: ✓[0m
[32m2025-12-01 13:43:57.841[0m | [34m[1mDEBUG   [0m | [36mafml.cache[0m:[36m<module>[0m:[36m674[0m - [34m[1m  - Cache size analysis: ✓[0m
[32m2025-12-01 13:43:57.844[0m | [34m[1mDEBUG   [0m | [36mafml.cache

In [2]:
import re
import time
import warnings
import winsound
from pathlib import Path
from pprint import pprint

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.base import clone
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    log_loss,
    precision_score,
    recall_score,
)
from sklearn.tree import DecisionTreeClassifier
from tqdm import tqdm

from afml.cross_validation import (
    PurgedKFold,
    PurgedSplit,
    analyze_cross_val_scores,
    analyze_cross_val_scores_calibrated,
    clf_hyper_fit,
    probability_weighted_accuracy,
)
from afml.data_structures.bars import *
from afml.ensemble import (
    SequentiallyBootstrappedBaggingClassifier,
    compute_custom_oob_metrics,
    estimate_ensemble_size,
)
from afml.labeling.triple_barrier import (
    add_vertical_barrier,
    get_event_weights,
    triple_barrier_labels,
)
from afml.sample_weights.optimized_attribution import (
    get_weights_by_time_decay_optimized,
)
from afml.strategies import (
    BollingerStrategy,
    ForexFeatureEngine,
    MACrossoverStrategy,
    create_bollinger_features,
    get_entries,
)
from afml.util import get_daily_vol, value_counts_data
from afml.util.misc import expand_params, value_counts_data
from tools.module_reloader import reload_with_dependencies

warnings.filterwarnings("ignore")
# plt.style.use("seaborn-v0_8-whitegrid")
plt.style.use("dark_background")

In [3]:
# from afml.cache import clear_data_access_log, clear_afml_cache


# clear_afml_cache()
# clear_data_access_log()

## 0. Data Collection and Cleaning

In [4]:
# Your Account Name for  MT5 fetched from a .env file of your configuration
account_name = "FUNDEDNEXT_STLR2_6K"

In [5]:
from afml.cache.cv_cache import cv_cacheable


@cv_cacheable
def train_rf(classifier, X, y, sample_weight=None):
    time0 = time.time()
    clf = clone(classifier).set_params(oob_score=True).fit(X, y, sample_weight)
    duration = str(pd.Timedelta(time.time() - time0, unit="s").round("s")).replace("0 days", "")
    print(f"{clf.__class__.__name__} trained in {duration}.")
    return clf

## 1. Bollinger Band Strategy

In [6]:
symbol = "EURUSD"
start_date = "2022-01-01"
end_date = "2023-12-31"
account_name = "FUNDEDNEXT_STLR2_6K"

In [7]:
bb_period, bb_std = 20, 1.5 # Bollinger Band parameters
bb_strategy = BollingerStrategy(window=bb_period, num_std=bb_std)
bb_lookback = 100
bb_pt_barrier, bb_sl_barrier, bb_time_horizon = (1, 2, dict(days=1))
bb_vol_multiplier = 1

### Time-Bars

In [8]:
from scipy.stats import uniform


def uniform_range(low, high):
    return uniform(loc=low, scale=high - low)

# Example: between 0.5 and 0.9
dist = uniform_range(0.5, 0.9)

In [15]:
from os import cpu_count
from feature_engine.selection import DropConstantFeatures, DropDuplicateFeatures
from scipy.stats import randint, uniform
from scipy.stats.distributions import rv_continuous
from sklearn import preprocessing
from sklearn.pipeline import Pipeline

from afml.cross_validation.hyperfit import MyPipeline

n_jobs = cpu_count() - 1
pipe = MyPipeline([
        ("drop_constant", DropConstantFeatures(tol=0.998)),  # remove constant features
        ("drop_duplicates", DropDuplicateFeatures()),        # remove duplicate features
        ("clf", RandomForestClassifier(n_jobs=n_jobs, random_state=42, class_weight="balanced_subsample")),
        ])
param_distributions = {
    "clf__n_estimators": randint(100, 500),                 # sample integers between 100 and 500
    "clf__max_depth": randint(3, 20),                       # sample integers between 3 and 20
    "clf__max_features": uniform_range(0.1, 1),                 # sample floats between 0.1 and 1.0
    "clf__max_samples": uniform_range(0.5, 1),                  # sample floats between 0.1 and 1.0
    "clf__min_weight_fraction_leaf": uniform_range(0.01, 0.05),        # sample floats between 0.01 and 0.05
}

model_params = dict(
    pipe_clf=pipe,
    param_grid=param_distributions,
    cv_splits=5,
    bagging_n_estimators=0,
    bagging_max_samples=1,
    bagging_max_features=1,
    rnd_search_iter=1,
    n_jobs=-1,
    pct_embargo=0.01,
    random_state=42,
    verbose=False,
)
sample_weight_config = dict(
    attribution=["uniqueness"],
    decay_factor=[0.0, 0.25, 0.5, 0.75, 1.0],
    linear=[True],
)

feature_params = dict(
    bb_period=np.arange(15,30,5),
    bb_std=np.arange(1.5, 2.1, 0.25)

)

data_config = dict(
    account_name=[account_name],
    bar_type=["time", "tick"],
    bar_size=["M5"],
    price=["mid_price"]
)
label_params = dict(
    target_lookback=np.arange(20, 101, 20),
    profit_target=np.arange(0, 2.5, 0.5),
    stop_loss=np.arange(0.5, 2.5, 0.5),
    max_holding_period=[dict(days=1)],
    min_ret=[0],
    vertical_barrier_zero=[True],
)

In [16]:
np.product([len(expand_params(x)) for x in [sample_weight_config, feature_params, label_params, data_config]])

9000

In [None]:
all_cv_metrics = {}

In [21]:
all_cv_metrics

{1: {'key': [{'account_name': 'FUNDEDNEXT_STLR2_6K',
    'bar_type': 'time',
    'bar_size': 'M5',
    'price': 'mid_price'},
   {'bb_period': 15, 'bb_std': 1.5},
   {'target_lookback': 20,
    'profit_target': 0.0,
    'stop_loss': 0.5,
    'max_holding_period': {'days': 1},
    'min_ret': 0,
    'vertical_barrier_zero': True,
    'strategy': <afml.strategies.signals.BollingerStrategy at 0x2344079bd30>},
   {'pipe_clf': MyPipeline(steps=[('drop_constant', DropConstantFeatures(tol=0.998)),
                      ('drop_duplicates', DropDuplicateFeatures()),
                      ('clf',
                       RandomForestClassifier(class_weight='balanced_subsample',
                                              n_jobs=3, random_state=42))]),
    'param_grid': {'clf__n_estimators': <scipy.stats._distn_infrastructure.rv_discrete_frozen at 0x2343090c790>,
     'clf__max_depth': <scipy.stats._distn_infrastructure.rv_discrete_frozen at 0x2343090cbb0>,
     'clf__max_features': <scipy.stats._

In [18]:
from afml.filters.filters import cusum_filter
from afml.production.model_development import (
    develop_production_model,
    load_and_prepare_training_data,
)


i = 0

for data_cfg in expand_params(data_config):
    for feature_cfg in expand_params(feature_params):
        feat_params = {"func": create_bollinger_features, "params": feature_cfg}
        for label_cfg in expand_params(label_params):
            label_cfg["strategy"] = BollingerStrategy(window=feature_cfg["bb_period"], num_std=feature_cfg["bb_std"])
            for sample_weight_cfg in expand_params(sample_weight_config):
                i += 1
                print(f"{i}. \n  {data_cfg} \n  {feature_cfg} \n  {label_cfg} \n  {model_params}\n")
                all_cv_metrics.setdefault(i, {})
                all_cv_metrics[i]["key"] = [data_cfg, feature_cfg, label_cfg, model_params, sample_weight_cfg]
                best_model, model_features, metrics = develop_production_model(
                    symbol, start_date, end_date, data_cfg, feat_params, label_cfg, model_params, sample_weight_cfg
                )
                all_cv_metrics[i]["result"] = best_model, model_features, metrics
                break

[32m2025-12-01 14:21:40.010[0m | [34m[1mDEBUG   [0m | [36mafml.cache.robust_cache_keys[0m:[36mwrapper[0m:[36m336[0m - [34m[1mCache HIT for afml.production.model_development.load_and_prepare_training_data[0m
[32m2025-12-01 14:21:40.157[0m | [34m[1mDEBUG   [0m | [36mafml.cache.robust_cache_keys[0m:[36mwrapper[0m:[36m336[0m - [34m[1mCache HIT for afml.production.model_development.create_feature_engineering_pipeline[0m


1. 
  {'account_name': 'FUNDEDNEXT_STLR2_6K', 'bar_type': 'time', 'bar_size': 'M5', 'price': 'mid_price'} 
  {'bb_period': 15, 'bb_std': 1.5} 
  {'target_lookback': 20, 'profit_target': 0.0, 'stop_loss': 0.5, 'max_holding_period': {'days': 1}, 'min_ret': 0, 'vertical_barrier_zero': True, 'strategy': <afml.strategies.signals.BollingerStrategy object at 0x000002344079BD30>} 
  {'pipe_clf': MyPipeline(steps=[('drop_constant', DropConstantFeatures(tol=0.998)),
                  ('drop_duplicates', DropDuplicateFeatures()),
                  ('clf',
                   RandomForestClassifier(class_weight='balanced_subsample',
                                          n_jobs=3, random_state=42))]), 'param_grid': {'clf__n_estimators': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x000002343090C790>, 'clf__max_depth': <scipy.stats._distn_infrastructure.rv_discrete_frozen object at 0x000002343090CBB0>, 'clf__max_features': <scipy.stats._distn_infrastructure.rv_continuous_froze

[32m2025-12-01 14:21:40.812[0m | [34m[1mDEBUG   [0m | [36mafml.cache.robust_cache_keys[0m:[36mwrapper[0m:[36m336[0m - [34m[1mCache HIT for afml.production.model_development.generate_events_triple_barrier[0m
[32m2025-12-01 14:21:40.916[0m | [34m[1mDEBUG   [0m | [36mafml.cache.robust_cache_keys[0m:[36mwrapper[0m:[36m336[0m - [34m[1mCache HIT for afml.production.model_development.compute_sample_weights_time_decay[0m


✓ Generated 59 features

[Step 3/6] Generating events...
✓ Generated events: 
     count  proportion
bin                   
0    8,616    0.866888
1    1,323    0.133112

[Step 4/6] Computing sample weights...
✓ Computed time-decay weights

[Step 5/6] Training model with cross-validation...


[32m2025-12-01 14:21:41.474[0m | [34m[1mDEBUG   [0m | [36mafml.cache.robust_cache_keys[0m:[36mwrapper[0m:[36m336[0m - [34m[1mCache HIT for afml.production.model_development.train_model_with_cv[0m


✓ Best CV score: 0.2119
✓ Best params: {'clf__max_depth': 9, 'clf__max_features': 0.8168886881742096, 'clf__max_samples': 0.5917173949330818, 'clf__min_weight_fraction_leaf': 0.04118764001091077, 'clf__n_estimators': 120}

[Step 6/6] Analyzing feature importance...

Top 10 Features:
    feature  importance
     d1_vol    0.259825
        vol    0.044798
     spread    0.042814
ret_1_lag_3    0.033937
truerange_1    0.029784
 bbb_15_1.5    0.028060
     h1_vol    0.027820
   ret_skew    0.019933
ret_5_lag_1    0.019810
ret_1_lag_2    0.019607
2. 
  {'account_name': 'FUNDEDNEXT_STLR2_6K', 'bar_type': 'time', 'bar_size': 'M5', 'price': 'mid_price'} 
  {'bb_period': 15, 'bb_std': 1.5} 
  {'target_lookback': 20, 'profit_target': 0.0, 'stop_loss': 1.0, 'max_holding_period': {'days': 1}, 'min_ret': 0, 'vertical_barrier_zero': True, 'strategy': <afml.strategies.signals.BollingerStrategy object at 0x00000234380FC3A0>} 
  {'pipe_clf': MyPipeline(steps=[('drop_constant', DropConstantFeatures(tol=

[32m2025-12-01 14:21:42.996[0m | [34m[1mDEBUG   [0m | [36mafml.cache.robust_cache_keys[0m:[36mwrapper[0m:[36m336[0m - [34m[1mCache HIT for afml.production.model_development.load_and_prepare_training_data[0m
[32m2025-12-01 14:21:43.177[0m | [34m[1mDEBUG   [0m | [36mafml.cache.robust_cache_keys[0m:[36mwrapper[0m:[36m336[0m - [34m[1mCache HIT for afml.production.model_development.create_feature_engineering_pipeline[0m


✓ Loaded 147,860 samples from 2022-01-01 to 2023-12-31

[Step 2/6] Computing features...


[32m2025-12-01 14:21:43.504[0m | [34m[1mDEBUG   [0m | [36mafml.cache.robust_cache_keys[0m:[36mwrapper[0m:[36m336[0m - [34m[1mCache HIT for afml.production.model_development.generate_events_triple_barrier[0m


✓ Generated 59 features

[Step 3/6] Generating events...
✓ Generated events: 
     count  proportion
bin                   
0    7,911    0.796116
1    2,026    0.203884

[Step 4/6] Computing sample weights...


[32m2025-12-01 14:21:43.667[0m | [34m[1mDEBUG   [0m | [36mafml.cache.robust_cache_keys[0m:[36mwrapper[0m:[36m336[0m - [34m[1mCache HIT for afml.production.model_development.compute_sample_weights_time_decay[0m
[32m2025-12-01 14:21:43.942[0m | [34m[1mDEBUG   [0m | [36mafml.cache.robust_cache_keys[0m:[36mwrapper[0m:[36m336[0m - [34m[1mCache HIT for afml.production.model_development.train_model_with_cv[0m


✓ Computed time-decay weights

[Step 5/6] Training model with cross-validation...


[32m2025-12-01 14:21:44.996[0m | [34m[1mDEBUG   [0m | [36mafml.cache.robust_cache_keys[0m:[36mwrapper[0m:[36m336[0m - [34m[1mCache HIT for afml.production.model_development.load_and_prepare_training_data[0m
[32m2025-12-01 14:21:45.071[0m | [34m[1mDEBUG   [0m | [36mafml.cache.robust_cache_keys[0m:[36mwrapper[0m:[36m336[0m - [34m[1mCache HIT for afml.production.model_development.create_feature_engineering_pipeline[0m


✓ Best CV score: 0.3391
✓ Best params: {'clf__max_depth': 9, 'clf__max_features': 0.8168886881742096, 'clf__max_samples': 0.5917173949330818, 'clf__min_weight_fraction_leaf': 0.04118764001091077, 'clf__n_estimators': 120}

[Step 6/6] Analyzing feature importance...

Top 10 Features:
     feature  importance
      d1_vol    0.397741
         vol    0.040409
      h1_vol    0.028852
      spread    0.026054
     atrr_14    0.025496
 ret_1_lag_2    0.024160
  bbb_15_1.5    0.023745
ret_10_lag_3    0.022784
 ret_1_lag_3    0.020761
      h4_vol    0.019431
3. 
  {'account_name': 'FUNDEDNEXT_STLR2_6K', 'bar_type': 'time', 'bar_size': 'M5', 'price': 'mid_price'} 
  {'bb_period': 15, 'bb_std': 1.5} 
  {'target_lookback': 20, 'profit_target': 0.0, 'stop_loss': 1.5, 'max_holding_period': {'days': 1}, 'min_ret': 0, 'vertical_barrier_zero': True, 'strategy': <afml.strategies.signals.BollingerStrategy object at 0x00000234409C7670>} 
  {'pipe_clf': MyPipeline(steps=[('drop_constant', DropConstantFe

[32m2025-12-01 14:21:45.302[0m | [34m[1mDEBUG   [0m | [36mafml.cache.robust_cache_keys[0m:[36mwrapper[0m:[36m336[0m - [34m[1mCache HIT for afml.production.model_development.generate_events_triple_barrier[0m
[32m2025-12-01 14:21:45.389[0m | [34m[1mDEBUG   [0m | [36mafml.cache.robust_cache_keys[0m:[36mwrapper[0m:[36m336[0m - [34m[1mCache HIT for afml.production.model_development.compute_sample_weights_time_decay[0m


✓ Generated 59 features

[Step 3/6] Generating events...
✓ Generated events: 
     count  proportion
bin                   
0    7,355    0.740312
1    2,580    0.259688

[Step 4/6] Computing sample weights...
✓ Computed time-decay weights

[Step 5/6] Training model with cross-validation...


[32m2025-12-01 14:21:46.066[0m | [34m[1mDEBUG   [0m | [36mafml.cache.robust_cache_keys[0m:[36mwrapper[0m:[36m336[0m - [34m[1mCache HIT for afml.production.model_development.train_model_with_cv[0m


KeyboardInterrupt: 

In [None]:
# from afml.cache import clear_afml_cache
# from afml.cache.cv_cache import clear_cv_cache


# clear_afml_cache(), clear_cv_cache()

[Memory(location=C:\Users\JoeN\AppData\Local\afml\afml\Cache\joblib_cache\joblib)]: Flushing completely the cache
[32m2025-12-01 02:41:06.667[0m | [1mINFO    [0m | [36mafml.cache.cv_cache[0m:[36mclear_cv_cache[0m:[36m445[0m - [1mCleared 257 CV cache files[0m


(None, 257)

#### CV of Weighting Methods

In [None]:
from os import cpu_count

# Reserve 1 CPU if you want to do something else during training, otherwise set to -1
N_JOBS = cpu_count() - 1
N_ESTIMATORS = 100
seed = 7
min_w_leaf = 0.05
max_depth = 4
n_splits = 5
pct_embargo = 0.01
test_size = 0.2

In [None]:
train_idx = bb_events_tb_time.index.intersection(bb_feat_time.index)
cont = bb_events_tb_time.reindex(train_idx)
X = bb_feat_time.reindex(train_idx)
y = cont["bin"]
t1 = cont["t1"]

train, test = PurgedSplit(t1, test_size).split(X)
X_train, X_test, y_train, y_test = (
        X.iloc[train],
        X.iloc[test],
        y.iloc[train],
        y.iloc[test],
    )

cont_train = get_event_weights(cont.iloc[train], bb_time_bars.close)

cv_gen = PurgedKFold(n_splits, cont_train["t1"], pct_embargo)

In [None]:
avg_u = cont_train.tW.mean()
print(f"Average Uniqueness in Training Set: {avg_u:.4f}")

weighting_schemes = {
    "unweighted": pd.Series(1., index=cont_train.index),
    "uniqueness": cont_train["tW"],
    "return": cont_train["w"],
    }

decay_factors = [0.0, 0.25, 0.5, 0.75]
time_decay_weights = {}
for time_decay in decay_factors:
    decay_w = get_weights_by_time_decay_optimized(
                triple_barrier_events=cont_train,
                close_index=bb_time_bars.index,
                last_weight=time_decay,
                linear=True,
                av_uniqueness=cont_train["tW"],
            )
    time_decay_weights[f"decay_{time_decay}"] = decay_w
        
weighting_schemes.keys()

Average Uniqueness in Training Set: 0.4350


dict_keys(['unweighted', 'uniqueness', 'return'])

##### Selection of Best Model

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Initialize Random Forest


clf = RandomForestClassifier(
    criterion='entropy',
    n_estimators=N_ESTIMATORS,
    class_weight="balanced_subsample",
    max_samples=avg_u,
    min_weight_fraction_leaf=min_w_leaf,
    max_depth=max_depth,
    random_state=seed,
    n_jobs=N_JOBS,  # Use all available cores
    )

- Analyze all CV scores for all weighting schemes to find the best scheme

In [None]:
all_cv_scores_df = pd.DataFrame()
all_cv_scores_d = {}
all_cms = {}
best_score = 0
best_scheme = "unweighted"

if set(y_train.values) == {0, 1}:
    scoring = "f1"  # f1 for meta-labeling
else:
    scoring = "neg_log_loss"  # symmetric towards all cases

for scheme, w in tqdm(weighting_schemes.items()):
    cv_scores, cv_scores_df, cms = analyze_cross_val_scores(
        clf, X_train, y_train, cv_gen, 
        sample_weight_train=w, 
        sample_weight_score=w,
    )
    all_cms[scheme] = cms
    all_cv_scores_d[scheme] = cv_scores
    score = cv_scores[scoring].mean()
    recall = cv_scores_df.loc["recall", "mean"]
    recall_std = cv_scores_df.loc["recall", "std"]

    for idx, row in cv_scores_df.iterrows():
        all_cv_scores_df.loc[idx, scheme] = f"{row['mean']:.4f} ± {row['std']:.4f}"
    
    best_score = max(best_score, score)
    if score == best_score:
        best_scheme = scheme

print(f"{best_scheme.title()} is the best weighting scheme with {scoring} = {best_score:.4f}")
print("\nWeighting Scheme CV:")
all_cv_scores_df

  0%|          | 0/3 [00:00<?, ?it/s][32m2025-11-28 21:00:04.684[0m | [1mINFO    [0m | [36mafml.cache.cv_cache[0m:[36mwrapper[0m:[36m251[0m - [1mCV cache hit for analyze_cross_val_scores[0m
[32m2025-11-28 21:00:04.698[0m | [1mINFO    [0m | [36mafml.cache.cv_cache[0m:[36mwrapper[0m:[36m251[0m - [1mCV cache hit for analyze_cross_val_scores[0m
[32m2025-11-28 21:00:04.710[0m | [1mINFO    [0m | [36mafml.cache.cv_cache[0m:[36mwrapper[0m:[36m251[0m - [1mCV cache hit for analyze_cross_val_scores[0m
100%|██████████| 3/3 [00:00<00:00, 81.08it/s]

Uniqueness is the best weighting scheme with f1 = 0.6565

Weighting Scheme CV:





Unnamed: 0,unweighted,uniqueness,return
accuracy,0.5359 ± 0.0177,0.5532 ± 0.0299,0.6084 ± 0.0157
pwa,0.5483 ± 0.0309,0.5718 ± 0.0181,0.6164 ± 0.0139
neg_log_loss,-0.6910 ± 0.0058,-0.6867 ± 0.0028,-0.6753 ± 0.0092
precision,0.5936 ± 0.0170,0.6040 ± 0.0139,0.2582 ± 0.3876
recall,0.6180 ± 0.0404,0.7213 ± 0.0650,0.0033 ± 0.0045
f1,0.6047 ± 0.0185,0.6565 ± 0.0330,0.0063 ± 0.0088


- Test if time-decay improves performance of best model

In [None]:
best_model_decay_cv_scores = pd.DataFrame()

for scheme, decay_factor in tqdm(time_decay_weights.items()):
    best_scheme_o = best_scheme.split("_decay")[0]
    sample_weight = weighting_schemes[best_scheme_o] * decay_factor
    cv_scores, cv_scores_df, cms = analyze_cross_val_scores(
        clf, X_train, y_train, cv_gen, 
        sample_weight_train=sample_weight, 
        sample_weight_score=sample_weight,
    )
    score = cv_scores[scoring].mean()
    best_score = max(best_score, score)
    scheme = f"{best_scheme_o}_{scheme}"
    all_cv_scores_d[scheme] = cv_scores
    all_cms[scheme] = cms
    for idx, row in cv_scores_df.iterrows():
        best_model_decay_cv_scores.loc[idx, scheme] = f"{row['mean']:.4f} ± {row['std']:.4f}"
    if score == best_score:
        best_scheme = scheme
        weighting_schemes[best_scheme] = sample_weight
    all_cv_scores_df[scheme] = best_model_decay_cv_scores[scheme]
best_model_decay_cv_scores[f"{best_scheme_o}_decay_1.0"] = all_cv_scores_df[best_scheme_o]
        
print(f"\n{best_scheme.title()} model achieved the best {scoring} score of {best_score:.4f}")
best_model_decay_cv_scores

  0%|          | 0/4 [00:00<?, ?it/s][32m2025-11-28 21:00:05.129[0m | [1mINFO    [0m | [36mafml.cache.cv_cache[0m:[36mwrapper[0m:[36m251[0m - [1mCV cache hit for analyze_cross_val_scores[0m
[32m2025-11-28 21:00:05.151[0m | [1mINFO    [0m | [36mafml.cache.cv_cache[0m:[36mwrapper[0m:[36m251[0m - [1mCV cache hit for analyze_cross_val_scores[0m
[32m2025-11-28 21:00:05.168[0m | [1mINFO    [0m | [36mafml.cache.cv_cache[0m:[36mwrapper[0m:[36m251[0m - [1mCV cache hit for analyze_cross_val_scores[0m
[32m2025-11-28 21:00:05.187[0m | [1mINFO    [0m | [36mafml.cache.cv_cache[0m:[36mwrapper[0m:[36m251[0m - [1mCV cache hit for analyze_cross_val_scores[0m
100%|██████████| 4/4 [00:00<00:00, 51.28it/s]


Uniqueness_Decay_0.0 model achieved the best f1 score of 0.6708





Unnamed: 0,uniqueness_decay_0.0,uniqueness_decay_0.25,uniqueness_decay_0.5,uniqueness_decay_0.75,uniqueness_decay_1.0
accuracy,0.5556 ± 0.0186,0.5542 ± 0.0177,0.5560 ± 0.0213,0.5559 ± 0.0257,0.5532 ± 0.0299
pwa,0.5779 ± 0.0152,0.5760 ± 0.0157,0.5755 ± 0.0141,0.5723 ± 0.0186,0.5718 ± 0.0181
neg_log_loss,-0.6858 ± 0.0024,-0.6859 ± 0.0024,-0.6860 ± 0.0022,-0.6866 ± 0.0027,-0.6867 ± 0.0028
precision,0.6033 ± 0.0129,0.6043 ± 0.0113,0.6051 ± 0.0119,0.6052 ± 0.0127,0.6040 ± 0.0139
recall,0.7577 ± 0.0548,0.7330 ± 0.0431,0.7354 ± 0.0636,0.7303 ± 0.0667,0.7213 ± 0.0650
f1,0.6708 ± 0.0235,0.6619 ± 0.0208,0.6626 ± 0.0263,0.6607 ± 0.0304,0.6565 ± 0.0330


##### Sequential Bootstrap

In [None]:
# Random Forest default of max_features is sqrt, which means I don't have to calculate or set it.
base_rf = clone(clf).set_params(
    n_estimators=1,
    bootstrap=False,
    n_jobs=None,
    max_samples=None,
    random_state=None,
    )

seq_rf = SequentiallyBootstrappedBaggingClassifier(
    samples_info_sets=cont_train.t1,
    price_bars_index=bb_time_bars.index,
    estimator=base_rf,
    n_estimators=N_ESTIMATORS, # set low to save time
    max_samples=avg_u, # Set to average uniqueness
    oob_score=True,
    n_jobs=N_JOBS,
    random_state=seed,
    verbose=False,
)
seq_rf

In [None]:
w = weighting_schemes[best_scheme]
rf = clone(clf).set_params(oob_score=True)

print(f"Training: Standard RF (max_samples={avg_u:.3f}) - Unweighted...")
rf = train_rf(rf, X_train, y_train, w)

print(f"Training: Sequential Bootstrap RF (max_samples={avg_u:.3f}) - {best_scheme}...")
seq_rf = train_rf(seq_rf, X_train, y_train, w)

ensembles = {
    "standard_rf": rf,
    "sequential_rf": seq_rf,  # max_samples=avg_u
}

scoring_methods = {
            "f1": f1_score,
            "precision": precision_score,
            "recall": recall_score,
            "neg_log_loss": log_loss,
            "pwa": probability_weighted_accuracy,
            "accuracy": accuracy_score,
        }

all_scores_oos = pd.DataFrame()

for name, classifier in ensembles.items():
    prob = classifier.predict_proba(X_test)[:, 1]
    pred = (prob > 0.5).astype("int8")
    oob_metrics = compute_custom_oob_metrics(classifier, X_train, y_train, w)
    for method, scoring in scoring_methods.items():
        y_pred = prob if scoring in (probability_weighted_accuracy, log_loss) else pred
        score = scoring(y_test, y_pred)
        if method == "neg_log_loss":
            score *= -1
        all_scores_oos.loc[method, name] = score
        all_scores_oos.loc[f"{method}_oob_gap", name] = abs(score - oob_metrics[method])

print(f"\nBest weighting scheme: {best_scheme}")
bb_all_scores_oos = all_scores_oos.copy()
bb_all_scores_oos_time = bb_all_scores_oos.copy()

winsound.Beep(1000, 1000) # Alert

all_scores_oos.round(4)

[32m2025-11-28 21:00:06.216[0m | [1mINFO    [0m | [36mafml.cache.cv_cache[0m:[36mwrapper[0m:[36m251[0m - [1mCV cache hit for train_rf[0m
[32m2025-11-28 21:00:06.318[0m | [1mINFO    [0m | [36mafml.cache.cv_cache[0m:[36mwrapper[0m:[36m251[0m - [1mCV cache hit for train_rf[0m


Training: Standard RF (max_samples=0.435) - Unweighted...
Training: Sequential Bootstrap RF (max_samples=0.435) - uniqueness_decay_0.0...

Best weighting scheme: uniqueness_decay_0.0


Unnamed: 0,standard_rf,sequential_rf
f1,0.6985,0.6869
f1_oob_gap,0.1704,0.1618
precision,0.5819,0.5899
precision_oob_gap,0.053,0.0666
recall,0.8735,0.822
recall_oob_gap,0.3119,0.268
neg_log_loss,-0.6834,-0.6865
neg_log_loss_oob_gap,0.0026,0.0005
pwa,0.5846,0.57
pwa_oob_gap,0.0049,0.0032


### Tick-Bars

In [None]:
bb_feat_tick, bb_events_tb_tick = prepare_training_data(
    df=bb_tick_bars, 
    strategy=bb_strategy, 
    feature_engine=create_bollinger_features, 
    feature_params=dict(bb_period=bb_period, bb_std=bb_std), 
    vol_lookback=bb_lookback, 
    vol_multiplier=bb_vol_multiplier, 
    time_horizon=bb_time_horizon, 
    pt_barrier=bb_pt_barrier, 
    sl_barrier=bb_sl_barrier, 
    vertical_barrier_zero=True,
)

[32m2025-11-28 21:00:15.415[0m | [34m[1mDEBUG   [0m | [36mafml.cache.robust_cache_keys[0m:[36mwrapper[0m:[36m336[0m - [34m[1mCache HIT for __main__.prepare_training_data[0m


#### CV of Weighting Methods

In [None]:
from os import cpu_count

# Reserve 1 CPU if you want to do something else during training, otherwise set to -1
N_JOBS = cpu_count() - 1
N_ESTIMATORS = 100
seed = 7
min_w_leaf = 0.05
max_depth = 4
n_splits = 5
pct_embargo = 0.01
test_size = 0.2

In [None]:
train_idx = bb_events_tb_tick.index.intersection(bb_feat_tick.index)
cont = bb_events_tb_tick.reindex(train_idx)
X = bb_feat_tick.reindex(train_idx)
y = cont["bin"]
t1 = cont["t1"]

train, test = PurgedSplit(t1, test_size).split(X)
X_train, X_test, y_train, y_test = (
        X.iloc[train],
        X.iloc[test],
        y.iloc[train],
        y.iloc[test],
    )
cont_train = get_event_weights(cont.iloc[train], bb_tick_bars.close)

cv_gen = PurgedKFold(n_splits, cont_train["t1"], pct_embargo)

In [None]:
avg_u = cont_train.tW.mean()
print(f"Average Uniqueness in Training Set: {avg_u:.4f}")

weighting_schemes = {
    "unweighted": pd.Series(1., index=cont_train.index),
    "uniqueness": cont_train["tW"],
    "return": cont_train["w"],
    }

decay_factors = [0.0, 0.25, 0.5, 0.75]
time_decay_weights = {}
for time_decay in decay_factors:
    decay_w = get_weights_by_time_decay_optimized(
                triple_barrier_events=cont_train,
                close_index=bb_tick_bars.index,
                last_weight=time_decay,
                linear=True,
                av_uniqueness=cont_train["tW"],
            )
    time_decay_weights[f"decay_{time_decay}"] = decay_w
        
weighting_schemes.keys()

Average Uniqueness in Training Set: 0.3786


dict_keys(['unweighted', 'uniqueness', 'return'])

##### Selection of Best Model

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Initialize Random Forest


clf = RandomForestClassifier(
    criterion='entropy',
    n_estimators=N_ESTIMATORS,
    class_weight="balanced_subsample",
    max_samples=avg_u,
    min_weight_fraction_leaf=min_w_leaf,
    max_depth=max_depth,
    random_state=seed,
    n_jobs=N_JOBS,  # Use all available cores
    )

- Analyze all CV scores for all weighting schemes to find the best scheme

In [None]:
all_cv_scores_df = pd.DataFrame()
all_cv_scores_d = {}
all_cms = {}
best_score = 0
best_scheme = "unweighted"

if set(y_train.values) == {0, 1}:
    scoring = "f1"  # f1 for meta-labeling
else:
    scoring = "neg_log_loss"  # symmetric towards all cases

for scheme, w in tqdm(weighting_schemes.items()):
    cv_scores, cv_scores_df, cms = analyze_cross_val_scores(
        clf, X_train, y_train, cv_gen, 
        sample_weight_train=w, 
        sample_weight_score=w,
    )
    all_cms[scheme] = cms
    all_cv_scores_d[scheme] = cv_scores
    score = cv_scores[scoring].mean()
    recall = cv_scores_df.loc["recall", "mean"]
    recall_std = cv_scores_df.loc["recall", "std"]

    for idx, row in cv_scores_df.iterrows():
        all_cv_scores_df.loc[idx, scheme] = f"{row['mean']:.4f} ± {row['std']:.4f}"
    
    best_score = max(best_score, score)
    if score == best_score:
        best_scheme = scheme

print(f"{best_scheme.title()} is the best weighting scheme with {scoring} = {best_score:.4f}")
print("\nWeighting Scheme CV:")
all_cv_scores_df

  0%|          | 0/3 [00:00<?, ?it/s][32m2025-11-28 21:00:19.283[0m | [1mINFO    [0m | [36mafml.cache.cv_cache[0m:[36mwrapper[0m:[36m251[0m - [1mCV cache hit for analyze_cross_val_scores[0m
[32m2025-11-28 21:00:19.299[0m | [1mINFO    [0m | [36mafml.cache.cv_cache[0m:[36mwrapper[0m:[36m251[0m - [1mCV cache hit for analyze_cross_val_scores[0m
[32m2025-11-28 21:00:19.314[0m | [1mINFO    [0m | [36mafml.cache.cv_cache[0m:[36mwrapper[0m:[36m251[0m - [1mCV cache hit for analyze_cross_val_scores[0m
100%|██████████| 3/3 [00:00<00:00, 69.75it/s]

Uniqueness is the best weighting scheme with f1 = 0.6619

Weighting Scheme CV:





Unnamed: 0,unweighted,uniqueness,return
accuracy,0.5330 ± 0.0186,0.5601 ± 0.0334,0.5838 ± 0.0094
pwa,0.5490 ± 0.0304,0.5904 ± 0.0339,0.5868 ± 0.0098
neg_log_loss,-0.6899 ± 0.0033,-0.6851 ± 0.0039,-0.6957 ± 0.0072
precision,0.6269 ± 0.0209,0.6320 ± 0.0181,0.0000 ± 0.0000
recall,0.5825 ± 0.0550,0.6975 ± 0.0736,0.0000 ± 0.0000
f1,0.6029 ± 0.0345,0.6619 ± 0.0419,0.0000 ± 0.0000


- Test if time-decay improves performance of best model

In [None]:
best_model_decay_cv_scores = pd.DataFrame()

for scheme, decay_factor in tqdm(time_decay_weights.items()):
    best_scheme_o = best_scheme.split("_decay")[0]
    sample_weight = weighting_schemes[best_scheme_o] * decay_factor
    cv_scores, cv_scores_df, cms = analyze_cross_val_scores(
        clf, X_train, y_train, cv_gen, 
        sample_weight_train=sample_weight, 
        sample_weight_score=sample_weight,
    )
    score = cv_scores[scoring].mean()
    best_score = max(best_score, score) if best_score is not None else score
    scheme = f"{best_scheme_o}_{scheme}"
    all_cv_scores_d[scheme] = cv_scores
    all_cms[scheme] = cms
    for idx, row in cv_scores_df.iterrows():
        best_model_decay_cv_scores.loc[idx, scheme] = f"{row['mean']:.4f} ± {row['std']:.4f}"
    if score == best_score:
        best_scheme = scheme
        weighting_schemes[best_scheme] = sample_weight
    all_cv_scores_df[scheme] = best_model_decay_cv_scores[scheme]
best_model_decay_cv_scores[f"{best_scheme_o}_decay_1.0"] = all_cv_scores_df[best_scheme_o]
        
print(f"\n{best_scheme.title()} model achieved the best {scoring} score of {best_score:.4f}")
best_model_decay_cv_scores

  0%|          | 0/4 [00:00<?, ?it/s][32m2025-11-28 21:00:19.759[0m | [1mINFO    [0m | [36mafml.cache.cv_cache[0m:[36mwrapper[0m:[36m251[0m - [1mCV cache hit for analyze_cross_val_scores[0m
[32m2025-11-28 21:00:19.779[0m | [1mINFO    [0m | [36mafml.cache.cv_cache[0m:[36mwrapper[0m:[36m251[0m - [1mCV cache hit for analyze_cross_val_scores[0m
[32m2025-11-28 21:00:19.794[0m | [1mINFO    [0m | [36mafml.cache.cv_cache[0m:[36mwrapper[0m:[36m251[0m - [1mCV cache hit for analyze_cross_val_scores[0m
[32m2025-11-28 21:00:19.812[0m | [1mINFO    [0m | [36mafml.cache.cv_cache[0m:[36mwrapper[0m:[36m251[0m - [1mCV cache hit for analyze_cross_val_scores[0m
100%|██████████| 4/4 [00:00<00:00, 57.97it/s]


Uniqueness_Decay_0.0 model achieved the best f1 score of 0.6841





Unnamed: 0,uniqueness_decay_0.0,uniqueness_decay_0.25,uniqueness_decay_0.5,uniqueness_decay_0.75,uniqueness_decay_1.0
accuracy,0.5680 ± 0.0163,0.5535 ± 0.0273,0.5641 ± 0.0315,0.5579 ± 0.0294,0.5601 ± 0.0334
pwa,0.6011 ± 0.0079,0.6010 ± 0.0222,0.5954 ± 0.0302,0.5912 ± 0.0323,0.5904 ± 0.0339
neg_log_loss,-0.6838 ± 0.0012,-0.6841 ± 0.0029,-0.6848 ± 0.0035,-0.6853 ± 0.0036,-0.6851 ± 0.0039
precision,0.6312 ± 0.0154,0.6201 ± 0.0137,0.6293 ± 0.0159,0.6286 ± 0.0141,0.6320 ± 0.0181
recall,0.7481 ± 0.0417,0.7285 ± 0.0631,0.7250 ± 0.0644,0.7047 ± 0.0689,0.6975 ± 0.0736
f1,0.6841 ± 0.0203,0.6690 ± 0.0329,0.6730 ± 0.0366,0.6633 ± 0.0372,0.6619 ± 0.0419


##### Sequential Bootstrap

In [None]:
w = weighting_schemes[best_scheme]
rf = clone(clf).set_params(oob_score=True)

print(f"Training: Standard RF (max_samples={avg_u:.3f}) - Unweighted...")
rf = train_rf(rf, X_train, y_train, w)

print(f"Training: Sequential Bootstrap RF (max_samples={avg_u:.3f}) - {best_scheme}...")
seq_rf = train_rf(seq_rf, X_train, y_train, w)

ensembles = {
    "standard_rf": rf,
    "sequential_rf": seq_rf,  # max_samples=avg_u
}
scoring_methods = {
            "f1": f1_score,
            "precision": precision_score,
            "recall": recall_score,
            "neg_log_loss": log_loss,
            "pwa": probability_weighted_accuracy,
            "accuracy": accuracy_score,
        }

all_scores_oos = pd.DataFrame()

for name, classifier in ensembles.items():
    prob = classifier.predict_proba(X_test)[:, 1]
    pred = (prob > 0.5).astype("int8")
    oob_metrics = compute_custom_oob_metrics(classifier, X_train, y_train, w)
    for method, scoring in scoring_methods.items():
        y_pred = prob if scoring in (probability_weighted_accuracy, log_loss) else pred
        score = scoring(y_test, y_pred)
        if method == "neg_log_loss":
            score *= -1
        all_scores_oos.loc[method, name] = score
        all_scores_oos.loc[f"{method}_oob_gap", name] = abs(score - oob_metrics[method])

print(f"\nBest weighting scheme: {best_scheme}")
bb_all_scores_oos = all_scores_oos.copy()
bb_all_scores_oos_tick = bb_all_scores_oos.copy()

winsound.Beep(1000, 1000) # Alert

all_scores_oos.round(4)

[32m2025-11-28 21:00:20.709[0m | [1mINFO    [0m | [36mafml.cache.cv_cache[0m:[36mwrapper[0m:[36m251[0m - [1mCV cache hit for train_rf[0m
[32m2025-11-28 21:00:20.779[0m | [1mINFO    [0m | [36mafml.cache.cv_cache[0m:[36mwrapper[0m:[36m251[0m - [1mCV cache hit for train_rf[0m


Training: Standard RF (max_samples=0.379) - Unweighted...
Training: Sequential Bootstrap RF (max_samples=0.379) - uniqueness_decay_0.0...

Best weighting scheme: uniqueness_decay_0.0


Unnamed: 0,standard_rf,sequential_rf
f1,0.6327,0.6442
f1_oob_gap,0.0899,0.1083
precision,0.5595,0.5653
precision_oob_gap,0.0235,0.0365
recall,0.7278,0.7489
recall_oob_gap,0.1685,0.1897
neg_log_loss,-0.6917,-0.6906
neg_log_loss_oob_gap,0.006,0.0037
pwa,0.5338,0.5448
pwa_oob_gap,0.0593,0.0485


### Cache Analysis

In [None]:
# from afml.cache import clear_afml_cache

# clear_afml_cache()

In [14]:
from afml.cache import get_cache_size_info, print_cache_health

print_cache_health()

[32m2025-12-01 14:08:52.198[0m | [34m[1mDEBUG   [0m | [36mafml.cache.cache_monitoring[0m:[36m_get_function_cache_size[0m:[36m444[0m - [34m[1mLooking for cache in: C:\Users\JoeN\AppData\Local\afml\afml\Cache\joblib_cache[0m
[32m2025-12-01 14:08:52.203[0m | [34m[1mDEBUG   [0m | [36mafml.cache.cache_monitoring[0m:[36m_get_function_cache_size[0m:[36m459[0m - [34m[1mSearching for patterns: ['afml_production_model_development_load_and_prepare_training_data', 'load_and_prepare_training_data'][0m
[32m2025-12-01 14:08:52.286[0m | [34m[1mDEBUG   [0m | [36mafml.cache.cache_monitoring[0m:[36m_get_function_cache_size[0m:[36m478[0m - [34m[1mFound matching cache file: func_code.py - 662 bytes[0m
[32m2025-12-01 14:08:52.288[0m | [34m[1mDEBUG   [0m | [36mafml.cache.cache_monitoring[0m:[36m_get_function_cache_size[0m:[36m478[0m - [34m[1mFound matching cache file: metadata.json - 263 bytes[0m
[32m2025-12-01 14:08:52.290[0m | [34m[1mDEBUG   [0m 


CACHE HEALTH REPORT

Overall Statistics:
  Total Functions:     6
  Total Calls:         250
  Overall Hit Rate:    80.4%
  Total Cache Size:    87.41 MB

Top Performers (by hit rate):
  1. load_and_prepare_training_data: 100.0% (50 calls)
  2. create_feature_engineering_pipeline: 100.0% (50 calls)
  3. create_bollinger_features: 100.0% (1 calls)
  4. generate_events_triple_barrier: 100.0% (50 calls)
  5. compute_sample_weights_time_decay: 100.0% (50 calls)

Worst Performers (by hit rate):
  1. create_feature_engineering_pipeline: 100.0% (50 calls)
  2. create_bollinger_features: 100.0% (1 calls)
  3. generate_events_triple_barrier: 100.0% (50 calls)
  4. compute_sample_weights_time_decay: 100.0% (50 calls)
  5. train_model_with_cv: 0.0% (49 calls)

Recommendations:
  1. Functions with low hit rate: train_model_with_cv. Review cache key generation for these functions.




In [None]:
pprint(get_cache_size_info(), sort_dicts=False)

{'base': {'size_bytes': 695752560, 'size_mb': 663.52, 'file_count': 597},
 'joblib': {'size_bytes': 344528483, 'size_mb': 328.57, 'file_count': 68},
 'numba': {'size_bytes': 6217168, 'size_mb': 5.93, 'file_count': 164},
 'backtest': {'size_bytes': 0, 'size_mb': 0.0, 'file_count': 0}}


In [None]:
# Detailed analysis of cache patterns
from afml.cache.cache_monitoring import analyze_cache_patterns

analyze_cache_patterns()

[32m19:59:50[0m | [34m[1mDEBUG   [0m | [34m[1mLooking for cache in: C:\Users\JoeN\AppData\Local\afml\afml\Cache\joblib_cache[0m
[32m19:59:50[0m | [34m[1mDEBUG   [0m | [34m[1mSearching for patterns: ['__main___load_data', 'load_data'][0m
[32m19:59:50[0m | [34m[1mDEBUG   [0m | [34m[1mFound matching cache file: func_code.py - 662 bytes[0m
[32m19:59:50[0m | [34m[1mDEBUG   [0m | [34m[1mFound matching cache file: metadata.json - 1014 bytes[0m
[32m19:59:50[0m | [34m[1mDEBUG   [0m | [34m[1mFound matching cache file: output.pkl - 9083888 bytes[0m
[32m19:59:50[0m | [34m[1mDEBUG   [0m | [34m[1mFound matching cache file: metadata.json - 1165 bytes[0m
[32m19:59:50[0m | [34m[1mDEBUG   [0m | [34m[1mFound matching cache file: output.pkl - 4437488 bytes[0m
[32m19:59:50[0m | [34m[1mDEBUG   [0m | [34m[1mFound matching cache file: metadata.json - 1016 bytes[0m
[32m19:59:50[0m | [34m[1mDEBUG   [0m | [34m[1mFound matching cache file: out

{'high_miss_rate_functions': [{'function': '__main__.load_data',
   'hit_rate': 0.16666666666666666,
   'calls': 24},
  {'function': 'afml.cross_validation.cross_validation.analyze_cross_val_scores',
   'hit_rate': 0.21875,
   'calls': 32},
  {'function': '__main__.train_rf',
   'hit_rate': 0.2222222222222222,
   'calls': 18}],
 'unused_caches': [],
 'large_caches': [{'function': '__main__.load_data',
   'size_mb': 298.2985153198242,
   'hit_rate': 0.16666666666666666}],
 'frequently_accessed': [],
 'optimization_candidates': []}

In [None]:
from afml.cache import diagnose_cache_issues

# Run diagnostics
diagnose_cache_issues()


CACHE DIAGNOSTICS REPORT

1. BASIC STATS:
   Tracked functions: 4
   Total calls: 82
   Overall hit rate: 25.6%

2. CACHE EFFICIENCY:
[32m19:59:51[0m | [34m[1mDEBUG   [0m | [34m[1mLooking for cache in: C:\Users\JoeN\AppData\Local\afml\afml\Cache\joblib_cache[0m
[32m19:59:51[0m | [34m[1mDEBUG   [0m | [34m[1mSearching for patterns: ['__main___load_data', 'load_data'][0m
[32m19:59:51[0m | [34m[1mDEBUG   [0m | [34m[1mFound matching cache file: func_code.py - 662 bytes[0m
[32m19:59:51[0m | [34m[1mDEBUG   [0m | [34m[1mFound matching cache file: metadata.json - 1014 bytes[0m
[32m19:59:51[0m | [34m[1mDEBUG   [0m | [34m[1mFound matching cache file: output.pkl - 9083888 bytes[0m
[32m19:59:51[0m | [34m[1mDEBUG   [0m | [34m[1mFound matching cache file: metadata.json - 1165 bytes[0m
[32m19:59:51[0m | [34m[1mDEBUG   [0m | [34m[1mFound matching cache file: output.pkl - 4437488 bytes[0m
[32m19:59:51[0m | [34m[1mDEBUG   [0m | [34m[1mFound 