In [1]:
import os
os.chdir('..')

from typing import Any, Dict, Optional

import pandas as pd
import numpy as np

import xtx.utils.dev_utils as dev_utils
# from xtx.features.feature_extractor import FeatureExtractor
from xtx.modeling.runners import CrossValClassificationRunner, CrossValRunner
# from xtx.modeling.stacking import RunnersStacking
from xtx.modeling.time_folds import TimeFolds

pd.set_option("display.max_columns", 100)
experiment = dev_utils.load_yaml('configs/experiment.yaml')

from xtx.modeling.evaluation import get_mse_and_corr_scores
def score_cols(fold_processor, usecols, prefix=None):
    model = runner.init_model()
    model.fit(fold_processor.train_data[:, usecols], fold_processor.train_target)
    val_predicted = model.predict(fold_processor.valid_data[:, usecols])
    return get_mse_and_corr_scores(fold_processor.valid_target, val_predicted, verbose=True, prefix=prefix)

experiment


{'train_data_path': 'data/xtx_data.csv',
 'test_data_path': None,
 'train_topk_features': 'topk_artefacts',
 'test_topk_features': 'test_topk_artefacts',
 'cached_features': 'data/__extended_features.pkl',
 'cached_test_features': 'data/__extended_test_features.pkl',
 'from_pool': False,
 'usecols': ['usecols/usecols_fold_0.txt', 'usecols/usecols_fold_1.txt'],
 'predictions_dir': 'predictions/debug',
 'runners_dir': 'runners/5_folds_extended',
 'model_zoo': 'configs/models_zoo.yaml',
 'use_regression_models': ['default_ridge', 'default_dart'],
 'use_classification_models': [],
 'stacking_model': 'stacking_ridge',
 'TimeFolds': {'n_folds': 5,
  'minifold_size': 60000,
  'neutral_ratio': 0.025,
  'test_ratio': 0.2,
  'train_test_gap': 2000},
 'random_seed': 42,
 'pseudo_target': [40]}

In [2]:
model_zoo = dev_utils.load_yaml(experiment['model_zoo'])
# model_config = model_zoo['train_zoo']['default_ridge']
model_config = model_zoo['selection_ridge']
model_config

{'model_module': 'sklearn.linear_model',
 'model_cls': 'Ridge',
 'model_params': {'alpha': 10}}

In [3]:
train_features = pd.read_pickle(experiment['cached_features'])
target = pd.read_csv('data/xtx_data.csv', usecols=['y']).y
train_target = target.iloc[:-699533]
test_target = target.iloc[-699533:]
train_features.head()

Unnamed: 0,ask_rate_0,bid_rate_0,mid_price,mid_price_log,spread,bid_ask_spread,ask_size_0,bid_size_0,ask_len,bid_len,len_ratio,cum_volume_imbalance_0,volume_imbalance_0,wap0,cum_volume_imbalance_1,volume_imbalance_1,wap1,cum_volume_imbalance_2,volume_imbalance_2,wap2,cum_volume_imbalance_3,volume_imbalance_3,wap3,cum_volume_imbalance_4,volume_imbalance_4,wap4,cum_volume_imbalance_5,volume_imbalance_5,wap5,cum_volume_imbalance_6,volume_imbalance_6,wap6,cum_volume_imbalance_7,volume_imbalance_7,wap7,cum_volume_imbalance_8,volume_imbalance_8,wap8,cum_volume_imbalance_9,volume_imbalance_9,wap9,ask_rate_moda_spread,bid_rate_moda_spread,increased_ask_counts,increased_ask_rank,decreased_ask_counts,decreased_ask_rank,increased_bid_counts,increased_bid_rank,decreased_bid_counts,...,wap1_10_mean,wap1_10_std,wap1_10_max,volume_imbalance_10_mean,volume_imbalance_10_max,volume_imbalance_10_std,volume_imbalance_10_skew,len_ratio_10_mean,len_ratio_10_std,wap0_20_mean,wap0_20_std,wap0_20_max,wap1_20_mean,wap1_20_std,wap1_20_max,volume_imbalance_20_mean,volume_imbalance_20_max,volume_imbalance_20_std,volume_imbalance_20_skew,len_ratio_20_mean,len_ratio_20_std,wap0_40_mean,wap0_40_std,wap0_40_max,wap1_40_mean,wap1_40_std,wap1_40_max,volume_imbalance_40_mean,volume_imbalance_40_max,volume_imbalance_40_std,volume_imbalance_40_skew,len_ratio_40_mean,len_ratio_40_std,wap0_80_mean,wap0_80_std,wap0_80_max,wap1_80_mean,wap1_80_std,wap1_80_max,volume_imbalance_80_mean,volume_imbalance_80_max,volume_imbalance_80_std,volume_imbalance_80_skew,len_ratio_80_mean,len_ratio_80_std,wap0_ewm_80,wap1_ewm_80,mid_price_ewm_80,volume_imbalance_ewm_80,volume_imbalance_1_ewm_80
0,1619.5,1615.0,1617.25,7.389101,4.5,0.002786,1.0,7.0,35.0,192.0,0.182292,0.75,0.75,1618.9375,0.214286,0.0,1617.0,-0.320755,-0.92,1613.319946,-0.111111,1.0,0.0,0.156627,1.0,0.0,0.186047,1.0,0.0,0.339623,1.0,0.0,0.473684,1.0,0.0,0.513889,1.0,0.0,0.556962,1.0,0.0,1.5,12.0,0,15,0,15,0,15,0,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1618.9375,1617.0,1617.25,0.75,0.0
1,1619.5,1615.0,1617.25,7.389101,4.5,0.002786,1.0,7.0,40.0,192.0,0.208333,0.75,0.75,1618.9375,0.214286,0.0,1617.0,-0.320755,-0.92,1613.319946,-0.176471,0.333333,1618.333374,0.090909,1.0,0.0,0.120879,1.0,0.0,0.279279,1.0,0.0,0.42029,1.0,0.0,0.463087,1.0,0.0,0.509202,1.0,0.0,1.5,12.0,1,3,0,15,0,15,0,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1618.9375,1617.0,1617.25,0.75,0.0
2,1619.5,1615.0,1617.25,7.389101,4.5,0.002786,1.0,7.0,42.0,192.0,0.21875,0.75,0.75,1618.9375,0.214286,0.0,1617.0,-0.320755,-0.92,1613.319946,-0.176471,0.333333,1618.333374,0.066667,0.818182,1621.0,0.096774,1.0,0.0,0.256637,1.0,0.0,0.4,1.0,0.0,0.443709,1.0,0.0,0.490909,1.0,0.0,1.5,12.0,1,4,0,15,0,15,0,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1618.9375,1617.0,1617.25,0.75,0.0
3,1619.5,1615.0,1617.25,7.389101,4.5,0.002786,1.0,7.0,62.0,192.0,0.322917,0.75,0.75,1618.9375,0.214286,0.0,1617.0,-0.320755,-0.92,1613.319946,-0.176471,0.333333,1618.333374,-0.127273,-0.047619,1616.238037,-0.097345,1.0,0.0,0.067669,1.0,0.0,0.225,1.0,0.0,0.274854,1.0,0.0,0.32973,1.0,0.0,1.5,12.0,1,4,0,15,0,15,0,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1618.9375,1617.0,1617.25,0.75,0.0
4,1619.5,1615.0,1617.25,7.389101,4.5,0.002786,1.0,7.0,72.0,192.0,0.375,0.75,0.75,1618.9375,0.214286,0.0,1617.0,-0.320755,-0.92,1613.319946,-0.176471,0.333333,1618.333374,-0.2,-0.230769,1615.230713,-0.170732,1.0,0.0,-0.006993,1.0,0.0,0.152941,1.0,0.0,0.20442,1.0,0.0,0.261538,1.0,0.0,2.5,12.0,1,4,0,15,0,15,0,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1618.9375,1617.0,1617.25,0.75,0.0


In [5]:
time_folds = TimeFolds(**experiment['TimeFolds'])
print(experiment['TimeFolds'])
time_folds.fit(train_features, target)

{'n_folds': 5, 'minifold_size': 60000, 'neutral_ratio': 0.025, 'test_ratio': 0.2, 'train_test_gap': 2000}


NameError: name 'target' is not defined

In [3]:
runner = CrossValRunner(time_folds, **model_config)
fold_processor = runner.preprocessor.prepare_fold(fold_id)

{'model_module': 'sklearn.linear_model', 'model_cls': 'Ridge', 'model_params': {'alpha': 10, 'positive': True}}


NameError: name 'time_folds' is not defined

In [66]:
n_features = train_features.shape[1]
colname2idx = {name: idx for idx, name in enumerate(train_features.columns)}

dev_utils.init_seed(42)
best_usecols = np.random.choice(n_features, n_features // 2, replace=False)
best_mse, best_corr = score_cols(fold_processor, usecols=best_usecols)

 MSE score: 0.4863;  Corr score: 0.1643


In [67]:
for current_iter in range(100000):
    if current_iter % 2 == 0:
        removing_col = np.random.choice(best_usecols)
        use_cols = [col for col in best_usecols if col != removing_col]
        prefix = f'---{removing_col}'
    else:
        adding_col = np.random.choice([idx for idx in range(n_features) if idx not in best_usecols])
        use_cols = best_usecols + [adding_col]
        prefix = f'+++{adding_col}'
    current_mse, current_corr = score_cols(fold_processor, usecols=use_cols, prefix=prefix)
    if current_mse < best_mse:
        best_usecols = use_cols
        best_mse, best_corr = current_mse, current_corr
        if current_iter % 2 == 0:
            print(f'SUCCESSFULL REMOVE OF {train_features.columns[removing_col]}. Len: {len(use_cols)}')
        else:
            print(f'SUCCESSFULL ADDING OF {train_features.columns[adding_col]}. Len: {len(use_cols)}')
            print(f'Last added cols: {[train_features.columns[col] for col in best_usecols[-5:]]}')

---170 MSE score: 0.4863; ---170 Corr score: 0.1643
+++122 MSE score: 0.4863; +++122 Corr score: 0.1642
---153 MSE score: 0.4863; ---153 Corr score: 0.1643
+++33 MSE score: 0.4864; +++33 Corr score: 0.1642
---182 MSE score: 0.4864; ---182 Corr score: 0.1641
+++174 MSE score: 0.4863; +++174 Corr score: 0.1645
SUCCESSFULL ADDING OF mid_price_min_diff_80. Len: 123
Last added cols: ['volume_imbalance_80_max', 'len_ratio_80_mean', 'wap0_ewm_80', 'volume_imbalance_ewm_80', 'mid_price_min_diff_80']
---98 MSE score: 0.4863; ---98 Corr score: 0.1645
+++44 MSE score: 0.4862; +++44 Corr score: 0.1652
SUCCESSFULL ADDING OF increased_ask_rank. Len: 124
Last added cols: ['len_ratio_80_mean', 'wap0_ewm_80', 'volume_imbalance_ewm_80', 'mid_price_min_diff_80', 'increased_ask_rank']
---170 MSE score: 0.4862; ---170 Corr score: 0.1651
+++194 MSE score: 0.4863; +++194 Corr score: 0.1649
---140 MSE score: 0.4862; ---140 Corr score: 0.1652
+++184 MSE score: 0.4862; +++184 Corr score: 0.1650
---126 MSE score

KeyboardInterrupt: 

In [74]:
best_usenames = train_features.columns[best_usecols]
with open('usecols/usecols_fold_0.txt', 'w') as f:
    f.writelines('\n'.join(best_usenames))

In [75]:
best_usenames

Index(['ask_rate_0', 'bid_rate_0', 'mid_price', 'spread', 'bid_ask_spread',
       'bid_size_0', 'bid_len', 'len_ratio', 'cum_volume_imbalance_1',
       'volume_imbalance_1',
       ...
       'wap1_40_max', 'len_ratio_40_mean', 'wap0_80_mean', 'wap0_80_std',
       'wap1_80_std', 'volume_imbalance_80_max', 'volume_imbalance_80_skew',
       'len_ratio_80_mean', 'wap0_ewm_80', 'volume_imbalance_ewm_80'],
      dtype='object', length=129)

In [77]:
time_folds_prod = TimeFolds(**experiment['TimeFolds'])
print(experiment['TimeFolds'])
time_folds_prod.fit(train_features.loc[:, best_usenames], target)


{'n_folds': 5, 'minifold_size': 60000, 'neutral_ratio': 0.025, 'test_ratio': 0.2, 'train_test_gap': 2000}


In [80]:
model_zoo = dev_utils.load_yaml(experiment['model_zoo'])
model_config = model_zoo['train_zoo']['default_ridge']
model_config10 = model_zoo['selection_ridge']
print(model_config)
runner = CrossValRunner(time_folds_prod, **model_config)
runner.fit(verbose=True)

{'model_module': 'sklearn.linear_model', 'model_cls': 'Ridge', 'model_params': {'alpha': 100}}


Ridge:   0%|          | 0/5 [00:00<?, ?it/s]

|    | dataset   | metric_name   |   fold_0 |   fold_1 |   fold_2 |   fold_3 |   fold_4 |
|---:|:----------|:--------------|---------:|---------:|---------:|---------:|---------:|
|  0 | val       | mse           |    0.486 |    0.467 |    0.452 |    0.567 |    0.526 |
|  1 | val       | corr          |    0.167 |    0.164 |    0.154 |    0.149 |    0.145 |
|  2 | test      | mse           |    0.331 |    0.332 |    0.331 |    0.331 |    0.331 |
|  3 | test      | corr          |    0.155 |    0.152 |    0.151 |    0.156 |    0.155 |
             	Val  corr score averaged: 0.156
            	Val   MSE score averaged: 0.500
        
             	Test corr score averaged: 0.154
            	Test  MSE score averaged: 0.331
            ------------------------------------------------------------------
            	Test  MSE predicts averaged: 0.331
            	Test corr predicts averaged: 0.156


# Fold 1

In [86]:
time_folds = TimeFolds(**experiment['TimeFolds'])
print(experiment['TimeFolds'])
time_folds.fit(train_features, target)

fold_id = 1
runner = CrossValRunner(time_folds, **model_config)
fold_processor = runner.preprocessor.prepare_fold(fold_id)


In [91]:
model_config10 = model_config
model_config10['model_params']['alpha'] = 10

In [92]:
runner = CrossValRunner(time_folds, **model_config10)
fold_processor = runner.preprocessor.prepare_fold(fold_id)


In [93]:
n_features = train_features.shape[1]
colname2idx = {name: idx for idx, name in enumerate(train_features.columns)}

dev_utils.init_seed(42 + fold_id)
# best_usecols = np.random.choice(n_features, n_features // 2, replace=False)
best_mse, best_corr = score_cols(fold_processor, usecols=best_usecols)

 MSE score: 0.4656;  Corr score: 0.1731


In [94]:
for current_iter in range(100000):
    if current_iter % 2 == 0:
        removing_col = np.random.choice(best_usecols)
        use_cols = [col for col in best_usecols if col != removing_col]
        prefix = f'---{removing_col}'
    else:
        adding_col = np.random.choice([idx for idx in range(n_features) if idx not in best_usecols])
        use_cols = best_usecols + [adding_col]
        prefix = f'+++{adding_col}'
    current_mse, current_corr = score_cols(fold_processor, usecols=use_cols, prefix=prefix)
    if current_mse < best_mse:
        best_usecols = use_cols
        best_mse, best_corr = current_mse, current_corr
        if current_iter % 2 == 0:
            print(f'SUCCESSFULL REMOVE OF {train_features.columns[removing_col]}. Len: {len(use_cols)}')
        else:
            print(f'SUCCESSFULL ADDING OF {train_features.columns[adding_col]}. Len: {len(use_cols)}')
            print(f'Last added cols: {[train_features.columns[col] for col in best_usecols[-5:]]}')

---198 MSE score: 0.4656; ---198 Corr score: 0.1732
SUCCESSFULL REMOVE OF volume_imbalance_20_std. Len: 125
+++162 MSE score: 0.4656; +++162 Corr score: 0.1732
---56 MSE score: 0.4656; ---56 Corr score: 0.1731
+++53 MSE score: 0.4656; +++53 Corr score: 0.1731
---182 MSE score: 0.4656; ---182 Corr score: 0.1732
SUCCESSFULL REMOVE OF wap1_10_std. Len: 124
+++40 MSE score: 0.4656; +++40 Corr score: 0.1732
---208 MSE score: 0.4654; ---208 Corr score: 0.1739
SUCCESSFULL REMOVE OF volume_imbalance_40_mean. Len: 123
+++47 MSE score: 0.4655; +++47 Corr score: 0.1738
---98 MSE score: 0.4654; ---98 Corr score: 0.1739
SUCCESSFULL REMOVE OF bid_flatten_mean_50. Len: 122
+++202 MSE score: 0.4654; +++202 Corr score: 0.1741
SUCCESSFULL ADDING OF wap0_40_mean. Len: 123
Last added cols: ['ask_flatten_len_25', 'ask_len', 'mid_price_std_10', 'len_ratio_10_mean', 'wap0_40_mean']
---50 MSE score: 0.4657; ---50 Corr score: 0.1726
+++74 MSE score: 0.4654; +++74 Corr score: 0.1739
---106 MSE score: 0.4654; --

KeyboardInterrupt: 

In [95]:
best_usenames = train_features.columns[best_usecols]
with open(f'usecols/usecols_fold_{fold_id}.txt', 'w') as f:
    f.writelines('\n'.join(best_usenames))

In [105]:
time_folds_prod = TimeFolds(**experiment['TimeFolds'])
print(experiment['TimeFolds'])
time_folds_prod.fit(train_features.loc[:, best_usenames], target)

{'n_folds': 5, 'minifold_size': 60000, 'neutral_ratio': 0.025, 'test_ratio': 0.2, 'train_test_gap': 2000}


In [106]:
runner = CrossValRunner(time_folds_prod, **model_config)
runner.fit(verbose=True)

Ridge:   0%|          | 0/5 [00:00<?, ?it/s]

|    | dataset   | metric_name   |   fold_0 |   fold_1 |   fold_2 |   fold_3 |   fold_4 |
|---:|:----------|:--------------|---------:|---------:|---------:|---------:|---------:|
|  0 | val       | mse           |    0.487 |    0.465 |    0.451 |    0.566 |    0.526 |
|  1 | val       | corr          |    0.159 |    0.177 |    0.16  |    0.154 |    0.145 |
|  2 | test      | mse           |    0.332 |    0.331 |    0.331 |    0.332 |    0.331 |
|  3 | test      | corr          |    0.156 |    0.156 |    0.157 |    0.159 |    0.157 |
             	Val  corr score averaged: 0.159
            	Val   MSE score averaged: 0.499
        
             	Test corr score averaged: 0.157
            	Test  MSE score averaged: 0.331
            ------------------------------------------------------------------
            	Test  MSE predicts averaged: 0.331
            	Test corr predicts averaged: 0.158


# Fold 2

In [4]:
time_folds = TimeFolds(**experiment['TimeFolds'])
print(experiment['TimeFolds'])
time_folds.fit(train_features, target)

model_config = model_zoo['selection_ridge']
print(model_config)
fold_id = 2
runner = CrossValRunner(time_folds, **model_config)
fold_processor = runner.preprocessor.prepare_fold(fold_id)


{'n_folds': 5, 'minifold_size': 60000, 'neutral_ratio': 0.025, 'test_ratio': 0.2, 'train_test_gap': 2000}
{'model_module': 'sklearn.linear_model', 'model_cls': 'Ridge', 'model_params': {'alpha': 10}}


In [5]:
n_features = train_features.shape[1]
colname2idx = {name: idx for idx, name in enumerate(train_features.columns)}

dev_utils.init_seed(42 + fold_id)
best_usecols = np.random.choice(n_features, n_features // 2, replace=False)
best_mse, best_corr = score_cols(fold_processor, usecols=best_usecols)

 MSE score: 0.4523;  Corr score: 0.1513


In [8]:
not_improved_iter = 0
for current_iter in range(100000):
    if current_iter % 2 == 0:
        removing_col = np.random.choice(best_usecols)
        use_cols = [col for col in best_usecols if col != removing_col]
        prefix = f'---{removing_col}'
    else:
        adding_col = np.random.choice([idx for idx in range(n_features) if idx not in best_usecols])
        use_cols = best_usecols + [adding_col]
        prefix = f'+++{adding_col}'
    current_mse, current_corr = score_cols(fold_processor, usecols=use_cols, prefix=prefix)
    if current_mse < best_mse:
        best_usecols = use_cols
        best_mse, best_corr = current_mse, current_corr
        not_improved_iter = 0
        if current_iter % 2 == 0:
            print(f'SUCCESSFULL REMOVE OF {train_features.columns[removing_col]}. Len: {len(use_cols)}')
        else:
            print(f'SUCCESSFULL ADDING OF {train_features.columns[adding_col]}. Len: {len(use_cols)}')
            print(f'Last added cols: {[train_features.columns[col] for col in best_usecols[-5:]]}')
    else:
        not_improved_iter += 1
        print(f'Not improved: {not_improved_iter}')
    if not_improved_iter > 20:
        break

best_usenames = train_features.columns[best_usecols]
with open(f'usecols/usecols_fold_{fold_id}.txt', 'w') as f:
    f.writelines('\n'.join(best_usenames))

---13 MSE score: 0.4514; ---13 Corr score: 0.1574
Not improved: 1
+++104 MSE score: 0.4513; +++104 Corr score: 0.1578
SUCCESSFULL ADDING OF flatten_spread_50_ewm. Len: 119
Last added cols: ['ask_flatten_mean_5', 'volume_imbalance_20_skew', 'flatten_spread_100_ewm', 'bid_flatten_std_5', 'flatten_spread_50_ewm']
---153 MSE score: 0.4513; ---153 Corr score: 0.1578
SUCCESSFULL REMOVE OF mid_price_max_diff_5. Len: 118
+++34 MSE score: 0.4513; +++34 Corr score: 0.1577
Not improved: 1
---148 MSE score: 0.4513; ---148 Corr score: 0.1578
SUCCESSFULL REMOVE OF mid_price_log_40. Len: 117
+++16 MSE score: 0.4513; +++16 Corr score: 0.1579
SUCCESSFULL ADDING OF wap1. Len: 118
Last added cols: ['volume_imbalance_20_skew', 'flatten_spread_100_ewm', 'bid_flatten_std_5', 'flatten_spread_50_ewm', 'wap1']
---82 MSE score: 0.4513; ---82 Corr score: 0.1580
SUCCESSFULL REMOVE OF bid_flatten_iqr_25. Len: 117
+++216 MSE score: 0.4513; +++216 Corr score: 0.1580
Not improved: 1
---223 MSE score: 0.4512; ---223 C

In [11]:
for fold_id in (3,4):
    time_folds = TimeFolds(**experiment['TimeFolds'])
    print(experiment['TimeFolds'])
    time_folds.fit(train_features, target)

    model_config = model_zoo['selection_ridge']
    print(model_config)

    runner = CrossValRunner(time_folds, **model_config)
    fold_processor = runner.preprocessor.prepare_fold(fold_id)

    n_features = train_features.shape[1]

    dev_utils.init_seed(42 + fold_id)
    best_usecols = np.random.choice(n_features, n_features // 2, replace=False).tolist()
    best_mse, best_corr = score_cols(fold_processor, usecols=best_usecols)


    not_improved_iter = 0
    for current_iter in range(100000):
        if current_iter % 2 == 0:
            removing_col = np.random.choice(best_usecols)
            use_cols = [col for col in best_usecols if col != removing_col]
            prefix = f'---{removing_col}'
        else:
            adding_col = np.random.choice([idx for idx in range(n_features) if idx not in best_usecols])
            use_cols = best_usecols + [adding_col]
            prefix = f'+++{adding_col}'
        current_mse, current_corr = score_cols(fold_processor, usecols=use_cols, prefix=prefix)
        if current_mse < best_mse:
            best_usecols = use_cols
            best_mse, best_corr = current_mse, current_corr
            not_improved_iter = 0
            if current_iter % 2 == 0:
                print(f'SUCCESSFULL REMOVE OF {train_features.columns[removing_col]}. Len: {len(use_cols)}')
            else:
                print(f'SUCCESSFULL ADDING OF {train_features.columns[adding_col]}. Len: {len(use_cols)}')
                print(f'Last added cols: {[train_features.columns[col] for col in best_usecols[-5:]]}')
        else:
            not_improved_iter += 1
            print(f'Not improved: {not_improved_iter}')
        if not_improved_iter > 20:
            break

    best_usenames = train_features.columns[best_usecols]
    with open(f'usecols/usecols_fold_{fold_id}.txt', 'w') as f:
        f.writelines('\n'.join(best_usenames))


{'n_folds': 5, 'minifold_size': 60000, 'neutral_ratio': 0.025, 'test_ratio': 0.2, 'train_test_gap': 2000}
{'model_module': 'sklearn.linear_model', 'model_cls': 'Ridge', 'model_params': {'alpha': 10}}
 MSE score: 0.5678;  Corr score: 0.1465
---47 MSE score: 0.5678; ---47 Corr score: 0.1462
Not improved: 1
+++203 MSE score: 0.5678; +++203 Corr score: 0.1464
Not improved: 2
---104 MSE score: 0.5678; ---104 Corr score: 0.1464
Not improved: 3
+++46 MSE score: 0.5677; +++46 Corr score: 0.1468
SUCCESSFULL ADDING OF decreased_ask_rank. Len: 116
Last added cols: ['increased_ask_counts', 'volume_imbalance_6', 'bid_flatten_len_50', 'decreased_ask_counts_5_volume', 'decreased_ask_rank']
---33 MSE score: 0.5677; ---33 Corr score: 0.1469
SUCCESSFULL REMOVE OF volume_imbalance_7. Len: 115
+++170 MSE score: 0.5677; +++170 Corr score: 0.1468
Not improved: 1
---210 MSE score: 0.5677; ---210 Corr score: 0.1469
SUCCESSFULL REMOVE OF volume_imbalance_40_std. Len: 114
+++60 MSE score: 0.5677; +++60 Corr sco