# Installs

In [1]:
# !pip install -U onetick-ds-framework
# !pip install -U onetick.py

# Imports

In [2]:
%%capture
import os
import yaml
import itertools

from dsframework.utils import build_experiment
import onetick.py as otp

# Mini usecases

## 1. Simple Experiment run

In [3]:
# init experiment based on config
config_path = os.path.join('./min_config.yml')
config = yaml.load(open(config_path), Loader=yaml.Loader)
exp = build_experiment(config)

# get data
df_orig = exp.get_data()

# prepare data
exp.prepare_data()

# train model
exp.init_fit(local_mode=True)

# predict result
prediction = exp.predict(x = exp.x_test)
print(prediction)

# calc metrics
exp.calc_metrics()

      VOLUME_PREDICTION
8500        5686.274239
8501        5594.027861
8502        6003.124030
8503        7989.692529
8504        7467.357646
...                 ...
9995        3355.691032
9996        3142.114660
9997        2828.472254
9998        3773.203191
9999        3424.182879

[1500 rows x 1 columns]


{'VOLUME_MAE': 500.0867144249592}

## 2. Hyper-parameters optimization

There are 2 ways to configure tuning hyperparameters:
1) change config: 

    modify yaml-file directly (enable hyperparameter tuning and pass parameter lists):

    ```
    training:
        use: 'CatBoostRegressor'
        search_cv:
          tuning_cv: True
        models:
          CatBoostRegressor:
            init_params:
              iterations: [ 500 ]
              learning_rate: [ 0.09 ]
              l2_leaf_reg: [ 1, 2, 3 ]
              depth: [ 1, 2 ]
            fit_params:
              verbose: 0
    ```
    
    or modify config after loading from yaml file:
    
    ```
    config['training']['search_cv']['tuning_cv'] = True
    config['training']['models']['CatBoostRegressor']['init_params'] = {'iterations': [ 500 ],
                                                                        'learning_rate': [ 0.09 ],
                                                                        'l2_leaf_reg': [ 1, 2, 3 ],
                                                                        'depth': [ 1, 2 ]}
    ```
2) modify `training_params` and `init_params_grid` parameter in the `init_fit` function:
    ```
    exp.init_fit(training_params={'search_cv_params':
                                      {'tuning_cv': True}
                                  },
                 init_params_grid={'iterations': [500],
                                   'learning_rate': [0.09],
                                   'l2_leaf_reg': [1, 2, 3],
                                   'depth': [1, 2]
                                   })
    ```

In [4]:
# Example with the modifying init_params_grid parameter in the init_fit function:

config_path = os.path.join('./min_config.yml')
config = yaml.load(open(config_path), Loader=yaml.Loader)
exp = build_experiment(config)

exp.get_data()
exp.prepare_data()
exp.init_fit(training_params={'search_cv':
                                  {'tuning_cv': True}
                              },
             init_params_grid={'iterations': [500],
                               'learning_rate': [0.09],
                               'l2_leaf_reg': [1, 2, 3],
                               'depth': [1, 2]
                               },
             local_mode=True)

# best params
exp.current_model_params

{'iterations': 500, 'learning_rate': 0.09, 'l2_leaf_reg': 3, 'depth': 2}

## 3. Tuning ML model

In [5]:
# Let's list the set of models that we want to test

models = ['XGBRegressor', 'CatBoostRegressor', 'DNNRegressor', 'DecisionTreeRegressor', 'RandomForestRegressor']

# Calculate metrics for each model

config_path = os.path.join('./min_config.yml')
config = yaml.load(open(config_path), Loader=yaml.Loader)

res = {}
for model in models:
    config['training']['use'] = model
    exp = build_experiment(config)
    exp.get_data()
    exp.prepare_data()
    exp.init_fit(local_mode=True)
    exp.predict(x = exp.x_test)
    res[f'{model}'] = exp.calc_metrics()
    
res

{'XGBRegressor': {'VOLUME_MAE': 528.0314935709636},
 'CatBoostRegressor': {'VOLUME_MAE': 500.0867144249592},
 'DNNRegressor': {'VOLUME_MAE': 4979.535737752279},
 'DecisionTreeRegressor': {'VOLUME_MAE': 694.2513333333334},
 'RandomForestRegressor': {'VOLUME_MAE': 538.7566466666666}}

## 4. Tuning of data processing 

In [6]:
# Calculate metrics for each combination of data processing

config_path = os.path.join('./demo_config.yml')
config = yaml.load(open(config_path), Loader=yaml.Loader)

res = {}
for preproc_combination in list(itertools.product([False, True], [False, True])):
    key = []
    for idx, preprocessor in enumerate(config['preprocessors']):
        preprocessor['use'] = preproc_combination[idx]
        key.append(f"{preprocessor['class']} = {preproc_combination[idx]}")  
    exp = build_experiment(config)
    exp.get_data()
    exp.prepare_data()
    exp.init_fit(local_mode=True)
    exp.predict(x = exp.x_test)
    res[tuple(key)] = exp.calc_metrics()
    
res

{('dsframework.impl.LimitOutliers = False',
  'dsframework.impl.IntradayAveraging = False'): {'VOLUME_MAE': 465.09318226501364},
 ('dsframework.impl.LimitOutliers = False',
  'dsframework.impl.IntradayAveraging = True'): {'VOLUME_MAE': 496.7541489068729},
 ('dsframework.impl.LimitOutliers = True',
  'dsframework.impl.IntradayAveraging = False'): {'VOLUME_MAE': 437.3806200785672},
 ('dsframework.impl.LimitOutliers = True',
  'dsframework.impl.IntradayAveraging = True'): {'VOLUME_MAE': 479.9462812613347}}

## 5. Tuning of features

In [7]:
# Calculate metrics for each set of lag features

config_path = os.path.join('./demo_config.yml')
config = yaml.load(open(config_path), Loader=yaml.Loader)

res = {}
for i in range(2, 12):
    lags = list(range(1, i))
    config['features'][0]['periods'] = lags
    exp = build_experiment(config)
    exp.get_data()
    exp.prepare_data()
    exp.init_fit(local_mode=True)
    exp.predict(x = exp.x_test)
    res[f'lags: {tuple(lags)}'] = exp.calc_metrics()

res

{'lags: (1,)': {'VOLUME_MAE': 493.8599480387573},
 'lags: (1, 2)': {'VOLUME_MAE': 496.71957163234885},
 'lags: (1, 2, 3)': {'VOLUME_MAE': 464.39686752854914},
 'lags: (1, 2, 3, 4)': {'VOLUME_MAE': 497.90489074955866},
 'lags: (1, 2, 3, 4, 5)': {'VOLUME_MAE': 470.4375251183097},
 'lags: (1, 2, 3, 4, 5, 6)': {'VOLUME_MAE': 503.3914871062161},
 'lags: (1, 2, 3, 4, 5, 6, 7)': {'VOLUME_MAE': 507.2907692761104},
 'lags: (1, 2, 3, 4, 5, 6, 7, 8)': {'VOLUME_MAE': 506.5415381062806},
 'lags: (1, 2, 3, 4, 5, 6, 7, 8, 9)': {'VOLUME_MAE': 481.75941170119785},
 'lags: (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)': {'VOLUME_MAE': 514.2889663624289}}

## 6. Test of various types of validation

There are 2 ways to configure validation:
1) change config: 

    modify yaml-file directly:

    ```
    training:
        use: 'CatBoostRegressor'
        search_cv:
            val_type: 'Simple' # 'Simple', 'Cross', 'WalkForward'
            folds: 5 # is used for 'Cross', 'WalkForward'
            eval_metric: 'MAE'
    ```
    
    or modify config after loading from yaml file:
    
    ```
    config['training']['search_cv']['val_type'] = 'WalkForward'

    ```
2) modify `training_params` in the `init_fit` function:
    ```
    exp.init_fit(training_params={'search_cv_params':
                                      {'val_type': 'WalkForward'}
                                  }
    ```

In [8]:
# Example with the modifying training_params in the init_fit function:

config_path = os.path.join('./min_config.yml')
config = yaml.load(open(config_path), Loader=yaml.Loader)

exp = build_experiment(config)
exp.get_data()
exp.prepare_data()
exp.init_fit(training_params={'search_cv':
                                  {'val_type': 'WalkForward',
                                   'folds': 10,
                                   'eval_metric': 'MAE'
                                  }
                              },
             local_mode=True)

exp.cv_model.cv_results_

{'params': [{}],
 'split0_test_score': array([-558.65148296]),
 'split1_test_score': array([-496.90452111]),
 'split2_test_score': array([-546.70587834]),
 'split3_test_score': array([-519.76188083]),
 'split4_test_score': array([-462.55707829]),
 'split5_test_score': array([-482.13030472]),
 'split6_test_score': array([-481.96009258]),
 'split7_test_score': array([-543.13324583]),
 'split8_test_score': array([-472.51666105]),
 'split9_test_score': array([-510.26431007]),
 'mean_test_score': array([-507.45854558]),
 'std_test_score': array([31.98930127]),
 'rank_test_score': array([1], dtype=int32),
 'time_total_s': array([9.0647428]),
 'training_iteration': array([1])}

## 7. Benchmark: current prediction = previous actual

In [9]:
config_path = os.path.join('./min_config.yml')
config = yaml.load(open(config_path), Loader=yaml.Loader)
exp = build_experiment(config)

exp.get_data()
exp.prepare_data()
exp.init_fit(local_mode=True)
exp.predict(x = exp.x_test)

model_metric = exp.calc_metrics()
baseline_metric = exp.calc_baseline()

'Model metric', model_metric, 'Baseline metric', baseline_metric

('Model metric',
 {'VOLUME_MAE': 500.0867144249592},
 'Baseline metric',
 {'VOLUME_MAE': 510.7098065376918})

## 8. Test of various trading symbols

## 9. Adding custom data with features (after/instead of get_data)

## 10. Adding custom preprocessing

## 11. Prediction intervals

## 12. Simple examples of overriding experiment pipeline classes

## 13. Simultaneous tuning of models and loss of functions

## 14. Local saving and loading of models

## 15. Saving and loading experiments using MLflow