### Libraries import

In [1]:
# Base libraries
import numpy as np
import pandas as pd
from tqdm import tqdm
import functions as fun
import importlib as imp

# Libraries for plotting
import plotly.io as pio
import plotly.graph_objects as go

# Libraries for modelling
import statsmodels.api as sm
import sklearn.model_selection as modsel

# Set additional settings for warnings and templates
import warnings
warnings.filterwarnings("ignore")
pio.templates.default = "plotly_dark"
pd.set_option('display.max_columns', None)

In [26]:
# Reload functions if they were changed
imp.reload(fun)



### Financial models

#### Feature generation

In [22]:
# Read dataset and define columns for feature generation
data = pd.read_parquet('Data/dataset.parquet').drop(columns = 'Variance')
indices = data.groupby(['Ticker', 'Index']).size().index.values
cols = ['Hurst', 'CorrDim', 'Lyapunov', 
        'Skewness', 'Kurtosis', 'PSD', 'ACF_1',
        'WL_C1', 'WL_C2', 'WL_C3']

# Set lag for dynamics and short variance calculation
lag_model = [8]

# Calculate dynamics and short variance
# Original idea about variance was born from the largest Lyapunov exponent's behaviour before the critical transition point:
# is mostly didn't move in nominal values but its variance in some cases decreased signigicantly 
data_logdyn = pd.DataFrame()
for ind in tqdm(indices):
    data_ind = data[(data['Ticker'] == ind[0]) & (data['Index'] == ind[1])]
    for col in cols:
        for lag_m in lag_model:
            data_ind[col + '_' + str(lag_m) + '_dyn'] = data_ind[col] / data_ind[col].shift(lag_m) - 1
            data_ind[col + '_' + str(lag_m) + '_Variance'] = data_ind[col].rolling(lag_m).var()
    data_ind.dropna(inplace = True)
    data_logdyn = pd.concat([data_logdyn, data_ind])

# Reset index to get rid of dates and save final dataset
data_logdyn.reset_index(drop = True, inplace = True)
data_logdyn = data_logdyn[data_logdyn['Distance'] > 0]
data_logdyn.to_parquet('Data/final_dataset.parquet')
data_logdyn

100%|██████████| 548/548 [00:17<00:00, 32.06it/s]


Unnamed: 0,Volume,MA100,MV100,Rise,Distance,Index,Ticker,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
0,23952.0,31070.20,7.130232e+08,False,291,2643,AAN,0.651904,-2.815337e-15,0.049050,4.481086,26.699359,-0.609296,0.374819,0.465914,-0.103608,-0.124637,-0.050382,0.002605,-0.391654,1.096794e-29,0.145294,0.000312,0.030011,0.002971,0.082711,0.754393,0.037030,0.000049,0.019152,0.000045,-0.045022,0.003071,0.285858,0.000373,-0.167933,0.001749
1,159410.0,32093.49,8.715063e+08,False,290,2643,AAN,0.662030,-9.789577e-17,0.048384,4.402063,25.787151,-0.612294,0.367293,0.418223,-0.130413,-0.086043,0.050584,0.002415,224.435763,1.094419e-29,0.111672,0.000318,0.012617,0.002558,0.046669,0.643472,0.038410,0.000053,-0.000925,0.000044,-0.009408,0.003095,0.559503,0.000450,-0.162758,0.001908
2,74965.0,32664.47,8.876980e+08,False,289,2643,AAN,0.690370,-2.362195e-15,0.017922,4.390049,25.683490,-0.614583,0.371478,0.475433,-0.103349,-0.175004,0.204455,0.000927,-1.947039,9.171144e-30,-0.676723,0.000350,0.011164,0.001981,0.044073,0.473688,0.038391,0.000054,0.009061,0.000048,-0.046778,0.002778,0.087733,0.000436,-0.030778,0.001826
3,57275.0,32998.33,8.929255e+08,False,288,2643,AAN,0.693400,-2.719365e-15,0.019476,4.389790,25.681349,-0.618153,0.370974,0.380598,-0.146165,-0.076268,0.038265,0.000894,-0.439684,7.705216e-30,-0.652200,0.000322,0.011460,0.001289,0.044543,0.257182,0.040634,0.000055,0.012792,0.000050,0.045300,0.002463,0.182044,0.000618,-0.023034,0.001851
4,36764.0,32491.61,8.628754e+08,False,287,2643,AAN,0.670705,2.596383e-15,0.050424,4.420547,25.969972,-0.621126,0.373620,0.466517,-0.100117,-0.133767,-0.013487,0.000923,-1.533414,6.951390e-30,-0.057433,0.000305,0.009323,0.001066,0.038771,0.127176,0.040629,0.000055,0.063141,0.000010,-0.045662,0.002215,0.182232,0.000544,-0.128684,0.001742
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160010,103982.0,107127.65,4.066301e+09,False,5,1541,ZWS,0.762291,-3.139746e-15,0.002439,3.228418,15.896758,-0.752358,0.577192,0.573424,-0.154147,-0.074996,-0.010378,0.000124,-1.974779,-3.552214e-18,0.288016,0.000002,-0.003889,0.000036,-0.003274,0.001379,-0.021714,0.000026,-0.008208,0.000002,-0.019764,0.001104,0.024043,0.000198,0.047756,0.000524
160011,160370.0,108231.04,4.060776e+09,False,4,1541,ZWS,0.793100,-4.579490e-15,0.001399,3.229276,15.914075,-0.749824,0.576721,0.555407,-0.133274,-0.058564,0.032792,0.000163,-2.177920,-3.552214e-18,-0.411605,0.000003,-0.002980,0.000040,-0.002118,0.001451,-0.022367,0.000028,-0.008702,0.000003,-0.004787,0.001098,-0.038660,0.000224,0.090447,0.000535
160012,1031511.0,117603.93,1.258062e+10,False,3,1541,ZWS,0.767982,-4.548268e-15,0.006788,3.364231,16.524853,-0.747284,0.526691,0.563935,-0.163990,-0.059153,-0.028708,0.000136,-1.868521,-3.552214e-18,0.820707,0.000005,0.037579,0.002156,0.034430,0.045315,-0.023350,0.000031,-0.093385,0.000344,0.008998,0.001116,-0.023602,0.000208,0.327306,0.000546
160013,382418.0,120666.56,1.326214e+10,False,2,1541,ZWS,0.750742,2.858428e-15,0.006570,3.333177,16.262226,-0.744650,0.546938,0.506274,-0.149322,-0.015747,-0.014877,0.000191,-0.462384,-3.552214e-18,7.506477,0.000006,0.030473,0.002933,0.020689,0.052069,-0.024309,0.000034,-0.056829,0.000410,0.021562,0.000969,0.045151,0.000197,0.018425,0.000543


#### Probit modelling

##### Modelling with all variables

In the cell below we are iterating over the three lists of parameters:
- horizons - how many hours before the transition are considered to be close enough to be prediction phase
- sizes - share of the positive observations in the whole modelling dataset - this parameter is important because in the original dataset share of positives for some of the horizons was to small, so we decided to use decrease size of the negative dataset and randomize it
- states - in order to avoid lucky random choices in the sizes randomization we use a list of different random states to average the results

In [23]:
# Read dataset
data_logdyn = pd.read_parquet('Data/final_dataset.parquet')

# Choose binary target and other parameters
target = 'Flag'
horizons = list(range(1, 9))
shares = np.linspace(0.05, 0.2, 4)
states = list(range(0, 10000, 500))

# Iterate over the chosen parameters and optimize classification models, then save all the results to the dataframe
res = fun.model(data_logdyn, target, horizons, shares, states)

# Save results and create pivot based on the horizon and 1 share parameters
coeffs_cols = data_logdyn.columns.drop(['Volume', 'MA100', 'MV100', 'Rise', 'Distance', 'Index', 'Ticker'])
res_cols = list(coeffs_cols) + ['const']
res_means = fun.save_results(res, res_cols)
res_means

100%|██████████| 8/8 [17:04<00:00, 128.05s/it]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Train size,Test size,Train AUC,Test AUC,Train KS-test p-value,Test KS-test p-value,Train F1-score,Test F1-score,Train precision,Test precision,Train recall,Test recall,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance,const
Horizon,1 Share,1 Share real,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1
1,0.05,0.05016,8740.0,2185.0,0.91705,0.9028,0.0,0.0,0.57445,0.59895,0.44475,0.48455,0.8156,0.8005,,1.607584,6.267405,0.806011,-0.041219,0.668487,1.083096,-0.689425,-0.954452,2.429622,-0.766993,-48.750093,8.734625e-16,-2.48641,0.001889,-388.416154,2.28238,-0.431408,-0.134771,0.0007,-1.446676,719.281892,1.639095,273.559374,-1.260763,-44.110917,,-37.053144,,-34.409319,-3.905914
1,0.1,0.100293,4371.0,1093.0,0.91705,0.9046,0.0,0.0,0.7236,0.70725,0.65555,0.64685,0.81155,0.7948,,0.932452,6.184795,0.832796,-0.04373,0.743014,1.263808,-0.924908,-1.114259,2.349438,,-54.762036,1.683301e-15,-3.644923,0.004566,-529.973318,2.551459,-0.627291,-0.149984,0.001167,-1.719918,864.886362,1.81899,356.849388,-1.508813,-54.288192,,-48.962652,,,-3.571782
1,0.15,0.150425,2914.0,729.0,0.91605,0.9133,0.0,0.0,0.7749,0.78225,0.7435,0.7621,0.81005,0.8092,,-1.840861,7.525784,0.865403,-0.04647,0.677284,1.32092,-1.032421,-1.198083,2.536896,,-56.920876,2.936649e-15,-5.261013,,,2.527806,-0.481449,-0.162402,0.001077,-1.874719,1089.417724,1.847678,408.925355,-1.622408,,,-48.844835,,,-3.367581
1,0.2,0.200512,2186.0,547.0,0.91795,0.9122,0.0,0.0,0.81155,0.80115,0.8135,0.78915,0.811,0.81845,-1.362815,1.186848,8.706753,0.856623,-0.046887,0.637647,1.399337,-1.145474,,2.598529,,-63.904992,4.739529e-15,-3.872457,0.00397,-503.927029,2.651688,-0.636803,-0.170072,0.001722,-2.211436,1102.293919,1.820314,494.315938,-1.778574,-50.472005,,-64.909306,,,-3.050465
2,0.05,0.050328,17421.0,4356.0,0.85525,0.8508,0.0,0.0,0.50195,0.5314,0.3958,0.43455,0.69495,0.69735,,,5.949856,0.576943,-0.029305,0.56574,0.780231,-0.533009,-0.75461,1.529387,,-32.037711,,-1.526988,0.000295,-262.077567,2.252509,-0.245854,-0.109087,0.000323,-0.58015,538.538131,1.677715,284.707936,-0.836844,,,-26.606743,,,-3.132078
2,0.1,0.100615,8714.0,2179.0,0.85725,0.8522,0.0,0.0,0.63565,0.6499,0.5833,0.6182,0.7027,0.6963,,2.064734,6.222017,0.607224,-0.03171,0.626832,0.881601,-0.659062,-0.806744,1.516031,,-34.863703,1.090834e-15,-6.85374,0.000381,-273.87235,2.423935,-0.502257,-0.120826,0.000935,-1.278146,681.664206,1.757455,376.414395,-0.999836,,,-32.822317,,-19.320801,-2.799415
2,0.15,0.150881,5811.0,1453.0,0.8579,0.8502,0.0,0.0,0.70105,0.69735,0.70255,0.69835,0.70215,0.70395,,1.706606,6.015912,0.599213,-0.031346,0.61958,0.88754,-0.728862,-0.858354,1.5459,,-37.872832,1.243321e-15,-3.121328,,-331.085645,2.504764,-0.620129,-0.136071,0.001236,-1.136778,739.938617,1.940004,446.850271,-1.065814,-30.389706,,-35.36893,,,-2.564423
2,0.2,0.201101,4360.0,1090.0,0.86165,0.84565,0.0,0.0,0.739,0.7246,0.7811,0.7724,0.7035,0.6861,,2.366407,6.702149,0.585349,-0.030286,0.666391,0.978562,-0.796565,-0.927206,1.4144,-0.563297,-38.679182,3.823556e-15,-5.297013,,-339.202137,2.503397,-0.544827,-0.130046,0.001316,-1.372624,820.220546,1.919745,501.212278,-1.090282,,,-30.653011,,,-2.339029
3,0.05,0.050495,26046.0,6512.0,0.8069,0.80015,0.0,0.0,0.4785,0.4732,0.401,0.40095,0.5991,0.594,,0.909015,4.71659,0.451179,-0.022806,0.476404,0.579283,-0.444694,-0.591065,1.407781,,-26.337869,,-1.815282,,-212.351931,2.379674,-0.185205,-0.10584,-0.000248,-0.492387,479.679233,1.556019,285.88537,-0.915205,-21.156899,,-19.565594,,,-2.737445
3,0.1,0.100933,13030.0,3258.0,0.8097,0.80575,0.0,0.0,0.58905,0.58295,0.5768,0.56215,0.60485,0.61445,,1.115722,4.418842,0.453154,-0.023059,0.508481,0.619484,-0.464519,-0.693387,1.376691,,-27.267071,,-4.294936,,-236.998279,2.526585,-0.382029,-0.11086,0.000433,-0.829317,565.441235,1.53422,375.499152,-1.034082,-24.16733,,-22.147374,0.008278,,-2.391228


In [24]:
# Get mean metrics for all of the columns to understand what variables are actually used in the final models
pd.DataFrame(round(res_means[np.in1d(res_means.index.get_level_values(0), list(range(1, 5)))].mean(), 2), columns = ['Average coeff'])
# round(res_means.mean(), 2)

Unnamed: 0,Average coeff
Train size,26860.75
Test size,6715.75
Train AUC,0.72
Test AUC,0.71
Train KS-test p-value,0.0
Test KS-test p-value,0.0
Train F1-score,0.42
Test F1-score,0.42
Train precision,0.42
Test precision,0.42


In [17]:
# Count a share of appearances for positive horizons
res = pd.read_parquet('Params/params_Probit.parquet')
res_1_5 = res[res['Horizon'].isin(range(1, 5))]
groups = ['Horizon', '1 Share', '1 Share real']
drops = ['State']
round(res_1_5.groupby(groups)[list(coeffs_cols) + ['const']].count() / 20, 2)
# round(res.groupby(groups)[list(coeffs_cols) + ['const']].count() / 20, 2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance,const
Horizon,1 Share,1 Share real,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1
1,0.05,0.05016,0.0,0.05,1.0,1.0,1.0,0.8,0.7,0.1,0.25,1.0,0.15,0.5,0.1,0.35,0.1,0.15,1.0,0.6,1.0,0.55,0.2,0.65,1.0,1.0,0.85,0.1,0.0,0.6,0.0,0.05,1.0
1,0.1,0.100293,0.0,0.15,0.9,1.0,1.0,0.75,0.45,0.15,0.15,1.0,0.0,0.2,0.05,0.25,0.1,0.05,1.0,0.7,1.0,0.65,0.35,0.75,1.0,1.0,0.75,0.1,0.0,0.2,0.0,0.0,1.0
1,0.15,0.150425,0.0,0.05,0.75,1.0,1.0,0.65,0.25,0.2,0.2,1.0,0.0,0.3,0.15,0.15,0.0,0.0,1.0,0.8,1.0,0.7,0.4,0.75,1.0,1.0,0.75,0.0,0.0,0.15,0.0,0.0,1.0
1,0.2,0.200512,0.1,0.15,0.65,1.0,1.0,0.75,0.15,0.1,0.0,1.0,0.0,0.2,0.2,0.1,0.05,0.1,1.0,0.75,1.0,0.6,0.45,0.9,1.0,1.0,0.7,0.05,0.0,0.15,0.0,0.0,1.0
2,0.05,0.050328,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.8,0.6,1.0,0.0,0.75,0.0,0.25,0.1,0.6,1.0,0.85,1.0,0.5,0.4,1.0,1.0,1.0,0.9,0.0,0.0,0.25,0.0,0.0,1.0
2,0.1,0.100615,0.0,0.15,0.9,1.0,1.0,1.0,0.9,0.8,0.45,1.0,0.0,0.8,0.15,0.3,0.1,0.4,1.0,0.8,1.0,0.7,0.5,0.95,1.0,1.0,0.9,0.0,0.0,0.05,0.0,0.05,1.0
2,0.15,0.150881,0.0,0.05,0.85,1.0,1.0,0.95,0.75,0.65,0.45,1.0,0.0,0.6,0.3,0.15,0.0,0.25,1.0,0.7,1.0,0.65,0.45,0.85,1.0,1.0,1.0,0.05,0.0,0.1,0.0,0.0,1.0
2,0.2,0.201101,0.0,0.1,0.85,1.0,1.0,0.95,0.65,0.65,0.4,1.0,0.05,0.55,0.15,0.25,0.0,0.4,1.0,0.8,0.95,0.55,0.65,0.9,1.0,1.0,0.9,0.0,0.0,0.1,0.0,0.0,1.0
3,0.05,0.050495,0.0,0.2,0.95,1.0,1.0,1.0,0.9,0.6,0.6,1.0,0.0,1.0,0.0,0.4,0.0,0.85,1.0,0.65,1.0,0.45,0.55,0.9,1.0,1.0,1.0,0.2,0.0,0.3,0.0,0.0,1.0
3,0.1,0.100933,0.0,0.25,0.95,1.0,1.0,1.0,0.85,0.65,0.5,1.0,0.0,1.0,0.0,0.3,0.0,0.7,1.0,0.75,1.0,0.5,0.75,1.0,1.0,1.0,1.0,0.1,0.0,0.25,0.05,0.0,1.0


In [18]:
# Count an aggregated share of appearances
pd.DataFrame(round(res_1_5[list(coeffs_cols) + ['const']].count() / len(res_1_5), 2), columns = ['Share'])
# pd.DataFrame(round(res[list(coeffs_cols) + ['const']].count() / len(res), 2), columns = ['Share'])

Unnamed: 0,Share
Hurst,0.01
CorrDim,0.13
Lyapunov,0.89
Skewness,1.0
Kurtosis,1.0
PSD,0.93
ACF_1,0.71
WL_C1,0.56
WL_C2,0.52
WL_C3,1.0


##### Visualization for the KS-test

In [27]:
# Vizual check of the single model
data_logdyn = pd.read_parquet('Data/final_dataset.parquet')
target = 'Flag'
horizon = 8
share = 0.1
state = 2000
data_testing = data_logdyn.copy()
data_testing['Flag'] = data_testing['Distance'].apply(lambda x: 0 if x >= horizon else 1)
data_testing.drop(columns = ['Volume', 'MA100', 'MA100','Rise', 'Distance', 'Index', 'Ticker'], inplace = True)

data_testing_1 = data_testing[data_testing[target] == 1]
data_testing_0 = data_testing[data_testing[target] == 0]
Y_1 = data_testing_1[target]
X_1 = data_testing_1.drop(columns = [target])
share_1_orig = len(data_testing_1) / (len(data_testing_0) + len(data_testing_1))
_, X_0, _, Y_0 = modsel.train_test_split(data_testing_0.drop(columns = [target]), data_testing_0[target], 
                                                     test_size = min(share_1_orig * (1 - share) / share, 1), random_state = state)
share_1 = len(Y_1) / (len(Y_0) + len(Y_1))
Y = pd.concat([Y_0, Y_1])
X = sm.add_constant(pd.concat([X_0, X_1]))
X_train, X_test, Y_train, Y_test = modsel.train_test_split(X, Y, test_size = 0.2, random_state = state)
results_rs, auc_train_rs, auc_test_rs, ks_train_rs, ks_test_rs, f1_train_rs,\
    f1_test_rs, pr_train_rs, pr_test_rs, rec_train_rs, rec_test_rs\
    = fun.model_optimization(Y_train, Y_test, X_train, X_test, silent = True)
print(results_rs.summary())
Y_test_pred = results_rs.predict(X_test)
ks_samples = pd.DataFrame({'Y': Y_test, 'Y_pred': Y_test_pred})
ks_samples_posi = ks_samples[ks_samples['Y'] == 1]['Y_pred']
ks_samples_nega = ks_samples[ks_samples['Y'] == 0]['Y_pred']
fig = go.Figure()
fig.add_trace(go.Histogram(x = ks_samples_posi, name = 'Posi', nbinsx = 30))
fig.add_trace(go.Histogram(x = ks_samples_nega, name = 'Nega', nbinsx = 100))
fig.update_layout(barmode = 'overlay')
fig.update_traces(opacity = 0.75)
fig.show()

                          Probit Regression Results                           
Dep. Variable:                   Flag   No. Observations:                30024
Model:                         Probit   Df Residuals:                    30004
Method:                           MLE   Df Model:                           19
Date:                Wed, 07 Aug 2024   Pseudo R-squ.:                  0.1584
Time:                        14:16:27   Log-Likelihood:                -8320.4
converged:                       True   LL-Null:                       -9886.2
Covariance Type:            nonrobust   LLR p-value:                     0.000
                          coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------------
const                  -1.7121      0.074    -23.170      0.000      -1.857      -1.567
Lyapunov                3.0276      0.860      3.520      0.000       1.342       4.714
Skewness        

##### Modelling with separate variables

In [3]:
# Read dataset
data_logdyn = pd.read_parquet('Data/final_dataset.parquet')

# Choose binary target and other parameters
target = 'Flag'
horizons = list(range(1, 6))
shares = np.linspace(0.05, 0.2, 4)
states = list(range(0, 10000, 500))

# Iterate over the chosen parameters and optimize classification models, then save all the results to the dataframe
res_sep = fun.model(data_logdyn, target, horizons, shares, states, separate = True)

# Save results and create pivot based on the horizon and 1 share parameters
res_means = fun.save_results(res, [], sep = True)
res_means[res_means['Test AUC'] >= 0.75].sort_values('Test AUC', ascending = False)

100%|██████████| 5/5 [16:58<00:00, 203.76s/it]


Unnamed: 0_level_0,Unnamed: 1_level_0,Train size,Test size,Train AUC,Test AUC,Train KS-test p-value,Test KS-test p-value,Train F1-score,Test F1-score,Train precision,Test precision,Train recall,Test recall,Const,Const_Pvalue,Coef,Coef_Pvalue
Variable,Horizon,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Skewness_8_Variance,1,4648.565789,1162.447368,0.940487,0.941184,0.0,0.0,0.703316,0.711487,0.612789,0.625868,0.850697,0.862592,-1.51277,3.296619e-217,0.652842,5.436307e-46
Kurtosis_8_Variance,1,4709.863014,1177.767123,0.936753,0.937918,0.0,0.0,0.666247,0.679959,0.561397,0.583945,0.850932,0.859603,-1.466881,2.058904e-204,0.001631,2.3831820000000003e-43
ACF_1_8_Variance,1,4552.75,1138.5,0.930875,0.931375,0.0,0.0,0.7542,0.738487,0.706313,0.673013,0.8255,0.845675,-1.660177,1.426042e-232,793.14515,4.93482e-73
Skewness_8_Variance,2,9501.5,2375.771429,0.895057,0.896329,0.0,0.0,0.635514,0.614543,0.568829,0.528614,0.7491,0.773514,-1.462848,0.0,0.813546,1.3489950000000002e-84
Kurtosis_8_Variance,2,9918.949153,2480.152542,0.890254,0.890881,0.0,0.0,0.577915,0.598424,0.486559,0.514203,0.759136,0.758661,-1.43851,0.0,0.001947,5.874377e-74
ACF_1_8_Variance,2,9076.5,2269.5,0.882225,0.882237,0.0,0.0,0.649175,0.639925,0.6003,0.584213,0.73,0.747825,-1.558918,0.0,860.292515,4.765923e-118
Skewness_8_Variance,3,14732.015385,3683.446154,0.860585,0.857877,0.0,0.0,0.526231,0.545954,0.439615,0.474785,0.704538,0.697769,-1.429076,0.0,0.765947,1.926762e-104
Kurtosis_8_Variance,3,16676.297872,4169.553191,0.853809,0.850979,0.0,0.0,0.479596,0.488,0.382043,0.393234,0.70183,0.703,-1.460052,0.0,0.001748,3.6093349999999997e-90
ACF_1_8_Variance,3,13572.25,3393.5,0.842025,0.839112,0.0,0.0,0.5547,0.565813,0.494675,0.504663,0.676063,0.683462,-1.477161,0.0,802.732695,1.2669080000000002e-153
Skewness_8_Variance,4,19435.953125,4859.3125,0.820844,0.819078,0.0,0.0,0.469031,0.484,0.393828,0.418391,0.630016,0.624328,-1.367296,0.0,0.64628,1.643859e-108


In [12]:
# Getting highest AUC values for variables
res_sep = pd.read_parquet('Data/params_sep.parquet')
res_sep.groupby(groups)['Test AUC'].mean().to_frame().reset_index(1, drop = False).groupby('Variable')['Test AUC'].max().sort_values(ascending = False).round(2)

Variable
Skewness_8_Variance    0.94
Kurtosis_8_Variance    0.94
ACF_1_8_Variance       0.93
Skewness               0.81
Skewness_8_dyn         0.81
Kurtosis               0.80
Kurtosis_8_dyn         0.77
Lyapunov_8_Variance    0.76
Lyapunov               0.74
CorrDim                0.64
CorrDim_8_Variance     0.58
PSD_8_dyn              0.57
WL_C1_8_dyn            0.57
ACF_1                  0.55
WL_C1                  0.54
PSD_8_Variance         0.54
WL_C2                  0.53
WL_C2_8_Variance       0.53
PSD                    0.52
ACF_1_8_dyn            0.52
Hurst_8_Variance       0.51
WL_C1_8_Variance       0.51
WL_C3                  0.51
Hurst_8_dyn            0.51
WL_C3_8_dyn            0.51
WL_C2_8_dyn            0.51
Hurst                  0.50
WL_C3_8_Variance       0.50
CorrDim_8_dyn          0.50
Lyapunov_8_dyn         0.49
Name: Test AUC, dtype: float64

#### Random forest modelling

In [2]:
# Read dataset
data_logdyn = pd.read_parquet('Data/final_dataset.parquet')

# Choose binary target and other parameters
model = 'RF'
target = 'Flag'
horizons = list(range(1, 13))
shares = np.linspace(0.05, 0.2, 4)
states = list(range(0, 10000, 50))

# Iterate over the chosen parameters and optimize classification models, then save all the results to the dataframe
res = fun.model(data_logdyn, target, horizons, shares, states, model = model)

# Save results and create pivot based on the horizon and 1 share parameters
coeffs_cols = data_logdyn.columns.drop(['Volume', 'MA100', 'MV100', 'Rise', 'Distance', 'Index', 'Ticker'])
res_means = fun.save_results(res, 
                             list(coeffs_cols),
                             model = model)
res_means

100%|██████████| 12/12 [16:24:13<00:00, 4921.12s/it]  


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Train size,Test size,Train AUC,Test AUC,Train KS-test p-value,Test KS-test p-value,Train F1-score,Test F1-score,Train precision,Test precision,Train recall,Test recall,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
Horizon,1 Share,1 Share real,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
1,0.05,0.05016,8740.0,2185.0,0.97123,0.95024,0.0,0.0,0.650075,0.616825,0.518185,0.4877,0.88828,0.858865,-1.73913e-05,3.844394e-05,0.000298,0.000538,0.000345,7e-06,3.9e-05,-6.864989e-07,4.4e-05,-1.5e-05,4.805492e-06,-2.7e-05,2.3e-05,1.6e-05,4.23341e-05,2.6e-05,0.000651,0.000853,8.9e-05,0.000912,1.4e-05,-1e-05,0.000284,0.001514,1.7e-05,-3e-06,2.494279e-05,-1.487414e-05,-3e-06,6.3e-05
1,0.1,0.100293,4371.0,1093.0,0.97436,0.950715,0.0,0.0,0.776845,0.741195,0.693145,0.656615,0.89363,0.862015,-1.143641e-05,-4.574565e-07,0.000463,0.000924,0.000393,5.8e-05,9.1e-05,0.0001120769,5e-05,7.3e-05,-2.698994e-05,-5.3e-05,0.0001,-1e-05,0.0001061299,0.000333,0.001106,0.00169,0.00022,0.001906,5.2e-05,-3.8e-05,0.00137,0.004398,0.000126,-4.2e-05,-1.829826e-06,5.215005e-05,1.1e-05,0.000119
1,0.15,0.150425,2914.0,729.0,0.976155,0.9525,0.0,0.0,0.821585,0.8069,0.75824,0.761115,0.90431,0.86464,-7.133059e-05,-3.703704e-05,0.00025,0.000486,0.000147,-6.9e-05,-4e-05,-1.303155e-05,-9e-05,-3.5e-05,-0.0001097394,-8.4e-05,1.5e-05,-6.1e-05,-6.241427e-05,0.000434,0.001481,0.00151,0.000173,0.001336,3.4e-05,-0.000112,0.002153,0.006383,8.6e-05,-0.00013,-5.829904e-05,-6.378601e-05,-1.6e-05,-4.7e-05
1,0.2,0.200512,2186.0,547.0,0.97821,0.951085,0.0,0.0,0.855235,0.836185,0.807225,0.813295,0.913325,0.86501,-1.1883e-05,-1.371115e-05,0.000159,0.000591,0.0004,-5.8e-05,6.6e-05,5.210238e-05,-7.2e-05,0.000102,-1.371115e-05,-3.7e-05,0.000113,-0.000134,7.86106e-05,0.000347,0.001378,0.001277,0.000803,0.000682,8.2e-05,2.6e-05,0.002122,0.007578,9e-05,-8e-06,6.124314e-05,-5.850091e-05,0.000135,7.2e-05
2,0.05,0.050328,17421.0,4356.0,0.934575,0.915465,0.0,0.0,0.472875,0.490255,0.33808,0.36422,0.826325,0.79675,-9.641873e-06,5.096419e-05,0.000275,0.000594,0.000299,1.5e-05,5.8e-05,2.215335e-05,1e-05,-1.5e-05,5.165289e-06,4.1e-05,6.8e-05,3.1e-05,3.604224e-05,4.3e-05,0.000805,0.000869,0.000357,0.000978,4.8e-05,6e-06,0.000348,0.001336,-2e-06,9e-06,1.262626e-06,9.182736e-07,3.1e-05,8e-05
2,0.1,0.100615,8714.0,2179.0,0.938615,0.91659,0.0,0.0,0.64031,0.634235,0.52862,0.540395,0.824935,0.796775,2.937127e-05,6.14961e-05,0.000318,0.000755,0.000367,8.9e-05,6.5e-05,4.107389e-05,3.9e-05,4.7e-05,6.883892e-06,2.1e-05,4.3e-05,3.2e-05,8.604865e-05,0.000144,0.001275,0.001355,0.000553,0.001594,0.000138,1.2e-05,0.000895,0.002458,0.000119,4.1e-05,2.753557e-06,4.543369e-05,1.6e-05,0.000153
2,0.15,0.150881,5811.0,1453.0,0.94052,0.919115,0.0,0.0,0.71432,0.70434,0.628585,0.63739,0.832795,0.80633,-2.167928e-05,1.995871e-05,0.000444,0.00082,0.000376,4.7e-05,0.00012,6.331727e-05,6e-05,3.5e-05,1.823813e-05,1e-05,5.7e-05,3.9e-05,4.576738e-05,0.000385,0.001857,0.001807,0.001284,0.002055,0.000199,2.2e-05,0.00226,0.004044,4.7e-05,5e-05,3.441156e-05,5.609085e-05,5.4e-05,0.000124
2,0.2,0.201101,4360.0,1090.0,0.94299,0.91741,0.0,0.0,0.76497,0.746605,0.704445,0.709125,0.840105,0.80435,3.807339e-05,1.100917e-05,0.000345,0.00063,0.000268,7.9e-05,0.000162,6.376147e-05,0.000131,9.8e-05,1.238532e-05,2.7e-05,0.000114,2e-05,4.724771e-05,0.000285,0.001356,0.001653,0.001256,0.001635,0.00018,5.6e-05,0.002114,0.005139,0.000154,6e-06,1.192661e-05,3.944954e-05,0.000102,0.000146
3,0.05,0.050495,26046.0,6512.0,0.90163,0.8835,0.0,0.0,0.35706,0.360995,0.22996,0.239055,0.805785,0.78163,1.297604e-05,5.873771e-05,0.000334,0.00065,0.000382,5.1e-05,5.9e-05,4.990786e-05,3.6e-05,2.7e-05,7.52457e-06,3.3e-05,8.9e-05,4.3e-05,5.328624e-05,6.7e-05,0.000867,0.000802,0.000355,0.000914,5.5e-05,2.8e-05,0.000298,0.001074,7.5e-05,1.8e-05,1.251536e-05,3.754607e-05,3.4e-05,0.000139
3,0.1,0.100933,13030.0,3258.0,0.905925,0.88665,0.0,0.0,0.543815,0.53381,0.415615,0.412955,0.79266,0.77548,-3.83671e-06,2.869859e-05,0.00037,0.000634,0.000296,3.4e-05,5.7e-05,6.629834e-05,3.8e-05,2.2e-05,1.841621e-06,1.6e-05,8.3e-05,2.1e-05,8.670964e-05,0.000158,0.001926,0.001611,0.00093,0.001649,9.7e-05,2.3e-05,0.000775,0.002273,0.000122,1.3e-05,1.058932e-05,5.709024e-05,1.2e-05,0.000172


In [19]:
# Count an average feature importance
res = pd.read_parquet(f'Params/params_{model}.parquet')
res_viable = res[res['Horizon'].isin(range(1, 10))]
groups = ['Horizon']

# Get the final pivot and multiply by 100000 to make results more readable
means = round(res_viable[coeffs_cols].mean() * 100000, 2)
print(round(means.mean(), 2), round(means.median(), 2))
means

36.79 6.88


Hurst                    0.75
CorrDim                  2.28
Lyapunov                28.72
Skewness                58.21
Kurtosis                28.64
PSD                      7.98
ACF_1                    7.13
WL_C1                    6.62
WL_C2                    3.23
WL_C3                    4.36
Hurst_8_dyn              0.65
Hurst_8_Variance         2.38
CorrDim_8_dyn            4.72
CorrDim_8_Variance       0.85
Lyapunov_8_dyn           4.97
Lyapunov_8_Variance     18.06
Skewness_8_dyn         147.36
Skewness_8_Variance    132.64
Kurtosis_8_dyn          85.83
Kurtosis_8_Variance    123.03
PSD_8_dyn               14.87
PSD_8_Variance           4.12
ACF_1_8_dyn            116.87
ACF_1_8_Variance       256.66
WL_C1_8_dyn             12.80
WL_C1_8_Variance         3.30
WL_C2_8_dyn              1.23
WL_C2_8_Variance         5.60
WL_C3_8_dyn              2.61
WL_C3_8_Variance        17.19
dtype: float64

#### SVM modelling

In [4]:
# Read dataset
data_logdyn = pd.read_parquet('Data/final_dataset.parquet')

# Choose binary target and other parameters
model = 'SVM'
target = 'Flag'
horizons = [1]
shares = np.linspace(0.05, 0.2, 4)
states = list(range(0, 10000, 50))

# Iterate over the chosen parameters and optimize classification models, then save all the results to the dataframe
res = fun.model(data_logdyn, target, horizons, shares, states, model = model)

# Save results and create pivot based on the horizon and 1 share parameters
coeffs_cols = data_logdyn.columns.drop(['Volume', 'MA100', 'MV100', 'Rise', 'Distance', 'Index', 'Ticker'])
res_means = fun.save_results(res, 
                             list(coeffs_cols),
                             model = model)
res_means

100%|██████████| 1/1 [1:45:53<00:00, 6353.89s/it]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Train size,Test size,Train AUC,Test AUC,Train KS-test p-value,Test KS-test p-value,Train F1-score,Test F1-score,Train precision,Test precision,Train recall,Test recall,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
Horizon,1 Share,1 Share real,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
1,0.05,0.05016,8740.0,2185.0,0.56989,0.56531,2.2765e-08,0.013104,0.22232,0.221125,0.34747,0.341625,0.250755,0.27472,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005638,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.1,0.100293,4371.0,1093.0,0.5618,0.56075,1.1238e-07,0.014266,0.27198,0.278935,0.47771,0.476365,0.24239,0.264665,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010282,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.15,0.150425,2914.0,729.0,0.55749,0.553055,3.4804e-07,0.03026,0.293475,0.300085,0.531065,0.541495,0.24906,0.26631,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012715,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.2,0.200512,2186.0,547.0,0.55734,0.55189,9.26685e-07,0.028407,0.31488,0.322255,0.564635,0.586075,0.26012,0.27235,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014759,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
# Count an average feature importance
res = pd.read_parquet(f'Params/params_{model}.parquet')
res_viable = res[res['Horizon'].isin(range(1, 2))]
groups = ['Horizon']

# Get the final pivot and multiply by 100000 to make results more readable
means = round(res_viable[coeffs_cols].mean() * 100000, 2)
print(round(means.mean(), 2), round(means.median(), 2))
means

36.16 0.0


Hurst                     0.00
CorrDim                   0.00
Lyapunov                  0.00
Skewness                  0.00
Kurtosis                  0.00
PSD                       0.00
ACF_1                     0.00
WL_C1                     0.00
WL_C2                     0.00
WL_C3                     0.00
Hurst_8_dyn               0.00
Hurst_8_Variance          0.00
CorrDim_8_dyn          1084.83
CorrDim_8_Variance        0.00
Lyapunov_8_dyn            0.00
Lyapunov_8_Variance       0.00
Skewness_8_dyn            0.00
Skewness_8_Variance       0.00
Kurtosis_8_dyn            0.00
Kurtosis_8_Variance       0.00
PSD_8_dyn                 0.00
PSD_8_Variance            0.00
ACF_1_8_dyn               0.00
ACF_1_8_Variance          0.00
WL_C1_8_dyn               0.00
WL_C1_8_Variance          0.00
WL_C2_8_dyn               0.00
WL_C2_8_Variance          0.00
WL_C3_8_dyn               0.00
WL_C3_8_Variance          0.00
dtype: float64

#### Gradient boosting modelling

##### LightGBM

In [13]:
# Read dataset
data_logdyn = pd.read_parquet('Data/final_dataset.parquet')

# Choose binary target and other parameters
model = 'LightGBM'
target = 'Flag'
horizons = list(range(1, 16))
shares = np.linspace(0.05, 0.2, 4)
states = list(range(0, 10000, 50))

# Iterate over the chosen parameters and optimize classification models, then save all the results to the dataframe
res = fun.model(data_logdyn, target, horizons, shares, states, model = model)

# Save results and create pivot based on the horizon and 1 share parameters
coeffs_cols = data_logdyn.columns.drop(['Volume', 'MA100', 'MV100', 'Rise', 'Distance', 'Index', 'Ticker'])
res_means = fun.save_results(res, 
                             list(coeffs_cols),
                             model = model)
res_means

100%|██████████| 15/15 [1:24:18<00:00, 337.22s/it]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Train size,Test size,Train AUC,Test AUC,Train KS-test p-value,Test KS-test p-value,Train F1-score,Test F1-score,Train precision,Test precision,Train recall,Test recall,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
Horizon,1 Share,1 Share real,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
1,0.05,0.05016,8740.0,2185.0,0.96503,0.957085,0.0,0.0,0.595425,0.597725,0.451945,0.465995,0.88272,0.870375,3.689461,55.565708,56.535701,359.110749,53.657348,8.165762,15.467618,11.174157,3.379298,6.727479,13.142833,3.997521,10.386102,7.467779,12.463744,11.195115,4098.026721,2563.714153,285.900751,369.556931,29.663953,16.900211,113.438889,3580.325939,24.062805,3.632483,8.567037,6.770136,4.616648,3.528314
1,0.1,0.100293,4371.0,1093.0,0.963585,0.956535,0.0,0.0,0.73885,0.725875,0.64121,0.631695,0.877725,0.87251,3.153985,24.880466,33.436975,260.903106,52.92095,5.761195,8.208002,6.376834,2.555287,2.565292,5.161843,2.24315,5.816305,7.086823,8.661142,9.42294,1138.467215,2488.40531,276.192257,498.019521,23.304669,13.475754,75.322039,4619.102459,16.002727,2.724192,5.1907,5.103548,3.048277,2.370007
1,0.15,0.150425,2914.0,729.0,0.964905,0.957765,0.0,0.0,0.800325,0.79551,0.736035,0.738075,0.880625,0.872495,3.70472,20.369801,24.615643,195.075933,44.809295,4.990406,9.11258,6.434717,2.142296,2.860293,3.633534,2.764123,5.749457,6.799284,7.038731,11.509117,456.384993,2272.018362,291.816641,338.369009,26.468381,14.50891,65.610191,4584.915519,20.447475,3.424834,5.375797,6.473474,3.604691,3.544315
1,0.2,0.200512,2186.0,547.0,0.96542,0.95553,0.0,0.0,0.838965,0.829545,0.803645,0.797945,0.88031,0.870655,3.51244,16.3156,16.101811,140.931781,40.246139,5.334229,7.822971,5.319965,1.70813,2.321003,1.986454,3.076994,4.805404,5.541843,5.087591,9.910014,345.566406,2114.003341,285.343773,333.894084,25.091474,11.896617,57.435062,4034.669029,14.734025,3.342497,4.631655,6.875956,3.115261,2.26768
2,0.05,0.050328,17421.0,4356.0,0.93972,0.922665,0.0,0.0,0.461675,0.472385,0.32015,0.341045,0.836425,0.81129,8.971417,76.903366,137.669843,679.823972,59.579326,45.356028,75.851023,19.535655,16.266865,13.224766,29.456556,36.194252,26.254349,51.558691,52.06518,20.516527,7812.353448,4391.811424,839.884024,429.726576,163.774491,53.079493,255.698398,4888.67308,66.191815,12.98099,24.630603,22.395939,20.723495,36.440328
2,0.1,0.100615,8714.0,2179.0,0.941555,0.92156,0.0,0.0,0.62934,0.62029,0.505615,0.512905,0.83743,0.81116,9.395422,59.732909,92.842852,502.900521,85.358318,39.214827,53.938324,13.969715,14.411012,13.225943,24.090676,35.55038,24.754168,54.763969,49.042275,21.553512,2774.788691,4289.750703,858.032575,1044.369579,161.914222,45.317059,190.803502,6444.967755,59.226787,11.130781,24.322491,20.781218,19.140925,41.398266
2,0.15,0.150881,5811.0,1453.0,0.942005,0.92328,0.0,0.0,0.7117,0.700895,0.619775,0.62585,0.83881,0.812245,10.035821,51.040578,65.532483,367.559772,93.591566,42.052062,43.908706,14.397668,15.77428,12.212533,20.871279,30.768442,20.022894,54.729531,46.38065,20.946061,1192.573195,3814.157843,867.64796,878.748944,160.330325,40.34678,156.375756,6759.804777,51.121227,10.492052,25.725327,19.067122,15.936863,42.152392
2,0.2,0.201101,4360.0,1090.0,0.94351,0.92139,0.0,0.0,0.764905,0.741145,0.703815,0.686955,0.83966,0.81423,10.729012,44.576006,57.826259,284.659387,76.263841,44.12509,40.416114,12.904507,16.40938,10.488502,20.544385,32.016937,19.877466,54.832801,42.869761,20.470509,906.380246,3734.080231,827.481293,639.578033,155.590505,41.207961,131.034945,5976.292466,49.974588,12.282957,26.721957,20.781833,15.978206,41.381309
3,0.05,0.050495,26046.0,6512.0,0.916855,0.893575,0.0,0.0,0.41493,0.390335,0.28205,0.264425,0.79134,0.77452,11.414969,89.72069,193.891654,921.904027,102.762938,117.485629,107.905653,55.201148,35.232698,23.313612,17.953025,55.347104,39.481823,143.73691,23.468996,110.297441,10161.261723,5786.248765,1336.324783,568.330948,361.113376,63.569644,316.59178,5483.358173,133.148298,30.320558,50.473402,59.677188,43.712843,133.311429
3,0.1,0.100933,13030.0,3258.0,0.917745,0.89517,0.0,0.0,0.575975,0.550735,0.45435,0.43246,0.79044,0.77114,11.705996,79.346675,140.490056,607.807942,109.135122,98.185715,78.64083,36.552626,34.152426,17.187876,12.770335,46.190573,29.565735,123.284524,20.785766,80.732655,5941.377532,5180.821079,1357.692494,955.599279,342.055073,50.457853,223.886612,6191.767671,118.912915,30.481529,46.177703,39.52838,44.734673,128.850717


In [23]:
# Count an average feature importance
res = pd.read_parquet(f'Params/params_{model}.parquet')
res_viable = res[res['Horizon'].isin(range(1, 14))]
groups = ['Horizon']

# Get the final pivot
means = round(res_viable[coeffs_cols].mean(), 2)
print(round(means.mean(), 2), round(means.median(), 2))
means

794.44 167.46


Hurst                    22.86
CorrDim                  46.15
Lyapunov                224.01
Skewness                782.72
Kurtosis                149.56
PSD                     382.55
ACF_1                   185.35
WL_C1                    47.60
WL_C2                   188.48
WL_C3                    64.75
Hurst_8_dyn               7.09
Hurst_8_Variance         55.64
CorrDim_8_dyn            16.61
CorrDim_8_Variance      469.18
Lyapunov_8_dyn           11.48
Lyapunov_8_Variance     136.60
Skewness_8_dyn         6881.82
Skewness_8_Variance    4016.62
Kurtosis_8_dyn         1497.51
Kurtosis_8_Variance     547.93
PSD_8_dyn              1152.50
PSD_8_Variance          147.14
ACF_1_8_dyn             404.47
ACF_1_8_Variance       5466.74
WL_C1_8_dyn             412.06
WL_C1_8_Variance         89.66
WL_C2_8_dyn              24.32
WL_C2_8_Variance        119.24
WL_C3_8_dyn              16.34
WL_C3_8_Variance        266.21
dtype: float64

##### XGBoost

In [24]:
# Read dataset
data_logdyn = pd.read_parquet('Data/final_dataset.parquet')

# Choose binary target and other parameters
model = 'XGBoost'
target = 'Flag'
horizons = list(range(1, 16))
shares = np.linspace(0.05, 0.2, 4)
states = list(range(0, 10000, 50))

# Iterate over the chosen parameters and optimize classification models, then save all the results to the dataframe
res = fun.model(data_logdyn, target, horizons, shares, states, model = model)

# Save results and create pivot based on the horizon and 1 share parameters
coeffs_cols = data_logdyn.columns.drop(['Volume', 'MA100', 'MV100', 'Rise', 'Distance', 'Index', 'Ticker'])
res_means = fun.save_results(res, 
                             list(coeffs_cols),
                             model = model)
res_means

100%|██████████| 15/15 [1:52:28<00:00, 449.92s/it]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Train size,Test size,Train AUC,Test AUC,Train KS-test p-value,Test KS-test p-value,Train F1-score,Test F1-score,Train precision,Test precision,Train recall,Test recall,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
Horizon,1 Share,1 Share real,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
1,0.05,0.05016,8740.0,2185.0,0.965655,0.957605,0.0,0.0,0.598585,0.594765,0.45509,0.462275,0.887175,0.8726,3.353556,5.817527,5.267846,14.672528,6.727177,3.452366,4.098412,3.543328,3.582201,3.154433,3.306322,3.21222,3.602224,3.611179,4.010669,3.841106,103.233725,56.00796,8.382118,37.968978,5.217821,4.459786,7.916582,100.984789,4.502708,3.232231,3.214632,3.702088,3.405642,3.412679
1,0.1,0.100293,4371.0,1093.0,0.9664,0.956785,0.0,0.0,0.7439,0.71763,0.64318,0.617005,0.88728,0.877845,3.12083,4.858677,4.534715,11.94806,7.195119,3.192029,3.525644,3.115566,3.276862,2.789397,2.991917,2.93631,3.324789,3.271208,3.74328,3.566305,39.487573,66.00997,8.6389,44.27139,4.695281,4.096785,6.332323,134.348912,3.977993,2.957415,3.002004,3.505512,3.009674,2.98131
1,0.15,0.150425,2914.0,729.0,0.966845,0.958055,0.0,0.0,0.80733,0.794445,0.74369,0.736625,0.886605,0.873485,2.991208,4.335059,3.919076,9.966953,6.620854,3.030116,3.394166,2.893196,2.978255,2.763973,2.867471,2.814949,3.11528,3.026329,3.419793,3.217707,14.041361,61.086724,8.828114,30.639377,4.41285,3.736884,5.348838,148.240901,3.690595,2.765258,2.822377,3.432025,2.845269,2.826853
1,0.2,0.200512,2186.0,547.0,0.96641,0.955255,0.0,0.0,0.841035,0.82761,0.80176,0.793625,0.88721,0.872185,2.799003,3.831761,3.458541,8.081617,5.621507,2.885729,3.056807,2.66889,2.812951,2.468968,2.73912,2.704747,2.956383,2.902647,3.090135,3.014597,10.243525,60.722739,8.930286,20.798109,4.243931,3.613091,4.827606,130.550659,3.423627,2.590654,2.658554,3.252127,2.637597,2.592656
2,0.05,0.050328,17421.0,4356.0,0.954865,0.92577,0.0,0.0,0.508515,0.47018,0.3622,0.336885,0.863985,0.816435,4.128213,6.619333,9.293149,21.347727,7.627507,5.447912,7.145346,5.278411,4.545286,5.005584,4.694712,4.433541,4.937935,6.43296,5.854332,6.169264,165.34848,93.234996,16.982584,63.55851,8.634895,6.047501,11.676594,132.627102,6.897936,5.055456,4.473557,5.608345,4.341151,5.214837
2,0.1,0.100615,8714.0,2179.0,0.956005,0.9243,0.0,0.0,0.6738,0.624,0.554255,0.516125,0.863275,0.813135,3.873481,5.489085,7.522375,17.400371,7.637137,5.088829,6.044259,4.830464,4.006038,4.58496,4.486916,4.210081,4.540614,6.042526,5.515586,5.667831,81.723186,118.816807,18.32293,61.437231,8.186795,5.409755,8.655232,196.407294,6.171045,4.469389,4.203335,5.285492,4.170033,5.001377
2,0.15,0.150881,5811.0,1453.0,0.95692,0.92569,0.0,0.0,0.748075,0.7034,0.65906,0.626995,0.867945,0.813305,3.70116,4.956338,6.013432,14.088312,8.856224,4.864938,5.504215,4.676879,4.01477,4.331431,4.314524,4.083422,4.256781,5.951095,5.153853,5.308746,27.302983,101.707975,19.587341,58.173197,7.924942,4.999841,7.556967,258.476866,5.758051,4.295136,4.014221,4.908197,4.028721,4.838445
2,0.2,0.201101,4360.0,1090.0,0.959145,0.923405,0.0,0.0,0.801135,0.74911,0.744285,0.703925,0.86912,0.809035,3.835669,4.597776,5.402836,11.911917,7.173551,4.76168,5.109103,4.366299,3.870797,4.285112,4.151618,3.93224,4.094188,5.664981,5.092432,5.007958,19.295038,112.729655,19.542386,40.311424,7.719307,4.837133,6.600387,228.090545,5.333616,4.19183,3.832928,4.859101,3.941761,4.485078
3,0.05,0.050495,26046.0,6512.0,0.941325,0.90215,0.0,0.0,0.44142,0.405725,0.30011,0.27835,0.842545,0.780465,5.673102,7.57561,13.017172,29.034658,9.54401,8.06747,9.072469,6.720861,5.348208,5.959819,5.767725,6.567055,6.171644,9.132984,6.799817,8.363147,189.623482,119.598874,26.244856,72.949104,12.617104,7.697622,14.04677,159.494724,9.887264,6.885912,5.73394,7.109133,5.482285,8.614897
3,0.1,0.100933,13030.0,3258.0,0.94339,0.902135,0.0,0.0,0.61296,0.564585,0.48313,0.44829,0.84349,0.77885,5.222284,6.183639,10.117351,19.625281,8.788931,7.288457,7.864241,5.749861,4.969696,5.626064,5.295969,6.211219,5.688711,8.640113,6.388505,7.356998,160.793368,152.551567,27.929645,37.758966,11.846069,6.798458,10.255562,136.0796,8.431082,6.213651,5.324501,6.711506,5.092793,7.88327


In [25]:
# Count an average feature importance
res = pd.read_parquet(f'Params/params_{model}.parquet')
res_viable = res[res['Horizon'].isin(range(1, 16))]
groups = ['Horizon']

# Get the final pivot
means = round(res_viable[coeffs_cols].mean(), 2)
print(round(means.mean(), 2), round(means.median(), 2))
means

33.41 16.29


Hurst                   12.82
CorrDim                 10.80
Lyapunov                18.29
Skewness                25.77
Kurtosis                17.52
PSD                     16.46
ACF_1                   17.85
WL_C1                   12.15
WL_C2                   12.48
WL_C3                   11.69
Hurst_8_dyn              7.25
Hurst_8_Variance        14.35
CorrDim_8_dyn            8.56
CorrDim_8_Variance      20.57
Lyapunov_8_dyn           7.99
Lyapunov_8_Variance     14.26
Skewness_8_dyn         184.81
Skewness_8_Variance    112.35
Kurtosis_8_dyn          43.51
Kurtosis_8_Variance     34.67
PSD_8_dyn               29.34
PSD_8_Variance          16.12
ACF_1_8_dyn             22.65
ACF_1_8_Variance       245.05
WL_C1_8_dyn             23.22
WL_C1_8_Variance        12.18
WL_C2_8_dyn              8.53
WL_C2_8_Variance        15.98
WL_C3_8_dyn              6.31
WL_C3_8_Variance        18.89
dtype: float64

##### CatBoost

In [26]:
# Read dataset
data_logdyn = pd.read_parquet('Data/final_dataset.parquet')

# Choose binary target and other parameters
model = 'CatBoost'
target = 'Flag'
horizons = list(range(1, 16))
shares = np.linspace(0.05, 0.2, 4)
states = list(range(0, 10000, 50))

# Iterate over the chosen parameters and optimize classification models, then save all the results to the dataframe
res = fun.model(data_logdyn, target, horizons, shares, states, model = model)

# Save results and create pivot based on the horizon and 1 share parameters
coeffs_cols = data_logdyn.columns.drop(['Volume', 'MA100', 'MV100', 'Rise', 'Distance', 'Index', 'Ticker'])
res_means = fun.save_results(res, 
                             list(coeffs_cols),
                             model = model)
res_means

100%|██████████| 15/15 [18:29:39<00:00, 4438.65s/it]  


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Train size,Test size,Train AUC,Test AUC,Train KS-test p-value,Test KS-test p-value,Train F1-score,Test F1-score,Train precision,Test precision,Train recall,Test recall,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
Horizon,1 Share,1 Share real,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
1,0.05,0.05016,8740.0,2185.0,0.962275,0.954885,0.0,0.0,0.62508,0.618865,0.488985,0.49143,0.87473,0.86684,0.096798,0.881951,0.957115,2.003388,0.949791,0.263823,0.267861,0.546535,0.369724,0.235135,0.265869,0.131981,0.470035,0.339937,0.487279,0.462914,20.312529,15.504169,12.62139,12.23422,0.904363,0.541771,3.878795,22.769514,0.8399,0.13679,0.237304,0.258515,0.109724,0.920882
1,0.1,0.100293,4371.0,1093.0,0.961345,0.955195,0.0,0.0,0.74829,0.7325,0.656175,0.640395,0.875185,0.86943,0.080173,0.647821,0.643526,1.285855,0.942232,0.218338,0.159896,0.294357,0.167231,0.139847,0.198446,0.113901,0.334871,0.287548,0.368719,0.383785,18.884239,19.479604,9.348431,12.38492,0.681738,0.410254,2.508247,28.253749,0.564677,0.118309,0.185452,0.195058,0.093776,0.624999
1,0.15,0.150425,2914.0,729.0,0.960635,0.956435,0.0,0.0,0.80171,0.801385,0.741905,0.749695,0.87407,0.869235,0.076014,0.489629,0.534333,0.895407,0.707544,0.159444,0.130071,0.212389,0.125517,0.114015,0.161851,0.075463,0.216396,0.270516,0.25836,0.319375,17.857282,20.666866,7.478939,13.80314,0.575269,0.317324,1.778313,31.437643,0.450109,0.087205,0.110739,0.200922,0.063062,0.426863
1,0.2,0.200512,2186.0,547.0,0.96028,0.95466,0.0,0.0,0.835185,0.83018,0.80312,0.7989,0.87136,0.86952,0.083295,0.396591,0.41413,0.695743,0.578709,0.124187,0.10621,0.152384,0.086986,0.079077,0.117948,0.059692,0.135215,0.225788,0.180161,0.257007,17.184397,21.602789,7.541711,14.573776,0.438739,0.242078,1.238339,32.417658,0.338773,0.07744,0.108409,0.171884,0.054437,0.316451
2,0.05,0.050328,17421.0,4356.0,0.94373,0.923665,0.0,0.0,0.51197,0.486755,0.371975,0.355795,0.838715,0.80892,0.337652,0.918391,1.635978,2.396628,1.471022,1.43445,1.424869,0.612829,0.674649,0.625448,0.58488,0.820463,0.537673,0.893444,1.110767,0.994145,15.712108,13.406294,15.437935,8.049631,3.475746,1.165967,3.556131,17.522522,0.962441,0.493764,0.970036,0.790634,0.4689,1.514603
2,0.1,0.100615,8714.0,2179.0,0.9378,0.92106,0.0,0.0,0.65,0.618695,0.538015,0.50894,0.831715,0.811195,0.196357,0.634532,1.119274,1.629863,0.960527,0.835939,0.806178,0.354008,0.415682,0.344352,0.354625,0.555045,0.34436,0.689695,0.756637,0.662728,19.364734,16.982601,13.063937,9.238137,2.305088,0.784379,2.308862,21.738729,0.659259,0.380184,0.570575,0.580009,0.324907,1.038796
2,0.15,0.150881,5811.0,1453.0,0.9406,0.923815,0.0,0.0,0.72835,0.70154,0.648945,0.62533,0.835155,0.81422,0.249985,0.658381,0.977363,1.367889,0.913643,0.908803,0.748655,0.383307,0.470587,0.365017,0.392997,0.507853,0.342505,0.74727,0.810377,0.640379,17.630693,15.054065,12.319657,11.68058,2.277467,0.768769,1.848104,24.353955,0.67445,0.377538,0.598285,0.538848,0.345421,1.047156
2,0.2,0.201101,4360.0,1090.0,0.941345,0.92196,0.0,0.0,0.77246,0.74629,0.717145,0.699925,0.840345,0.809595,0.237086,0.626833,0.933737,1.194137,0.813812,0.777515,0.658896,0.319264,0.446238,0.300029,0.366693,0.485902,0.278041,0.716615,0.757573,0.586801,18.44608,17.165888,10.618366,12.007079,2.061081,0.730436,1.576682,24.629495,0.61811,0.315457,0.523072,0.594476,0.313296,0.901311
3,0.05,0.050495,26046.0,6512.0,0.938665,0.902265,0.0,0.0,0.46837,0.416385,0.329275,0.28881,0.82491,0.776475,0.483948,1.008304,2.475068,2.539241,1.556176,3.324321,2.007795,1.198234,1.277006,1.171881,0.54149,1.368965,0.721091,1.787594,0.768233,2.796031,13.74325,8.917151,13.797296,5.675455,5.527603,1.812075,3.54814,11.676834,1.351916,1.862292,1.360587,2.100154,1.003227,2.59864
3,0.1,0.100933,13030.0,3258.0,0.94327,0.904335,0.0,0.0,0.630105,0.574015,0.506885,0.46134,0.837365,0.776505,0.557191,0.989632,2.208559,2.167004,1.456376,2.965363,1.727128,1.245821,1.299305,1.203849,0.575851,1.291435,0.723327,1.669427,0.855869,2.600236,15.057678,11.205801,11.968239,5.763735,4.836986,1.506418,2.775847,13.571312,1.367276,1.730259,1.325511,1.876064,1.090013,2.38849


In [27]:
# Count an average feature importance
res = pd.read_parquet(f'Params/params_{model}.parquet')
res_viable = res[res['Horizon'].isin(range(1, 16))]
groups = ['Horizon']

# Get the final pivot
means = round(res_viable[coeffs_cols].mean(), 2)
print(round(means.mean(), 2), round(means.median(), 2))
means

3.33 2.65


Hurst                   0.55
CorrDim                 1.15
Lyapunov                1.87
Skewness                3.92
Kurtosis                3.09
PSD                     6.00
ACF_1                   3.08
WL_C1                   2.24
WL_C2                   2.49
WL_C3                   1.61
Hurst_8_dyn             0.33
Hurst_8_Variance        1.08
CorrDim_8_dyn           0.35
CorrDim_8_Variance      3.11
Lyapunov_8_dyn          0.49
Lyapunov_8_Variance     2.81
Skewness_8_dyn         10.89
Skewness_8_Variance     7.92
Kurtosis_8_dyn          7.93
Kurtosis_8_Variance     5.02
PSD_8_dyn               7.25
PSD_8_Variance          2.40
ACF_1_8_dyn             2.28
ACF_1_8_Variance        9.46
WL_C1_8_dyn             1.82
WL_C1_8_Variance        2.82
WL_C2_8_dyn             0.58
WL_C2_8_Variance        2.97
WL_C3_8_dyn             0.55
WL_C3_8_Variance        3.92
dtype: float64

#### Gradient boosting with Optuna and SHAP

In [27]:
# Read dataset
data_logdyn = pd.read_parquet('Data/final_dataset.parquet')

# Choose horizon and set random state
horizons = list(range(1, 2))
state = 2025

# Iterate over the chosen parameters and optimize gradient boosting classification models
fun.optuna_and_boosting(data_logdyn, horizons, state)


 LightGBM, 1 horizon:


  0%|          | 0/5 [00:00<?, ?it/s]


 XGBoost, 1 horizon:


  0%|          | 0/5 [00:00<?, ?it/s]


 CatBoost, 1 horizon:


  0%|          | 0/5 [00:00<?, ?it/s]

### Solar data

#### Data and params

In [8]:
# Fix params for the boostrap
target = 'Flag'
horizons = list(range(4, 9))
shares = np.linspace(0.05, 0.07, 3)
states = list(range(0, 10000, 100))
# horizons = [4]
# shares = [0.05]
# states = list(range(0, 10000, 1000))
# states = [0]
path = 'Params_solar'

# Read dataset
data_logdyn = pd.read_parquet('Data/solar_metrics_smoothed.parquet')

##### Probit

In [9]:
# Choose binary target and other parameters
model = 'Probit'

# Iterate over the chosen parameters and optimize classification models, then save all the results to the dataframe
res = fun.model(data_logdyn, target, horizons, shares, states, model = model)

# Save results and create pivot based on the horizon and 1 share parameters
coeffs_cols = data_logdyn.columns.drop(['Volume', 'MA100', 'MV100', 'Rise', 'Distance', 'Index', 'Ticker'])
res_means = fun.save_results(res, 
                             list(coeffs_cols) + ['const'],
                             model = model,
                             path = path)
res_means

100%|██████████| 5/5 [49:34<00:00, 594.94s/it]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Train size,Validation size,Test size,Train AUC,Validation AUC,Test AUC,Train KS-test p-value,Validation KS-test p-value,Test KS-test p-value,Train F1-score,Validation F1-score,Test F1-score,Train precision,Validation precision,Test precision,Train recall,Validation recall,Test recall,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance,const
Horizon,1 Share,1 Share real,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
4,0.05,0.050679,8856.0,1688.0,2637.0,0.92077,0.91462,0.91675,0.0,0.0,0.0,0.54601,0.54445,0.53972,0.41344,0.41493,0.40833,0.81799,0.82421,0.82388,3.024825,0.426348,7.231688,0.234211,-0.074103,-0.852499,-50.029031,,-0.70953,-0.546672,-3.360205,,-1.26846,-21.418252,,-1428.776539,,-0.727701,-0.003875,0.01884,15.621426,-1563.121682,-42.135298,-3386.37103,,47.925949,,-32.162416,,18.153407,43.976924
4,0.06,0.060805,7381.0,1407.0,2198.0,0.92188,0.9181,0.91399,0.0,0.0,0.0,0.58176,0.59071,0.58642,0.45376,0.46794,0.46456,0.8201,0.83075,0.81807,3.064345,0.367916,8.282198,0.223167,-0.076221,-0.860648,-49.459457,,-0.745192,-0.092434,-3.070593,-1154.956611,-1.508745,-28.595115,,-1555.235562,,-0.923186,-0.003768,0.021952,16.496143,-1639.247429,-45.898315,-3699.803765,-1.077492,47.998679,,-33.44873,-0.004578,13.778143,43.631429
4,0.07,0.070928,6328.0,1206.0,1884.0,0.92312,0.91541,0.91498,0.0,0.0,0.0,0.6188,0.61128,0.61153,0.50094,0.49462,0.49407,0.81988,0.82898,0.82362,3.369387,0.383253,7.379423,0.225341,-0.077256,-0.873325,-50.606325,0.794791,-0.721385,,-4.03805,-1287.28908,-1.712977,-33.948639,,-1279.303479,,-1.015212,-0.001567,0.024098,17.179423,-1707.655128,-48.182475,-3983.587216,-1.088454,53.621025,,-37.821344,,15.811566,44.785803
5,0.05,0.050853,11034.0,2102.0,3284.0,0.90661,0.90142,0.90283,0.0,0.0,0.0,0.52501,0.54131,0.53163,0.39278,0.41778,0.40597,0.79933,0.79865,0.79786,2.616714,0.340574,6.203194,0.234486,-0.070628,-0.864651,-52.515876,0.624763,-0.621679,,-2.993448,,-0.773574,-14.215853,,,,-0.749068,-0.003553,0.015661,13.513277,-1471.614849,-38.038318,-2724.818876,-1.015233,38.992562,,-41.249669,,12.99613,46.472525
5,0.06,0.061011,9196.0,1752.0,2738.0,0.90874,0.90268,0.90023,0.0,0.0,0.0,0.5636,0.57352,0.57105,0.43727,0.45215,0.45185,0.80118,0.8068,0.79442,2.465237,0.348075,6.307549,0.239328,-0.068767,-0.894723,-52.950138,,-0.638777,-0.441951,,-887.94785,-0.58599,-18.322783,,-1388.905964,,-0.832977,-0.002192,0.01761,14.252529,-1537.217441,-40.721209,-2926.093902,-1.030316,41.9175,,-31.304168,,14.99197,46.916307
5,0.07,0.071167,7884.0,1502.0,2347.0,0.90953,0.90286,0.90105,0.0,0.0,0.0,0.59616,0.60759,0.60637,0.4763,0.49638,0.49569,0.80301,0.80233,0.79604,2.844688,0.352057,6.710042,0.227575,-0.072115,-0.876517,-52.306585,,-0.685701,,-3.59817,,-0.851169,-23.662671,,-1361.771706,,-0.943808,-0.002867,0.019437,15.648458,-1608.034067,-43.972467,-3136.957649,-0.958741,45.598838,,-35.875728,,14.853942,46.311864
6,0.05,0.051026,13195.0,2514.0,3928.0,0.89092,0.88451,0.88884,0.0,0.0,0.0,0.49349,0.51578,0.50748,0.36376,0.39507,0.38358,0.77672,0.77315,0.7786,2.332087,0.318287,5.864219,0.236685,-0.0773,-0.877113,-55.976317,0.5144,-0.605325,-0.473097,-2.333139,651.479594,0.218636,-11.964762,,-932.802432,,-0.692898,-0.003104,0.012848,12.029413,-1363.732222,-35.377818,-2321.745593,,35.507412,,-32.392955,,13.5389,49.69711
6,0.06,0.061217,10998.0,2096.0,3274.0,0.89148,0.88284,0.88742,0.0,0.0,0.0,0.53246,0.5554,0.54153,0.40631,0.44077,0.42193,0.77774,0.7739,0.77601,2.313997,0.348534,6.269611,0.235575,-0.075362,-0.888961,-57.068829,,-0.610472,-0.49733,-2.708466,,0.246156,-14.421259,,,,-0.701425,-0.003192,0.013027,12.639308,-1490.310672,-36.536303,-2444.036712,,35.689052,,,,15.033587,50.79167
6,0.07,0.071403,9429.0,1797.0,2807.0,0.89236,0.88803,0.88877,0.0,0.0,0.0,0.57452,0.56807,0.58343,0.45777,0.45268,0.47465,0.77652,0.78467,0.77275,2.489769,0.339752,6.080741,0.231028,-0.072345,-0.884443,-55.158738,-0.496942,-0.656125,0.514026,,773.753473,0.273812,-16.789069,,-993.454927,,-0.799724,-0.00416,0.014749,13.328835,-1476.105161,-40.578384,-2585.735041,,37.564104,,-35.13917,,14.370258,49.078782
7,0.05,0.051202,15341.0,2923.0,4567.0,0.87481,0.86907,0.86612,0.0,0.0,0.0,0.46673,0.4829,0.4742,0.33914,0.36196,0.35419,0.75387,0.75034,0.74489,2.046831,0.318935,5.332884,0.237825,-0.08555,-0.861304,-58.938178,,-0.534059,-0.545779,,,0.353884,-11.970816,,-849.356256,,-0.655909,-0.00243,0.012146,11.035624,-1229.885088,-33.301083,-2151.255817,,33.142802,,-29.990068,-0.001157,14.886526,52.548575


In [10]:
# Count a share of appearances for positive horizons
res = pd.read_parquet(f'{path}/params_Probit.parquet')
groups = ['Horizon', '1 Share', '1 Share real']
drops = ['State']
round(res.groupby(groups)[list(coeffs_cols) + ['const']].count() / 20, 2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance,const
Horizon,1 Share,1 Share real,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1
4,0.05,0.050679,0.4,0.55,0.25,5.0,4.4,5.0,5.0,0.0,1.05,0.15,0.15,0.0,1.1,3.35,0.0,0.05,0.0,3.4,0.15,4.95,1.75,4.55,5.0,5.0,0.0,0.7,0.0,0.05,0.0,0.3,5.0
4,0.06,0.060805,0.15,0.3,0.3,5.0,3.75,5.0,5.0,0.0,0.8,0.25,0.15,0.1,1.65,3.35,0.0,0.15,0.0,3.45,0.25,5.0,2.0,4.15,5.0,5.0,0.15,0.8,0.0,0.05,0.05,0.25,5.0
4,0.07,0.070928,0.1,0.5,0.15,5.0,3.65,5.0,5.0,0.05,0.25,0.0,0.1,0.15,1.4,3.2,0.0,0.2,0.0,3.25,0.2,5.0,2.45,4.65,5.0,5.0,0.1,0.8,0.0,0.05,0.0,0.25,5.0
5,0.05,0.050853,0.3,1.0,0.75,5.0,4.7,5.0,5.0,0.05,1.25,0.0,0.15,0.0,1.5,3.1,0.0,0.0,0.0,4.35,0.05,5.0,2.05,4.85,5.0,5.0,0.1,0.25,0.0,0.05,0.0,1.2,5.0
5,0.06,0.061011,0.3,0.7,0.4,5.0,4.5,5.0,5.0,0.0,0.95,0.05,0.0,0.05,1.0,3.2,0.0,0.05,0.0,4.6,0.25,5.0,2.7,4.9,5.0,5.0,0.1,0.4,0.0,0.05,0.0,0.6,5.0
5,0.07,0.071167,0.45,0.85,0.45,5.0,3.85,5.0,5.0,0.0,0.45,0.0,0.05,0.0,1.05,3.0,0.0,0.1,0.0,4.35,0.3,5.0,2.5,4.9,5.0,5.0,0.05,0.4,0.0,0.1,0.0,0.9,5.0
6,0.05,0.051026,0.45,2.45,0.25,5.0,4.8,5.0,5.0,0.05,1.9,0.15,0.05,0.05,0.9,3.65,0.0,0.05,0.0,4.85,0.15,5.0,2.55,5.0,5.0,5.0,0.0,0.2,0.0,0.1,0.0,2.05,5.0
6,0.06,0.061217,0.4,2.75,0.3,5.0,5.0,5.0,5.0,0.0,1.3,0.05,0.05,0.0,0.75,3.15,0.0,0.0,0.0,4.6,0.05,5.0,2.6,4.95,5.0,5.0,0.0,0.2,0.0,0.0,0.0,2.4,5.0
6,0.07,0.071403,0.4,1.95,0.2,5.0,4.5,5.0,5.0,0.05,1.0,0.05,0.0,0.15,0.7,3.35,0.0,0.25,0.0,4.8,0.1,5.0,3.15,4.95,5.0,5.0,0.0,0.15,0.0,0.1,0.0,2.0,5.0
7,0.05,0.051202,0.6,3.85,0.5,5.0,5.0,5.0,5.0,0.0,2.35,0.05,0.0,0.0,0.5,4.0,0.0,0.2,0.0,4.8,0.5,5.0,2.3,5.0,5.0,5.0,0.0,0.1,0.0,0.4,0.2,3.55,5.0


In [11]:
# Count an aggregated share of appearances
pd.DataFrame(round(res[list(coeffs_cols) + ['const']].count() / len(res), 2), columns = ['Share'])

Unnamed: 0,Share
Hurst,0.14
CorrDim,0.45
Lyapunov,0.11
Skewness,1.0
Kurtosis,0.92
PSD,1.0
ACF_1,1.0
WL_C1,0.03
WL_C2,0.25
WL_C3,0.02


##### LightGBM

In [12]:
# Choose binary target and other parameters
model = 'LightGBM'

# Iterate over the chosen parameters and optimize classification models, then save all the results to the dataframe
res = fun.model(data_logdyn, target, horizons, shares, states, model = model, target_metric = 'Precision')

# Save results and create pivot based on the horizon and 1 share parameters
coeffs_cols = data_logdyn.columns.drop(['Volume', 'MA100', 'MV100', 'Rise', 'Distance', 'Index', 'Ticker'])
res_means = fun.save_results(res, 
                             list(coeffs_cols),
                             model = model,
                             path = path)
res_means

100%|██████████| 5/5 [16:18<00:00, 195.77s/it]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Train size,Validation size,Test size,Train AUC,Validation AUC,Test AUC,Train KS-test p-value,Validation KS-test p-value,Test KS-test p-value,Train F1-score,Validation F1-score,Test F1-score,Train precision,Validation precision,Test precision,Train recall,Validation recall,Test recall,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
Horizon,1 Share,1 Share real,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1
4,0.05,0.050679,8856.0,1688.0,2637.0,0.9864,0.97037,0.9698,0.0,0.0,0.0,0.61831,0.58427,0.58207,0.45963,0.43432,0.43128,0.95197,0.91672,0.91372,132.917091,60.243765,82.287186,382.849355,363.905717,263.82719,1264.255016,61.369464,23.938891,15.625145,58.374112,32.211968,66.541382,63.549559,5.076453,66.015948,53.292789,148.072848,94.570237,208.093004,93.612653,73.327534,8542.241078,551.104182,30.971713,48.098924,20.486909,39.812804,9.235466,82.120856
4,0.06,0.060805,7381.0,1407.0,2198.0,0.98651,0.97153,0.96979,0.0,0.0,0.0,0.66168,0.62997,0.62428,0.5089,0.48714,0.48078,0.95205,0.91936,0.91061,129.523973,52.049196,85.757668,356.667834,359.143053,245.765288,1105.249281,56.295997,23.068211,16.938252,61.117772,32.999993,55.558943,69.427325,5.206159,61.442669,46.56144,126.841436,84.533949,170.742267,86.672907,66.647564,8387.914435,615.802328,29.415393,47.722303,19.192896,42.49316,9.164976,79.591514
4,0.07,0.070928,6328.0,1206.0,1884.0,0.98686,0.96992,0.97002,0.0,0.0,0.0,0.69374,0.67128,0.65203,0.54657,0.53544,0.51276,0.95376,0.91564,0.91213,126.622587,51.619977,84.117345,337.423569,344.711265,222.976063,1025.311572,54.367673,20.220869,13.104821,62.330744,33.777585,51.066199,66.100988,5.337378,62.011198,40.47963,112.119014,79.576287,161.581089,86.237097,63.618057,8195.522227,560.301542,33.834567,47.044766,20.597489,33.256146,12.208575,79.272022
5,0.05,0.050853,11034.0,2102.0,3284.0,0.98388,0.96737,0.96654,0.0,0.0,0.0,0.58024,0.5733,0.56332,0.42066,0.42683,0.41514,0.94431,0.90198,0.90018,185.541159,97.261612,90.022164,526.679026,517.19215,358.77686,1703.320547,62.248909,33.671249,16.924039,41.59528,43.27971,61.444488,66.228732,11.742488,79.345038,37.002939,112.4005,97.851511,153.358162,123.874028,106.774969,10174.030308,897.741232,21.047445,29.303651,17.842672,50.525065,8.063211,111.629575
5,0.06,0.061011,9196.0,1752.0,2738.0,0.98354,0.96814,0.96641,0.0,0.0,0.0,0.62293,0.61826,0.60299,0.46692,0.47679,0.45786,0.94327,0.90721,0.90074,183.239926,86.082864,90.650768,499.264381,465.484215,334.162305,1514.075632,59.064753,28.964077,15.747491,43.97357,40.652553,48.669886,60.612342,11.849374,72.631929,31.729167,108.373778,87.556265,158.743319,114.441978,95.629568,9871.024764,893.599695,21.112226,29.835738,19.855387,36.963148,9.039915,105.226117
5,0.07,0.071167,7884.0,1502.0,2347.0,0.98432,0.96836,0.96728,0.0,0.0,0.0,0.65789,0.64213,0.63281,0.50673,0.50378,0.49186,0.9454,0.90668,0.90228,189.177804,81.443273,87.703751,470.925931,468.475841,313.09404,1255.067443,55.521345,27.169845,13.598513,46.826197,41.895757,51.205883,64.959595,8.920503,67.333027,29.782036,102.44358,78.701279,129.727133,111.645522,97.094306,9744.017712,904.330285,19.83705,28.483548,18.35824,37.215747,11.223119,109.721652
6,0.05,0.051026,13195.0,2514.0,3928.0,0.97755,0.961,0.96234,0.0,0.0,0.0,0.55175,0.55248,0.53662,0.39646,0.4104,0.38991,0.91948,0.87995,0.8834,222.141129,141.046908,93.160052,641.33508,685.823073,475.538628,2006.05718,75.365814,28.470387,24.23798,20.492511,67.375722,52.62921,60.203961,22.529397,100.88158,31.211786,90.360047,78.017231,85.478854,146.291294,127.356009,11001.216955,1508.682214,13.228438,36.768929,26.172768,23.518032,8.653764,150.070628
6,0.06,0.061217,10998.0,2096.0,3274.0,0.97771,0.96038,0.96246,0.0,0.0,0.0,0.5836,0.58732,0.57787,0.42759,0.44813,0.4334,0.92628,0.8795,0.88597,213.210205,120.198449,87.939764,614.026823,633.268201,435.01539,1460.101603,69.78839,24.633267,21.717097,24.551287,65.653198,40.789208,62.948034,22.230733,98.871231,27.233942,66.113106,69.770873,91.129136,138.994334,133.430269,10951.746281,1607.01136,18.511017,42.717434,25.177442,18.270208,7.861735,149.733211
6,0.07,0.071403,9429.0,1797.0,2807.0,0.97841,0.96142,0.96161,0.0,0.0,0.0,0.62574,0.60916,0.62143,0.47428,0.47052,0.48547,0.92717,0.8904,0.88044,213.907193,113.749375,88.129426,608.623897,574.936954,402.546479,1453.629093,61.282884,28.18225,19.932343,24.518404,71.245488,40.135841,62.076666,19.939331,90.049626,28.800823,73.297056,70.061719,89.984338,135.605747,117.289163,10622.547515,1482.017949,17.53581,37.911456,25.060819,18.817978,9.353988,149.682483
7,0.05,0.051202,15341.0,2923.0,4567.0,0.96996,0.95251,0.95151,0.0,0.0,0.0,0.51273,0.51911,0.50983,0.35932,0.38008,0.36747,0.90311,0.86075,0.85613,220.393207,149.838745,86.969559,803.337923,766.411763,585.285048,2007.976912,98.475325,25.640571,49.127226,27.604661,73.496137,47.986892,67.609278,30.183028,120.822169,36.808847,53.203155,68.045629,58.818997,187.398897,126.718833,11784.546609,2062.724382,18.29475,38.979347,9.562755,9.23552,4.581462,187.742124


In [13]:
# Count an average feature importance
res = pd.read_parquet(f'{path}/params_{model}.parquet')
res_viable = res[res['Horizon'].isin(range(1, 16))]
groups = ['Horizon']

# Get the final pivot
means = round(res_viable[coeffs_cols].mean(), 2)
print(f'Mean feature importance: {round(means.mean(), 2)}, Median feature importance: {round(means.median(), 2)}')
means

Mean feature importance: 563.13, Median feature importance: 78.47


Hurst                    191.73
CorrDim                  113.25
Lyapunov                  88.74
Skewness                 636.15
Kurtosis                 590.66
PSD                      435.97
ACF_1                   1548.49
WL_C1                     74.40
WL_C2                     27.09
WL_C3                     32.49
Hurst_8_dyn               35.07
Hurst_8_Variance          60.42
CorrDim_8_dyn             47.08
CorrDim_8_Variance        68.80
Lyapunov_8_dyn            18.60
Lyapunov_8_Variance       94.64
Skewness_8_dyn            40.72
Skewness_8_Variance       82.54
Kurtosis_8_dyn            72.64
Kurtosis_8_Variance      114.30
PSD_8_dyn                151.35
PSD_8_Variance           106.13
ACF_1_8_dyn            10504.70
ACF_1_8_Variance        1505.82
WL_C1_8_dyn               21.24
WL_C1_8_Variance          37.74
WL_C2_8_dyn               15.41
WL_C2_8_Variance          23.99
WL_C3_8_dyn                7.33
WL_C3_8_Variance         146.34
dtype: float64

##### XGBoost

In [14]:
# Choose binary target and other parameters
model = 'XGBoost'

# Iterate over the chosen parameters and optimize classification models, then save all the results to the dataframe
res = fun.model(data_logdyn, target, horizons, shares, states, model = model, target_metric = 'Precision')

# Save results and create pivot based on the horizon and 1 share parameters
coeffs_cols = data_logdyn.columns.drop(['Volume', 'MA100', 'MV100', 'Rise', 'Distance', 'Index', 'Ticker'])
res_means = fun.save_results(res, 
                             list(coeffs_cols),
                             model = model,
                             path = path)
res_means

100%|██████████| 5/5 [07:12<00:00, 86.43s/it]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Train size,Validation size,Test size,Train AUC,Validation AUC,Test AUC,Train KS-test p-value,Validation KS-test p-value,Test KS-test p-value,Train F1-score,Validation F1-score,Test F1-score,Train precision,Validation precision,Test precision,Train recall,Validation recall,Test recall,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
Horizon,1 Share,1 Share real,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1
4,0.05,0.050679,8856.0,1688.0,2637.0,0.97251,0.96023,0.9599,0.0,0.0,0.0,0.61859,0.59144,0.59014,0.46615,0.44386,0.44329,0.92956,0.90802,0.90044,7.234739,6.079145,6.324395,8.643131,10.896064,11.150415,27.958858,6.541324,6.284287,4.348164,4.505948,4.723221,5.938097,5.609486,3.793903,5.308463,6.648251,7.383414,6.66369,13.379435,6.608807,5.649793,159.46811,16.443991,5.675431,5.128346,6.23999,4.986618,4.482046,5.922075
4,0.06,0.060805,7381.0,1407.0,2198.0,0.96869,0.95957,0.95504,0.0,0.0,0.0,0.65912,0.63943,0.6216,0.51615,0.503,0.48209,0.92096,0.90034,0.8968,6.856171,5.472859,6.149178,8.131777,10.953118,10.645266,27.686311,6.148901,5.654944,4.400307,4.866891,4.678157,5.47658,5.622469,3.752407,5.18482,6.20755,6.522238,6.231639,12.019291,6.453197,5.518876,161.824564,17.124558,5.423229,5.013627,5.761316,4.820624,4.927972,5.847453
4,0.07,0.070928,6328.0,1206.0,1884.0,0.96555,0.95361,0.95181,0.0,0.0,0.0,0.68559,0.6678,0.65741,0.55207,0.53983,0.52791,0.91399,0.89578,0.88787,6.533762,5.492979,5.759761,7.749074,10.966061,10.277269,24.064482,5.878254,5.437374,4.292465,4.464796,4.420546,5.362861,5.393164,3.276843,5.193039,6.056307,6.126542,6.050717,11.167547,6.189452,5.197487,159.759046,15.537124,5.599737,4.761945,5.754083,4.294363,4.566583,5.745439
5,0.05,0.050853,11034.0,2102.0,3284.0,0.97121,0.95898,0.95785,0.0,0.0,0.0,0.60792,0.57425,0.5741,0.45785,0.4312,0.42945,0.92174,0.89599,0.89275,8.966197,7.108235,6.411893,11.099252,13.314649,13.397683,37.388206,7.954293,7.876171,5.081003,5.245847,5.951099,7.401507,6.472025,4.438801,6.291924,7.272276,6.653921,7.652502,12.340259,7.564943,6.835752,174.407174,19.248205,6.508754,6.168791,6.448739,5.803892,4.795375,7.039063
5,0.06,0.061011,9196.0,1752.0,2738.0,0.9664,0.95525,0.95274,0.0,0.0,0.0,0.64495,0.61836,0.6156,0.50175,0.48071,0.47816,0.91356,0.8944,0.88252,8.537982,6.864315,6.298682,10.373173,13.125355,12.889476,33.15938,7.617936,7.327994,5.131254,5.408711,5.601982,6.665248,6.2918,4.525034,6.088768,6.833529,6.223176,7.338329,11.826526,7.153948,6.730359,176.501366,20.325837,5.897113,6.041694,6.06129,5.628367,4.726097,7.091986
5,0.07,0.071167,7884.0,1502.0,2347.0,0.96387,0.95144,0.95008,0.0,0.0,0.0,0.66466,0.65033,0.64018,0.5281,0.52073,0.50769,0.90522,0.88581,0.88114,8.520825,6.676986,6.227122,9.638327,13.461485,12.165772,27.46715,7.179237,7.192083,4.834639,5.242173,5.588341,6.381738,5.757609,4.194878,5.707154,6.911628,6.009413,6.839828,10.894205,7.226636,6.481546,183.195885,18.424697,5.827183,5.546711,6.105381,5.700929,5.038551,6.746206
6,0.05,0.051026,13195.0,2514.0,3928.0,0.96489,0.95037,0.95132,0.0,0.0,0.0,0.58489,0.55239,0.55329,0.43672,0.41151,0.41109,0.90332,0.87669,0.87561,10.461692,8.584307,7.302159,13.205098,16.789455,15.729183,37.731685,9.215211,7.933794,5.506513,6.662791,6.732741,8.951299,8.000733,5.965294,7.426788,8.798278,6.480421,8.407722,10.646206,8.566219,8.076862,195.299493,26.662652,7.06709,6.880932,7.189001,6.421693,6.272445,8.01688
6,0.06,0.061217,10998.0,2096.0,3274.0,0.96217,0.94888,0.94949,0.0,0.0,0.0,0.61202,0.58504,0.58802,0.46727,0.44746,0.44906,0.89765,0.87079,0.87458,9.686148,8.195689,7.241714,12.716809,16.447879,14.807723,31.446867,9.274256,7.878136,5.560096,6.639843,6.616412,8.575411,8.417146,5.558947,7.215989,8.436936,6.71855,7.915453,10.690588,8.431209,7.745644,196.497388,26.523467,6.830071,6.722118,6.576014,6.181653,6.013613,7.484506
6,0.07,0.071403,9429.0,1797.0,2807.0,0.95394,0.94019,0.94133,0.0,0.0,0.0,0.64226,0.61914,0.62469,0.50806,0.49027,0.49634,0.88672,0.86344,0.86089,9.651547,8.028187,6.952093,12.173752,15.690868,13.805347,30.446076,9.057349,7.588844,5.423142,6.134346,6.822965,7.888129,7.897763,5.810739,6.868782,8.175017,6.471035,7.916963,10.126564,8.328271,7.52683,193.135654,24.848236,6.556052,6.809548,6.721469,6.068842,5.944744,7.684999
7,0.05,0.051202,15341.0,2923.0,4567.0,0.95053,0.93578,0.93442,0.0,0.0,0.0,0.54341,0.52867,0.51597,0.39805,0.39278,0.37724,0.8712,0.84285,0.8411,11.297927,9.927871,8.351823,15.872321,20.211218,17.446844,37.826739,11.188672,9.558412,6.282755,7.819092,7.857041,9.880256,9.284519,7.448959,8.486922,9.591493,7.350013,9.036407,10.328846,9.538635,9.061955,212.674903,32.502414,7.742212,7.691688,7.282473,6.869352,5.639321,9.330214


In [15]:
# Count an average feature importance
res = pd.read_parquet(f'{path}/params_{model}.parquet')
res_viable = res[res['Horizon'].isin(range(1, 16))]
groups = ['Horizon']

# Get the final pivot
means = round(res_viable[coeffs_cols].mean(), 2)
print(f'Mean feature importance: {round(means.mean(), 2)}, Median feature importance: {round(means.median(), 2)}')
means

Mean feature importance: 15.86, Median feature importance: 7.96


Hurst                    9.56
CorrDim                  8.19
Lyapunov                 7.48
Skewness                12.97
Kurtosis                16.15
PSD                     14.51
ACF_1                   31.66
WL_C1                    9.09
WL_C2                    7.90
WL_C3                    5.65
Hurst_8_dyn              6.42
Hurst_8_Variance         6.58
CorrDim_8_dyn            8.30
CorrDim_8_Variance       7.98
Lyapunov_8_dyn           5.69
Lyapunov_8_Variance      7.20
Skewness_8_dyn           8.47
Skewness_8_Variance      7.01
Kurtosis_8_dyn           7.95
Kurtosis_8_Variance     11.47
PSD_8_dyn                8.39
PSD_8_Variance           7.68
ACF_1_8_dyn            193.43
ACF_1_8_Variance        26.22
WL_C1_8_dyn              6.78
WL_C1_8_Variance         6.60
WL_C2_8_dyn              6.86
WL_C2_8_Variance         6.06
WL_C3_8_dyn              5.43
WL_C3_8_Variance         8.01
dtype: float64

##### CatBoost

In [16]:
# Choose binary target and other parameters
model = 'CatBoost'

# Iterate over the chosen parameters and optimize classification models, then save all the results to the dataframe
res = fun.model(data_logdyn, target, horizons, shares, states = list(range(0, 10000, 500)), model = model, target_metric = 'Precision')

# Save results and create pivot based on the horizon and 1 share parameters
coeffs_cols = data_logdyn.columns.drop(['Volume', 'MA100', 'MV100', 'Rise', 'Distance', 'Index', 'Ticker'])
res_means = fun.save_results(res, 
                             list(coeffs_cols),
                             model = model,
                             path = path)
res_means

100%|██████████| 5/5 [04:00<00:00, 48.15s/it]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Train size,Validation size,Test size,Train AUC,Validation AUC,Test AUC,Train KS-test p-value,Validation KS-test p-value,Test KS-test p-value,Train F1-score,Validation F1-score,Test F1-score,Train precision,Validation precision,Test precision,Train recall,Validation recall,Test recall,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
Horizon,1 Share,1 Share real,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1
4,0.05,0.050679,8856.0,1688.0,2637.0,0.923,0.90775,0.91105,0.0,0.0,0.0,0.5949,0.6072,0.5983,0.4689,0.4857,0.4751,0.8384,0.8252,0.828,0.407017,0.134613,0.692766,1.236498,2.056513,1.222121,28.55784,0.240029,0.151875,0.049542,0.083724,0.045289,0.228442,0.137221,0.124818,0.248684,0.086844,5.350952,0.295343,2.248749,0.791752,0.322006,41.642814,12.680341,0.108642,0.156943,0.024527,0.073534,0.011363,0.589197
4,0.06,0.060805,7381.0,1407.0,2198.0,0.9361,0.93455,0.9334,0.0,0.0,0.0,0.62925,0.67225,0.652,0.50375,0.55745,0.53195,0.8608,0.85795,0.8543,1.323466,0.656495,0.839866,1.76994,2.857715,1.881572,21.292758,0.65401,0.242511,0.127923,0.197963,0.132927,0.511646,0.526695,0.114748,0.383683,0.249462,6.418919,0.91742,2.35219,1.829624,0.412788,36.401289,16.356391,0.398143,0.184084,0.085784,0.29895,0.157324,0.423715
4,0.07,0.070928,6328.0,1206.0,1884.0,0.90205,0.8974,0.89225,0.0,0.0,0.0,0.66,0.66075,0.6752,0.58095,0.59065,0.6071,0.8,0.79635,0.79105,0.496653,0.130012,0.418509,1.047218,1.3708,1.046235,33.07407,0.260936,0.314735,0.005544,0.023948,0.063442,0.139237,0.177037,0.146512,0.182537,0.053312,6.283523,0.285237,0.280873,0.944357,0.146667,29.273602,22.831017,0.091902,0.115468,0.03113,0.140729,0.002796,0.621962
5,0.05,0.050853,11034.0,2102.0,3284.0,0.9101,0.90515,0.90505,0.0,0.0,0.0,0.60125,0.6068,0.57285,0.50105,0.5112,0.46935,0.80545,0.80635,0.8005,1.11895,0.53292,0.335293,1.59816,2.502029,1.613209,30.889385,0.418803,0.103001,0.035199,0.163608,0.04553,0.289741,0.310675,0.092807,0.456532,0.194948,5.109356,0.491099,1.248074,1.317489,0.529728,39.490626,10.079957,0.228255,0.089394,0.0545,0.092832,0.043343,0.524557
5,0.06,0.061011,9196.0,1752.0,2738.0,0.94225,0.93605,0.9374,0.0,0.0,0.0,0.6232,0.6399,0.653,0.4942,0.51635,0.53345,0.8581,0.8574,0.84995,1.846672,0.787554,0.488704,2.921061,3.394914,2.352613,19.165397,0.323973,0.167097,0.114765,0.329622,0.145638,0.410863,0.496802,0.121068,0.568509,0.361971,3.63199,0.76171,0.72174,1.883245,0.680829,44.382794,12.598469,0.260588,0.14485,0.094342,0.19429,0.135008,0.512924
5,0.07,0.071167,7884.0,1502.0,2347.0,0.9118,0.89935,0.90515,0.0,0.0,0.0,0.6338,0.646,0.65955,0.5422,0.56515,0.5838,0.81085,0.80075,0.79625,1.094157,0.268833,0.171622,1.654234,2.086035,1.925414,30.314621,0.197962,0.039982,0.018246,0.019851,0.040759,0.140772,0.094,0.084693,0.222804,0.030995,2.063098,0.388682,0.360882,0.511527,0.599363,38.56961,18.362865,0.133051,0.016185,0.122257,0.069277,0.02475,0.373475
6,0.05,0.051026,13195.0,2514.0,3928.0,0.9292,0.92345,0.9229,0.0,0.0,0.0,0.5409,0.57805,0.5509,0.40965,0.45325,0.4228,0.82875,0.8273,0.8338,1.976487,0.884136,0.403315,2.814774,4.156648,3.712669,18.082704,0.5942,0.257461,0.112985,0.070958,0.130152,0.374116,0.465445,0.127828,0.538155,0.418991,1.871613,0.555304,1.110729,2.580552,0.638816,36.856592,20.198628,0.239492,0.070714,0.069188,0.127024,0.084322,0.476003
6,0.06,0.061217,10998.0,2096.0,3274.0,0.93015,0.9215,0.9238,0.0,0.0,0.0,0.61425,0.5818,0.5873,0.4926,0.4635,0.4662,0.8332,0.82255,0.83165,2.511141,0.903386,0.514531,3.068952,3.15675,2.957017,19.390873,0.374732,0.228816,0.219777,0.173112,0.285244,0.534418,0.481393,0.216406,0.631653,0.682215,4.704905,0.610373,0.896013,1.905711,1.065911,36.320551,16.584825,0.225227,0.273673,0.188902,0.17771,0.009435,0.706347
6,0.07,0.071403,9429.0,1797.0,2807.0,0.9403,0.932,0.937,0.0,0.0,0.0,0.6235,0.6165,0.62545,0.49775,0.4901,0.5005,0.8523,0.8509,0.85405,2.367482,1.222404,0.551902,3.040679,3.334579,3.333051,19.771973,0.42345,0.172816,0.159187,0.228171,0.302598,0.441085,0.706335,0.274564,0.686626,0.641353,0.814654,0.518296,0.728598,2.206889,0.967362,41.59286,14.172645,0.140841,0.241902,0.042617,0.190735,0.057682,0.666663
7,0.05,0.051202,15341.0,2923.0,4567.0,0.9295,0.9197,0.9178,0.0,0.0,0.0,0.52305,0.5398,0.5343,0.3888,0.41585,0.4063,0.836,0.81015,0.814,2.676106,1.186991,0.388729,4.058804,3.790065,4.057492,17.073092,0.826016,0.261891,0.317506,0.232766,0.226788,0.447826,0.814731,0.250539,0.777174,1.146601,0.788763,0.527091,1.061001,2.870217,1.04401,33.533485,19.655649,0.387994,0.354796,0.05242,0.116457,0.024052,1.05095


In [17]:
# Count an average feature importance
res = pd.read_parquet(f'{path}/params_{model}.parquet')
res_viable = res[res['Horizon'].isin(range(1, 16))]
groups = ['Horizon']

# Get the final pivot
means = round(res_viable[coeffs_cols].mean(), 2)
print(f'Mean feature importance: {round(means.mean(), 2)}, Median feature importance: {round(means.median(), 2)}')
means

Mean feature importance: 3.33, Median feature importance: 0.5


Hurst                   1.62
CorrDim                 0.75
Lyapunov                0.43
Skewness                2.61
Kurtosis                3.09
PSD                     2.83
ACF_1                  22.22
WL_C1                   0.41
WL_C2                   0.18
WL_C3                   0.15
Hurst_8_dyn             0.13
Hurst_8_Variance        0.13
CorrDim_8_dyn           0.34
CorrDim_8_Variance      0.49
Lyapunov_8_dyn          0.20
Lyapunov_8_Variance     0.53
Skewness_8_dyn          0.52
Skewness_8_Variance     2.72
Kurtosis_8_dyn          0.47
Kurtosis_8_Variance     1.00
PSD_8_dyn               2.07
PSD_8_Variance          0.65
ACF_1_8_dyn            38.72
ACF_1_8_Variance       16.42
WL_C1_8_dyn             0.19
WL_C1_8_Variance        0.17
WL_C2_8_dyn             0.06
WL_C2_8_Variance        0.13
WL_C3_8_dyn             0.05
WL_C3_8_Variance        0.68
dtype: float64