### Libraries import

In [1]:
# Import libraries
import numpy as np
import pandas as pd
from tqdm import tqdm
import functions as fun
import plotly.io as pio
import importlib as imp
import statsmodels.api as sm
import plotly.graph_objects as go
import sklearn.model_selection as modsel

# Set additional settings for warnings and templates
import warnings
warnings.filterwarnings("ignore")
pio.templates.default = "plotly_dark"
pd.set_option('display.max_columns', None)

In [9]:
# Reload functions if they were changed
imp.reload(fun)



### Financial models

#### Feature generation

In [22]:
# Read dataset and define columns for feature generation
data = pd.read_parquet('Data/dataset.parquet').drop(columns = 'Variance')
indices = data.groupby(['Ticker', 'Index']).size().index.values
cols = ['Hurst', 'CorrDim', 'Lyapunov', 
        'Skewness', 'Kurtosis', 'PSD', 'ACF_1',
        'WL_C1', 'WL_C2', 'WL_C3']

# Set lag for dynamics and short variance calculation
lag_model = [8]

# Calculate dynamics and short variance
# Original idea about variance was born from the largest Lyapunov exponent's behaviour before the critical transition point:
# is mostly didn't move in nominal values but its variance in some cases decreased signigicantly 
data_logdyn = pd.DataFrame()
for ind in tqdm(indices):
    data_ind = data[(data['Ticker'] == ind[0]) & (data['Index'] == ind[1])]
    for col in cols:
        for lag_m in lag_model:
            data_ind[col + '_' + str(lag_m) + '_dyn'] = data_ind[col] / data_ind[col].shift(lag_m) - 1
            data_ind[col + '_' + str(lag_m) + '_Variance'] = data_ind[col].rolling(lag_m).var()
    data_ind.dropna(inplace = True)
    data_logdyn = pd.concat([data_logdyn, data_ind])

# Reset index to get rid of dates and save final dataset
data_logdyn.reset_index(drop = True, inplace = True)
data_logdyn = data_logdyn[data_logdyn['Distance'] > 0]
data_logdyn.to_parquet('Data/final_dataset.parquet')
data_logdyn

100%|██████████| 548/548 [00:17<00:00, 32.06it/s]


Unnamed: 0,Volume,MA100,MV100,Rise,Distance,Index,Ticker,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
0,23952.0,31070.20,7.130232e+08,False,291,2643,AAN,0.651904,-2.815337e-15,0.049050,4.481086,26.699359,-0.609296,0.374819,0.465914,-0.103608,-0.124637,-0.050382,0.002605,-0.391654,1.096794e-29,0.145294,0.000312,0.030011,0.002971,0.082711,0.754393,0.037030,0.000049,0.019152,0.000045,-0.045022,0.003071,0.285858,0.000373,-0.167933,0.001749
1,159410.0,32093.49,8.715063e+08,False,290,2643,AAN,0.662030,-9.789577e-17,0.048384,4.402063,25.787151,-0.612294,0.367293,0.418223,-0.130413,-0.086043,0.050584,0.002415,224.435763,1.094419e-29,0.111672,0.000318,0.012617,0.002558,0.046669,0.643472,0.038410,0.000053,-0.000925,0.000044,-0.009408,0.003095,0.559503,0.000450,-0.162758,0.001908
2,74965.0,32664.47,8.876980e+08,False,289,2643,AAN,0.690370,-2.362195e-15,0.017922,4.390049,25.683490,-0.614583,0.371478,0.475433,-0.103349,-0.175004,0.204455,0.000927,-1.947039,9.171144e-30,-0.676723,0.000350,0.011164,0.001981,0.044073,0.473688,0.038391,0.000054,0.009061,0.000048,-0.046778,0.002778,0.087733,0.000436,-0.030778,0.001826
3,57275.0,32998.33,8.929255e+08,False,288,2643,AAN,0.693400,-2.719365e-15,0.019476,4.389790,25.681349,-0.618153,0.370974,0.380598,-0.146165,-0.076268,0.038265,0.000894,-0.439684,7.705216e-30,-0.652200,0.000322,0.011460,0.001289,0.044543,0.257182,0.040634,0.000055,0.012792,0.000050,0.045300,0.002463,0.182044,0.000618,-0.023034,0.001851
4,36764.0,32491.61,8.628754e+08,False,287,2643,AAN,0.670705,2.596383e-15,0.050424,4.420547,25.969972,-0.621126,0.373620,0.466517,-0.100117,-0.133767,-0.013487,0.000923,-1.533414,6.951390e-30,-0.057433,0.000305,0.009323,0.001066,0.038771,0.127176,0.040629,0.000055,0.063141,0.000010,-0.045662,0.002215,0.182232,0.000544,-0.128684,0.001742
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160010,103982.0,107127.65,4.066301e+09,False,5,1541,ZWS,0.762291,-3.139746e-15,0.002439,3.228418,15.896758,-0.752358,0.577192,0.573424,-0.154147,-0.074996,-0.010378,0.000124,-1.974779,-3.552214e-18,0.288016,0.000002,-0.003889,0.000036,-0.003274,0.001379,-0.021714,0.000026,-0.008208,0.000002,-0.019764,0.001104,0.024043,0.000198,0.047756,0.000524
160011,160370.0,108231.04,4.060776e+09,False,4,1541,ZWS,0.793100,-4.579490e-15,0.001399,3.229276,15.914075,-0.749824,0.576721,0.555407,-0.133274,-0.058564,0.032792,0.000163,-2.177920,-3.552214e-18,-0.411605,0.000003,-0.002980,0.000040,-0.002118,0.001451,-0.022367,0.000028,-0.008702,0.000003,-0.004787,0.001098,-0.038660,0.000224,0.090447,0.000535
160012,1031511.0,117603.93,1.258062e+10,False,3,1541,ZWS,0.767982,-4.548268e-15,0.006788,3.364231,16.524853,-0.747284,0.526691,0.563935,-0.163990,-0.059153,-0.028708,0.000136,-1.868521,-3.552214e-18,0.820707,0.000005,0.037579,0.002156,0.034430,0.045315,-0.023350,0.000031,-0.093385,0.000344,0.008998,0.001116,-0.023602,0.000208,0.327306,0.000546
160013,382418.0,120666.56,1.326214e+10,False,2,1541,ZWS,0.750742,2.858428e-15,0.006570,3.333177,16.262226,-0.744650,0.546938,0.506274,-0.149322,-0.015747,-0.014877,0.000191,-0.462384,-3.552214e-18,7.506477,0.000006,0.030473,0.002933,0.020689,0.052069,-0.024309,0.000034,-0.056829,0.000410,0.021562,0.000969,0.045151,0.000197,0.018425,0.000543


#### Probit modelling

##### Modelling with all variables

In the cell below we are iterating over the three lists of parameters:
- horizons - how many hours before the transition are considered to be close enough to be prediction phase
- sizes - share of the positive observations in the whole modelling dataset - this parameter is important because in the original dataset share of positives for some of the horizons was to small, so we decided to use decrease size of the negative dataset and randomize it
- states - in order to avoid lucky random choices in the sizes randomization we use a list of different random states to average the results

In [23]:
# Read dataset
data_logdyn = pd.read_parquet('Data/final_dataset.parquet')

# Choose binary target and other parameters
target = 'Flag'
horizons = list(range(1, 9))
shares = np.linspace(0.05, 0.2, 4)
states = list(range(0, 10000, 500))

# Iterate over the chosen parameters and optimize classification models, then save all the results to the dataframe
res = fun.model(data_logdyn, target, horizons, shares, states)

# Save results and create pivot based on the horizon and 1 share parameters
coeffs_cols = data_logdyn.columns.drop(['Volume', 'MA100', 'MV100', 'Rise', 'Distance', 'Index', 'Ticker'])
res_cols = list(coeffs_cols) + ['const']
res_means = fun.save_results(res, res_cols)
res_means

100%|██████████| 8/8 [17:04<00:00, 128.05s/it]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Train size,Test size,Train AUC,Test AUC,Train KS-test p-value,Test KS-test p-value,Train F1-score,Test F1-score,Train precision,Test precision,Train recall,Test recall,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance,const
Horizon,1 Share,1 Share real,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1
1,0.05,0.05016,8740.0,2185.0,0.91705,0.9028,0.0,0.0,0.57445,0.59895,0.44475,0.48455,0.8156,0.8005,,1.607584,6.267405,0.806011,-0.041219,0.668487,1.083096,-0.689425,-0.954452,2.429622,-0.766993,-48.750093,8.734625e-16,-2.48641,0.001889,-388.416154,2.28238,-0.431408,-0.134771,0.0007,-1.446676,719.281892,1.639095,273.559374,-1.260763,-44.110917,,-37.053144,,-34.409319,-3.905914
1,0.1,0.100293,4371.0,1093.0,0.91705,0.9046,0.0,0.0,0.7236,0.70725,0.65555,0.64685,0.81155,0.7948,,0.932452,6.184795,0.832796,-0.04373,0.743014,1.263808,-0.924908,-1.114259,2.349438,,-54.762036,1.683301e-15,-3.644923,0.004566,-529.973318,2.551459,-0.627291,-0.149984,0.001167,-1.719918,864.886362,1.81899,356.849388,-1.508813,-54.288192,,-48.962652,,,-3.571782
1,0.15,0.150425,2914.0,729.0,0.91605,0.9133,0.0,0.0,0.7749,0.78225,0.7435,0.7621,0.81005,0.8092,,-1.840861,7.525784,0.865403,-0.04647,0.677284,1.32092,-1.032421,-1.198083,2.536896,,-56.920876,2.936649e-15,-5.261013,,,2.527806,-0.481449,-0.162402,0.001077,-1.874719,1089.417724,1.847678,408.925355,-1.622408,,,-48.844835,,,-3.367581
1,0.2,0.200512,2186.0,547.0,0.91795,0.9122,0.0,0.0,0.81155,0.80115,0.8135,0.78915,0.811,0.81845,-1.362815,1.186848,8.706753,0.856623,-0.046887,0.637647,1.399337,-1.145474,,2.598529,,-63.904992,4.739529e-15,-3.872457,0.00397,-503.927029,2.651688,-0.636803,-0.170072,0.001722,-2.211436,1102.293919,1.820314,494.315938,-1.778574,-50.472005,,-64.909306,,,-3.050465
2,0.05,0.050328,17421.0,4356.0,0.85525,0.8508,0.0,0.0,0.50195,0.5314,0.3958,0.43455,0.69495,0.69735,,,5.949856,0.576943,-0.029305,0.56574,0.780231,-0.533009,-0.75461,1.529387,,-32.037711,,-1.526988,0.000295,-262.077567,2.252509,-0.245854,-0.109087,0.000323,-0.58015,538.538131,1.677715,284.707936,-0.836844,,,-26.606743,,,-3.132078
2,0.1,0.100615,8714.0,2179.0,0.85725,0.8522,0.0,0.0,0.63565,0.6499,0.5833,0.6182,0.7027,0.6963,,2.064734,6.222017,0.607224,-0.03171,0.626832,0.881601,-0.659062,-0.806744,1.516031,,-34.863703,1.090834e-15,-6.85374,0.000381,-273.87235,2.423935,-0.502257,-0.120826,0.000935,-1.278146,681.664206,1.757455,376.414395,-0.999836,,,-32.822317,,-19.320801,-2.799415
2,0.15,0.150881,5811.0,1453.0,0.8579,0.8502,0.0,0.0,0.70105,0.69735,0.70255,0.69835,0.70215,0.70395,,1.706606,6.015912,0.599213,-0.031346,0.61958,0.88754,-0.728862,-0.858354,1.5459,,-37.872832,1.243321e-15,-3.121328,,-331.085645,2.504764,-0.620129,-0.136071,0.001236,-1.136778,739.938617,1.940004,446.850271,-1.065814,-30.389706,,-35.36893,,,-2.564423
2,0.2,0.201101,4360.0,1090.0,0.86165,0.84565,0.0,0.0,0.739,0.7246,0.7811,0.7724,0.7035,0.6861,,2.366407,6.702149,0.585349,-0.030286,0.666391,0.978562,-0.796565,-0.927206,1.4144,-0.563297,-38.679182,3.823556e-15,-5.297013,,-339.202137,2.503397,-0.544827,-0.130046,0.001316,-1.372624,820.220546,1.919745,501.212278,-1.090282,,,-30.653011,,,-2.339029
3,0.05,0.050495,26046.0,6512.0,0.8069,0.80015,0.0,0.0,0.4785,0.4732,0.401,0.40095,0.5991,0.594,,0.909015,4.71659,0.451179,-0.022806,0.476404,0.579283,-0.444694,-0.591065,1.407781,,-26.337869,,-1.815282,,-212.351931,2.379674,-0.185205,-0.10584,-0.000248,-0.492387,479.679233,1.556019,285.88537,-0.915205,-21.156899,,-19.565594,,,-2.737445
3,0.1,0.100933,13030.0,3258.0,0.8097,0.80575,0.0,0.0,0.58905,0.58295,0.5768,0.56215,0.60485,0.61445,,1.115722,4.418842,0.453154,-0.023059,0.508481,0.619484,-0.464519,-0.693387,1.376691,,-27.267071,,-4.294936,,-236.998279,2.526585,-0.382029,-0.11086,0.000433,-0.829317,565.441235,1.53422,375.499152,-1.034082,-24.16733,,-22.147374,0.008278,,-2.391228


In [24]:
# Get mean metrics for all of the columns to understand what variables are actually used in the final models
pd.DataFrame(round(res_means[np.in1d(res_means.index.get_level_values(0), list(range(1, 5)))].mean(), 2), columns = ['Average coeff'])
# round(res_means.mean(), 2)

Unnamed: 0,Average coeff
Train size,26860.75
Test size,6715.75
Train AUC,0.72
Test AUC,0.71
Train KS-test p-value,0.0
Test KS-test p-value,0.0
Train F1-score,0.42
Test F1-score,0.42
Train precision,0.42
Test precision,0.42


In [17]:
# Count a share of appearances for positive horizons
res = pd.read_parquet('Params/params_Probit.parquet')
res_1_5 = res[res['Horizon'].isin(range(1, 5))]
groups = ['Horizon', '1 Share', '1 Share real']
drops = ['State']
round(res_1_5.groupby(groups)[list(coeffs_cols) + ['const']].count() / 20, 2)
# round(res.groupby(groups)[list(coeffs_cols) + ['const']].count() / 20, 2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance,const
Horizon,1 Share,1 Share real,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1
1,0.05,0.05016,0.0,0.05,1.0,1.0,1.0,0.8,0.7,0.1,0.25,1.0,0.15,0.5,0.1,0.35,0.1,0.15,1.0,0.6,1.0,0.55,0.2,0.65,1.0,1.0,0.85,0.1,0.0,0.6,0.0,0.05,1.0
1,0.1,0.100293,0.0,0.15,0.9,1.0,1.0,0.75,0.45,0.15,0.15,1.0,0.0,0.2,0.05,0.25,0.1,0.05,1.0,0.7,1.0,0.65,0.35,0.75,1.0,1.0,0.75,0.1,0.0,0.2,0.0,0.0,1.0
1,0.15,0.150425,0.0,0.05,0.75,1.0,1.0,0.65,0.25,0.2,0.2,1.0,0.0,0.3,0.15,0.15,0.0,0.0,1.0,0.8,1.0,0.7,0.4,0.75,1.0,1.0,0.75,0.0,0.0,0.15,0.0,0.0,1.0
1,0.2,0.200512,0.1,0.15,0.65,1.0,1.0,0.75,0.15,0.1,0.0,1.0,0.0,0.2,0.2,0.1,0.05,0.1,1.0,0.75,1.0,0.6,0.45,0.9,1.0,1.0,0.7,0.05,0.0,0.15,0.0,0.0,1.0
2,0.05,0.050328,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.8,0.6,1.0,0.0,0.75,0.0,0.25,0.1,0.6,1.0,0.85,1.0,0.5,0.4,1.0,1.0,1.0,0.9,0.0,0.0,0.25,0.0,0.0,1.0
2,0.1,0.100615,0.0,0.15,0.9,1.0,1.0,1.0,0.9,0.8,0.45,1.0,0.0,0.8,0.15,0.3,0.1,0.4,1.0,0.8,1.0,0.7,0.5,0.95,1.0,1.0,0.9,0.0,0.0,0.05,0.0,0.05,1.0
2,0.15,0.150881,0.0,0.05,0.85,1.0,1.0,0.95,0.75,0.65,0.45,1.0,0.0,0.6,0.3,0.15,0.0,0.25,1.0,0.7,1.0,0.65,0.45,0.85,1.0,1.0,1.0,0.05,0.0,0.1,0.0,0.0,1.0
2,0.2,0.201101,0.0,0.1,0.85,1.0,1.0,0.95,0.65,0.65,0.4,1.0,0.05,0.55,0.15,0.25,0.0,0.4,1.0,0.8,0.95,0.55,0.65,0.9,1.0,1.0,0.9,0.0,0.0,0.1,0.0,0.0,1.0
3,0.05,0.050495,0.0,0.2,0.95,1.0,1.0,1.0,0.9,0.6,0.6,1.0,0.0,1.0,0.0,0.4,0.0,0.85,1.0,0.65,1.0,0.45,0.55,0.9,1.0,1.0,1.0,0.2,0.0,0.3,0.0,0.0,1.0
3,0.1,0.100933,0.0,0.25,0.95,1.0,1.0,1.0,0.85,0.65,0.5,1.0,0.0,1.0,0.0,0.3,0.0,0.7,1.0,0.75,1.0,0.5,0.75,1.0,1.0,1.0,1.0,0.1,0.0,0.25,0.05,0.0,1.0


In [18]:
# Count an aggregated share of appearances
pd.DataFrame(round(res_1_5[list(coeffs_cols) + ['const']].count() / len(res_1_5), 2), columns = ['Share'])
# pd.DataFrame(round(res[list(coeffs_cols) + ['const']].count() / len(res), 2), columns = ['Share'])

Unnamed: 0,Share
Hurst,0.01
CorrDim,0.13
Lyapunov,0.89
Skewness,1.0
Kurtosis,1.0
PSD,0.93
ACF_1,0.71
WL_C1,0.56
WL_C2,0.52
WL_C3,1.0


##### Visualization for the KS-test

In [27]:
# Vizual check of the single model
data_logdyn = pd.read_parquet('Data/final_dataset.parquet')
target = 'Flag'
horizon = 8
share = 0.1
state = 2000
data_testing = data_logdyn.copy()
data_testing['Flag'] = data_testing['Distance'].apply(lambda x: 0 if x >= horizon else 1)
data_testing.drop(columns = ['Volume', 'MA100', 'MA100','Rise', 'Distance', 'Index', 'Ticker'], inplace = True)

data_testing_1 = data_testing[data_testing[target] == 1]
data_testing_0 = data_testing[data_testing[target] == 0]
Y_1 = data_testing_1[target]
X_1 = data_testing_1.drop(columns = [target])
share_1_orig = len(data_testing_1) / (len(data_testing_0) + len(data_testing_1))
_, X_0, _, Y_0 = modsel.train_test_split(data_testing_0.drop(columns = [target]), data_testing_0[target], 
                                                     test_size = min(share_1_orig * (1 - share) / share, 1), random_state = state)
share_1 = len(Y_1) / (len(Y_0) + len(Y_1))
Y = pd.concat([Y_0, Y_1])
X = sm.add_constant(pd.concat([X_0, X_1]))
X_train, X_test, Y_train, Y_test = modsel.train_test_split(X, Y, test_size = 0.2, random_state = state)
results_rs, auc_train_rs, auc_test_rs, ks_train_rs, ks_test_rs, f1_train_rs,\
    f1_test_rs, pr_train_rs, pr_test_rs, rec_train_rs, rec_test_rs\
    = fun.model_optimization(Y_train, Y_test, X_train, X_test, silent = True)
print(results_rs.summary())
Y_test_pred = results_rs.predict(X_test)
ks_samples = pd.DataFrame({'Y': Y_test, 'Y_pred': Y_test_pred})
ks_samples_posi = ks_samples[ks_samples['Y'] == 1]['Y_pred']
ks_samples_nega = ks_samples[ks_samples['Y'] == 0]['Y_pred']
fig = go.Figure()
fig.add_trace(go.Histogram(x = ks_samples_posi, name = 'Posi', nbinsx = 30))
fig.add_trace(go.Histogram(x = ks_samples_nega, name = 'Nega', nbinsx = 100))
fig.update_layout(barmode = 'overlay')
fig.update_traces(opacity = 0.75)
fig.show()

                          Probit Regression Results                           
Dep. Variable:                   Flag   No. Observations:                30024
Model:                         Probit   Df Residuals:                    30004
Method:                           MLE   Df Model:                           19
Date:                Wed, 07 Aug 2024   Pseudo R-squ.:                  0.1584
Time:                        14:16:27   Log-Likelihood:                -8320.4
converged:                       True   LL-Null:                       -9886.2
Covariance Type:            nonrobust   LLR p-value:                     0.000
                          coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------------
const                  -1.7121      0.074    -23.170      0.000      -1.857      -1.567
Lyapunov                3.0276      0.860      3.520      0.000       1.342       4.714
Skewness        

##### Modelling with separate variables

In [3]:
# Read dataset
data_logdyn = pd.read_parquet('Data/final_dataset.parquet')

# Choose binary target and other parameters
target = 'Flag'
horizons = list(range(1, 6))
shares = np.linspace(0.05, 0.2, 4)
states = list(range(0, 10000, 500))

# Iterate over the chosen parameters and optimize classification models, then save all the results to the dataframe
res_sep = fun.model(data_logdyn, target, horizons, shares, states, separate = True)

# Save results and create pivot based on the horizon and 1 share parameters
res_means = fun.save_results(res, [], sep = True)
res_means[res_means['Test AUC'] >= 0.75].sort_values('Test AUC', ascending = False)

100%|██████████| 5/5 [16:58<00:00, 203.76s/it]


Unnamed: 0_level_0,Unnamed: 1_level_0,Train size,Test size,Train AUC,Test AUC,Train KS-test p-value,Test KS-test p-value,Train F1-score,Test F1-score,Train precision,Test precision,Train recall,Test recall,Const,Const_Pvalue,Coef,Coef_Pvalue
Variable,Horizon,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Skewness_8_Variance,1,4648.565789,1162.447368,0.940487,0.941184,0.0,0.0,0.703316,0.711487,0.612789,0.625868,0.850697,0.862592,-1.51277,3.296619e-217,0.652842,5.436307e-46
Kurtosis_8_Variance,1,4709.863014,1177.767123,0.936753,0.937918,0.0,0.0,0.666247,0.679959,0.561397,0.583945,0.850932,0.859603,-1.466881,2.058904e-204,0.001631,2.3831820000000003e-43
ACF_1_8_Variance,1,4552.75,1138.5,0.930875,0.931375,0.0,0.0,0.7542,0.738487,0.706313,0.673013,0.8255,0.845675,-1.660177,1.426042e-232,793.14515,4.93482e-73
Skewness_8_Variance,2,9501.5,2375.771429,0.895057,0.896329,0.0,0.0,0.635514,0.614543,0.568829,0.528614,0.7491,0.773514,-1.462848,0.0,0.813546,1.3489950000000002e-84
Kurtosis_8_Variance,2,9918.949153,2480.152542,0.890254,0.890881,0.0,0.0,0.577915,0.598424,0.486559,0.514203,0.759136,0.758661,-1.43851,0.0,0.001947,5.874377e-74
ACF_1_8_Variance,2,9076.5,2269.5,0.882225,0.882237,0.0,0.0,0.649175,0.639925,0.6003,0.584213,0.73,0.747825,-1.558918,0.0,860.292515,4.765923e-118
Skewness_8_Variance,3,14732.015385,3683.446154,0.860585,0.857877,0.0,0.0,0.526231,0.545954,0.439615,0.474785,0.704538,0.697769,-1.429076,0.0,0.765947,1.926762e-104
Kurtosis_8_Variance,3,16676.297872,4169.553191,0.853809,0.850979,0.0,0.0,0.479596,0.488,0.382043,0.393234,0.70183,0.703,-1.460052,0.0,0.001748,3.6093349999999997e-90
ACF_1_8_Variance,3,13572.25,3393.5,0.842025,0.839112,0.0,0.0,0.5547,0.565813,0.494675,0.504663,0.676063,0.683462,-1.477161,0.0,802.732695,1.2669080000000002e-153
Skewness_8_Variance,4,19435.953125,4859.3125,0.820844,0.819078,0.0,0.0,0.469031,0.484,0.393828,0.418391,0.630016,0.624328,-1.367296,0.0,0.64628,1.643859e-108


In [12]:
# Getting highest AUC values for variables
res_sep = pd.read_parquet('Data/params_sep.parquet')
res_sep.groupby(groups)['Test AUC'].mean().to_frame().reset_index(1, drop = False).groupby('Variable')['Test AUC'].max().sort_values(ascending = False).round(2)

Variable
Skewness_8_Variance    0.94
Kurtosis_8_Variance    0.94
ACF_1_8_Variance       0.93
Skewness               0.81
Skewness_8_dyn         0.81
Kurtosis               0.80
Kurtosis_8_dyn         0.77
Lyapunov_8_Variance    0.76
Lyapunov               0.74
CorrDim                0.64
CorrDim_8_Variance     0.58
PSD_8_dyn              0.57
WL_C1_8_dyn            0.57
ACF_1                  0.55
WL_C1                  0.54
PSD_8_Variance         0.54
WL_C2                  0.53
WL_C2_8_Variance       0.53
PSD                    0.52
ACF_1_8_dyn            0.52
Hurst_8_Variance       0.51
WL_C1_8_Variance       0.51
WL_C3                  0.51
Hurst_8_dyn            0.51
WL_C3_8_dyn            0.51
WL_C2_8_dyn            0.51
Hurst                  0.50
WL_C3_8_Variance       0.50
CorrDim_8_dyn          0.50
Lyapunov_8_dyn         0.49
Name: Test AUC, dtype: float64

#### Random forest modelling

In [2]:
# Read dataset
data_logdyn = pd.read_parquet('Data/final_dataset.parquet')

# Choose binary target and other parameters
model = 'RF'
target = 'Flag'
horizons = list(range(1, 13))
shares = np.linspace(0.05, 0.2, 4)
states = list(range(0, 10000, 50))

# Iterate over the chosen parameters and optimize classification models, then save all the results to the dataframe
res = fun.model(data_logdyn, target, horizons, shares, states, model = model)

# Save results and create pivot based on the horizon and 1 share parameters
coeffs_cols = data_logdyn.columns.drop(['Volume', 'MA100', 'MV100', 'Rise', 'Distance', 'Index', 'Ticker'])
res_means = fun.save_results(res, 
                             list(coeffs_cols),
                             model = model)
res_means

100%|██████████| 12/12 [16:24:13<00:00, 4921.12s/it]  


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Train size,Test size,Train AUC,Test AUC,Train KS-test p-value,Test KS-test p-value,Train F1-score,Test F1-score,Train precision,Test precision,Train recall,Test recall,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
Horizon,1 Share,1 Share real,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
1,0.05,0.05016,8740.0,2185.0,0.97123,0.95024,0.0,0.0,0.650075,0.616825,0.518185,0.4877,0.88828,0.858865,-1.73913e-05,3.844394e-05,0.000298,0.000538,0.000345,7e-06,3.9e-05,-6.864989e-07,4.4e-05,-1.5e-05,4.805492e-06,-2.7e-05,2.3e-05,1.6e-05,4.23341e-05,2.6e-05,0.000651,0.000853,8.9e-05,0.000912,1.4e-05,-1e-05,0.000284,0.001514,1.7e-05,-3e-06,2.494279e-05,-1.487414e-05,-3e-06,6.3e-05
1,0.1,0.100293,4371.0,1093.0,0.97436,0.950715,0.0,0.0,0.776845,0.741195,0.693145,0.656615,0.89363,0.862015,-1.143641e-05,-4.574565e-07,0.000463,0.000924,0.000393,5.8e-05,9.1e-05,0.0001120769,5e-05,7.3e-05,-2.698994e-05,-5.3e-05,0.0001,-1e-05,0.0001061299,0.000333,0.001106,0.00169,0.00022,0.001906,5.2e-05,-3.8e-05,0.00137,0.004398,0.000126,-4.2e-05,-1.829826e-06,5.215005e-05,1.1e-05,0.000119
1,0.15,0.150425,2914.0,729.0,0.976155,0.9525,0.0,0.0,0.821585,0.8069,0.75824,0.761115,0.90431,0.86464,-7.133059e-05,-3.703704e-05,0.00025,0.000486,0.000147,-6.9e-05,-4e-05,-1.303155e-05,-9e-05,-3.5e-05,-0.0001097394,-8.4e-05,1.5e-05,-6.1e-05,-6.241427e-05,0.000434,0.001481,0.00151,0.000173,0.001336,3.4e-05,-0.000112,0.002153,0.006383,8.6e-05,-0.00013,-5.829904e-05,-6.378601e-05,-1.6e-05,-4.7e-05
1,0.2,0.200512,2186.0,547.0,0.97821,0.951085,0.0,0.0,0.855235,0.836185,0.807225,0.813295,0.913325,0.86501,-1.1883e-05,-1.371115e-05,0.000159,0.000591,0.0004,-5.8e-05,6.6e-05,5.210238e-05,-7.2e-05,0.000102,-1.371115e-05,-3.7e-05,0.000113,-0.000134,7.86106e-05,0.000347,0.001378,0.001277,0.000803,0.000682,8.2e-05,2.6e-05,0.002122,0.007578,9e-05,-8e-06,6.124314e-05,-5.850091e-05,0.000135,7.2e-05
2,0.05,0.050328,17421.0,4356.0,0.934575,0.915465,0.0,0.0,0.472875,0.490255,0.33808,0.36422,0.826325,0.79675,-9.641873e-06,5.096419e-05,0.000275,0.000594,0.000299,1.5e-05,5.8e-05,2.215335e-05,1e-05,-1.5e-05,5.165289e-06,4.1e-05,6.8e-05,3.1e-05,3.604224e-05,4.3e-05,0.000805,0.000869,0.000357,0.000978,4.8e-05,6e-06,0.000348,0.001336,-2e-06,9e-06,1.262626e-06,9.182736e-07,3.1e-05,8e-05
2,0.1,0.100615,8714.0,2179.0,0.938615,0.91659,0.0,0.0,0.64031,0.634235,0.52862,0.540395,0.824935,0.796775,2.937127e-05,6.14961e-05,0.000318,0.000755,0.000367,8.9e-05,6.5e-05,4.107389e-05,3.9e-05,4.7e-05,6.883892e-06,2.1e-05,4.3e-05,3.2e-05,8.604865e-05,0.000144,0.001275,0.001355,0.000553,0.001594,0.000138,1.2e-05,0.000895,0.002458,0.000119,4.1e-05,2.753557e-06,4.543369e-05,1.6e-05,0.000153
2,0.15,0.150881,5811.0,1453.0,0.94052,0.919115,0.0,0.0,0.71432,0.70434,0.628585,0.63739,0.832795,0.80633,-2.167928e-05,1.995871e-05,0.000444,0.00082,0.000376,4.7e-05,0.00012,6.331727e-05,6e-05,3.5e-05,1.823813e-05,1e-05,5.7e-05,3.9e-05,4.576738e-05,0.000385,0.001857,0.001807,0.001284,0.002055,0.000199,2.2e-05,0.00226,0.004044,4.7e-05,5e-05,3.441156e-05,5.609085e-05,5.4e-05,0.000124
2,0.2,0.201101,4360.0,1090.0,0.94299,0.91741,0.0,0.0,0.76497,0.746605,0.704445,0.709125,0.840105,0.80435,3.807339e-05,1.100917e-05,0.000345,0.00063,0.000268,7.9e-05,0.000162,6.376147e-05,0.000131,9.8e-05,1.238532e-05,2.7e-05,0.000114,2e-05,4.724771e-05,0.000285,0.001356,0.001653,0.001256,0.001635,0.00018,5.6e-05,0.002114,0.005139,0.000154,6e-06,1.192661e-05,3.944954e-05,0.000102,0.000146
3,0.05,0.050495,26046.0,6512.0,0.90163,0.8835,0.0,0.0,0.35706,0.360995,0.22996,0.239055,0.805785,0.78163,1.297604e-05,5.873771e-05,0.000334,0.00065,0.000382,5.1e-05,5.9e-05,4.990786e-05,3.6e-05,2.7e-05,7.52457e-06,3.3e-05,8.9e-05,4.3e-05,5.328624e-05,6.7e-05,0.000867,0.000802,0.000355,0.000914,5.5e-05,2.8e-05,0.000298,0.001074,7.5e-05,1.8e-05,1.251536e-05,3.754607e-05,3.4e-05,0.000139
3,0.1,0.100933,13030.0,3258.0,0.905925,0.88665,0.0,0.0,0.543815,0.53381,0.415615,0.412955,0.79266,0.77548,-3.83671e-06,2.869859e-05,0.00037,0.000634,0.000296,3.4e-05,5.7e-05,6.629834e-05,3.8e-05,2.2e-05,1.841621e-06,1.6e-05,8.3e-05,2.1e-05,8.670964e-05,0.000158,0.001926,0.001611,0.00093,0.001649,9.7e-05,2.3e-05,0.000775,0.002273,0.000122,1.3e-05,1.058932e-05,5.709024e-05,1.2e-05,0.000172


In [19]:
# Count an average feature importance
res = pd.read_parquet(f'Params/params_{model}.parquet')
res_viable = res[res['Horizon'].isin(range(1, 10))]
groups = ['Horizon']

# Get the final pivot and multiply by 100000 to make results more readable
means = round(res_viable[coeffs_cols].mean() * 100000, 2)
print(round(means.mean(), 2), round(means.median(), 2))
means

36.79 6.88


Hurst                    0.75
CorrDim                  2.28
Lyapunov                28.72
Skewness                58.21
Kurtosis                28.64
PSD                      7.98
ACF_1                    7.13
WL_C1                    6.62
WL_C2                    3.23
WL_C3                    4.36
Hurst_8_dyn              0.65
Hurst_8_Variance         2.38
CorrDim_8_dyn            4.72
CorrDim_8_Variance       0.85
Lyapunov_8_dyn           4.97
Lyapunov_8_Variance     18.06
Skewness_8_dyn         147.36
Skewness_8_Variance    132.64
Kurtosis_8_dyn          85.83
Kurtosis_8_Variance    123.03
PSD_8_dyn               14.87
PSD_8_Variance           4.12
ACF_1_8_dyn            116.87
ACF_1_8_Variance       256.66
WL_C1_8_dyn             12.80
WL_C1_8_Variance         3.30
WL_C2_8_dyn              1.23
WL_C2_8_Variance         5.60
WL_C3_8_dyn              2.61
WL_C3_8_Variance        17.19
dtype: float64

#### SVM modelling

In [4]:
# Read dataset
data_logdyn = pd.read_parquet('Data/final_dataset.parquet')

# Choose binary target and other parameters
model = 'SVM'
target = 'Flag'
horizons = [1]
shares = np.linspace(0.05, 0.2, 4)
states = list(range(0, 10000, 50))

# Iterate over the chosen parameters and optimize classification models, then save all the results to the dataframe
res = fun.model(data_logdyn, target, horizons, shares, states, model = model)

# Save results and create pivot based on the horizon and 1 share parameters
coeffs_cols = data_logdyn.columns.drop(['Volume', 'MA100', 'MV100', 'Rise', 'Distance', 'Index', 'Ticker'])
res_means = fun.save_results(res, 
                             list(coeffs_cols),
                             model = model)
res_means

100%|██████████| 1/1 [1:45:53<00:00, 6353.89s/it]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Train size,Test size,Train AUC,Test AUC,Train KS-test p-value,Test KS-test p-value,Train F1-score,Test F1-score,Train precision,Test precision,Train recall,Test recall,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
Horizon,1 Share,1 Share real,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
1,0.05,0.05016,8740.0,2185.0,0.56989,0.56531,2.2765e-08,0.013104,0.22232,0.221125,0.34747,0.341625,0.250755,0.27472,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005638,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.1,0.100293,4371.0,1093.0,0.5618,0.56075,1.1238e-07,0.014266,0.27198,0.278935,0.47771,0.476365,0.24239,0.264665,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010282,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.15,0.150425,2914.0,729.0,0.55749,0.553055,3.4804e-07,0.03026,0.293475,0.300085,0.531065,0.541495,0.24906,0.26631,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012715,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.2,0.200512,2186.0,547.0,0.55734,0.55189,9.26685e-07,0.028407,0.31488,0.322255,0.564635,0.586075,0.26012,0.27235,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014759,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
# Count an average feature importance
res = pd.read_parquet(f'Params/params_{model}.parquet')
res_viable = res[res['Horizon'].isin(range(1, 2))]
groups = ['Horizon']

# Get the final pivot and multiply by 100000 to make results more readable
means = round(res_viable[coeffs_cols].mean() * 100000, 2)
print(round(means.mean(), 2), round(means.median(), 2))
means

36.16 0.0


Hurst                     0.00
CorrDim                   0.00
Lyapunov                  0.00
Skewness                  0.00
Kurtosis                  0.00
PSD                       0.00
ACF_1                     0.00
WL_C1                     0.00
WL_C2                     0.00
WL_C3                     0.00
Hurst_8_dyn               0.00
Hurst_8_Variance          0.00
CorrDim_8_dyn          1084.83
CorrDim_8_Variance        0.00
Lyapunov_8_dyn            0.00
Lyapunov_8_Variance       0.00
Skewness_8_dyn            0.00
Skewness_8_Variance       0.00
Kurtosis_8_dyn            0.00
Kurtosis_8_Variance       0.00
PSD_8_dyn                 0.00
PSD_8_Variance            0.00
ACF_1_8_dyn               0.00
ACF_1_8_Variance          0.00
WL_C1_8_dyn               0.00
WL_C1_8_Variance          0.00
WL_C2_8_dyn               0.00
WL_C2_8_Variance          0.00
WL_C3_8_dyn               0.00
WL_C3_8_Variance          0.00
dtype: float64

#### Gradient boosting modelling

##### LightGBM

In [13]:
# Read dataset
data_logdyn = pd.read_parquet('Data/final_dataset.parquet')

# Choose binary target and other parameters
model = 'LightGBM'
target = 'Flag'
horizons = list(range(1, 16))
shares = np.linspace(0.05, 0.2, 4)
states = list(range(0, 10000, 50))

# Iterate over the chosen parameters and optimize classification models, then save all the results to the dataframe
res = fun.model(data_logdyn, target, horizons, shares, states, model = model)

# Save results and create pivot based on the horizon and 1 share parameters
coeffs_cols = data_logdyn.columns.drop(['Volume', 'MA100', 'MV100', 'Rise', 'Distance', 'Index', 'Ticker'])
res_means = fun.save_results(res, 
                             list(coeffs_cols),
                             model = model)
res_means

100%|██████████| 15/15 [1:24:18<00:00, 337.22s/it]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Train size,Test size,Train AUC,Test AUC,Train KS-test p-value,Test KS-test p-value,Train F1-score,Test F1-score,Train precision,Test precision,Train recall,Test recall,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
Horizon,1 Share,1 Share real,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
1,0.05,0.05016,8740.0,2185.0,0.96503,0.957085,0.0,0.0,0.595425,0.597725,0.451945,0.465995,0.88272,0.870375,3.689461,55.565708,56.535701,359.110749,53.657348,8.165762,15.467618,11.174157,3.379298,6.727479,13.142833,3.997521,10.386102,7.467779,12.463744,11.195115,4098.026721,2563.714153,285.900751,369.556931,29.663953,16.900211,113.438889,3580.325939,24.062805,3.632483,8.567037,6.770136,4.616648,3.528314
1,0.1,0.100293,4371.0,1093.0,0.963585,0.956535,0.0,0.0,0.73885,0.725875,0.64121,0.631695,0.877725,0.87251,3.153985,24.880466,33.436975,260.903106,52.92095,5.761195,8.208002,6.376834,2.555287,2.565292,5.161843,2.24315,5.816305,7.086823,8.661142,9.42294,1138.467215,2488.40531,276.192257,498.019521,23.304669,13.475754,75.322039,4619.102459,16.002727,2.724192,5.1907,5.103548,3.048277,2.370007
1,0.15,0.150425,2914.0,729.0,0.964905,0.957765,0.0,0.0,0.800325,0.79551,0.736035,0.738075,0.880625,0.872495,3.70472,20.369801,24.615643,195.075933,44.809295,4.990406,9.11258,6.434717,2.142296,2.860293,3.633534,2.764123,5.749457,6.799284,7.038731,11.509117,456.384993,2272.018362,291.816641,338.369009,26.468381,14.50891,65.610191,4584.915519,20.447475,3.424834,5.375797,6.473474,3.604691,3.544315
1,0.2,0.200512,2186.0,547.0,0.96542,0.95553,0.0,0.0,0.838965,0.829545,0.803645,0.797945,0.88031,0.870655,3.51244,16.3156,16.101811,140.931781,40.246139,5.334229,7.822971,5.319965,1.70813,2.321003,1.986454,3.076994,4.805404,5.541843,5.087591,9.910014,345.566406,2114.003341,285.343773,333.894084,25.091474,11.896617,57.435062,4034.669029,14.734025,3.342497,4.631655,6.875956,3.115261,2.26768
2,0.05,0.050328,17421.0,4356.0,0.93972,0.922665,0.0,0.0,0.461675,0.472385,0.32015,0.341045,0.836425,0.81129,8.971417,76.903366,137.669843,679.823972,59.579326,45.356028,75.851023,19.535655,16.266865,13.224766,29.456556,36.194252,26.254349,51.558691,52.06518,20.516527,7812.353448,4391.811424,839.884024,429.726576,163.774491,53.079493,255.698398,4888.67308,66.191815,12.98099,24.630603,22.395939,20.723495,36.440328
2,0.1,0.100615,8714.0,2179.0,0.941555,0.92156,0.0,0.0,0.62934,0.62029,0.505615,0.512905,0.83743,0.81116,9.395422,59.732909,92.842852,502.900521,85.358318,39.214827,53.938324,13.969715,14.411012,13.225943,24.090676,35.55038,24.754168,54.763969,49.042275,21.553512,2774.788691,4289.750703,858.032575,1044.369579,161.914222,45.317059,190.803502,6444.967755,59.226787,11.130781,24.322491,20.781218,19.140925,41.398266
2,0.15,0.150881,5811.0,1453.0,0.942005,0.92328,0.0,0.0,0.7117,0.700895,0.619775,0.62585,0.83881,0.812245,10.035821,51.040578,65.532483,367.559772,93.591566,42.052062,43.908706,14.397668,15.77428,12.212533,20.871279,30.768442,20.022894,54.729531,46.38065,20.946061,1192.573195,3814.157843,867.64796,878.748944,160.330325,40.34678,156.375756,6759.804777,51.121227,10.492052,25.725327,19.067122,15.936863,42.152392
2,0.2,0.201101,4360.0,1090.0,0.94351,0.92139,0.0,0.0,0.764905,0.741145,0.703815,0.686955,0.83966,0.81423,10.729012,44.576006,57.826259,284.659387,76.263841,44.12509,40.416114,12.904507,16.40938,10.488502,20.544385,32.016937,19.877466,54.832801,42.869761,20.470509,906.380246,3734.080231,827.481293,639.578033,155.590505,41.207961,131.034945,5976.292466,49.974588,12.282957,26.721957,20.781833,15.978206,41.381309
3,0.05,0.050495,26046.0,6512.0,0.916855,0.893575,0.0,0.0,0.41493,0.390335,0.28205,0.264425,0.79134,0.77452,11.414969,89.72069,193.891654,921.904027,102.762938,117.485629,107.905653,55.201148,35.232698,23.313612,17.953025,55.347104,39.481823,143.73691,23.468996,110.297441,10161.261723,5786.248765,1336.324783,568.330948,361.113376,63.569644,316.59178,5483.358173,133.148298,30.320558,50.473402,59.677188,43.712843,133.311429
3,0.1,0.100933,13030.0,3258.0,0.917745,0.89517,0.0,0.0,0.575975,0.550735,0.45435,0.43246,0.79044,0.77114,11.705996,79.346675,140.490056,607.807942,109.135122,98.185715,78.64083,36.552626,34.152426,17.187876,12.770335,46.190573,29.565735,123.284524,20.785766,80.732655,5941.377532,5180.821079,1357.692494,955.599279,342.055073,50.457853,223.886612,6191.767671,118.912915,30.481529,46.177703,39.52838,44.734673,128.850717


In [23]:
# Count an average feature importance
res = pd.read_parquet(f'Params/params_{model}.parquet')
res_viable = res[res['Horizon'].isin(range(1, 14))]
groups = ['Horizon']

# Get the final pivot
means = round(res_viable[coeffs_cols].mean(), 2)
print(round(means.mean(), 2), round(means.median(), 2))
means

794.44 167.46


Hurst                    22.86
CorrDim                  46.15
Lyapunov                224.01
Skewness                782.72
Kurtosis                149.56
PSD                     382.55
ACF_1                   185.35
WL_C1                    47.60
WL_C2                   188.48
WL_C3                    64.75
Hurst_8_dyn               7.09
Hurst_8_Variance         55.64
CorrDim_8_dyn            16.61
CorrDim_8_Variance      469.18
Lyapunov_8_dyn           11.48
Lyapunov_8_Variance     136.60
Skewness_8_dyn         6881.82
Skewness_8_Variance    4016.62
Kurtosis_8_dyn         1497.51
Kurtosis_8_Variance     547.93
PSD_8_dyn              1152.50
PSD_8_Variance          147.14
ACF_1_8_dyn             404.47
ACF_1_8_Variance       5466.74
WL_C1_8_dyn             412.06
WL_C1_8_Variance         89.66
WL_C2_8_dyn              24.32
WL_C2_8_Variance        119.24
WL_C3_8_dyn              16.34
WL_C3_8_Variance        266.21
dtype: float64

##### XGBoost

In [24]:
# Read dataset
data_logdyn = pd.read_parquet('Data/final_dataset.parquet')

# Choose binary target and other parameters
model = 'XGBoost'
target = 'Flag'
horizons = list(range(1, 16))
shares = np.linspace(0.05, 0.2, 4)
states = list(range(0, 10000, 50))

# Iterate over the chosen parameters and optimize classification models, then save all the results to the dataframe
res = fun.model(data_logdyn, target, horizons, shares, states, model = model)

# Save results and create pivot based on the horizon and 1 share parameters
coeffs_cols = data_logdyn.columns.drop(['Volume', 'MA100', 'MV100', 'Rise', 'Distance', 'Index', 'Ticker'])
res_means = fun.save_results(res, 
                             list(coeffs_cols),
                             model = model)
res_means

100%|██████████| 15/15 [1:52:28<00:00, 449.92s/it]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Train size,Test size,Train AUC,Test AUC,Train KS-test p-value,Test KS-test p-value,Train F1-score,Test F1-score,Train precision,Test precision,Train recall,Test recall,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
Horizon,1 Share,1 Share real,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
1,0.05,0.05016,8740.0,2185.0,0.965655,0.957605,0.0,0.0,0.598585,0.594765,0.45509,0.462275,0.887175,0.8726,3.353556,5.817527,5.267846,14.672528,6.727177,3.452366,4.098412,3.543328,3.582201,3.154433,3.306322,3.21222,3.602224,3.611179,4.010669,3.841106,103.233725,56.00796,8.382118,37.968978,5.217821,4.459786,7.916582,100.984789,4.502708,3.232231,3.214632,3.702088,3.405642,3.412679
1,0.1,0.100293,4371.0,1093.0,0.9664,0.956785,0.0,0.0,0.7439,0.71763,0.64318,0.617005,0.88728,0.877845,3.12083,4.858677,4.534715,11.94806,7.195119,3.192029,3.525644,3.115566,3.276862,2.789397,2.991917,2.93631,3.324789,3.271208,3.74328,3.566305,39.487573,66.00997,8.6389,44.27139,4.695281,4.096785,6.332323,134.348912,3.977993,2.957415,3.002004,3.505512,3.009674,2.98131
1,0.15,0.150425,2914.0,729.0,0.966845,0.958055,0.0,0.0,0.80733,0.794445,0.74369,0.736625,0.886605,0.873485,2.991208,4.335059,3.919076,9.966953,6.620854,3.030116,3.394166,2.893196,2.978255,2.763973,2.867471,2.814949,3.11528,3.026329,3.419793,3.217707,14.041361,61.086724,8.828114,30.639377,4.41285,3.736884,5.348838,148.240901,3.690595,2.765258,2.822377,3.432025,2.845269,2.826853
1,0.2,0.200512,2186.0,547.0,0.96641,0.955255,0.0,0.0,0.841035,0.82761,0.80176,0.793625,0.88721,0.872185,2.799003,3.831761,3.458541,8.081617,5.621507,2.885729,3.056807,2.66889,2.812951,2.468968,2.73912,2.704747,2.956383,2.902647,3.090135,3.014597,10.243525,60.722739,8.930286,20.798109,4.243931,3.613091,4.827606,130.550659,3.423627,2.590654,2.658554,3.252127,2.637597,2.592656
2,0.05,0.050328,17421.0,4356.0,0.954865,0.92577,0.0,0.0,0.508515,0.47018,0.3622,0.336885,0.863985,0.816435,4.128213,6.619333,9.293149,21.347727,7.627507,5.447912,7.145346,5.278411,4.545286,5.005584,4.694712,4.433541,4.937935,6.43296,5.854332,6.169264,165.34848,93.234996,16.982584,63.55851,8.634895,6.047501,11.676594,132.627102,6.897936,5.055456,4.473557,5.608345,4.341151,5.214837
2,0.1,0.100615,8714.0,2179.0,0.956005,0.9243,0.0,0.0,0.6738,0.624,0.554255,0.516125,0.863275,0.813135,3.873481,5.489085,7.522375,17.400371,7.637137,5.088829,6.044259,4.830464,4.006038,4.58496,4.486916,4.210081,4.540614,6.042526,5.515586,5.667831,81.723186,118.816807,18.32293,61.437231,8.186795,5.409755,8.655232,196.407294,6.171045,4.469389,4.203335,5.285492,4.170033,5.001377
2,0.15,0.150881,5811.0,1453.0,0.95692,0.92569,0.0,0.0,0.748075,0.7034,0.65906,0.626995,0.867945,0.813305,3.70116,4.956338,6.013432,14.088312,8.856224,4.864938,5.504215,4.676879,4.01477,4.331431,4.314524,4.083422,4.256781,5.951095,5.153853,5.308746,27.302983,101.707975,19.587341,58.173197,7.924942,4.999841,7.556967,258.476866,5.758051,4.295136,4.014221,4.908197,4.028721,4.838445
2,0.2,0.201101,4360.0,1090.0,0.959145,0.923405,0.0,0.0,0.801135,0.74911,0.744285,0.703925,0.86912,0.809035,3.835669,4.597776,5.402836,11.911917,7.173551,4.76168,5.109103,4.366299,3.870797,4.285112,4.151618,3.93224,4.094188,5.664981,5.092432,5.007958,19.295038,112.729655,19.542386,40.311424,7.719307,4.837133,6.600387,228.090545,5.333616,4.19183,3.832928,4.859101,3.941761,4.485078
3,0.05,0.050495,26046.0,6512.0,0.941325,0.90215,0.0,0.0,0.44142,0.405725,0.30011,0.27835,0.842545,0.780465,5.673102,7.57561,13.017172,29.034658,9.54401,8.06747,9.072469,6.720861,5.348208,5.959819,5.767725,6.567055,6.171644,9.132984,6.799817,8.363147,189.623482,119.598874,26.244856,72.949104,12.617104,7.697622,14.04677,159.494724,9.887264,6.885912,5.73394,7.109133,5.482285,8.614897
3,0.1,0.100933,13030.0,3258.0,0.94339,0.902135,0.0,0.0,0.61296,0.564585,0.48313,0.44829,0.84349,0.77885,5.222284,6.183639,10.117351,19.625281,8.788931,7.288457,7.864241,5.749861,4.969696,5.626064,5.295969,6.211219,5.688711,8.640113,6.388505,7.356998,160.793368,152.551567,27.929645,37.758966,11.846069,6.798458,10.255562,136.0796,8.431082,6.213651,5.324501,6.711506,5.092793,7.88327


In [25]:
# Count an average feature importance
res = pd.read_parquet(f'Params/params_{model}.parquet')
res_viable = res[res['Horizon'].isin(range(1, 16))]
groups = ['Horizon']

# Get the final pivot
means = round(res_viable[coeffs_cols].mean(), 2)
print(round(means.mean(), 2), round(means.median(), 2))
means

33.41 16.29


Hurst                   12.82
CorrDim                 10.80
Lyapunov                18.29
Skewness                25.77
Kurtosis                17.52
PSD                     16.46
ACF_1                   17.85
WL_C1                   12.15
WL_C2                   12.48
WL_C3                   11.69
Hurst_8_dyn              7.25
Hurst_8_Variance        14.35
CorrDim_8_dyn            8.56
CorrDim_8_Variance      20.57
Lyapunov_8_dyn           7.99
Lyapunov_8_Variance     14.26
Skewness_8_dyn         184.81
Skewness_8_Variance    112.35
Kurtosis_8_dyn          43.51
Kurtosis_8_Variance     34.67
PSD_8_dyn               29.34
PSD_8_Variance          16.12
ACF_1_8_dyn             22.65
ACF_1_8_Variance       245.05
WL_C1_8_dyn             23.22
WL_C1_8_Variance        12.18
WL_C2_8_dyn              8.53
WL_C2_8_Variance        15.98
WL_C3_8_dyn              6.31
WL_C3_8_Variance        18.89
dtype: float64

##### CatBoost

In [26]:
# Read dataset
data_logdyn = pd.read_parquet('Data/final_dataset.parquet')

# Choose binary target and other parameters
model = 'CatBoost'
target = 'Flag'
horizons = list(range(1, 16))
shares = np.linspace(0.05, 0.2, 4)
states = list(range(0, 10000, 50))

# Iterate over the chosen parameters and optimize classification models, then save all the results to the dataframe
res = fun.model(data_logdyn, target, horizons, shares, states, model = model)

# Save results and create pivot based on the horizon and 1 share parameters
coeffs_cols = data_logdyn.columns.drop(['Volume', 'MA100', 'MV100', 'Rise', 'Distance', 'Index', 'Ticker'])
res_means = fun.save_results(res, 
                             list(coeffs_cols),
                             model = model)
res_means

100%|██████████| 15/15 [18:29:39<00:00, 4438.65s/it]  


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Train size,Test size,Train AUC,Test AUC,Train KS-test p-value,Test KS-test p-value,Train F1-score,Test F1-score,Train precision,Test precision,Train recall,Test recall,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
Horizon,1 Share,1 Share real,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
1,0.05,0.05016,8740.0,2185.0,0.962275,0.954885,0.0,0.0,0.62508,0.618865,0.488985,0.49143,0.87473,0.86684,0.096798,0.881951,0.957115,2.003388,0.949791,0.263823,0.267861,0.546535,0.369724,0.235135,0.265869,0.131981,0.470035,0.339937,0.487279,0.462914,20.312529,15.504169,12.62139,12.23422,0.904363,0.541771,3.878795,22.769514,0.8399,0.13679,0.237304,0.258515,0.109724,0.920882
1,0.1,0.100293,4371.0,1093.0,0.961345,0.955195,0.0,0.0,0.74829,0.7325,0.656175,0.640395,0.875185,0.86943,0.080173,0.647821,0.643526,1.285855,0.942232,0.218338,0.159896,0.294357,0.167231,0.139847,0.198446,0.113901,0.334871,0.287548,0.368719,0.383785,18.884239,19.479604,9.348431,12.38492,0.681738,0.410254,2.508247,28.253749,0.564677,0.118309,0.185452,0.195058,0.093776,0.624999
1,0.15,0.150425,2914.0,729.0,0.960635,0.956435,0.0,0.0,0.80171,0.801385,0.741905,0.749695,0.87407,0.869235,0.076014,0.489629,0.534333,0.895407,0.707544,0.159444,0.130071,0.212389,0.125517,0.114015,0.161851,0.075463,0.216396,0.270516,0.25836,0.319375,17.857282,20.666866,7.478939,13.80314,0.575269,0.317324,1.778313,31.437643,0.450109,0.087205,0.110739,0.200922,0.063062,0.426863
1,0.2,0.200512,2186.0,547.0,0.96028,0.95466,0.0,0.0,0.835185,0.83018,0.80312,0.7989,0.87136,0.86952,0.083295,0.396591,0.41413,0.695743,0.578709,0.124187,0.10621,0.152384,0.086986,0.079077,0.117948,0.059692,0.135215,0.225788,0.180161,0.257007,17.184397,21.602789,7.541711,14.573776,0.438739,0.242078,1.238339,32.417658,0.338773,0.07744,0.108409,0.171884,0.054437,0.316451
2,0.05,0.050328,17421.0,4356.0,0.94373,0.923665,0.0,0.0,0.51197,0.486755,0.371975,0.355795,0.838715,0.80892,0.337652,0.918391,1.635978,2.396628,1.471022,1.43445,1.424869,0.612829,0.674649,0.625448,0.58488,0.820463,0.537673,0.893444,1.110767,0.994145,15.712108,13.406294,15.437935,8.049631,3.475746,1.165967,3.556131,17.522522,0.962441,0.493764,0.970036,0.790634,0.4689,1.514603
2,0.1,0.100615,8714.0,2179.0,0.9378,0.92106,0.0,0.0,0.65,0.618695,0.538015,0.50894,0.831715,0.811195,0.196357,0.634532,1.119274,1.629863,0.960527,0.835939,0.806178,0.354008,0.415682,0.344352,0.354625,0.555045,0.34436,0.689695,0.756637,0.662728,19.364734,16.982601,13.063937,9.238137,2.305088,0.784379,2.308862,21.738729,0.659259,0.380184,0.570575,0.580009,0.324907,1.038796
2,0.15,0.150881,5811.0,1453.0,0.9406,0.923815,0.0,0.0,0.72835,0.70154,0.648945,0.62533,0.835155,0.81422,0.249985,0.658381,0.977363,1.367889,0.913643,0.908803,0.748655,0.383307,0.470587,0.365017,0.392997,0.507853,0.342505,0.74727,0.810377,0.640379,17.630693,15.054065,12.319657,11.68058,2.277467,0.768769,1.848104,24.353955,0.67445,0.377538,0.598285,0.538848,0.345421,1.047156
2,0.2,0.201101,4360.0,1090.0,0.941345,0.92196,0.0,0.0,0.77246,0.74629,0.717145,0.699925,0.840345,0.809595,0.237086,0.626833,0.933737,1.194137,0.813812,0.777515,0.658896,0.319264,0.446238,0.300029,0.366693,0.485902,0.278041,0.716615,0.757573,0.586801,18.44608,17.165888,10.618366,12.007079,2.061081,0.730436,1.576682,24.629495,0.61811,0.315457,0.523072,0.594476,0.313296,0.901311
3,0.05,0.050495,26046.0,6512.0,0.938665,0.902265,0.0,0.0,0.46837,0.416385,0.329275,0.28881,0.82491,0.776475,0.483948,1.008304,2.475068,2.539241,1.556176,3.324321,2.007795,1.198234,1.277006,1.171881,0.54149,1.368965,0.721091,1.787594,0.768233,2.796031,13.74325,8.917151,13.797296,5.675455,5.527603,1.812075,3.54814,11.676834,1.351916,1.862292,1.360587,2.100154,1.003227,2.59864
3,0.1,0.100933,13030.0,3258.0,0.94327,0.904335,0.0,0.0,0.630105,0.574015,0.506885,0.46134,0.837365,0.776505,0.557191,0.989632,2.208559,2.167004,1.456376,2.965363,1.727128,1.245821,1.299305,1.203849,0.575851,1.291435,0.723327,1.669427,0.855869,2.600236,15.057678,11.205801,11.968239,5.763735,4.836986,1.506418,2.775847,13.571312,1.367276,1.730259,1.325511,1.876064,1.090013,2.38849


In [27]:
# Count an average feature importance
res = pd.read_parquet(f'Params/params_{model}.parquet')
res_viable = res[res['Horizon'].isin(range(1, 16))]
groups = ['Horizon']

# Get the final pivot
means = round(res_viable[coeffs_cols].mean(), 2)
print(round(means.mean(), 2), round(means.median(), 2))
means

3.33 2.65


Hurst                   0.55
CorrDim                 1.15
Lyapunov                1.87
Skewness                3.92
Kurtosis                3.09
PSD                     6.00
ACF_1                   3.08
WL_C1                   2.24
WL_C2                   2.49
WL_C3                   1.61
Hurst_8_dyn             0.33
Hurst_8_Variance        1.08
CorrDim_8_dyn           0.35
CorrDim_8_Variance      3.11
Lyapunov_8_dyn          0.49
Lyapunov_8_Variance     2.81
Skewness_8_dyn         10.89
Skewness_8_Variance     7.92
Kurtosis_8_dyn          7.93
Kurtosis_8_Variance     5.02
PSD_8_dyn               7.25
PSD_8_Variance          2.40
ACF_1_8_dyn             2.28
ACF_1_8_Variance        9.46
WL_C1_8_dyn             1.82
WL_C1_8_Variance        2.82
WL_C2_8_dyn             0.58
WL_C2_8_Variance        2.97
WL_C3_8_dyn             0.55
WL_C3_8_Variance        3.92
dtype: float64

### Solar data

#### Feature generation

In [3]:
# Read dataset and define columns for feature generation
data = pd.read_parquet('Data/solar_metrics.parquet').drop(columns = 'Variance')

# Columns for feature generation
cols = ['Hurst', 'CorrDim', 'Lyapunov', 
        'Skewness', 'Kurtosis', 'PSD', 'ACF_1',
        'WL_C1', 'WL_C2', 'WL_C3']

# Set lag for dynamics and short variance calculation
lag_model = [8]

# Generate lagged features
data_logdyn = fun.generate_features(data, cols, lag_model)
data_logdyn.to_parquet('Data/solar_metrics.parquet', index = False)
data_logdyn

100%|██████████| 948/948 [00:38<00:00, 24.67it/s]


Unnamed: 0,Volume,MA100,MV100,Rise,Distance,Index,Ticker,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
0,5.410,4.97719,0.224913,False,291,2303,Magnitude,0.851766,1.644137,-0.001304,-0.614560,-0.336438,-2.052302,0.976440,0.587527,0.017656,0.090164,0.018559,0.000029,0.059292,0.021189,-2.908052,9.420687e-07,0.184114,0.000767,0.106232,0.000074,0.015342,0.000087,0.000456,7.821216e-08,-0.117862,0.000596,4.695176,0.000973,2.290695,0.001009
1,5.385,4.97546,0.223182,False,290,2303,Magnitude,0.859157,1.635183,-0.001544,-0.620982,-0.333604,-2.055082,0.976154,0.624887,0.042677,0.016826,0.022767,0.000046,0.061415,0.021712,-3.574361,5.973987e-07,0.159794,0.000605,0.057905,0.000018,0.014463,0.000077,0.000380,5.565650e-08,-0.044848,0.000530,-13.463130,0.001520,-2.119461,0.000744
2,5.366,4.97377,0.221557,False,289,2303,Magnitude,0.862140,1.120645,-0.000885,-0.628262,-0.331614,-2.057624,0.975260,0.674733,0.015405,0.018545,0.017479,0.000071,-0.283142,0.047615,-0.402899,5.885533e-07,0.141200,0.000479,0.013820,0.000009,0.013536,0.000068,-0.000458,1.443230e-07,0.018836,0.000665,-1.297177,0.001453,0.457613,0.000716
3,5.199,4.96795,0.215454,False,288,2303,Magnitude,0.875565,1.258753,-0.001421,-0.639840,-0.336115,-2.059943,0.974816,0.643045,0.023586,0.011339,0.034595,0.000150,-0.033824,0.050581,11.726272,4.352876e-07,0.132573,0.000433,0.000117,0.000009,0.012594,0.000058,-0.001259,3.696638e-07,0.013931,0.000682,-1.332033,0.000937,0.165840,0.000705
4,5.027,4.96313,0.212508,False,287,2303,Magnitude,0.883757,1.680480,-0.002242,-0.652057,-0.344222,-2.062051,0.974928,0.608329,-0.006473,0.055882,0.055395,0.000207,0.130212,0.053788,-13.800612,1.759245e-07,0.128490,0.000455,0.014775,0.000016,0.011662,0.000049,-0.001408,4.852196e-07,-0.019953,0.000743,-0.883015,0.000445,1.429574,0.000746
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
285333,5.175,3.66068,0.918408,False,5,3402798,Magnitude,0.783584,2.092003,0.000375,0.376564,-1.007110,-1.920372,0.984924,0.508872,-0.167006,-0.132325,0.065357,0.000152,-0.002467,0.000005,-1.289260,5.064098e-06,0.091839,0.000319,-0.077096,0.002166,0.008893,0.000035,0.006131,1.298356e-05,-0.111575,0.000541,0.086490,0.000208,-0.009051,0.001311
285334,7.330,3.69947,1.052443,False,4,3402798,Magnitude,0.801417,2.090483,0.002550,0.393337,-0.962545,-1.923978,0.973708,0.440128,-0.177023,-0.042865,0.076616,0.000092,-0.003113,0.000006,-3.300071,4.236649e-06,0.154792,0.000347,-0.126186,0.002341,0.010509,0.000046,-0.003500,1.631782e-05,-0.167199,0.001736,0.009986,0.000213,0.147831,0.001264
285335,7.294,3.73760,1.181004,False,3,3402798,Magnitude,0.796349,2.085917,0.002208,0.409357,-0.924550,-1.927755,0.973520,0.413900,-0.148128,-0.045837,0.035069,0.000100,-0.005544,0.000011,-4.529561,3.324464e-06,0.210804,0.000365,-0.165194,0.002357,0.011949,0.000057,-0.004041,1.956156e-05,-0.252101,0.003081,-0.136286,0.000253,0.365866,0.001155
285336,7.201,3.77454,1.300251,False,2,3402798,Magnitude,0.792783,2.082975,0.002027,0.424982,-0.887193,-1.931680,0.974622,0.427213,-0.141484,-0.063350,0.025298,0.000091,-0.006671,0.000018,-6.646551,2.363087e-06,0.231863,0.000426,-0.185280,0.002599,0.013196,0.000067,-0.001945,2.088144e-05,-0.219705,0.003555,-0.055002,0.000296,0.042400,0.001148


#### Data and params

In [2]:
# Fix params for the boostrap
target = 'Flag'
horizons = list(range(4, 16))
shares = np.linspace(0.05, 0.1, 3)
states = list(range(0, 10000, 50))
# horizons = [15]
# shares = [0.05]
# states = [0]
path = 'Params_solar'

# Read dataset
data_logdyn = pd.read_parquet('Data/solar_metrics.parquet')

##### LightGBM

In [24]:
# Choose binary target and other parameters
model = 'LightGBM'

# Iterate over the chosen parameters and optimize classification models, then save all the results to the dataframe
res = fun.model(data_logdyn, target, horizons, shares, states, model = model, target_metric = 'Precision')

# Save results and create pivot based on the horizon and 1 share parameters
coeffs_cols = data_logdyn.columns.drop(['Volume', 'MA100', 'MV100', 'Rise', 'Distance', 'Index', 'Ticker'])
res_means = fun.save_results(res, 
                             list(coeffs_cols),
                             model = model,
                             path = path)
res_means

100%|██████████| 12/12 [1:56:37<00:00, 583.16s/it]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Train size,Test size,Train AUC,Test AUC,Train KS-test p-value,Test KS-test p-value,Train F1-score,Test F1-score,Train precision,Test precision,Train recall,Test recall,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
Horizon,1 Share,1 Share real,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
4,0.05,0.050661,59880.0,14970.0,0.901475,0.87525,0.0,0.0,0.3403,0.32504,0.2168,0.20849,0.79558,0.75364,291.416415,957.386121,490.462348,5228.135541,1109.277726,1227.806919,1209.562278,116.50326,143.395422,303.605031,105.740811,146.236507,1496.765213,506.883066,42.01979,276.865558,1599.862428,4230.35061,595.124618,3128.452769,204.304391,215.728378,18933.85783,3870.839801,171.875831,85.686775,106.607899,141.20377,33.673423,293.110463
4,0.075,0.075965,39934.0,9984.0,0.90255,0.875615,0.0,0.0,0.43371,0.410155,0.298095,0.282615,0.79946,0.757975,277.439591,886.806892,471.299955,4771.623898,1027.139607,1069.233662,1184.592797,126.872962,142.181701,283.181312,93.028906,140.672202,1275.833298,458.516455,36.756438,258.860871,1386.42015,4054.691651,530.154834,2698.213328,196.468213,179.801582,17609.43836,3778.166029,166.012527,76.585171,106.830052,103.142168,34.922369,226.832264
4,0.1,0.101252,29960.0,7491.0,0.9031,0.87449,0.0,0.0,0.50076,0.47657,0.364385,0.34999,0.80312,0.755965,270.817186,832.672258,444.851161,4304.550129,960.824742,955.812398,1171.36946,131.637793,141.082694,266.612795,91.735025,125.587434,1111.930667,422.403148,33.551309,250.099123,1217.757671,4035.355043,504.604839,2554.171001,183.683198,158.408077,16020.622423,3905.954908,163.437518,65.998852,99.788927,89.24944,32.434091,209.512784
5,0.05,0.050829,74602.0,18651.0,0.88053,0.85428,0.0,0.0,0.314945,0.30124,0.19845,0.19087,0.765975,0.725195,291.195791,1097.336021,669.214941,6324.228201,1247.821937,1560.259814,1169.719955,173.345775,187.955458,320.351554,81.960664,162.382574,1689.277039,597.383127,35.298293,353.868391,1878.641368,4266.006682,672.456995,3690.199809,266.973787,260.274971,17578.518024,5967.788485,174.594793,129.623758,139.786286,165.653286,35.335443,323.31579
5,0.075,0.076211,49756.0,12440.0,0.88136,0.85379,0.0,0.0,0.404505,0.38659,0.274815,0.26451,0.76913,0.726305,245.929563,1032.487865,619.103529,5724.004865,1159.493007,1359.474747,1137.30062,178.94862,182.626032,292.831384,74.939987,153.491616,1478.344001,529.36907,28.721298,346.833296,1616.676007,4191.217542,599.983923,3564.10132,233.91678,216.008667,16130.933186,5583.030214,167.088963,105.057339,135.973239,130.523606,39.191038,261.304543
5,0.1,0.101569,37334.0,9334.0,0.881805,0.85519,0.0,0.0,0.471205,0.449485,0.33965,0.32582,0.771885,0.73024,238.856579,947.769923,591.224597,5276.368227,1089.660826,1204.848758,1129.96393,182.370526,180.527188,282.137188,75.126699,129.329858,1323.332083,495.560468,27.307392,300.442878,1434.811806,4087.489528,566.893093,3389.286457,232.732497,189.421094,14846.308256,5453.059854,161.485565,99.456535,136.087645,113.644345,43.929265,230.891999
6,0.05,0.050999,89225.0,22307.0,0.864355,0.83893,0.0,0.0,0.29529,0.27959,0.18431,0.17465,0.74682,0.71259,323.087225,1255.304667,857.689753,7542.228989,1446.197927,1807.019703,1192.028764,221.737091,212.165174,342.650561,57.539437,136.438548,1809.037452,688.912052,31.297058,415.945277,2275.253096,4404.188901,716.337968,4369.574077,348.26045,293.7516,16334.731549,7185.209357,216.220968,140.748182,135.905266,176.1354,36.300236,410.616437
6,0.075,0.076458,59515.0,14879.0,0.864595,0.839235,0.0,0.0,0.382355,0.36769,0.257325,0.249305,0.74738,0.71019,258.324088,1174.27271,796.525832,6863.903475,1311.397651,1574.240106,1156.687068,222.802112,197.334111,323.004355,61.971032,122.591516,1606.050438,616.348994,33.715596,371.586932,1932.791871,4349.334254,670.336299,4086.381894,290.18517,240.482061,14969.752415,6850.819697,209.476636,115.802796,140.643557,142.541362,39.344274,343.272185
6,0.1,0.10189,44660.0,11165.0,0.86519,0.839285,0.0,0.0,0.447585,0.42914,0.31914,0.3083,0.75149,0.712435,228.629144,1130.622715,755.962986,6341.453563,1242.078876,1441.17739,1126.735238,224.75457,187.739636,325.70755,62.084796,103.237331,1424.589079,575.931832,30.842261,343.570602,1699.497927,4269.777163,632.915576,3827.644893,279.792827,208.662425,13605.042285,6840.662122,199.326668,97.750585,142.158176,129.040564,39.451432,293.322433
7,0.05,0.051169,103750.0,25938.0,0.85244,0.82955,0.0,0.0,0.282695,0.268095,0.175465,0.16599,0.73154,0.70343,335.827818,1377.103028,1013.00692,8743.895894,1708.215205,2087.921262,1304.202064,272.799946,215.487464,365.939758,59.223798,103.891111,2065.17719,801.208721,35.272365,488.729962,2546.209593,4804.19552,739.21653,4225.120929,411.646505,318.112602,15754.42576,8325.766363,271.4699,165.118523,123.221354,147.416466,24.146824,461.904248


In [7]:
# Count an average feature importance
res = pd.read_parquet(f'{path}/params_{model}.parquet')
res_viable = res[res['Horizon'].isin(range(1, 16))]
groups = ['Horizon']

# Get the final pivot
means = round(res_viable[coeffs_cols].mean(), 2)
print(f'Mean feature importance: {round(means.mean(), 2)}, Median feature importance: {round(means.median(), 2)}')
means

Mean feature importance: 2095.08, Median feature importance: 556.57


Hurst                    320.53
CorrDim                 1601.80
Lyapunov                1211.28
Skewness               10511.16
Kurtosis                2073.42
PSD                     2491.40
ACF_1                   1486.37
WL_C1                    429.29
WL_C2                    312.52
WL_C3                    407.96
Hurst_8_dyn               71.33
Hurst_8_Variance          98.66
CorrDim_8_dyn           2197.63
CorrDim_8_Variance       952.22
Lyapunov_8_dyn            46.56
Lyapunov_8_Variance      557.48
Skewness_8_dyn          2723.51
Skewness_8_Variance     6626.85
Kurtosis_8_dyn          1082.96
Kurtosis_8_Variance     4271.92
PSD_8_dyn                555.66
PSD_8_Variance           246.64
ACF_1_8_dyn            12668.68
ACF_1_8_Variance        8825.28
WL_C1_8_dyn              187.11
WL_C1_8_Variance         101.85
WL_C2_8_dyn              106.53
WL_C2_8_Variance         136.37
WL_C3_8_dyn               22.89
WL_C3_8_Variance         526.50
dtype: float64

##### XGBoost

In [28]:
# Choose binary target and other parameters
model = 'XGBoost'

# Iterate over the chosen parameters and optimize classification models, then save all the results to the dataframe
res = fun.model(data_logdyn, target, horizons, shares, states, model = model, target_metric = 'Precision')

# Save results and create pivot based on the horizon and 1 share parameters
coeffs_cols = data_logdyn.columns.drop(['Volume', 'MA100', 'MV100', 'Rise', 'Distance', 'Index', 'Ticker'])
res_means = fun.save_results(res, 
                             list(coeffs_cols),
                             model = model,
                             path = path)
res_means

100%|██████████| 12/12 [2:16:29<00:00, 682.50s/it]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Train size,Test size,Train AUC,Test AUC,Train KS-test p-value,Test KS-test p-value,Train F1-score,Test F1-score,Train precision,Test precision,Train recall,Test recall,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
Horizon,1 Share,1 Share real,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
4,0.05,0.050661,59880.0,14970.0,0.93695,0.897125,0.0,0.0,0.403615,0.34876,0.26491,0.22478,0.851045,0.78905,11.184344,20.978605,15.159844,35.347359,16.709482,19.018925,17.400146,9.454476,10.126951,12.31415,8.612278,10.889811,34.88189,12.817505,7.906404,11.814581,29.442785,49.504277,16.679338,56.797512,11.286758,10.699753,185.095702,38.485889,9.31975,10.009739,9.708454,9.127754,6.752569,10.206486
4,0.075,0.075965,39934.0,9984.0,0.93952,0.89774,0.0,0.0,0.50751,0.438835,0.361235,0.30475,0.856235,0.79326,10.558075,19.16513,14.757573,33.185126,15.978247,17.68854,17.462608,9.07728,9.614447,11.740069,8.175341,10.152002,30.799115,12.130241,7.339138,11.53067,27.389361,51.674719,14.938802,44.435493,10.594272,9.952913,181.332556,39.78141,9.123867,9.058889,8.924067,8.437341,6.409244,9.630957
4,0.1,0.101252,29960.0,7491.0,0.94145,0.897045,0.0,0.0,0.582205,0.51086,0.4408,0.37944,0.859445,0.788515,10.199879,18.264897,14.157709,31.23872,15.18913,16.314286,17.103131,9.019407,9.212259,11.300889,7.892192,9.71663,28.105443,11.728763,7.143472,10.9808,24.951528,51.903473,14.397043,37.811488,10.026266,9.423174,176.412151,41.164795,8.590139,8.332175,8.577962,7.825431,6.598423,9.095807
5,0.05,0.050829,74602.0,18651.0,0.9187,0.87939,0.0,0.0,0.368135,0.3238,0.23748,0.206005,0.82196,0.766555,12.121393,22.463478,18.645995,42.147643,18.201975,22.090056,19.127714,11.346002,12.051681,13.02944,9.082789,11.715463,38.700119,14.509478,7.983639,13.813922,35.81021,54.039513,18.036422,68.579607,13.704919,12.376825,185.463245,60.522623,11.073862,12.427594,11.073174,10.760917,8.576578,12.363094
5,0.075,0.076211,49756.0,12440.0,0.921565,0.87934,0.0,0.0,0.46804,0.41578,0.32657,0.286565,0.82827,0.765335,11.729775,20.928984,17.994147,39.174122,17.276276,20.56217,18.790429,11.055895,11.696008,12.403235,8.872993,10.814198,35.434914,13.904387,7.601691,13.028604,32.75456,55.724952,16.989248,60.868348,12.847776,11.313375,178.208133,57.924239,10.587867,11.374939,10.783457,9.618487,7.903565,11.325196
5,0.1,0.101569,37334.0,9334.0,0.92362,0.880655,0.0,0.0,0.54208,0.47968,0.402405,0.34927,0.83242,0.772295,11.206677,19.86942,17.343508,36.987668,16.749484,19.071549,18.171321,11.095175,11.101522,11.761157,8.477762,10.350718,32.594297,13.347996,7.453656,12.58501,29.840642,58.612342,15.996,49.986236,12.426902,10.619355,174.595844,57.275422,10.149161,10.794366,10.372321,9.119929,7.820595,10.829994
6,0.05,0.050999,89225.0,22307.0,0.9051,0.86704,0.0,0.0,0.34416,0.30613,0.21927,0.192995,0.803325,0.750045,14.111308,24.941644,22.139136,48.312465,20.37197,24.582823,20.213898,13.640452,13.324179,13.899126,9.787447,12.317355,41.604279,16.50812,9.545059,15.203964,41.559913,59.09315,19.942458,81.503501,16.459221,15.00328,186.992084,74.337962,13.248382,13.215631,11.722256,11.696978,8.638513,14.065086
6,0.075,0.076458,59515.0,14879.0,0.908005,0.868635,0.0,0.0,0.44116,0.39542,0.303395,0.26858,0.810035,0.75534,13.262379,23.992849,21.610785,44.789165,19.359625,22.847227,19.755424,13.263654,12.925076,13.335432,9.527225,11.506815,38.14681,15.544325,9.306781,14.039283,37.978944,62.356775,18.822607,69.098436,15.440691,13.609459,186.172725,70.664696,12.705607,12.181162,11.896007,10.381202,8.096077,12.981273
6,0.1,0.10189,44660.0,11165.0,0.9104,0.86902,0.0,0.0,0.515475,0.461715,0.377745,0.33329,0.81334,0.756525,12.648168,22.677989,20.858375,43.229152,18.226066,21.337249,19.262616,13.187961,12.402412,12.849246,9.559436,10.987867,35.179036,14.800214,8.967869,13.778466,34.775831,65.315034,18.210593,56.769191,14.793554,12.68977,174.430209,71.630208,12.296286,11.710243,11.348552,10.003837,7.875661,12.314323
7,0.05,0.051169,103750.0,25938.0,0.89484,0.85944,0.0,0.0,0.3268,0.293755,0.20616,0.18364,0.79041,0.74184,15.934836,27.167846,25.447877,54.032962,23.327502,26.83274,21.740079,15.70132,15.074001,15.320578,12.07759,12.548864,47.242985,18.52048,10.817916,16.365193,45.573606,66.929628,21.983759,76.817489,19.310679,16.661519,191.177279,88.312642,15.671107,15.262715,11.97482,13.194916,8.696475,15.914348


In [9]:
# Count an average feature importance
res = pd.read_parquet(f'{path}/params_{model}.parquet')
res_viable = res[res['Horizon'].isin(range(1, 16))]
groups = ['Horizon']

# Get the final pivot
means = round(res_viable[coeffs_cols].mean(), 2)
print(f'Mean feature importance: {round(means.mean(), 2)}, Median feature importance: {round(means.median(), 2)}')
means

Mean feature importance: 35.78, Median feature importance: 21.34


Hurst                   19.01
CorrDim                 31.52
Lyapunov                30.66
Skewness                62.64
Kurtosis                27.05
PSD                     29.73
ACF_1                   24.70
WL_C1                   20.33
WL_C2                   17.74
WL_C3                   17.31
Hurst_8_dyn             13.07
Hurst_8_Variance        13.54
CorrDim_8_dyn           51.57
CorrDim_8_Variance      22.36
Lyapunov_8_dyn          12.06
Lyapunov_8_Variance     19.54
Skewness_8_dyn          47.93
Skewness_8_Variance    107.49
Kurtosis_8_dyn          27.55
Kurtosis_8_Variance     79.85
PSD_8_dyn               22.47
PSD_8_Variance          18.28
ACF_1_8_dyn            162.28
ACF_1_8_Variance       103.71
WL_C1_8_dyn             17.17
WL_C1_8_Variance        16.50
WL_C2_8_dyn             12.33
WL_C2_8_Variance        15.95
WL_C3_8_dyn              9.38
WL_C3_8_Variance        19.57
dtype: float64

##### CatBoost

In [30]:
# Choose binary target and other parameters
model = 'CatBoost'

# Iterate over the chosen parameters and optimize classification models, then save all the results to the dataframe
res = fun.model(data_logdyn, target, horizons, shares, states = list(range(0, 1000, 100)), model = model, target_metric = 'Precision')

# Save results and create pivot based on the horizon and 1 share parameters
coeffs_cols = data_logdyn.columns.drop(['Volume', 'MA100', 'MV100', 'Rise', 'Distance', 'Index', 'Ticker'])
res_means = fun.save_results(res, 
                             list(coeffs_cols),
                             model = model,
                             path = path)
res_means

100%|██████████| 12/12 [58:15<00:00, 291.31s/it]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Train size,Test size,Train AUC,Test AUC,Train KS-test p-value,Test KS-test p-value,Train F1-score,Test F1-score,Train precision,Test precision,Train recall,Test recall,Hurst,CorrDim,Lyapunov,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,CorrDim_8_dyn,CorrDim_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
Horizon,1 Share,1 Share real,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
4,0.05,0.050661,59880.0,14970.0,0.9432,0.9063,0.0,0.0,0.4353,0.3807,0.293,0.2521,0.8529,0.7921,1.890985,2.714148,2.102171,15.722146,4.428424,3.809179,4.612091,1.305616,0.67917,1.294486,0.880665,1.177913,1.693345,2.918713,0.599181,2.37112,9.445242,6.286395,2.034625,7.296457,1.714844,1.454236,11.594127,5.746104,0.966883,0.968864,0.760688,1.395326,0.493282,1.643575
4,0.075,0.075965,39934.0,9984.0,0.9483,0.9078,0.0,0.0,0.526,0.4556,0.3767,0.3178,0.8726,0.8116,2.146784,2.669545,2.022996,15.353861,4.500422,3.963456,4.60991,1.363388,0.870015,1.351137,0.974362,1.242063,1.795063,2.740055,0.669724,2.206163,9.118556,6.158633,2.206455,7.139123,1.597442,1.659624,11.348064,5.85569,0.948441,0.996079,0.878366,1.396795,0.460502,1.757286
4,0.1,0.101252,29960.0,7491.0,0.95,0.9117,0.0,0.0,0.607,0.5321,0.4662,0.3952,0.8724,0.82,2.163672,2.749086,2.028447,15.183641,4.289488,3.601786,4.516015,1.513176,0.889741,1.463031,1.05223,1.277293,1.727627,2.866823,0.659384,2.300891,9.179896,6.247398,2.10323,7.018275,1.67651,1.637568,11.162599,5.558371,1.08041,1.101442,0.917585,1.644368,0.528965,1.861053
5,0.05,0.050829,74602.0,18651.0,0.928,0.8908,0.0,0.0,0.3968,0.3471,0.261,0.2245,0.8297,0.7757,1.859308,2.814501,2.057471,16.292134,4.3169,4.429641,4.433053,1.566839,0.903818,1.266682,0.808931,1.012347,1.733304,2.967291,0.799775,2.760706,9.097061,5.919103,2.386471,6.970646,1.654486,1.518073,10.438973,5.757575,0.800129,1.044824,0.817166,1.435091,0.395957,1.741745
5,0.075,0.076211,49756.0,12440.0,0.9328,0.8919,0.0,0.0,0.492,0.4274,0.3471,0.2922,0.8467,0.7995,1.771643,3.046996,2.114689,15.635966,4.284144,4.390726,4.381615,1.513366,0.952681,1.296626,0.851485,1.20789,1.924482,3.164678,0.811738,2.800624,8.663321,6.090096,2.282824,6.738476,1.825154,1.75022,10.128143,5.811089,0.864562,1.113239,0.78966,1.443572,0.498796,1.851498
5,0.1,0.101569,37334.0,9334.0,0.9368,0.8932,0.0,0.0,0.5778,0.5164,0.438,0.3875,0.8496,0.7774,1.925561,2.854016,2.233879,15.790281,4.541301,4.319894,4.111159,1.493881,1.169046,1.376417,0.828214,1.187891,1.844752,2.934507,0.806482,2.963462,8.728464,5.96458,2.268967,6.820729,1.918849,1.566045,10.05975,5.481888,0.854792,1.092982,0.929542,1.511301,0.620367,1.801001
6,0.05,0.050999,89225.0,22307.0,0.9181,0.8782,0.0,0.0,0.375,0.3272,0.2438,0.2091,0.8142,0.7612,1.731766,2.93539,2.27762,16.374594,4.579993,4.767049,4.57089,1.607627,1.049403,1.273121,0.631415,0.874129,1.787968,3.375928,0.704006,2.944568,8.560495,6.23933,2.280046,6.718373,1.867139,1.479069,9.211616,6.021357,0.683309,1.194802,0.724947,1.447759,0.396178,1.690113
6,0.075,0.076458,59515.0,14879.0,0.9229,0.8845,0.0,0.0,0.4706,0.4279,0.3285,0.297,0.8319,0.7696,1.831464,3.051001,2.200669,15.907976,4.741107,4.739307,4.436283,1.66033,1.14694,1.334479,0.633082,1.033751,1.690998,3.427048,0.737685,3.061836,8.357486,5.757903,2.466386,6.950665,2.057785,1.656243,9.047365,5.352994,0.745258,1.114626,0.876957,1.57398,0.428908,1.979488
6,0.1,0.10189,44660.0,11165.0,0.9262,0.8832,0.0,0.0,0.5577,0.4827,0.4205,0.3516,0.828,0.7754,1.66006,3.026025,2.290816,15.526752,4.528535,4.664429,4.371078,1.68898,1.326174,1.390182,0.816755,1.011363,1.854392,3.410063,0.833829,2.927616,8.108801,5.899719,2.336867,6.71716,2.138427,1.671008,9.084461,5.851085,0.80048,1.198827,0.853946,1.664125,0.428627,1.91942
7,0.05,0.051169,103750.0,25938.0,0.91,0.8776,0.0,0.0,0.355,0.3142,0.2279,0.1974,0.8053,0.7716,1.68869,3.20883,2.169187,16.293253,5.009332,5.39272,4.570618,1.762356,1.154755,1.212883,0.625233,0.70192,1.974904,3.555557,0.83841,3.098645,8.129991,6.046324,2.252794,6.452718,1.887778,1.685741,8.359208,5.686166,0.69629,1.03572,0.609555,1.631573,0.282922,1.985927


In [11]:
# Count an average feature importance
res = pd.read_parquet(f'{path}/params_{model}.parquet')
res_viable = res[res['Horizon'].isin(range(1, 16))]
groups = ['Horizon']

# Get the final pivot
means = round(res_viable[coeffs_cols].mean(), 2)
print(f'Mean feature importance: {round(means.mean(), 2)}, Median feature importance: {round(means.median(), 2)}')
means

Mean feature importance: 3.33, Median feature importance: 2.16


Hurst                   1.68
CorrDim                 3.49
Lyapunov                2.26
Skewness               16.46
Kurtosis                5.64
PSD                     6.00
ACF_1                   5.09
WL_C1                   1.87
WL_C2                   1.30
WL_C3                   1.28
Hurst_8_dyn             0.59
Hurst_8_Variance        0.62
CorrDim_8_dyn           1.93
CorrDim_8_Variance      3.72
Lyapunov_8_dyn          0.72
Lyapunov_8_Variance     2.71
Skewness_8_dyn          7.62
Skewness_8_Variance     5.37
Kurtosis_8_dyn          2.36
Kurtosis_8_Variance     6.11
PSD_8_dyn               2.67
PSD_8_Variance          1.94
ACF_1_8_dyn             7.40
ACF_1_8_Variance        5.42
WL_C1_8_dyn             0.55
WL_C1_8_Variance        0.86
WL_C2_8_dyn             0.48
WL_C2_8_Variance        1.50
WL_C3_8_dyn             0.30
WL_C3_8_Variance        2.06
dtype: float64