### Libraries import

In [1]:
import operator
import numpy as np
import scipy as sp
import pandas as pd
from tqdm import tqdm
import plotly.io as pio
import statsmodels.api as sm
import plotly.graph_objects as go
import sklearn.metrics as metrics
import sklearn.model_selection as modsel

import warnings
warnings.filterwarnings("ignore")
pio.templates.default = "plotly_dark"
pd.set_option('display.max_columns', None)

### Functions

In [2]:
def roc_metric(Y, 
               Y_pred):
    """
    Function for the calculation of AUC metric

    Inputs:
    ----------
    Y : DataFrame
        Set of Y for the model
    Y_pred : DataFrame
        Set of predicted Y for the model

    Returns:
    ----------
    auc : float
        AUC for the given series
    thresholds[optimal_index] : float
        Optimal threshold with highest (TPR - FPR)
    """

    fpr, tpr, thresholds = metrics.roc_curve(Y, Y_pred, pos_label=1)
    auc = round(metrics.auc(fpr, tpr), 3)
    optimal_index = np.argmax(tpr - fpr)

    return auc, thresholds[optimal_index]

#---------------------------------------------------------------------------------------

def remove_most_insignificant(X, 
                              X_test, 
                              results):
    """
    Function for the removal of the most insignificant variables from the model

    Inputs:
    ----------
    X : DataFrame
        Set of X for the model
    results : model
        Fitted statsmodels model

    Returns:
    ----------
    X : DataFrame
        Optimized set of X for the validation of the model
    X_test : DataFrame
        Optimized set of X for the testing of the model
    """
    
    # Use operator to find the key which belongs to the maximum value in the dictionary
    max_p_value = max(results.pvalues.iteritems(), key = operator.itemgetter(1))[0]
    # Drop the worst feature
    X.drop(columns = max_p_value, inplace = True)
    X_test.drop(columns = max_p_value, inplace = True)

    return X, X_test

#---------------------------------------------------------------------------------------

def model_optimization(Y_train,
                       Y_test,
                       X_train,
                       X_test,
                       type:str = 'Probit', 
                       p_value_bord:float = 0.05, 
                       silent:bool = False):
    """
    Function for the optimization of OLS

    Inputs:
    ----------
    Y : array
        Target variable for the regression
    X : DataFrame
        Set of X for the model
    type : str = 'Probit'
        Type of the model
    p_value_bord : float = 0.05
        Maximum acceptable p-value for the coefficient
    silent : bool = False
        Whether not to show reports about model

    Returns:
    ----------
    results : model
        Fitted statsmodels model
    auc_train : float
        AUC on the train data
    auc_test : float
        AUC on the test data
    ks_train.pvalue : float
        KS-test p-value on the train data
    ks_test.pvalue : float
        KS-test p-value on the test data
    f1_train : float
        F1-score on the train data
    f1_test : float
        F1-score on the test data
    pr_train : float
        Precision score on the train data
    pr_test : float
        Precision score on the test data
    rec_train : float
        Recall score on the train data
    rec_test : float
        Recall score on the test data
    """
    
    insignificant_feature = True
    while insignificant_feature:
        # Create model
        if type == 'Probit':
            model = sm.Probit(Y_train, X_train)
        else:
            model = sm.Logit(Y_train, X_train)

        # Fit model and get
        results = model.fit(disp = 0)
        significant = [p_value < p_value_bord for p_value in results.pvalues]
        if all(significant):
            insignificant_feature = False
        else:
            # If there's only one insignificant variable left
            if X_train.shape[1] == 1:
                print('No significant features found')
                results = None
                insignificant_feature = False
            else:
                X_train, X_test = remove_most_insignificant(X_train, X_test, results)
    
    Y_train_pred = results.predict(X_train)
    Y_test_pred = results.predict(X_test)
    auc_train, threshold_train = roc_metric(Y_train, Y_train_pred)
    auc_test, threshold_test = roc_metric(Y_test, Y_test_pred)
    Y_train_pred_round = np.where(Y_train_pred < threshold_train, np.floor(Y_train_pred), np.ceil(Y_train_pred))
    Y_test_pred_round = np.where(Y_test_pred < threshold_test, np.floor(Y_test_pred), np.ceil(Y_test_pred))

    ks_samples_train = pd.DataFrame({'Y': Y_train, 'Y_pred': Y_train_pred})
    ks_samples_train_posi = ks_samples_train[ks_samples_train['Y'] == 1]['Y_pred']
    ks_samples_train_nega = ks_samples_train[ks_samples_train['Y'] == 0]['Y_pred']
    ks_train = sp.stats.kstest(ks_samples_train_posi, ks_samples_train_nega)
    ks_samples_test = pd.DataFrame({'Y': Y_test, 'Y_pred': Y_test_pred})
    ks_samples_test_posi = ks_samples_test[ks_samples_test['Y'] == 1]['Y_pred']
    ks_samples_test_nega = ks_samples_test[ks_samples_test['Y'] == 0]['Y_pred']
    ks_test = sp.stats.kstest(ks_samples_test_posi, ks_samples_test_nega)

    f1_train = round(metrics.f1_score(Y_train, Y_train_pred_round), 3)
    f1_test = round(metrics.f1_score(Y_test, Y_test_pred_round), 3)
    pr_train = round(metrics.precision_score(Y_train, Y_train_pred_round), 3)
    pr_test = round(metrics.precision_score(Y_test, Y_test_pred_round), 3)
    rec_train = round(metrics.recall_score(Y_train, Y_train_pred_round), 3)
    rec_test = round(metrics.recall_score(Y_test, Y_test_pred_round), 3)
    if silent == False:
        print(f'''Train AUC score: {auc_train}, Train KS-test p-value: {round(ks_train.pvalue, 3)}, 
              Train F1-score: {f1_train}, Train precision: {pr_train}, Train recall: {rec_train}''')
        print(f'''Test AUC score: {auc_test}, Test KS-test p-value: {round(ks_test.pvalue, 3)}, 
              Test F1-score: {f1_test}, Test precision: {pr_test}, Test recall: {rec_test}''')
        print(results.summary())

    return results, auc_train, auc_test, round(ks_train.pvalue, 9), round(ks_test.pvalue, 9),\
           f1_train, f1_test, pr_train, pr_test, rec_train, rec_test

### Feature generation

In [10]:
# Read dataset and define columns for feature generation
data = pd.read_parquet('Data/dataset.parquet')
indices = data.groupby(['Ticker', 'Index']).size().index.values
cols = ['Hurst', 'Correlation Dimension', 'Lyapunov', 
        'Variance', 'Skewness', 'Kurtosis', 'PSD', 'ACF_1',
        'WL_C1', 'WL_C2', 'WL_C3']

# Set lag for dynamics and short variance calculation
lag_model = [8]

# Calculate dynamics and short variance
# Original idea about variance was born from the largest Lyapunov exponent's behaviour before the critical transition point:
# is mostly didn't move in nominal values but its variance in some cases decreased signigicantly 
data_logdyn = pd.DataFrame()
for ind in tqdm(indices):
    data_ind = data[(data['Ticker'] == ind[0]) & (data['Index'] == ind[1])]
    for col in cols:
        for lag_m in lag_model:
            data_ind[col + '_' + str(lag_m) + '_dyn'] = data_ind[col] / data_ind[col].shift(lag_m) - 1
            data_ind[col + '_' + str(lag_m) + '_Variance'] = data_ind[col].rolling(lag_m).var()
    data_ind.dropna(inplace = True)
    data_logdyn = pd.concat([data_logdyn, data_ind])

# Reset index to get rid of dates and save final dataset
data_logdyn.reset_index(drop = True, inplace = True)
data_logdyn = data_logdyn[data_logdyn['Distance'] > 0]
data_logdyn.to_parquet('Data/final_dataset.parquet')
data_logdyn

100%|██████████| 976/976 [00:43<00:00, 22.43it/s]


Unnamed: 0,Volume,MA100,Rise,Distance,Index,Ticker,Hurst,Correlation Dimension,Lyapunov,Variance,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,Correlation Dimension_8_dyn,Correlation Dimension_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Variance_8_dyn,Variance_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
0,382075.0,186099.90,False,291,2175,A,0.651958,-2.785248e-15,0.005159,9.550465e+09,2.464177,11.200965,-0.771071,0.507490,0.342651,0.022711,0.002669,-0.133859,0.005382,-6.589489,3.136854e-30,0.683168,1.051438e-05,0.072674,6.780709e+14,0.033793,0.000063,-0.005914,0.006870,0.005967,0.000014,-0.030671,0.000010,0.077689,0.000880,-8.733268,0.000429,-1.455061,0.000175
1,292647.0,187908.86,False,290,2175,A,0.613662,1.071124e-15,0.001808,9.585082e+09,2.445272,11.067568,-0.770524,0.511807,0.286024,-0.026283,-0.003199,-0.184678,0.002815,-0.076546,3.105995e-30,-0.196933,1.022248e-05,0.009710,1.774681e+15,-0.013670,0.000217,-0.028593,0.020058,0.005660,0.000015,-0.006026,0.000011,-0.043911,0.000973,1.643379,0.000485,-0.750029,0.000142
2,77650.0,186442.90,False,289,2175,A,0.692141,-2.569802e-15,0.004011,9.586808e+09,2.444799,11.063926,-0.770989,0.510281,0.343892,0.019235,0.016757,0.416782,0.000939,-2.758797,3.331604e-30,-3.830154,1.217756e-05,0.010084,2.591068e+15,-0.013094,0.000337,-0.028612,0.029793,0.007798,0.000015,-0.012033,0.000012,0.030140,0.001035,-3.108573,0.000570,1.116842,0.000160
3,69826.0,185885.98,False,288,2175,A,0.691744,1.381300e-15,0.003034,9.594264e+09,2.448700,11.069584,-0.773628,0.511491,0.296838,-0.009135,0.023979,0.078241,0.001344,0.064045,3.377804e-30,-2.156820,1.133224e-05,0.010890,3.212532e+15,-0.011603,0.000395,-0.028205,0.035370,0.015778,0.000009,-0.009283,0.000012,0.098330,0.000763,-0.748113,0.000410,0.362201,0.000187
4,68277.0,185620.89,False,287,2175,A,0.815697,7.129526e-16,0.001907,9.605297e+09,2.446302,11.047708,-0.776542,0.512073,0.335905,0.016324,0.000646,0.335859,0.004786,-1.262396,2.773670e-30,-1.635121,8.615687e-06,0.012696,3.541439e+15,-0.015236,0.000404,-0.033008,0.036562,0.017787,0.000005,-0.009602,0.000010,-0.021040,0.000724,-6.134960,0.000442,-1.047026,0.000129
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284986,305417.0,163643.22,False,5,2923,ZWS,0.629390,9.868228e-16,0.000374,9.862010e+09,2.750077,10.450696,-0.249480,0.509450,0.426579,-0.014910,-0.024646,-0.002823,0.004813,-2.042248,2.193114e-30,0.277281,2.530291e-07,0.002595,2.802480e+16,0.003499,0.000832,0.023218,0.043614,-0.105693,0.000085,0.054352,0.000184,-0.065463,0.000673,-0.651608,0.002305,-0.487456,0.000051
284987,208657.0,164804.55,False,4,2923,ZWS,0.650464,6.793770e-16,0.002001,9.805879e+09,2.767580,10.603502,-0.246640,0.514747,0.377997,-0.090383,-0.032964,0.438099,0.004155,-396.459219,2.105870e-30,-3.571225,5.678088e-07,-0.007021,2.640481e+16,0.017627,0.000655,0.052053,0.017863,-0.105187,0.000075,0.044414,0.000244,0.013370,0.000620,-0.092599,0.002211,-0.182851,0.000050
284988,1369475.0,177527.24,False,3,2923,ZWS,0.592440,8.986417e-16,0.006145,1.290870e+10,4.419049,33.761933,-0.245769,0.407302,0.452935,-0.008462,-0.046249,0.010253,0.004178,-1.668597,1.341674e-30,18.163513,4.696002e-06,0.343350,1.250351e+18,0.620435,0.342839,2.257314,67.236473,-0.092668,0.000062,-0.171816,0.001229,0.033572,0.000754,-0.400974,0.002288,0.036566,0.000054
284989,680336.0,183571.70,False,2,2923,ZWS,0.623110,-1.746746e-15,0.006337,1.351842e+10,4.314847,31.412914,-0.243810,0.490572,0.421536,-0.108931,-0.027726,0.244247,0.003415,0.598780,1.806869e-30,15.533128,7.880168e-06,0.416228,2.518010e+18,0.574922,0.548896,1.988899,104.172504,-0.089003,0.000046,0.009844,0.001233,0.040718,0.000743,-0.075623,0.002146,-0.270012,0.000061


### Feature analysis

In [31]:
# Read dataset
data_logdyn = pd.read_parquet('Data/final_dataset.parquet')
data_mean = data_logdyn.groupby('Distance')[data_logdyn.columns.drop(['Volume', 'MA100', 'Rise', 'Distance', 'Index', 'Ticker'])].mean().iloc[:30]
data_mean.to_parquet('Data/final_mean.parquet')
data_mean

Unnamed: 0_level_0,Hurst,Correlation Dimension,Lyapunov,Variance,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,Correlation Dimension_8_dyn,Correlation Dimension_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Variance_8_dyn,Variance_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
Distance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1
1,0.603056,0.005837,0.012501,217633500000.0,4.546983,41.403589,-0.563664,0.398032,0.368156,-0.056973,-0.029236,0.004378,0.001758,632701700000.0,0.003716752,23.14209,9.7e-05,0.400892,1.075872e+23,0.65292,1.294671,3.152373,409.896915,-0.003642,4.7e-05,0.009741,0.001438,-0.009037,0.001438,0.096811,0.000996,0.282219,0.000722
2,0.604001,0.005469,0.008922,194926800000.0,3.88085,31.021605,-0.564013,0.397508,0.368738,-0.057855,-0.029811,0.010643,0.001754,6452239000000.0,0.00227241,-3.255584,7.6e-05,0.232088,5.348023e+22,0.380032,0.642619,1.87383,200.898529,-0.003563,4.8e-05,-0.001434,0.000806,-0.008727,0.001451,-0.182713,0.000999,-0.021245,0.000714
3,0.604042,0.000773,0.006938,152144600000.0,3.413979,23.68882,-0.564319,0.398848,0.369176,-0.057106,-0.029314,0.008,0.001751,97044840000.0,1.900091e-06,0.946126,6.3e-05,0.107256,3.985438e+21,0.175326,0.243928,0.851138,71.384341,-0.003395,4.9e-05,-0.006255,0.000386,-0.007796,0.001446,-0.017612,0.001016,-0.833779,0.000736
4,0.603347,0.0006,0.00574,141680100000.0,3.143457,19.724781,-0.564644,0.403708,0.37023,-0.058181,-0.03043,0.008122,0.001772,18786540000.0,7.120364e-07,126.609306,5.7e-05,0.044547,2.593423e+20,0.055196,0.080514,0.260778,23.596229,-0.00315,4.9e-05,0.000918,0.000174,-0.009398,0.001448,0.08179,0.001013,0.000821,0.000725
5,0.604243,0.0006,0.005437,140155200000.0,3.031789,18.025415,-0.564961,0.408457,0.369881,-0.055797,-0.028019,0.009722,0.001778,-473061500.0,7.188359e-07,2.443996,5.4e-05,0.015504,1.683057e+20,0.006206,0.021652,0.038207,5.115135,-0.002749,4.9e-05,0.011692,7.4e-05,-0.010324,0.001434,-0.06115,0.001028,-0.127281,0.00073
6,0.6032,0.000605,0.004946,139863300000.0,3.035188,18.074094,-0.565268,0.407284,0.37063,-0.056928,-0.029207,0.007027,0.001778,41405020.0,7.319909e-07,0.105114,5.3e-05,0.013844,1.42082e+20,0.007876,0.02125,0.044956,5.16223,-0.002182,4.9e-05,0.008604,6e-05,-0.007758,0.00144,-0.268455,0.00103,-0.021999,0.000719
7,0.602204,0.000581,0.004544,139713400000.0,3.033561,18.082607,-0.565588,0.405674,0.370118,-0.056174,-0.02917,0.003076,0.001777,-915878300.0,6.292006e-07,0.075937,5.3e-05,0.010293,1.33605e+20,0.005213,0.018616,0.036424,4.675606,-0.001651,5e-05,0.004791,5.2e-05,-0.010329,0.001445,0.166694,0.00105,0.048372,0.000734
8,0.605282,0.000603,0.005002,139011800000.0,3.023676,17.994499,-0.565944,0.405024,0.37156,-0.057363,-0.030197,0.009696,0.001743,-904603400.0,5.281717e-07,-0.445808,5.3e-05,0.006042,1.229105e+20,-0.000505,0.02371,0.009271,7.394982,-0.001316,5e-05,0.001541,4.8e-05,-0.010689,0.001451,-0.024662,0.00105,-0.375856,0.000725
9,0.604454,0.000621,0.004972,138642000000.0,3.023039,17.99301,-0.56633,0.404907,0.371966,-0.055246,-0.027993,0.008838,0.001771,34075440000.0,3.588921e-07,-0.669464,5.2e-05,0.003272,1.153594e+20,-0.002685,0.026046,0.000931,8.808647,-0.001183,5e-05,0.001095,4.5e-05,-0.009027,0.001462,0.053621,0.001057,-0.023222,0.000741
10,0.601572,0.000627,0.005146,138289900000.0,3.024852,18.015478,-0.566718,0.405132,0.372246,-0.056285,-0.02924,0.000509,0.001764,2119195000.0,6.384742e-05,1.178628,5.2e-05,0.001969,1.029291e+20,-0.002802,0.02698,-0.001436,9.462916,-0.001163,5e-05,0.000907,4.5e-05,-0.0087,0.001459,-0.60835,0.001057,-0.164845,0.000735


In [32]:
data_median = data_logdyn.groupby('Distance')[data_logdyn.columns.drop(['Volume', 'MA100', 'Rise', 'Distance', 'Index', 'Ticker'])].median().iloc[:30]
data_median.to_parquet('Data/final_median.parquet')
data_median

Unnamed: 0_level_0,Hurst,Correlation Dimension,Lyapunov,Variance,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,Correlation Dimension_8_dyn,Correlation Dimension_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Variance_8_dyn,Variance_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
Distance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1
1,0.606819,1.663834e-16,0.005575,3568293000.0,4.000564,26.839219,-0.568773,0.404269,0.360123,-0.065452,-0.01866,-0.000224,0.00113,-0.94624,3.286956e-30,-0.030897,1.3e-05,0.280768,5.051273e+16,0.250575,0.097671,0.531971,12.697829,-0.003317,1.6e-05,0.003472,0.000565,-0.006251,0.000941,-0.003898,0.000384,-0.015852,0.00022
2,0.608476,1.463255e-16,0.003775,3228871000.0,3.300212,18.406723,-0.568054,0.401021,0.356581,-0.064034,-0.017622,0.000545,0.001126,-0.964332,3.138417e-30,-0.15614,8e-06,0.107239,6927068000000000.0,0.013278,0.004921,0.0109,0.639277,-0.002669,1.6e-05,-1.1e-05,0.0001,-0.006796,0.000957,-0.005166,0.000388,-0.013383,0.000217
3,0.608283,5.797892e-17,0.003045,2953625000.0,2.946485,14.045176,-0.568593,0.40181,0.354884,-0.062334,-0.017656,-0.001108,0.001127,-0.971887,2.947546e-30,-0.181453,6e-06,0.024541,1076718000000000.0,-0.000634,0.000851,-0.00508,0.091569,-0.00247,1.5e-05,-0.001948,1.7e-05,-0.006049,0.00095,-0.00739,0.000397,-0.022788,0.000217
4,0.60592,4.1962180000000004e-17,0.002561,2824365000.0,2.743402,12.17746,-0.568953,0.407783,0.359123,-0.063152,-0.017595,-0.003143,0.001143,-0.987908,2.9563949999999998e-30,-0.161284,4e-06,0.006145,391270400000000.0,-0.003705,0.000359,-0.006947,0.026566,-0.002813,1.5e-05,-0.000197,7e-06,-0.007316,0.000944,-0.006973,0.000399,-0.014575,0.000208
5,0.607678,7.572993e-17,0.002422,2730263000.0,2.677025,11.49302,-0.568493,0.415069,0.361456,-0.06256,-0.017761,0.000762,0.00112,-0.962666,2.921676e-30,-0.132623,4e-06,0.002522,201729700000000.0,-0.003213,0.000226,-0.006105,0.01677,-0.002505,1.5e-05,0.001177,4e-06,-0.007492,0.000935,-0.013206,0.000398,-0.024563,0.00021
6,0.60976,4.43347e-17,0.00223,2712437000.0,2.668759,11.50976,-0.567763,0.413082,0.359631,-0.063531,-0.017913,-0.000643,0.001178,-0.994348,2.9500859999999997e-30,-0.143667,4e-06,0.00195,184957400000000.0,-0.00297,0.000206,-0.005672,0.015485,-0.0025,1.5e-05,0.001003,4e-06,-0.007092,0.000949,-0.004574,0.0004,-0.023999,0.000212
7,0.609192,1.335177e-16,0.002224,2687019000.0,2.660912,11.381533,-0.568163,0.411409,0.358294,-0.061491,-0.018523,-0.001871,0.001188,-0.933463,2.924742e-30,-0.139398,4e-06,0.001682,164946700000000.0,-0.002342,0.00017,-0.003505,0.012467,-0.002796,1.5e-05,-0.000165,3e-06,-0.007827,0.000937,-0.011666,0.000399,-0.024903,0.000212
8,0.607871,1.066718e-16,0.002186,2655381000.0,2.65706,11.317045,-0.57006,0.411432,0.360383,-0.062719,-0.017705,0.003786,0.001126,-0.951531,2.825803e-30,-0.109053,4e-06,0.001044,112334500000000.0,-0.001831,0.000148,-0.002631,0.009449,-0.00289,1.6e-05,-0.001319,3e-06,-0.011087,0.000972,-0.010563,0.000403,-0.010574,0.000211
9,0.60792,1.413707e-16,0.002236,2640670000.0,2.655424,11.281799,-0.569911,0.411851,0.360205,-0.062962,-0.017414,0.000508,0.001089,-0.94503,2.940634e-30,-0.140801,4e-06,0.000526,99561340000000.0,-0.001533,0.000141,-0.002232,0.008277,-0.002535,1.5e-05,-0.000962,3e-06,-0.007612,0.000957,-0.009655,0.000418,-0.018013,0.000218
10,0.607627,6.913005e-17,0.002391,2638462000.0,2.656581,11.184473,-0.570557,0.411398,0.361148,-0.062306,-0.01809,-0.005231,0.001128,-0.891331,2.9229029999999998e-30,-0.089094,4e-06,0.000265,97117190000000.0,-0.001113,0.000139,-0.00144,0.008366,-0.002234,1.5e-05,-0.000539,3e-06,-0.006434,0.000918,-0.004083,0.000406,-0.024089,0.000211


In [33]:
data_max = data_logdyn.groupby('Distance')[data_logdyn.columns.drop(['Volume', 'MA100', 'Rise', 'Distance', 'Index', 'Ticker'])].min().iloc[:30]
data_max.to_parquet('Data/final_max.parquet')
data_max

Unnamed: 0_level_0,Hurst,Correlation Dimension,Lyapunov,Variance,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,Correlation Dimension_8_dyn,Correlation Dimension_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Variance_8_dyn,Variance_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
Distance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1
1,0.32604,-1.343118e-14,-0.028376,22607.271259,1.324591,1.935069,-1.189238,0.009493,0.124055,-0.25113,-0.430977,-0.358024,3.9e-05,-386540400000000.0,-7.930164e-18,-301.397642,2.680702e-09,-0.279489,569667.429354,-0.351551,5.47412e-06,-0.736261,0.000138,-0.72277,1.868873e-08,-0.731347,1.018988e-07,-0.300028,1e-05,-14.511536,2e-06,-130.100149,3.057218e-06
2,0.319458,-1.052346e-14,-0.020137,21103.736477,1.248784,1.384948,-1.188375,0.008701,0.14226,-0.226988,-0.380236,-0.302337,3.8e-05,-5088563000000000.0,-7.930164e-18,-1946.306429,3.196592e-09,-0.415324,53395.103812,-0.529635,4.179527e-07,-0.854581,4e-05,-0.682982,1.278762e-08,-0.629322,2.252996e-08,-0.350162,1e-05,-147.734664,2e-06,-143.488647,2.868584e-06
3,0.32126,-1.046921e-14,-0.01964,20566.807611,1.258788,1.446212,-1.187284,0.008363,0.116799,-0.254329,-0.37307,-0.377323,3.7e-05,-46709970000000.0,-2.2273820000000002e-17,-225.998642,5.739516e-09,-0.465092,3451.908411,-0.600461,5.537767e-07,-0.874142,1.4e-05,-0.608074,7.82943e-09,-0.616814,2.027579e-08,-0.400263,1.4e-05,-77.156899,3e-06,-497.638135,1.42359e-06
4,0.300309,-1.036913e-14,-0.024967,20485.842902,1.266994,1.465845,-1.18608,0.008982,0.127624,-0.226453,-0.423085,-0.435254,4.3e-05,-1894445000000.0,-2.2273820000000002e-17,-3065.014656,1.707698e-09,-0.472665,1757.015814,-0.597406,1.003392e-06,-0.87054,1.2e-05,-0.572008,2.689938e-08,-0.560396,8.459394e-09,-0.325289,1.9e-05,-40.766845,7e-06,-25.15634,2.17917e-06
5,0.371014,-1.06754e-14,-0.030819,20487.075828,1.192239,1.158285,-1.184904,0.008352,0.141859,-0.251322,-0.408405,-0.40314,4.8e-05,-751389800000.0,-2.2273820000000002e-17,-120.92924,1.649753e-09,-0.525997,1489.966437,-0.572809,9.646691e-07,-0.789724,6e-06,-0.534196,4.87007e-08,-0.368949,1.008909e-08,-0.389037,2.3e-05,-46.389422,7e-06,-46.15643,1.543419e-06
6,0.338589,-1.041528e-14,-0.026512,20468.937539,1.20038,1.172714,-1.183397,0.009184,0.142506,-0.232097,-0.379128,-0.400936,4.6e-05,-1604234000000.0,-2.2273820000000002e-17,-66.561722,1.416337e-09,-0.525422,976.444951,-0.574261,7.444639e-07,-0.790623,1.3e-05,-0.487577,4.300076e-08,-0.285134,8.165302e-09,-0.312484,1.5e-05,-180.630477,1.1e-05,-53.391672,1.209627e-06
7,0.367845,-1.057818e-14,-0.027798,20473.004132,1.199376,1.18985,-1.182158,0.009663,0.13767,-0.231592,-0.419516,-0.416295,4.2e-05,-1540870000000.0,-2.2273820000000002e-17,-51.08072,1.007154e-09,-0.740766,697.58696,-0.804786,4.597834e-07,-0.965559,5e-06,-0.445067,3.973068e-08,-0.23444,6.80634e-09,-0.345561,2.2e-05,-35.736017,1.3e-05,-48.143697,1.277825e-06
8,0.349496,-1.048931e-14,-0.028008,20392.624012,1.201032,1.193698,-1.181001,0.009034,0.107046,-0.217574,-0.38035,-0.445113,5.8e-05,-882892900000.0,-2.2273820000000002e-17,-488.765036,9.914639e-10,-0.721462,265.503867,-0.7901,4.743013e-07,-0.958968,2e-06,-0.419561,2.089252e-08,-0.405799,1.329174e-08,-0.349415,2.4e-05,-18.693662,1e-05,-380.549348,1.166552e-06
9,0.315522,-1.028379e-14,-0.020619,20403.163042,1.202258,1.194137,-1.179959,0.010474,0.145538,-0.210729,-0.460687,-0.452653,4.9e-05,-668.6781,-2.2273820000000002e-17,-418.779412,9.215491e-10,-0.70044,24815.050596,-0.776882,4.866768e-07,-0.953222,2e-06,-0.406811,1.128849e-08,-0.283632,2.607142e-08,-0.358477,1.9e-05,-24.799976,1e-06,-46.286997,1.121388e-06
10,0.311208,-1.036403e-14,-0.026641,20402.245367,1.189666,1.197196,-1.178963,0.009843,0.162928,-0.231914,-0.397882,-0.375389,5.4e-05,-245.4532,-2.2273820000000002e-17,-97.968949,1.028135e-09,-0.696096,59336.436449,-0.779687,5.277421e-07,-0.954761,2e-06,-0.34845,1.173123e-08,-0.382731,2.379961e-08,-0.258354,1.1e-05,-720.118486,3e-06,-52.759149,5.270599e-07


In [34]:
data_min = data_logdyn.groupby('Distance')[data_logdyn.columns.drop(['Volume', 'MA100', 'Rise', 'Distance', 'Index', 'Ticker'])].max().iloc[:30]
data_min.to_parquet('Data/final_min.parquet')
data_min

Unnamed: 0_level_0,Hurst,Correlation Dimension,Lyapunov,Variance,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,Correlation Dimension_8_dyn,Correlation Dimension_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Variance_8_dyn,Variance_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance
Distance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1
1,0.932477,4.007584,0.105641,48592990000000.0,17.643366,349.412211,0.085211,0.685672,0.779451,0.371777,0.367589,0.896053,0.01862,1768491000000000.0,3.618985,20463.394422,0.0027,18.41758,7.209157e+25,7.438254,55.803768,65.654622,28787.608558,0.357003,0.00069,2.021197,0.022751,0.320533,0.014032,83.247126,0.024093,292.428865,0.022849
2,0.812413,4.208295,0.090446,45037850000000.0,19.52707,413.158513,0.085255,0.692635,0.76471,0.295629,0.210677,0.691325,0.014358,1.123701e+16,2.213719,114.62555,0.00262,16.825637,3.37207e+25,8.12395,37.339642,64.667689,19938.048276,0.352948,0.000699,1.546568,0.020726,0.302638,0.014405,13.593787,0.023611,38.002953,0.020783
3,0.83818,0.128057,0.104875,28510100000000.0,12.963817,231.59384,0.0846,0.692014,0.773235,0.392881,0.366821,0.564995,0.014787,132027200000000.0,0.001134,1425.168761,0.00253,1.82199,3.265353e+24,5.651834,12.990316,49.836321,5139.970783,0.387274,0.000716,1.095861,0.014989,0.328562,0.014013,37.408189,0.024933,39.777903,0.034658
4,0.920992,0.053029,0.104749,28509320000000.0,13.51199,247.624979,0.082602,0.708573,0.776639,0.263685,0.198865,0.805096,0.015414,12655110000000.0,0.00033,125633.637833,0.002515,1.412174,1.062345e+23,4.310337,7.981416,28.630871,3104.291203,0.481094,0.000746,1.010541,0.010293,0.336752,0.013768,57.10035,0.02383,17.225139,0.030135
5,0.875711,0.061487,0.099081,28508130000000.0,13.599766,249.881715,0.082138,0.730789,0.791136,0.372017,0.36507,0.647343,0.01517,700334000000.0,0.000334,2505.58579,0.002502,0.615429,4.903773e+22,1.095738,6.003414,4.409967,2135.713241,0.603978,0.000857,0.861421,0.002658,0.336183,0.013085,12.21968,0.024895,22.59826,0.030645
6,0.876548,0.05258,0.098619,28518080000000.0,13.598177,249.8227,0.079518,0.730795,0.767419,0.288291,0.208256,0.47169,0.01345,1644646000000.0,0.000342,353.357751,0.002656,0.614409,4.136087e+22,1.125954,7.145217,4.684613,2486.878144,0.772879,0.000958,0.674615,0.002598,0.382136,0.013432,23.471403,0.02419,49.2342,0.021882
7,0.865471,0.055411,0.102293,28743150000000.0,13.679009,251.894286,0.079664,0.728123,0.764336,0.392155,0.361263,0.56237,0.015075,646973100000.0,0.000296,112.831189,0.002431,0.615018,4.718565e+22,1.15844,7.443743,5.054389,2557.535698,0.989,0.001052,0.79799,0.00299,0.480839,0.012413,89.899829,0.022753,249.765668,0.024634
8,0.829806,0.055411,0.107557,28751390000000.0,13.659253,251.409869,0.078131,0.728718,0.769509,0.249938,0.203575,0.623072,0.017962,773.2297,0.000184,425.928403,0.002093,0.637109,4.772873e+22,1.03983,8.889535,4.268695,3485.465043,1.085881,0.001147,0.468999,0.003735,0.373063,0.013014,53.147345,0.021785,44.265205,0.021702
9,0.856932,0.093801,0.098554,28778460000000.0,13.64177,250.979962,0.077249,0.729111,0.835928,0.364492,0.358141,0.593646,0.019947,33257630000000.0,0.000153,191.276352,0.002089,0.620694,5.296594e+22,0.621936,14.926558,2.324841,5927.464412,0.889867,0.001253,0.584596,0.004875,0.460602,0.012475,70.118903,0.019756,43.31933,0.021749
10,0.849269,0.084583,0.101933,28700170000000.0,13.642801,250.999956,0.075044,0.729843,0.727432,0.291283,0.217715,0.571844,0.022044,2068334000000.0,0.06195,925.15909,0.002124,0.379217,5.486381e+22,0.59841,18.41847,2.373368,7365.80634,0.696508,0.001367,0.631732,0.006044,0.322827,0.013796,61.691815,0.020846,24.005799,0.019234


### Modelling

In the cell below we are iterating over the three lists of parameters:
- horizons - how many hours before the transition are considered to be close enough to be prediction phase
- sizes - share of the positive observations in the whole modelling dataset - this parameter is important because in the original dataset share of positives for some of the horizons was to small, so we dicided to use decrease size of the negative dataset and randomize it
- states - in order to avoid lucky random choices in the sizes randomization we are use a list of different random states to average the results

In [21]:
# Read dataset
data_logdyn = pd.read_parquet('Data/final_dataset.parquet')

# Choose binary target and other parameters
target = 'Flag'
horizons = list(range(2, 13))
shares = np.linspace(0.05, 0.2, 4)
states = list(range(0, 10000, 500))

# Create dataframe for the results
res = pd.DataFrame(columns = ['Horizon', '1 Share', '1 Share real', 'State',
                              'Train size', 'Test size', 'Train AUC', 'Test AUC',
                              'Train KS-test p-value', 'Test KS-test p-value',
                              'Train F1-score', 'Test F1-score', 
                              'Train precision', 'Test precision', 
                              'Train recall', 'Test recall', 'Coeffs'])

# Iterate over the chosen parameters and optimize classification models, then save all the results to the dataframe
for horizon in tqdm(horizons):
    data_testing = data_logdyn.copy()
    data_testing['Flag'] = data_testing['Distance'].apply(lambda x: 0 if x > horizon else 1)
    data_testing.drop(columns = ['Volume', 'MA100', 'Rise', 'Distance', 'Index', 'Ticker'], inplace = True)
    
    data_testing_1 = data_testing[data_testing[target] == 1]
    data_testing_0 = data_testing[data_testing[target] == 0]
    Y_1 = data_testing_1[target]
    X_1 = data_testing_1.drop(columns = [target])
    share_1_orig = len(data_testing_1) / (len(data_testing_0) + len(data_testing_1))
    for share in shares:
        for state in states:
            _, X_0, _, Y_0 = modsel.train_test_split(data_testing_0.drop(columns = [target]), data_testing_0[target], 
                                                     test_size = min(share_1_orig * (1 - share) / share, 1), random_state = state)
            share_1 = len(Y_1) / (len(Y_0) + len(Y_1))
            Y = pd.concat([Y_0, Y_1])
            X = sm.add_constant(pd.concat([X_0, X_1]))
            X_train, X_test, Y_train, Y_test = modsel.train_test_split(X, Y, test_size = 0.2, random_state = state)
            results_rs, auc_train_rs, auc_test_rs, ks_train_rs, ks_test_rs, f1_train_rs,\
                f1_test_rs, pr_train_rs, pr_test_rs, rec_train_rs, rec_test_rs\
                = model_optimization(Y_train, Y_test, X_train, X_test, silent = True)
            res.loc[len(res)] = [horizon, share, share_1, state, len(Y_train), len(Y_test),
                                 auc_train_rs, auc_test_rs, ks_train_rs, ks_test_rs,
                                 f1_train_rs, f1_test_rs, pr_train_rs, pr_test_rs,
                                 rec_train_rs, rec_test_rs, results_rs.params]

# OHE-like transformation of the variables' lists
res_coeffs = pd.DataFrame(columns = list(X_1.columns) + ['const'])
for row in res['Coeffs']:
    res_coeffs.loc[len(res_coeffs)] = row
res = res.drop(columns = ['Coeffs']).join(res_coeffs)
res.to_parquet('Data/params.parquet')

# Create pivot based on the horizon and 1 share parameters
groups = ['Horizon', '1 Share', '1 Share real']
drops = ['State']
res_means = res.groupby(groups)[res.columns.drop(groups + drops)].mean()
res_means.to_parquet('Data/params_mean.parquet')
res_means

100%|██████████| 1/1 [02:31<00:00, 151.84s/it]


KeyError: "['Coeffs'] not found in axis"

In [25]:
# Get mean metrics for all of the columns to understand what variables are actually used in the final models
round(res_means[np.in1d(res_means.index.get_level_values(0), list(range(4,9)))].mean(), 2)
# round(res_means.mean(), 2)

Train size                          47839.75
Test size                           11960.60
Train AUC                               0.75
Test AUC                                0.75
Train KS-test p-value                   0.00
Test KS-test p-value                    0.00
Train F1-score                          0.42
Test F1-score                           0.42
Train precision                         0.35
Test precision                          0.36
Train recall                            0.57
Test recall                             0.57
Hurst                                   0.03
Correlation Dimension                   0.00
Lyapunov                                0.44
Variance                                0.00
Skewness                                0.96
Kurtosis                                0.97
PSD                                     0.98
ACF_1                                   0.09
WL_C1                                   0.04
WL_C2                                   0.04
WL_C3     

### Visualization for the KS-test

In [3]:
# Vizual check of the single model
data_logdyn = pd.read_parquet('Data/final_dataset.parquet')
target = 'Flag'
horizon = 8
share = 0.1
state = 2000
data_testing = data_logdyn.copy()
data_testing['Flag'] = data_testing['Distance'].apply(lambda x: 0 if x >= horizon else 1)
data_testing.drop(columns = ['Volume', 'MA100', 'Rise', 'Distance', 'Index', 'Ticker'], inplace = True)

data_testing_1 = data_testing[data_testing[target] == 1]
data_testing_0 = data_testing[data_testing[target] == 0]
Y_1 = data_testing_1[target]
X_1 = data_testing_1.drop(columns = [target])
share_1_orig = len(data_testing_1) / (len(data_testing_0) + len(data_testing_1))
_, X_0, _, Y_0 = modsel.train_test_split(data_testing_0.drop(columns = [target]), data_testing_0[target], 
                                                     test_size = min(share_1_orig * (1 - share) / share, 1), random_state = state)
share_1 = len(Y_1) / (len(Y_0) + len(Y_1))
Y = pd.concat([Y_0, Y_1])
X = sm.add_constant(pd.concat([X_0, X_1]))
X_train, X_test, Y_train, Y_test = modsel.train_test_split(X, Y, test_size = 0.2, random_state = state)
results_rs, auc_train_rs, auc_test_rs, ks_train_rs, ks_test_rs, f1_train_rs,\
    f1_test_rs, pr_train_rs, pr_test_rs, rec_train_rs, rec_test_rs\
    = model_optimization(Y_train, Y_test, X_train, X_test, silent = True)
print(results_rs.summary())
Y_test_pred = results_rs.predict(X_test)
ks_samples = pd.DataFrame({'Y': Y_test, 'Y_pred': Y_test_pred})
ks_samples_posi = ks_samples[ks_samples['Y'] == 1]['Y_pred']
ks_samples_nega = ks_samples[ks_samples['Y'] == 0]['Y_pred']
fig = go.Figure()
fig.add_trace(go.Histogram(x = ks_samples_posi, name = 'Posi'))
fig.add_trace(go.Histogram(x = ks_samples_nega, name = 'Nega'))
fig.update_layout(barmode = 'overlay')
fig.update_traces(opacity = 0.75)
fig.show()

                          Probit Regression Results                           
Dep. Variable:                   Flag   No. Observations:                53472
Model:                         Probit   Df Residuals:                    53455
Method:                           MLE   Df Model:                           16
Date:                Mon, 04 Mar 2024   Pseudo R-squ.:                  0.1468
Time:                        20:30:18   Log-Likelihood:                -15034.
converged:                       True   LL-Null:                       -17621.
Covariance Type:            nonrobust   LLR p-value:                     0.000
                          coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------------
const                  -1.4200      0.057    -25.025      0.000      -1.531      -1.309
Lyapunov               -2.0581      0.696     -2.956      0.003      -3.422      -0.694
Skewness        

In [7]:
dict(results_rs.params)

{'const': -1.420021746016806,
 'Lyapunov': -2.058072841478879,
 'Skewness': 0.08993267519928061,
 'Kurtosis': -0.005326412631214548,
 'PSD': 0.2564106097553771,
 'WL_C3': 0.5046731033419704,
 'Lyapunov_8_Variance': 125.4187491576873,
 'Variance_8_dyn': 7.1986224376182495,
 'Variance_8_Variance': -3.907411238970322e-25,
 'Skewness_8_dyn': -3.6664178684216364,
 'Skewness_8_Variance': 1.1349450821005473,
 'Kurtosis_8_dyn': 0.45272810686247633,
 'Kurtosis_8_Variance': -0.004012952099583372,
 'PSD_8_Variance': 628.0871886835979,
 'ACF_1_8_Variance': 268.6837020398557,
 'WL_C1_8_dyn': -0.948321843912319,
 'WL_C1_8_Variance': -22.740919088273287}

In [13]:
test2 = pd.read_parquet('Data/params.parquet')

In [16]:
test = pd.DataFrame(columns = list(X_1.columns) + ['const'])
test.loc[len(test)] = results_rs.params
test.loc[len(test)] = results_rs.params
test.loc[len(test)] = results_rs.params
test.columns = test.columns + ''
test

Unnamed: 0,Hurst,Correlation Dimension,Lyapunov,Variance,Skewness,Kurtosis,PSD,ACF_1,WL_C1,WL_C2,WL_C3,Hurst_8_dyn,Hurst_8_Variance,Correlation Dimension_8_dyn,Correlation Dimension_8_Variance,Lyapunov_8_dyn,Lyapunov_8_Variance,Variance_8_dyn,Variance_8_Variance,Skewness_8_dyn,Skewness_8_Variance,Kurtosis_8_dyn,Kurtosis_8_Variance,PSD_8_dyn,PSD_8_Variance,ACF_1_8_dyn,ACF_1_8_Variance,WL_C1_8_dyn,WL_C1_8_Variance,WL_C2_8_dyn,WL_C2_8_Variance,WL_C3_8_dyn,WL_C3_8_Variance,const
0,,,-2.058073,,0.089933,-0.005326,0.256411,,,,0.504673,,,,,,125.418749,7.198622,-3.907411e-25,-3.666418,1.134945,0.452728,-0.004013,,628.087189,,268.683702,-0.948322,-22.740919,,,,,-1.420022
1,,,-2.058073,,0.089933,-0.005326,0.256411,,,,0.504673,,,,,,125.418749,7.198622,-3.907411e-25,-3.666418,1.134945,0.452728,-0.004013,,628.087189,,268.683702,-0.948322,-22.740919,,,,,-1.420022
2,,,-2.058073,,0.089933,-0.005326,0.256411,,,,0.504673,,,,,,125.418749,7.198622,-3.907411e-25,-3.666418,1.134945,0.452728,-0.004013,,628.087189,,268.683702,-0.948322,-22.740919,,,,,-1.420022


In [None]:
test2.count()