In [1]:
import os
import sys
import numpy as np
import pandas as pd
import pickle

from feature_engine.imputation import MeanMedianImputer, ArbitraryNumberImputer
from feature_engine.outliers import Winsorizer
from feature_engine.wrappers import SklearnTransformerWrapper

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import RobustScaler , MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA
from sklearn.manifold import Isomap
from sklearn.pipeline import Pipeline 

s_path = os.path.dirname(os.path.realpath(__file__))
sys.path.append(s_path)

pd.set_option('display.max_columns',100)
pd.set_option('precision', 3)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

import tm_teoriaMvto_base_prep as base_prep
import tm_teoriaMvto_label as tm_label
import tm_teoriaMvto_train as tm_train
import tm_teoriaMvto_ft_eng as ft_eng
# import tm_teoriaMvto_ft_sel as ft_sel

# models to test
from sklearn.svm import SVC
import xgboost as xgb


In C:\Users\Desktop\Anaconda3\envs\ngym36\lib\site-packages\matplotlib\mpl-data\stylelib\_classic_test.mplstyle: 
The text.latex.preview rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In C:\Users\Desktop\Anaconda3\envs\ngym36\lib\site-packages\matplotlib\mpl-data\stylelib\_classic_test.mplstyle: 
The mathtext.fallback_to_cm rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In C:\Users\Desktop\Anaconda3\envs\ngym36\lib\site-packages\matplotlib\mpl-data\stylelib\_classic_test.mplstyle: Support for setting the 'mathtext.fallback_to_cm' rcParam is deprecated since 3.3 and will be removed two minor releases later; use 'mathtext.fallback : 'cm' instead.
In C:\Users\Desktop\Anaconda3\envs\ngym36\lib\site-packages\matplotlib\mpl-data\stylelib\_classic_test.mplstyle: 
The validate_bool_maybe_none function was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In C:\Users\Desktop\Anaconda3\envs\n

In [2]:
path_files = os.path.join(s_path, 'historical_files')

# base prep control
EXPORT_X = False
EXPORT_Y = False

# params labeling
s_prefix = 'ft_'
s_lbl_type = 'c_binary'
f_th = 0.2
b_dist = True
b_percent = True
b_custom = True

# params loading
s_regime = 'mi_up'



In [3]:
if EXPORT_X:
  df_data = base_prep.import_sampling(path_files= os.path.join(path_files, 'FeaturesFiles'), prefix_files= s_prefix)

  # create new features before extracting samples - using full data needed by lag features
  df_data = ft_eng.BasicFeatures().transform(X=df_data)

  df_X = base_prep.ft_export(df_data, path_files= os.path.join(path_files, 'TrainFiles'), prefix= s_prefix)
  y_sc = base_prep.y_export(df_data, path_files= os.path.join(path_files, 'TrainFiles'), prefix= s_prefix)



In [4]:
if EXPORT_Y:
  df_label = None 
  if EXPORT_X: df_label = y_sc

  # specify df_data = None (default) to load pickle from s_path + 'y_prep_data.pkl'
  label = tm_label.Labeling(df_data = df_label, label_type= s_lbl_type, b_dist_to_high= b_dist, 
                            s_path = os.path.join(path_files, 'TrainFiles'), prefix_files = s_prefix, th_label_y1=f_th, 
                            b_percent_freq=b_percent, b_custom_dir=b_custom)

  df_y = label.apply_label(s_model_return = s_regime)  # it will export 4 pickle files to label_obj.s_path
  y_config = label.s_name
else:
  # choose y_config
  y_config = 'prefix_'+ s_prefix +'_type_'+ s_lbl_type +'_th_'+ str(f_th) +'_dist_'+ str(b_dist) +'_percent_'+str(b_percent)+'_custom_'+str(b_custom)

y_outfile = os.path.join(os.path.join(path_files, 'TrainFiles'), 'y_' + y_config +'.pkl')

if os.path.exists(y_outfile):
  print('y outfile ready for configuration: {}'.format(y_config))
else:
  print('WARNING: y outfile not found for configuration. Export y before continuing {}'.format(y_config))



y outfile ready for configuration: prefix_ft__type_c_binary_th_0.2_dist_True_percent_True_custom_True


In [5]:
l_col_08 = ['escora_bid_2.5_0.8', 'escora_ask_2.5_0.8', 'escora_bid_3.5_0.8', 
            'escora_ask_3.5_0.8', 'escora_bid_4.5_0.8', 'escora_ask_4.5_0.8', ]

l_col_12 = ['escora_bid_2.5_1.2', 'escora_ask_2.5_1.2', 'escora_bid_3.5_1.2',
            'escora_ask_3.5_1.2', 'escora_ask_4.5_1.2', 'escora_bid_4.5_1.2', ]

l_col_2 = [ 'escora_bid_2.5_2', 'escora_ask_2.5_2', 'escora_bid_3.5_2',
            'escora_ask_3.5_2', 'escora_bid_4.5_2', 'escora_ask_4.5_2', ]

l_col_log = ['agg_net_d', 'aggbig_net_d', 'vol_trd', 'vol_big', 'big_v', 'vol_trd_aux', 
   'vol_big_aux', 'big_v_aux', 'loc_agg_net_d', 'big_c', 'big_c_aux',
   'loc_aggbig_net_d', 'agg_net_m', 'agg_net_m_aux', 'abagg', 'abagg_aux',
   'aggbig_net_m', 'aggbig_net_m_aux', 'loc_agg_net_m', 'loc_aggbig_net_m',
   'loc_agg_net_m_aux', 'loc_aggbig_net_m_aux', 'loc_aggbig_c_m', 'loc_aggbig_v_m', 
   'loc_aggbig_c_m_aux', 'loc_aggbig_v_m_aux', 'abs_v', 'abs_c', 'aggpior_v', 'aggpior_v_aux', 
   'aggpior_c', 'aggpior_c_aux', 'agg_net_10', 'agg_net_40', 'agg_net_80', 'loc_agg_net_10',
   'aggbig_net_10', 'aggpior_DIF', 'aggpior_DIF_30', 'abs_DIF', 'abs_DIF_30',
   'abagg_10', 'aggpior_aux_DIF', ]

l_side_drop = ['big_c','big_v','aggpior_c','aggpior_v','loc_aggbig_c_m','loc_aggbig_v_m','pagg_c_best',
  'pagg_c_best_0.5','pagg_c_best_0.7','pagg_c_best_0.9','pagg_v_best','pagg_v_best_0.5','pagg_v_best_0.7',
  'pagg_v_best_0.9','abs_c','abs_v','int_c','int_c_0.6','int_c_0.7','int_c_0.8','int_c_0.9','int_dif_c',
  'int_v','int_v_0.6','int_v_0.7','int_v_0.8','int_v_0.9','int_dif_v','imp_c','imp_c_0.6','imp_c_0.7',
  'imp_c_0.8','imp_c_0.9','imp_v','imp_v_0.6','imp_v_0.7','imp_v_0.8','imp_v_0.9','escora_bid_2.5_1.2',
  'escora_bid_2.5_2','escora_ask_2.5_1.2','escora_ask_2.5_2','escora_bid_3.5_1.2','escora_bid_3.5_2',
  'escora_ask_3.5_1.2','escora_ask_3.5_2','escora_bid_4.5_0.8','escora_bid_4.5_2','escora_ask_4.5_0.8',
  'escora_ask_4.5_2','movesc_bid_2.5','movesc_ask_2.5','movesc_bid_2.5_0.5','movesc_ask_2.5_0.5',
  'movesc_bid_2.5_0.7','movesc_ask_2.5_0.7','movesc_bid_3.5','movesc_ask_3.5','movesc_bid_3.5_0.7',
  'movesc_ask_3.5_0.7','movesc_bid_3.5_0.9','movesc_ask_3.5_0.9','movesc_bid_4.5','movesc_ask_4.5',
  'movesc_bid_4.5_0.5','movesc_ask_4.5_0.5','movesc_bid_4.5_0.7','movesc_ask_4.5_0.7','depth_bid7','depth_ask7'
  ]

l_side_drop_aux = ['big_c_aux','big_v_aux','aggpior_c_aux','aggpior_v_aux','loc_aggbig_c_m_aux',
  'loc_aggbig_v_m_aux','pagg_c_best_aux','pagg_c_best_0.5_aux','pagg_c_best_0.7_aux',
  'pagg_c_best_0.9_aux','pagg_v_best_aux','pagg_v_best_0.5_aux', 'pagg_v_best_0.7_aux',
  'pagg_v_best_0.9_aux'
  ]

l_ft_aux = [
  'vol_trd_aux', 'n_trd_aux','vol_big_aux','n_big_aux','vol_big_ratio_aux','big_c_aux','big_v_aux',
  'aggpior_c_aux','aggpior_v_aux','aggimb_aux','aggimb_big_aux','n_aggimb_aux','agg_net_m_aux',
  'aggbig_net_m_aux','loc_aggbig_c_m_aux','loc_aggbig_v_m_aux','loc_agg_net_m_aux','loc_aggbig_net_m_aux',
  'loc_agg_imb_m_aux','loc_aggbig_imb_m_aux','pagg_c_best_aux','pagg_c_best_0.5_aux','pagg_c_best_0.7_aux',
  'pagg_c_best_0.9_aux','pagg_v_best_aux','pagg_v_best_0.5_aux','pagg_v_best_0.7_aux','pagg_v_best_0.9_aux',
  'abagg_aux','n_p_aux','aggpior_aux_DIF','pagg_aux_DIF'
  ]


In [6]:
test_size = 0.2

if not EXPORT_X:
  df_X = tm_train.load_models('X_samples_'+s_prefix, os.path.join(path_files, 'TrainFiles'))  # search for path_files/s_regime.pkl

X = df_X.loc[df_X['model'] == s_regime].drop(columns='model')

if not EXPORT_Y:
  df_y = tm_train.load_models('y_' + y_config, os.path.join(path_files, 'TrainFiles'))
  y = df_y.loc[df_y['model'] == s_regime].drop(columns='model')
else:
  y = df_y.drop(columns='model')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size, shuffle = False)

print('load_split: X, y data load and split complete!')



load_split: X, y data load and split complete!


In [7]:
# DEBUG: prop_features contaning np.NaN = (pd.isna(X_train).sum()>0).sort_values(ascending=False).head(50)
X_train = ft_eng.PropImputer(0.8, l_col_08).transform(X_train)
X_train = ft_eng.PropImputer(1.2, l_col_12).transform(X_train)
X_train = ft_eng.PropImputer(2, l_col_2).transform(X_train)

median_inputer = MeanMedianImputer(variables=['PA_down',])
X_train = median_inputer.fit_transform(X_train)

nan_imputer = ArbitraryNumberImputer(0.0, variables=['ohlc_10','ohlc_50'])
X_train = nan_imputer.fit_transform(X_train)

X_train = ft_eng.DifAll().transform(X_train)
X_train = ft_eng.LogVolume(l_col_log).transform(X_train)

if s_regime[:2] == 'mw':
  # for now, removing all side columns
  l_cols_drop = l_side_drop + l_ft_aux
else:
  l_cols_drop = l_side_drop + l_side_drop_aux

X_train.drop(columns=l_cols_drop, inplace=True)


In [8]:
X_test = ft_eng.PropImputer(0.8, l_col_08).transform(X_test)
X_test = ft_eng.PropImputer(1.2, l_col_12).transform(X_test)
X_test = ft_eng.PropImputer(2, l_col_2).transform(X_test)

X_test = median_inputer.transform(X_test)

X_test = nan_imputer.transform(X_test)

X_test = ft_eng.DifAll().transform(X_test)
X_test = ft_eng.LogVolume(l_col_log).transform(X_test)

if s_regime[:2] == 'mw':
  # for now, removing all side columns
  l_cols_drop = l_side_drop + l_ft_aux
else:
  l_cols_drop = l_side_drop + l_side_drop_aux

X_test.drop(columns=l_cols_drop, inplace=True)

train_obj = tm_train.ModelTraining()


In [9]:

pipe_preproc = Pipeline([
                ('scaler', SklearnTransformerWrapper(transformer=RobustScaler(quantile_range=(0.15, 0.85)))),
                ('outliers', Winsorizer(capping_method='quantiles', fold = 0.001, tail = 'both')),
                ('minmax', SklearnTransformerWrapper(transformer=MinMaxScaler(feature_range=(-1, 1)))),
                ('pca', PCA(n_components=30, svd_solver='auto')), 
                # ('isomap', Isomap(n_components=13, n_neighbors=50, n_jobs=-1)),   #  expensive
                ])

X_pca = pipe_preproc.fit_transform(X_train)
X_test_pca = pipe_preproc.transform(X_test)



In [10]:
clf_svm = SVC(probability=True, cache_size=1000, verbose=0
              ,class_weight= 'balanced'
              # ,C=1, gamma=0.001
              )

clf_svm.fit(X_pca, y_train) 
l_results = tm_train.report_results(X_pca, X_test_pca, y_train, y_test, fitted_model=clf_svm, ready_probs = False)

# forest_minimize 

train_obj.optimized_training(X_pca, y_train, 'svm', clf_svm, pipe = False)



  return f(*args, **kwargs)
train_auc: 0.77, test_auc: 0.55 (overfit: 0.22)
train_f1:  0.54, test_f1:  0.27 (overfit: 0.26)
train_f*:0.65, test_f*:0.37 (overfit: 0.28)
--------
cm_train: 
[[6329  702]
 [3138 2220]]
cm_test: 
[[1512  268]
 [1068  250]]
threshold: 0.5
Iteration No: 1 started. Evaluating function at random point.
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 129.3601
Function value obtained: 0.4295
Current minimum: 0.4295
Iteration No: 2 started. Evaluating function at random point.
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 127.1825
Function value obtained: 0.4294
Current minimum: 0.4294
Iteration No: 3 started. Evaluating function at random point.
Iteration No: 3 ended. Evaluation done at random point.
Time taken: 130.6413
Function value obtained: 0.4304
Current minimum: 0.4294
Iteration No: 4 started. Evaluating function at random point.
Iteration No: 4 ended. Evaluation done at random point.
Time taken: 124.4293
Function 

In [11]:

pipe_xgb = Pipeline([  
                    # TODO: feature_selection()
                    ('xgb', xgb.XGBClassifier(
                          max_depth= 4,
                          subsample= 0.4102805752324993,
                          colsample_bytree= 0.18592295315692304,
                          min_child_weight= 50,
                          learning_rate= 0.006666055995539396,
                          n_estimators= 418,
                          gamma= 1,
                          reg_alpha= 9.436346363300686,
                          scale_pos_weight= 7.107995018980205
                      ))
                    ])


# training and test scores 
# pipe_xgb.fit(X_train, y_train)
# l_results = tm_train.report_results(X_train, X_test, y_train, y_test, fitted_model=pipe_xgb, ready_probs = False)
# f_imp = tm_train.ft_importance_plot(pipe_xgb[1], pipe_xgb[0].transform(X_train), plot_result=True)



In [12]:

d_params = {
            # 'svc__class_weight': ['balanced', None],
            'svc__C': [.1, .5, 1, 10, 100], 
            'svc__gamma': [0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1, 0.3, 0.6]
            }

# l_results, clf_cv = tm_train.rscv_clf(pipe_svm, d_params, X_train, y_train, X_test, y_test)


In [13]:
train_obj.result_opt

          fun: 0.42940571053347776
    func_vals: array([0.42950246, 0.42941107, 0.4303829 , 0.42941095, 0.43855993,
       0.42940571])
       models: []
 random_state: RandomState(MT19937) at 0x25A3D82CBA0
        space: Space([Real(low=1e-06, high=100.0, prior='log-uniform', transform='identity'),
       Real(low=1e-07, high=10.0, prior='log-uniform', transform='identity')])
        specs: {'args': {'model_queue_size': None, 'n_jobs': -1, 'kappa': 2.0, 'xi': 0.01, 'n_restarts_optimizer': 5, 'n_points': 10000, 'callback': <skopt.callbacks.DeltaYStopper object at 0x0000025A3D6A7A20>, 'verbose': 1, 'random_state': None, 'y0': None, 'x0': None, 'acq_optimizer': 'sampling', 'acq_func': 'EI', 'initial_point_generator': 'random', 'n_initial_points': 10, 'n_random_starts': None, 'n_calls': 30, 'base_estimator': 'ET', 'dimensions': [Real(low=1e-06, high=100.0, prior='log-uniform', transform='identity'), Real(low=1e-07, high=10.0, prior='log-uniform', transform='identity')], 'func': <function

In [14]:
clf_svm = SVC(probability=True, cache_size=1000, verbose=0
              ,class_weight= 'balanced'
              ,C=1.26, gamma=0.0002
              )

clf_svm.fit(X_pca, y_train) 
l_results = tm_train.report_results(X_pca, X_test_pca, y_train, y_test, fitted_model=clf_svm, ready_probs = False)


  return f(*args, **kwargs)
train_auc: 0.60, test_auc: 0.56 (overfit: 0.04)
train_f1:  0.60, test_f1:  0.56 (overfit: 0.04)
train_f*:0.52, test_f*:0.49 (overfit: 0.03)
--------
cm_train: 
[[2551 4480]
 [1169 4189]]
cm_test: 
[[ 654 1126]
 [ 369  949]]
threshold: 0.4


In [15]:
X_train

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,vol_trd,n_trd,vol_big,n_big,vol_big_ratio,aggimb,aggimb_big,n_aggimb,agg_net_m,aggbig_net_m,loc_agg_net_m,loc_aggbig_net_m,loc_agg_imb_m,loc_aggbig_imb_m,abagg,n_p,agg_net_d,aggbig_net_d,s_run,chgfreq,last_d_s,PA_up,PA_down,loc_agg_net_d,loc_aggbig_net_d,vewma,rng_ewma,vewma_g_p,vewma_c_v,vewmag_dif,vol_trd_aux,n_trd_aux,vol_big_aux,n_big_aux,vol_big_ratio_aux,aggimb_aux,aggimb_big_aux,n_aggimb_aux,agg_net_m_aux,aggbig_net_m_aux,loc_agg_net_m_aux,loc_aggbig_net_m_aux,loc_agg_imb_m_aux,loc_aggbig_imb_m_aux,abagg_aux,n_p_aux,smart_price,book_imb,escora_bid_2.5_0.8,escora_ask_2.5_0.8,...,ohlc_10,ohlc_50,rng_ewma_dif,rng_ewma_dif_40,rng_ewma_dif_80,vewma_10,vewma_g_p_10,imp_DIF,imp_DIF_10,imp_DIF_50,agg_net_10,agg_net_40,agg_net_80,loc_agg_net_10,aggbig_net_10,int_DIF,int_DIF_10,int_DIF_50,imp_FCAST_10,imp_FCAST_40,aggpior_DIF,aggpior_DIF_30,abs_DIF,abs_DIF_30,pagg_DIF,abagg_10,book_imb_dif,book_imb_mean_10,book_imb_mean_40,book_imb_mean_dif_lp,book_imb_mean_dif_cp,msg_imb_dif,msg_imb_mean_10,msg_imb_mean_40,msg_imb_mean_dif_lp,msg_imb_mean_dif_cp,sspread_mean,depth_DIF,depth_DIF_10,book_imb_mean_us_5,book_imb_mean_us_20,sspread_mean_us_5,aggpior_aux_DIF,pagg_aux_DIF,escora_2.5_2_DIF,escora_3.5_1.2_DIF,escora_4.5_0.8_DIF,movesc_2.5_0.7_DIF,movesc_3.5_0.7_DIF,movesc_4.5_0.7_DIF
date_trd,update,time,trd_id,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1
2020-06-01,49460,09:21:41.989000,764,6.717,87,5.352,6,0.255,-0.042,0.143,0.000,-3.584,3.434,1.905,3.716,0.016,0.393,-5.817,2,5.252,5.252,-1.000,0.250,1.000,0.200,0.290,5.038,4.747,-0.001,5.092,1.620,-0.658,-1.710,5.673,31,4.394,1,0.276,0.138,1.000,0.032,3.714,4.394,3.861,4.111,0.219,1.000,3.258,1,5353.852,-83.835,0.121,0.574,...,-0.400,-0.612,0.788,1.859,-0.207,-0.000,0.451,0.029,0.000,-0.006,0.000,-4.025,-4.111,2.250,0.000,-0.007,0.000,-0.045,0.000,-1.575,-4.263,0.000,0.000,-4.984,-1.000,-3.045,-61.401,-18.922,-20.056,1.134,-64.914,0.364,-0.189,-0.210,0.021,0.396,2.240,-165,-105.000,-24.965,-17.531,2.229,0.000,0.000,-0.245,-0.393,-0.453,-0.434,-0.630,-0.494
2020-06-01,52825,09:23:20.039000,860,6.957,127,5.464,8,0.224,-0.114,0.489,0.016,-4.796,4.754,-4.121,-2.692,-0.439,-0.537,-6.043,7,3.434,5.421,-1.000,0.208,-1.000,1.000,0.529,-3.098,-4.261,-0.002,12.305,5.255,2.542,-6.697,3.045,4,0.000,0,0.000,-0.188,0.111,0.000,0.000,0.000,1.455,0.000,-0.653,-1.000,0.000,1,5343.808,-39.076,0.121,0.238,...,0.092,-0.798,1.189,5.596,6.402,-0.000,0.168,0.039,0.000,0.021,-3.258,-5.050,-5.545,4.426,0.000,0.068,0.000,0.004,-0.972,-6.028,-3.714,-3.045,0.000,0.000,0.000,-3.714,-27.633,-17.584,-13.702,-3.883,-21.492,-0.177,-0.238,-0.187,-0.051,-0.206,2.228,-160,-75.000,-21.177,-12.331,2.222,0.000,0.000,-0.023,-0.074,-0.117,-1.106,-1.100,-0.750
2020-06-01,53165,09:23:31.093000,870,7.035,137,5.545,9,0.225,-0.154,0.373,0.007,-5.170,4.564,-4.552,-3.187,-0.500,-0.723,-6.066,9,-3.258,5.328,-1.000,0.167,-1.000,2.000,0.425,-4.426,-4.623,-0.002,11.370,3.776,4.964,-8.099,4.663,14,3.045,1,0.190,-0.188,0.111,0.000,-4.025,-3.045,-2.996,-1.992,-0.653,-1.000,3.584,3,5343.704,-17.295,0.172,0.211,...,-0.109,-0.776,-0.934,3.014,5.581,0.000,-1.479,0.025,-0.014,0.006,-4.025,-4.710,-5.656,-4.134,-3.045,0.071,0.003,0.006,-1.363,-2.727,-4.111,-3.714,0.000,0.000,0.000,-2.398,21.781,-21.022,-14.731,-6.291,3.727,0.129,-0.322,-0.221,-0.101,0.006,2.305,-135,25.000,-23.676,-15.896,2.347,0.000,0.000,-0.008,-0.025,0.000,0.208,-0.485,0.400
2020-06-01,63011,09:29:48.428000,1137,6.914,132,5.017,7,0.149,-0.015,0.200,0.008,-2.773,3.434,-3.546,1.447,-0.112,0.095,-5.771,8,-4.331,5.352,-1.000,0.167,1.000,3.030,3.030,-4.656,-3.863,-0.000,7.294,2.277,-0.265,-0.682,6.752,114,4.875,6,0.152,0.029,0.077,0.000,3.258,2.398,-3.466,1.447,-0.104,0.095,-5.525,7,5343.095,18.972,0.574,0.211,...,0.331,-0.545,1.317,1.792,1.482,-0.000,-0.358,-0.031,-0.029,-0.008,3.258,3.045,1.792,2.904,3.258,-0.044,-0.010,-0.094,-0.784,-0.627,0.000,0.000,0.000,0.000,0.000,-4.263,15.288,15.062,-3.231,18.293,3.910,-0.269,-0.196,-0.123,-0.074,-0.019,2.294,-5,20.000,10.538,-6.084,2.317,0.000,0.000,0.230,0.393,0.037,2.500,3.898,0.086
2020-06-01,67015,09:30:47.568000,1263,6.321,61,5.464,8,0.423,-0.622,-1.000,-0.131,-5.846,-5.464,-3.993,-3.665,-0.511,-1.000,-5.545,7,-6.704,-5.464,-1.000,0.167,-1.000,0.500,0.166,-3.993,-3.665,-0.002,10.555,3.535,15.909,-9.161,3.932,9,0.000,0,0.000,-0.600,-1.000,-0.150,0.000,0.000,1.063,0.000,-0.836,-1.000,0.000,1,5324.917,-13.745,0.147,0.238,...,0.133,-0.865,1.270,0.919,1.455,0.000,0.600,0.015,0.000,-0.001,-1.792,-5.303,-5.943,-2.550,0.000,-0.411,0.000,-0.150,-0.077,-3.063,-4.454,-4.454,0.000,0.000,-1.000,-3.434,1.621,-13.110,-15.334,2.223,-0.635,0.411,0.183,-0.109,0.292,0.257,2.208,-125,5.000,-15.983,-18.603,2.239,0.000,0.000,-0.020,-0.060,-0.092,-0.425,-0.425,-0.425
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-01-21,935156,14:51:18.956000,21691,7.812,214,6.995,36,0.441,0.008,0.119,-0.019,3.045,4.875,-5.256,-4.451,-0.275,-0.238,-6.463,8,7.790,6.256,-1.000,0.222,1.000,2.000,0.667,-6.044,-5.772,-0.000,2.894,0.202,1.678,-1.420,4.796,8,3.932,2,0.417,-0.147,-0.091,-0.074,-4.394,-3.932,-3.639,-3.355,-0.125,-0.060,4.111,1,5381.529,-33.593,0.276,0.574,...,-0.585,-0.509,-0.001,-0.594,-0.978,0.000,0.185,0.003,-0.003,-0.002,-4.511,-4.913,-5.846,-4.981,-3.932,0.246,0.076,-0.177,-0.270,-0.406,0.000,3.045,5.112,5.226,-0.343,-2.398,-45.889,-18.432,-17.280,-1.153,-15.161,0.489,-0.093,-0.074,-0.019,0.130,2.294,-65,-95.000,-7.414,-21.280,2.188,0.000,0.000,-0.210,0.082,0.155,0.750,0.083,-1.562
2021-01-21,936015,14:51:42.285000,21708,7.879,231,7.057,39,0.439,0.049,0.172,-0.004,4.875,5.303,-4.765,-3.751,-0.146,-0.104,-6.594,10,7.834,6.382,-1.000,0.222,1.000,2.000,0.667,-5.850,-5.628,-0.000,2.990,-0.023,0.488,-1.149,5.673,25,4.796,5,0.414,0.103,0.167,0.040,3.434,3.045,3.647,2.801,0.218,0.219,1.792,3,5381.695,-11.903,0.276,0.574,...,0.982,0.099,0.030,-0.697,-2.032,0.000,-0.683,-0.003,-0.000,-0.013,4.511,3.714,-4.796,-5.239,3.932,0.108,-0.026,0.075,-0.273,-0.121,0.000,3.045,3.434,1.792,1.000,-3.258,-18.742,-5.643,-7.405,1.761,-6.260,0.401,-0.126,-0.169,0.043,0.039,2.057,10,55.000,-9.126,-5.952,2.123,0.000,1.000,-0.210,0.082,0.155,1.453,0.000,0.000
2021-01-21,943637,14:55:13.748000,21898,8.022,215,7.474,47,0.578,-0.248,-0.381,-0.051,-6.628,-6.509,-5.766,-5.526,-0.428,-0.517,-6.686,7,7.462,-4.615,-1.000,0.333,-1.000,1.071,0.638,4.586,-5.706,-0.000,3.549,-0.704,2.631,-1.498,5.690,17,5.303,3,0.678,-0.322,-0.250,-0.304,-4.564,-3.932,3.124,3.596,0.142,0.309,1.792,1,5374.383,18.540,0.276,0.211,...,0.619,-0.427,-0.249,1.233,0.021,0.000,-1.893,0.009,-0.007,0.015,-4.190,-5.375,-6.509,-6.437,-3.932,-0.119,-0.038,0.052,-0.606,-2.006,4.331,5.916,-4.984,-4.984,1.000,-3.258,10.026,-0.088,2.154,-2.242,18.628,0.167,-0.285,-0.203,-0.082,0.134,1.707,30,-20.000,1.479,2.410,1.720,0.000,1.000,0.020,0.020,0.037,0.000,0.000,0.000
2021-01-21,948326,14:56:20.909000,22061,7.669,180,6.899,27,0.463,-0.210,-0.455,-0.050,-6.111,-6.111,-5.149,-5.052,-0.207,-0.445,-5.787,9,7.234,-6.217,-1.000,0.222,-1.000,3.030,3.030,5.742,4.736,-0.000,2.147,-0.338,1.842,-2.973,5.442,24,4.663,4,0.457,-0.217,-0.524,-0.083,-3.932,-4.025,-1.614,-3.215,-0.038,-0.531,3.434,1,5371.372,15.850,0.330,0.415,...,0.886,0.242,-0.445,-4.482,-2.468,0.000,-0.468,0.030,0.005,-0.020,-3.258,-5.112,-5.740,5.968,-4.111,-0.486,-0.237,-0.261,-0.756,-4.987,-3.714,4.394,-5.832,-5.690,0.832,3.258,-10.933,16.306,5.557,10.749,-0.455,-0.348,0.127,-0.140,0.267,-0.262,2.175,85,5.000,14.081,-2.295,2.117,-3.714,1.000,-0.049,-0.014,0.000,-0.250,0.000,-0.595


In [16]:
X_train.dtypes 

vol_trd               float64
n_trd                   int64
vol_big               float64
n_big                   int64
vol_big_ratio         float64
                       ...   
escora_3.5_1.2_DIF    float64
escora_4.5_0.8_DIF    float64
movesc_2.5_0.7_DIF    float64
movesc_3.5_0.7_DIF    float64
movesc_4.5_0.7_DIF    float64
Length: 116, dtype: object