# Data Preparation

## Packages

In [1]:
import os
import winsound
import numpy as np
import pandas as pd
import datetime as dt
from tqdm import tqdm
tqdm.pandas()
from multiprocessing import  Pool
import time
import imblearn

import math, collections
from scipy.stats import linregress

from matplotlib import pyplot as plt
from matplotlib.pyplot import figure

import warnings
warnings.filterwarnings('ignore')

In [2]:
def get_slope(y_axis):
    global data
    ma_len = len(y_axis)
    
    x_axis = []
    for i in range(ma_len):
        x_axis.append(1 + ((i+1) * 0.0001 * 0.1))
    
    slope_tick, intercept, _, _, _ = linregress(x_axis, y_axis)
    slope_tick = math.degrees(math.atan(slope_tick))
    
    return(slope_tick)

In [3]:
def chunk_ticks(df, number_of_ticks):   
    global data
    
    df['tick'] = (df['Bid'] + df['Ask'])/2
    df['spread'] = df['Ask'] - df['Bid']
    df = df[['tick', 'spread']]
    
    temp_df = pd.DataFrame()
    tick_avg = []
    spread_avg = []
    tick_sd = []
    tick_act = []
    
    for i in tqdm(range(0,len(df),number_of_ticks)):
        tick_list = list(df['tick'][i:i+number_of_ticks])
        spread_list = list(df['spread'][i:i+number_of_ticks])
        #print(len(tick_list))
        tick_act.append(tick_list[-1])              
        temp = list(pd.DataFrame(tick_list).ewm(span=len(tick_list)).mean()[0])[len(tick_list) - 1]
        tick_avg.append(temp)

        spread_avg.append(np.mean(spread_list))
        tick_sd.append(np.std(tick_list))
        
    temp_df['tick_act'] = tick_act      
    temp_df['tick_avg'] = tick_avg  
    temp_df['spread_avg'] = spread_avg  
    temp_df['tick_sd'] = tick_sd  
    
    return(temp_df)

In [4]:
def before_sma():
    global data    
    data['ssma_list'].append(val)    
    return()

def after_sma():
    global data
    
    data['ssma_list'].popleft()
    data['ssma_list'].append(val)
    data['sema'] = list(pd.DataFrame(list(data['ssma_list'])).ewm(span=data['sma_len']).mean()[0])[data['sma_len'] - 1]
    
    if len(data['sema_ready']) < 2:
        data['sema_ready'].append(data['sema'])
        data['sema_diff'] = np.nan

    elif len(data['sema_ready']) > 1:
        data['sema_ready'].popleft()
        data['sema_ready'].append(data['sema'])
        data['sema_diff'] = data['sema_ready'][-1] - data['sema_ready'][len(data['sema_ready'])-2]
    
    return()

def before_lma():
    global data    
    data['lsma_list'].append(val)    
    return()

def after_lma():
    global data
    
    data['lsma_list'].popleft()
    data['lsma_list'].append(val)
    data['lema'] = list(pd.DataFrame(list(data['lsma_list'])).ewm(span=data['lma_len']).mean()[0])[data['lma_len'] - 1]
    
    if len(data['lema_ready']) < 2:
        data['lema_ready'].append(data['lema'])
        data['lema_diff'] = np.nan

    elif len(data['lema_ready']) > 1:
        data['lema_ready'].popleft()
        data['lema_ready'].append(data['lema'])
        data['lema_diff'] = data['lema_ready'][-1] - data['lema_ready'][len(data['lema_ready'])-2]
    
    return()

In [5]:
def roll_ma(ma_list):
    global data
    ma_len = len(ma_list)
    sema_val = list(pd.DataFrame(ma_list).ewm(span=ma_len).mean()[0])[ma_len - 1]    
    return(sema_val)

In [6]:
def print_custom_value_counts(df, target_column, filter_column = None, filter_value = None):    
    if filter_column is None and filter_value is None:
        print(f'target_column : {target_column}')
        g= df[target_column]
        print(pd.concat([g.value_counts(), g.value_counts(normalize=True).mul(100)],axis=1, keys=('counts','percentage')))
    else:
        print(f'{filter_column} : {filter_value}')
        g= df.loc[df[filter_column] == filter_value, target_column]
        print(pd.concat([g.value_counts(), g.value_counts(normalize=True).mul(100)],axis=1, keys=('counts','percentage')))
    print('=======================')

In [7]:
def run_data_prep(year):
    global data
    print(f'-----------------------------------{year}--------------------------------------')
    
    diff_col = 'sema'
    #diff_col = 'tick_avg'

    source_file_path = f'data/yearly_tick_data/{year}.csv'
    path, file_name = os.path.split(source_file_path)

    target_file_name = 'tab_'+file_name
    target_file_path = os.path.join(path, target_file_name)

    chunk_file_name = 'chunk_'+file_name
    chunk_file_path = os.path.join(path, chunk_file_name)

    print(f'source_file_path : {source_file_path}')
    print(f'chunk_file_path : {chunk_file_path}')
    print(f'target_file_path : {target_file_path}')

    if data['input_rows'] is None:
        df = pd.read_csv(source_file_path)
    else:
        df = pd.read_csv(source_file_path, nrows=data['input_rows'])
    print(f'Total input recs : {len(df)}')
    print("Data manipulation...")
    df = chunk_ticks(df, data['number_of_ticks'])
    df.to_csv(chunk_file_path, index = False)
    print(f'Records : {len(df)}')

    df = pd.read_csv(chunk_file_path)

    data['rs_max'] = 1e6

    data['ssma_list'] = collections.deque([])
    data['lsma_list'] = collections.deque([])
    data['sema_ready'] = collections.deque([])
    data['lema_ready'] = collections.deque([])
    df['sema'] = ''
    df['lema'] = ''
    df['sema_diff'] = ''
    df['lema_diff'] = ''


    # RSI -----------------------------
    df['diff'] = df['tick_avg'].diff()
    df['gain'] = 0
    df['loss'] = 0
    df['gain'].loc[df['diff'] > 0] = abs(df['diff'])
    df['loss'].loc[df['diff'] < 0] = abs(df['diff'])
    df['avg_gain'] = df['gain'].rolling(window=data['rsi_window']).mean()
    df['avg_loss'] = df['loss'].rolling(window=data['rsi_window']).mean()
    df['rs'] = df['avg_gain']/df['avg_loss']
    df['rs'] = df['rs'].where(df['rs'] <= data['rs_max'], data['rs_max']) 
    df['rsi'] = 100 - (100 / (df['rs'] + 1))

    # Simple Moving Averages ------------------
    df['ssma'] = df['tick_avg'].rolling(window=data['sma_len']).mean()
    df['ssma_diff'] = df['ssma'].diff()
    df['lsma'] = df['tick_avg'].rolling(window=data['lma_len']).mean()
    df['lsma_diff'] = df['lsma'].diff()
    df['sma_diff'] = df['ssma'] - df['lsma']

    df['max_tick'] = df['tick_avg'].rolling(window=data['sma_len']).max()
    df['min_tick'] = df['tick_avg'].rolling(window=data['sma_len']).min()

    df['max_gap'] = df['max_tick'] -  df['tick_avg']
    df['min_gap'] = df['min_tick'] - df['tick_avg']

    print("Emas creation...")
    # Emas ----------------
    df['sema'] = df['tick_avg'].rolling(window=data['sma_len']).progress_apply(roll_ma)
    df['lema'] = df['tick_avg'].rolling(window=data['lma_len']).progress_apply(roll_ma)

    df['sema_diff'] = df['sema'].diff()
    df['lema_diff'] = df['lema'].diff()

    df['ema_diff'] = df['sema'] - df['lema']

    print("slope creation...")
    # Slopes -----------------------------
    df['small_sema_slope'] = df['sema'].rolling(window=data['sma_len']).progress_apply(get_slope)
    df['long_sema_slope'] = df['sema'].rolling(window=data['lma_len']).progress_apply(get_slope)

    df['slope_diff'] = df['small_sema_slope'] - df['long_sema_slope']

    print('Direction identification...')
    df = df.round(5)

    # Direction -------------------------
    df['direction'] = 'same'
    df['direction'].loc[df[diff_col].shift(-1) - df[diff_col] >= data['pip_diff']] = 'increase'
    df['direction'].loc[df[diff_col].shift(-1) - df[diff_col] <= -data['pip_diff']] = 'decrease'

    # Remove NaNs ------------------------
    del df['gain']
    del df['loss']
    
    df = df.dropna()
    df = df.reset_index(drop=True)
    print(f'Total records : {len(df)}')

    df.to_csv(target_file_path, index = False)
    #winsound.PlaySound('C:\\Windows\\Media\\tada.wav', winsound.SND_ASYNC)

    print_custom_value_counts(df = df, target_column = 'direction')    
    
    print('Avg Direction -------------------------')
    diff_col = 'tick_avg'

    df['act_direction'] = 'same'
    df['act_direction'].loc[df[diff_col].shift(-1) - df[diff_col] >= data['pip_diff']] = 'increase'
    df['act_direction'].loc[df[diff_col].shift(-1) - df[diff_col] <= -data['pip_diff']] = 'decrease'

    print_custom_value_counts(df = df, target_column = 'act_direction', filter_column = 'direction', filter_value = 'same')    
    print_custom_value_counts(df = df, target_column = 'act_direction', filter_column = 'direction', filter_value = 'increase')
    print_custom_value_counts(df = df, target_column = 'act_direction', filter_column = 'direction', filter_value = 'decrease')

    print('\n')
    df['tick_act_direction'] = df['act_direction']
    del df['act_direction']    

    print('Act Direction -------------------------')
    diff_col = 'tick_act'

    df['act_direction'] = 'same'
    df['act_direction'].loc[df[diff_col].shift(-1) - df[diff_col] >= data['pip_diff']] = 'increase'
    df['act_direction'].loc[df[diff_col].shift(-1) - df[diff_col] <= -data['pip_diff']] = 'decrease'

    print_custom_value_counts(df = df, target_column = 'act_direction', filter_column = 'direction', filter_value = 'same')
    print_custom_value_counts(df = df, target_column = 'act_direction', filter_column = 'direction', filter_value = 'increase')
    print_custom_value_counts(df = df, target_column = 'act_direction', filter_column = 'direction', filter_value = 'decrease')


    print('\n')
    #del df['act_direction']
    print(f'-----------------------------------{year}--------------------------------------')
    return()

In [8]:
data = {}
data['number_of_ticks']   = 300
data['rsi_window']        = 14
data['sma_len']           = 5
data['lma_len']           = 10
data['pip_diff']          = 0.00012

#data['input_rows']        = 5_000_000
data['input_rows']        = None

#train_files = [2019]
train_files = [2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020]

In [9]:
%%time
for year in train_files:
    #df = run_data_prep(year)
    run_data_prep(year)

-----------------------------------2003--------------------------------------
source_file_path : data/yearly_tick_data/2003.csv
chunk_file_path : data/yearly_tick_data\chunk_2003.csv
target_file_path : data/yearly_tick_data\tab_2003.csv
Total input recs : 6993511
Data manipulation...


100%|██████████████████████████████████████████████████████████████████████████████████████████| 23312/23312 [00:30<00:00, 771.11it/s]
216it [00:00, 1059.78it/s]

Records : 23312
Emas creation...


23308it [00:22, 1035.70it/s]
23303it [00:22, 1037.10it/s]
339it [00:00, 3364.37it/s]

slope creation...


23304it [00:06, 3375.48it/s]
23299it [00:06, 3350.63it/s]


Direction identification...
Total records : 23299
target_column : direction
          counts  percentage
same       12810   54.980900
increase    5310   22.790678
decrease    5179   22.228422
Avg Direction -------------------------
direction : same
          counts  percentage
same        7365   57.494145
decrease    2748   21.451991
increase    2697   21.053864
direction : increase
          counts  percentage
increase    3790   71.374765
same        1098   20.677966
decrease     422    7.947269
direction : decrease
          counts  percentage
decrease    3716   71.751303
same        1060   20.467272
increase     403    7.781425


Act Direction -------------------------
direction : same
          counts  percentage
same        4542   35.456674
increase    4166   32.521468
decrease    4102   32.021858
direction : increase
          counts  percentage
increase    3147   59.265537
same        1085   20.433145
decrease    1078   20.301318
direction : decrease
          counts  percentage

100%|██████████████████████████████████████████████████████████████████████████████████████████| 36207/36207 [00:48<00:00, 744.83it/s]
104it [00:00, 1032.55it/s]

Records : 36207
Emas creation...


36203it [00:35, 1018.93it/s]
36198it [00:34, 1043.54it/s]
656it [00:00, 3263.02it/s]

slope creation...


36199it [00:10, 3293.60it/s]
36194it [00:10, 3313.77it/s]


Direction identification...
Total records : 36194
target_column : direction
          counts  percentage
same       18742   51.782063
increase    8845   24.437752
decrease    8607   23.780185
Avg Direction -------------------------
direction : same
          counts  percentage
same        9460   50.474869
decrease    4679   24.965319
increase    4603   24.559812
direction : increase
          counts  percentage
increase    6635   75.014132
same        1483   16.766535
decrease     727    8.219333
direction : decrease
          counts  percentage
decrease    6491   75.415360
same        1405   16.323922
increase     711    8.260718


Act Direction -------------------------
direction : same
          counts  percentage
same        6463   34.484047
decrease    6161   32.872692
increase    6118   32.643261
direction : increase
          counts  percentage
increase    5490   62.068966
decrease    1802   20.373092
same        1553   17.557942
direction : decrease
          counts  percentage

100%|██████████████████████████████████████████████████████████████████████████████████████████| 35495/35495 [00:47<00:00, 752.66it/s]
97it [00:00, 962.95it/s]

Records : 35495
Emas creation...


35491it [00:34, 1031.21it/s]
35486it [00:34, 1027.72it/s]
296it [00:00, 2939.43it/s]

slope creation...


35487it [00:10, 3267.82it/s]
35482it [00:10, 3305.38it/s]


Direction identification...
Total records : 35482
target_column : direction
          counts  percentage
same       19623   55.304098
decrease    8071   22.746745
increase    7788   21.949157
Avg Direction -------------------------
direction : same
          counts  percentage
same       10165   51.801457
decrease    4757   24.241961
increase    4701   23.956582
direction : increase
          counts  percentage
increase    5887   75.590652
same        1290   16.563945
decrease     611    7.845403
direction : decrease
          counts  percentage
decrease    6105   75.641184
same        1393   17.259324
increase     573    7.099492


Act Direction -------------------------
direction : same
          counts  percentage
same        6999   35.667329
decrease    6389   32.558732
increase    6235   31.773939
direction : increase
          counts  percentage
increase    4868   62.506420
decrease    1538   19.748331
same        1382   17.745249
direction : decrease
          counts  percentage

100%|██████████████████████████████████████████████████████████████████████████████████████████| 46603/46603 [01:02<00:00, 749.65it/s]
102it [00:00, 1014.87it/s]

Records : 46603
Emas creation...


46599it [00:43, 1074.19it/s]
46594it [00:43, 1083.46it/s]
334it [00:00, 3314.71it/s]

slope creation...


46595it [00:13, 3350.23it/s]
46590it [00:14, 3326.57it/s]


Direction identification...
Total records : 46590
target_column : direction
          counts  percentage
same       31389   67.372827
increase    7763   16.662374
decrease    7438   15.964799
Avg Direction -------------------------
direction : same
          counts  percentage
same       19912   63.436236
decrease    5815   18.525598
increase    5662   18.038166
direction : increase
          counts  percentage
increase    5791   74.597449
same        1449   18.665464
decrease     523    6.737086
direction : decrease
          counts  percentage
decrease    5526   74.294165
same        1403   18.862597
increase     509    6.843237


Act Direction -------------------------
direction : same
          counts  percentage
same       14929   47.561248
decrease    8247   26.273535
increase    8213   26.165217
direction : increase
          counts  percentage
increase    4739   61.045987
same        1748   22.517068
decrease    1276   16.436944
direction : decrease
          counts  percentage

100%|██████████████████████████████████████████████████████████████████████████████████████████| 63889/63889 [01:24<00:00, 757.34it/s]
0it [00:00, ?it/s]

Records : 63889
Emas creation...


63885it [01:02, 1024.26it/s]
63880it [01:02, 1028.30it/s]
324it [00:00, 3234.15it/s]

slope creation...


63881it [00:19, 3283.99it/s]
63876it [00:19, 3307.11it/s]


Direction identification...
Total records : 63876
target_column : direction
          counts  percentage
same       44769   70.087357
increase    9852   15.423633
decrease    9255   14.489010
Avg Direction -------------------------
direction : same
          counts  percentage
same       27635   61.727981
decrease    8590   19.187384
increase    8544   19.084634
direction : increase
          counts  percentage
increase    7313   74.228583
same        2108   21.396671
decrease     431    4.374746
direction : decrease
          counts  percentage
decrease    7008   75.721232
same        1827   19.740681
increase     420    4.538088


Act Direction -------------------------
direction : same
          counts  percentage
same       19143   42.759499
increase   12874   28.756506
decrease   12752   28.483996
direction : increase
          counts  percentage
increase    5999   60.891190
same        2291   23.254162
decrease    1562   15.854649
direction : decrease
          counts  percentage

100%|██████████████████████████████████████████████████████████████████████████████████████████| 95697/95697 [02:07<00:00, 748.40it/s]
0it [00:00, ?it/s]

Records : 95697
Emas creation...


95693it [01:30, 1052.69it/s]
95688it [01:32, 1031.21it/s]
657it [00:00, 3281.88it/s]

slope creation...


95689it [00:29, 3268.80it/s]
95684it [00:29, 3276.82it/s]


Direction identification...
Total records : 95684
target_column : direction
          counts  percentage
same       45432   47.481293
increase   25311   26.452698
decrease   24941   26.066009
Avg Direction -------------------------
direction : same
          counts  percentage
same       21880   48.159887
increase   11838   26.056524
decrease   11714   25.783589
direction : increase
          counts  percentage
increase   19027   75.172850
same        4113   16.249852
decrease    2171    8.577298
direction : decrease
          counts  percentage
decrease   18771   75.261617
same        4028   16.150114
increase    2142    8.588268


Act Direction -------------------------
direction : same
          counts  percentage
increase   15400   33.896813
decrease   15203   33.463198
same       14829   32.639989
direction : increase
          counts  percentage
increase   15777   62.332583
decrease    5243   20.714314
same        4291   16.953103
direction : decrease
          counts  percentage

100%|██████████████████████████████████████████████████████████████████████████████████████████| 47199/47199 [01:03<00:00, 739.54it/s]
106it [00:00, 1052.30it/s]

Records : 47199
Emas creation...


47195it [00:45, 1041.99it/s]
47190it [00:45, 1045.57it/s]
339it [00:00, 3364.34it/s]

slope creation...


47191it [00:14, 3318.01it/s]
47186it [00:14, 3303.99it/s]


Direction identification...
Total records : 47186
target_column : direction
          counts  percentage
increase   16951   35.923791
decrease   16374   34.700971
same       13861   29.375238
Avg Direction -------------------------
direction : same
          counts  percentage
increase    5007   36.122935
decrease    4937   35.617921
same        3917   28.259144
direction : increase
          counts  percentage
increase   12859   75.859831
same        2247   13.255855
decrease    1845   10.884314
direction : decrease
          counts  percentage
decrease   12397   75.711494
same        2134   13.032857
increase    1843   11.255649


Act Direction -------------------------
direction : same
          counts  percentage
increase    5598   40.386696
decrease    5405   38.994301
same        2858   20.619003
direction : increase
          counts  percentage
increase   10902   64.314790
decrease    3977   23.461743
same        2072   12.223468
direction : decrease
          counts  percentage

100%|██████████████████████████████████████████████████████████████████████████████████████████| 27569/27569 [00:35<00:00, 778.38it/s]
222it [00:00, 1082.88it/s]

Records : 27569
Emas creation...


27565it [00:25, 1061.19it/s]
27560it [00:26, 1027.26it/s]
338it [00:00, 3372.45it/s]

slope creation...


27561it [00:08, 3288.71it/s]
27556it [00:08, 3322.58it/s]


Direction identification...
Total records : 27556
target_column : direction
          counts  percentage
decrease    9780   35.491363
increase    9556   34.678473
same        8220   29.830164
Avg Direction -------------------------
direction : same
          counts  percentage
increase    2986   36.326034
decrease    2920   35.523114
same        2314   28.150852
direction : increase
          counts  percentage
increase    7174   75.073252
same        1301   13.614483
decrease    1081   11.312265
direction : decrease
          counts  percentage
decrease    7451   76.186094
same        1184   12.106339
increase    1145   11.707566


Act Direction -------------------------
direction : same
          counts  percentage
increase    3443   41.885645
decrease    3327   40.474453
same        1450   17.639903
direction : increase
          counts  percentage
increase    6028   63.080787
decrease    2370   24.801172
same        1158   12.118041
direction : decrease
          counts  percentage

100%|██████████████████████████████████████████████████████████████████████████████████████████| 85980/85980 [01:58<00:00, 726.28it/s]
0it [00:00, ?it/s]

Records : 85980
Emas creation...


85976it [01:25, 999.98it/s] 
85971it [01:23, 1031.86it/s]
309it [00:00, 3066.86it/s]

slope creation...


85972it [00:25, 3331.10it/s]
85967it [00:25, 3392.06it/s]


Direction identification...
Total records : 85967
target_column : direction
          counts  percentage
same       33754   39.263904
decrease   26282   30.572196
increase   25931   30.163900
Avg Direction -------------------------
direction : same
          counts  percentage
same       11374   33.696747
decrease   11197   33.172365
increase   11183   33.130888
direction : increase
          counts  percentage
increase   19726   76.071112
same        4207   16.223825
decrease    1998    7.705063
direction : decrease
          counts  percentage
decrease   20146   76.653223
same        4100   15.600030
increase    2036    7.746747


Act Direction -------------------------
direction : same
          counts  percentage
increase   12902   38.223618
decrease   12737   37.734787
same        8115   24.041595
direction : increase
          counts  percentage
increase   16629   64.127878
decrease    4983   19.216382
same        4319   16.655740
direction : decrease
          counts  percentage

100%|██████████████████████████████████████████████████████████████████████████████████████████| 78052/78052 [01:38<00:00, 792.70it/s]
0it [00:00, ?it/s]

Records : 78052
Emas creation...


78048it [01:12, 1071.02it/s]
78043it [01:14, 1042.84it/s]
327it [00:00, 3245.39it/s]

slope creation...


78044it [00:23, 3281.39it/s]
78039it [00:23, 3351.42it/s]


Direction identification...
Total records : 78039
target_column : direction
          counts  percentage
same       41932   53.732108
decrease   18102   23.196094
increase   18005   23.071797
Avg Direction -------------------------
direction : same
          counts  percentage
same       18539   44.212058
decrease   11698   27.897548
increase   11695   27.890394
direction : increase
          counts  percentage
increase   13711   76.151069
same        3481   19.333518
decrease     813    4.515412
direction : decrease
          counts  percentage
decrease   13908   76.831289
same        3404   18.804552
increase     790    4.364159


Act Direction -------------------------
direction : same
          counts  percentage
decrease   14086   33.592483
increase   14071   33.556711
same       13775   32.850806
direction : increase
          counts  percentage
increase   11308   62.804776
same        3930   21.827270
decrease    2767   15.367953
direction : decrease
          counts  percentage

100%|██████████████████████████████████████████████████████████████████████████████████████████| 62080/62080 [01:20<00:00, 772.09it/s]
106it [00:00, 1059.78it/s]

Records : 62080
Emas creation...


62076it [00:59, 1048.12it/s]
62071it [00:58, 1059.16it/s]
650it [00:00, 3249.31it/s]

slope creation...


62072it [00:18, 3284.76it/s]
62067it [00:18, 3320.36it/s]


Direction identification...
Total records : 62067
target_column : direction
          counts  percentage
same       33883   54.591006
increase   14115   22.741553
decrease   14069   22.667440
Avg Direction -------------------------
direction : same
          counts  percentage
same       15162   44.748104
decrease    9390   27.713012
increase    9331   27.538884
direction : increase
          counts  percentage
increase   10793   76.464754
same        2585   18.313851
decrease     737    5.221396
direction : decrease
          counts  percentage
decrease   10819   76.899566
same        2602   18.494563
increase     648    4.605871


Act Direction -------------------------
direction : same
          counts  percentage
decrease   11335   33.453354
increase   11279   33.288080
same       11269   33.258566
direction : increase
          counts  percentage
increase    8941   63.343960
same        3003   21.275239
decrease    2171   15.380801
direction : decrease
          counts  percentage

100%|██████████████████████████████████████████████████████████████████████████████████████████| 56682/56682 [01:14<00:00, 760.00it/s]
106it [00:00, 1052.38it/s]

Records : 56682
Emas creation...


56678it [00:54, 1040.28it/s]
56673it [00:54, 1046.13it/s]
339it [00:00, 3365.26it/s]

slope creation...


56674it [00:17, 3283.13it/s]
56669it [00:16, 3338.62it/s]


Direction identification...
Total records : 56669
target_column : direction
          counts  percentage
same       36401   64.234414
decrease   10416   18.380420
increase    9852   17.385166
Avg Direction -------------------------
direction : same
          counts  percentage
same       19811   54.424329
decrease    8323   22.864756
increase    8267   22.710915
direction : increase
          counts  percentage
increase    7585   76.989444
same        1844   18.717012
decrease     423    4.293544
direction : decrease
          counts  percentage
decrease    8043   77.217742
same        2007   19.268433
increase     366    3.513825


Act Direction -------------------------
direction : same
          counts  percentage
same       15469   42.496085
increase   10524   28.911294
decrease   10408   28.592621
direction : increase
          counts  percentage
increase    6162   62.545676
same        2318   23.528218
decrease    1372   13.926106
direction : decrease
          counts  percentage

100%|██████████████████████████████████████████████████████████████████████████████████████████| 80774/80774 [01:49<00:00, 739.40it/s]
0it [00:00, ?it/s]

Records : 80774
Emas creation...


80770it [01:17, 1047.53it/s]
80765it [01:18, 1031.19it/s]
652it [00:00, 3246.27it/s]

slope creation...


80766it [00:24, 3298.48it/s]
80761it [00:24, 3337.27it/s]


Direction identification...
Total records : 80761
target_column : direction
          counts  percentage
same       39251   48.601429
decrease   21029   26.038558
increase   20481   25.360013
Avg Direction -------------------------
direction : same
          counts  percentage
same       16232   41.354360
decrease   11539   29.397977
increase   11480   29.247662
direction : increase
          counts  percentage
increase   15784   77.066549
same        3557   17.367316
decrease    1140    5.566134
direction : decrease
          counts  percentage
decrease   16146   76.779685
same        3844   18.279519
increase    1039    4.940796


Act Direction -------------------------
direction : same
          counts  percentage
decrease   13893   35.395277
increase   13719   34.951976
same       11639   29.652748
direction : increase
          counts  percentage
increase   12926   63.112153
same        4149   20.257800
decrease    3406   16.630047
direction : decrease
          counts  percentage

100%|████████████████████████████████████████████████████████████████████████████████████████| 149426/149426 [03:16<00:00, 761.56it/s]


Records : 149426


199it [00:00, 995.66it/s]

Emas creation...


149422it [02:22, 1046.79it/s]
149417it [02:23, 1042.64it/s]
637it [00:00, 3178.51it/s]

slope creation...


149418it [00:44, 3368.71it/s]
149413it [00:44, 3343.92it/s]


Direction identification...
Total records : 149413
target_column : direction
          counts  percentage
same      119134   79.734695
decrease   15170   10.153066
increase   15109   10.112239
Avg Direction -------------------------
direction : same
          counts  percentage
same       85759   71.985327
decrease   16869   14.159686
increase   16506   13.854987
direction : increase
          counts  percentage
increase   11698   77.424052
same        2888   19.114435
decrease     523    3.461513
direction : decrease
          counts  percentage
decrease   11727   77.303889
same        2974   19.604483
increase     469    3.091628


Act Direction -------------------------
direction : same
          counts  percentage
same       74899   62.869542
decrease   22192   18.627764
increase   22043   18.502694
direction : increase
          counts  percentage
increase    9520   63.008803
same        3767   24.932160
decrease    1822   12.059038
direction : decrease
          counts  percentag

100%|██████████████████████████████████████████████████████████████████████████████████████████| 72347/72347 [01:34<00:00, 763.74it/s]
0it [00:00, ?it/s]

Records : 72347
Emas creation...


72343it [01:08, 1052.05it/s]
72338it [01:08, 1049.49it/s]
335it [00:00, 3325.72it/s]

slope creation...


72339it [00:21, 3382.40it/s]
72334it [00:21, 3367.46it/s]


Direction identification...
Total records : 72334
target_column : direction
          counts  percentage
same       48100   66.497083
increase   12376   17.109520
decrease   11858   16.393397
Avg Direction -------------------------
direction : same
          counts  percentage
same       26359   54.800416
decrease   10907   22.675676
increase   10834   22.523909
direction : increase
          counts  percentage
increase    9554   77.197802
same        2484   20.071105
decrease     338    2.731092
direction : decrease
          counts  percentage
decrease    9192   77.517288
same        2372   20.003373
increase     294    2.479339


Act Direction -------------------------
direction : same
          counts  percentage
same       20156   41.904366
increase   14024   29.155925
decrease   13920   28.939709
direction : increase
          counts  percentage
increase    7727   62.435359
same        3261   26.349386
decrease    1388   11.215255
direction : decrease
          counts  percentage

100%|██████████████████████████████████████████████████████████████████████████████████████████| 86830/86830 [01:54<00:00, 755.84it/s]
0it [00:00, ?it/s]

Records : 86830
Emas creation...


86826it [01:23, 1039.04it/s]
86821it [01:22, 1048.95it/s]
330it [00:00, 3275.06it/s]

slope creation...


86822it [00:25, 3353.98it/s]
86817it [00:25, 3348.87it/s]


Direction identification...
Total records : 86817
target_column : direction
          counts  percentage
same       57241   65.932939
decrease   14966   17.238559
increase   14610   16.828501
Avg Direction -------------------------
direction : same
          counts  percentage
same       30852   53.898429
decrease   13222   23.098828
increase   13167   23.002743
direction : increase
          counts  percentage
increase   11377   77.871321
same        2849   19.500342
decrease     384    2.628337
direction : decrease
          counts  percentage
decrease   11662   77.923293
same        2939   19.637846
increase     365    2.438861


Act Direction -------------------------
direction : same
          counts  percentage
same       23884   41.725337
decrease   16703   29.180133
increase   16654   29.094530
direction : increase
          counts  percentage
increase    9220   63.107461
same        3782   25.886379
decrease    1608   11.006160
direction : decrease
          counts  percentage

100%|██████████████████████████████████████████████████████████████████████████████████████████| 97288/97288 [02:05<00:00, 772.70it/s]
0it [00:00, ?it/s]

Records : 97288
Emas creation...


97284it [01:32, 1054.37it/s]
97279it [01:32, 1052.10it/s]
322it [00:00, 3195.71it/s]

slope creation...


97280it [00:28, 3364.35it/s]
97275it [00:29, 3325.16it/s]


Direction identification...
Total records : 97275
target_column : direction
          counts  percentage
same       83200   85.530712
decrease    7307    7.511694
increase    6768    6.957594
Avg Direction -------------------------
direction : same
          counts  percentage
same       57299   68.868990
increase   13103   15.748798
decrease   12798   15.382212
direction : increase
          counts  percentage
increase    5362   79.225768
same        1330   19.651300
decrease      76    1.122931
direction : decrease
          counts  percentage
decrease    5761   78.842206
same        1449   19.830300
increase      97    1.327494


Act Direction -------------------------
direction : same
          counts  percentage
same       48155   57.878606
increase   17737   21.318510
decrease   17308   20.802885
direction : increase
          counts  percentage
increase    4156   61.406619
same        2172   32.092199
decrease     440    6.501182
direction : decrease
          counts  percentage

100%|████████████████████████████████████████████████████████████████████████████████████████| 109213/109213 [02:22<00:00, 766.94it/s]
0it [00:00, ?it/s]

Records : 109213
Emas creation...


109209it [01:45, 1039.89it/s]
109204it [01:44, 1049.34it/s]
322it [00:00, 3191.51it/s]

slope creation...


109205it [00:32, 3337.74it/s]
109200it [00:32, 3337.79it/s]


Direction identification...
Total records : 109200
target_column : direction
          counts  percentage
same       73270   67.097070
increase   18039   16.519231
decrease   17891   16.383700
Avg Direction -------------------------
direction : same
          counts  percentage
same       40734   55.594377
increase   16475   22.485328
decrease   16061   21.920295
direction : increase
          counts  percentage
increase   14058   77.931149
same        3556   19.712844
decrease     425    2.356006
direction : decrease
          counts  percentage
decrease   13929   77.854787
same        3548   19.831200
increase     414    2.314013


Act Direction -------------------------
direction : same
          counts  percentage
same       31967   43.629043
increase   20866   28.478231
decrease   20437   27.892726
direction : increase
          counts  percentage
increase   11467   63.567825
same        4832   26.786407
decrease    1740    9.645768
direction : decrease
          counts  percentag

df.to_csv('temp.csv')