In [33]:
%reset -f
import os
import sys

PROJ_PATH = os.path.abspath(os.path.join(os.getcwd(), ".."))

sys.path.append(PROJ_PATH) 

import warnings

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.metrics import mean_squared_error
import itertools
from pandas.api.types import CategoricalDtype

from datetime import date
from datetime import datetime, timedelta
import datetime as DT
from dateutil.relativedelta import relativedelta

import helper_functions as hfs
from scipy.stats import linregress
import math
import numba as nb

from numba import jit, njit, prange
from numba.typed import List as numbatypedList

import xgboost as xgb 
from boruta import BorutaPy

warnings.simplefilter("ignore", UserWarning)
warnings.filterwarnings("ignore")

pd.set_option("display.max_columns", None)  # to see all columns of dataframe
pd.set_option("display.max_rows", None)  # to see all rows of dataframe
pd.set_option("display.max_colwidth", None)  # to see the full (un-truncated) dataframe field/cell value 

%precision %.3f 

'%.3f'

In [35]:
import configparser

env_config = configparser.ConfigParser()
env_config.read(f'{PROJ_PATH}/kye/env_config.ini')
# <<<<<<<<<<<<<<< >>>>>>>>>>>>>>>>>>>>>>> #
SPLIT_DATE = pd.to_datetime(env_config['ENV_VARS']['SPLIT_DATE'])
ML_DATA_FOLDER_PATH = env_config['LOCAL_PATHS']['ML_DATA_FOLDER_PATH']
PROCESSING_TEMP_FOLDER_PATH = env_config['LOCAL_PATHS']['PROCESSING_TEMP_FOLDER_PATH'] 
#@@@@@@ **********************<<<<<<<<<<<<#

print("SPLIT_DATE: ", SPLIT_DATE)

TODAY = str(date.today())
FROM_DATE = pd.to_datetime(date.today(), format='%Y-%m-%d') - DT.timedelta(days=((365*4) + 60)) # "2016-01-01"
TO_DATE = str(date.today())
TWO_YEARS_BEFORE_FROM_DATE = pd.to_datetime(FROM_DATE, format='%Y-%m-%d') - DT.timedelta(days=730)
FROM_DATE

SPLIT_DATE:  2023-08-09 00:00:00


Timestamp('2019-06-11 00:00:00')

In [36]:
files_in_ML_DATA = os.listdir(f"{PROJ_PATH}/{ML_DATA_FOLDER_PATH}")
files_in_ML_DATA 

['indices_daily_ohlcv_2023-08-09.pkl', '.ipynb_checkpoints']

# Indices History 

In [37]:
indices_daily_ohlcv = pd.read_pickle(f"{PROJ_PATH}/{ML_DATA_FOLDER_PATH}/{[x for x in files_in_ML_DATA if 'indices_daily_ohlcv' in x][0]}") 

indices_daily_ohlcv["Date"] = hfs.str_to_date_col(indices_daily_ohlcv[["Date"]]) 
indices_daily_ohlcv = indices_daily_ohlcv.sort_values(by=['Ticker', 'Date']).reset_index(drop=True)
indices_daily_ohlcv.rename(columns={"Adj_Volume": "Adj_Vol"}, inplace=True)
del indices_daily_ohlcv['close_to_adj_close_ratio']
print(indices_daily_ohlcv.shape)
indices_daily_ohlcv.head(2) 

(1076, 7)


Unnamed: 0,Ticker,Date,Adj_Open,Adj_High,Adj_Low,Adj_Close,Adj_Vol
0,GSPC,2019-05-02,2922.1599,2931.6799,2900.5,2917.52,3778890000.0
1,GSPC,2019-05-03,2929.21,2947.8501,2929.21,2945.6399,3338120000.0


In [38]:
indices_daily_ohlcv = indices_daily_ohlcv[indices_daily_ohlcv['Ticker'].isin(['GSPC'])].reset_index(drop=True)
indices_daily_ohlcv.shape 

(1076, 7)

In [39]:
indices_daily_ohlcv['Adj_Close_before_1d'] = indices_daily_ohlcv.groupby("Ticker")['Adj_Close'].shift(1)
indices_daily_ohlcv['Adj_Close_before_2d'] = indices_daily_ohlcv.groupby("Ticker")['Adj_Close'].shift(2)
indices_daily_ohlcv['Adj_Close_before_5d'] = indices_daily_ohlcv.groupby("Ticker")['Adj_Close'].shift(5)
indices_daily_ohlcv['Adj_Close_before_10d'] = indices_daily_ohlcv.groupby("Ticker")['Adj_Close'].shift(10)

indices_daily_ohlcv['change_last_1d'] = 100 * (indices_daily_ohlcv['Adj_Close'] - indices_daily_ohlcv['Adj_Close_before_1d']) / indices_daily_ohlcv['Adj_Close_before_1d']
indices_daily_ohlcv['change_last_2d'] = 100 * (indices_daily_ohlcv['Adj_Close'] - indices_daily_ohlcv['Adj_Close_before_2d']) / indices_daily_ohlcv['Adj_Close_before_2d']
indices_daily_ohlcv['change_last_5d'] = 100 * (indices_daily_ohlcv['Adj_Close'] - indices_daily_ohlcv['Adj_Close_before_5d']) / indices_daily_ohlcv['Adj_Close_before_5d']
indices_daily_ohlcv['change_last_10d'] = 100 * (indices_daily_ohlcv['Adj_Close'] - indices_daily_ohlcv['Adj_Close_before_10d']) / indices_daily_ohlcv['Adj_Close_before_10d']

indices_daily_ohlcv['points_change'] = indices_daily_ohlcv['Adj_Close'] - indices_daily_ohlcv['Adj_Close_before_1d']

indices_daily_ohlcv['change_last_1d_mean10'] = indices_daily_ohlcv.groupby(['Ticker']).apply(lambda x: x[["change_last_1d"]].rolling(window= 10).mean() ) #.reset_index(drop=True)

indices_daily_ohlcv.drop(columns=['Adj_Close_before_1d', 'Adj_Close_before_2d', 
                                  'Adj_Close_before_5d', 'Adj_Close_before_10d'], inplace=True)

In [40]:
indices_daily_ohlcv[f'Adj_Vol_1d'] = indices_daily_ohlcv.groupby("Ticker")['Adj_Vol'].shift(1)

for i in [5,20]:
    # analytics_df[f'Adj_Vol_EMA_prev_{i}d'] = analytics_df['Adj_Vol_1d'].ewm(span=i).mean() #
    indices_daily_ohlcv[f'Adj_Vol_EMA_prev_{i}d'] = indices_daily_ohlcv['Adj_Vol_1d'].rolling(i).mean().round(decimals=3)
    
    
indices_daily_ohlcv['EMA_prev_20d_DIV_Adj_Vol'] = indices_daily_ohlcv['Adj_Vol_EMA_prev_20d'] / indices_daily_ohlcv['Adj_Vol']
indices_daily_ohlcv['EMA_prev_5d_DIV_Adj_Vol'] = indices_daily_ohlcv['Adj_Vol_EMA_prev_5d'] / indices_daily_ohlcv['Adj_Vol']


In [41]:
indices_daily_ohlcv.head(2)

Unnamed: 0,Ticker,Date,Adj_Open,Adj_High,Adj_Low,Adj_Close,Adj_Vol,change_last_1d,change_last_2d,change_last_5d,change_last_10d,points_change,change_last_1d_mean10,Adj_Vol_1d,Adj_Vol_EMA_prev_5d,Adj_Vol_EMA_prev_20d,EMA_prev_20d_DIV_Adj_Vol,EMA_prev_5d_DIV_Adj_Vol
0,GSPC,2019-05-02,2922.1599,2931.6799,2900.5,2917.52,3778890000.0,,,,,,,,,,,
1,GSPC,2019-05-03,2929.21,2947.8501,2929.21,2945.6399,3338120000.0,0.963829,,,,28.1199,,3778890000.0,,,,


In [42]:
indices_daily_ohlcv['Adj_Close_MA50'] = hfs.get_MA(indices_daily_ohlcv, "Ticker", "Adj_Close", 50)
indices_daily_ohlcv['Adj_Close_MA150'] = hfs.get_MA(indices_daily_ohlcv, "Ticker", "Adj_Close", 150)
indices_daily_ohlcv['Adj_Close_MA200'] = hfs.get_MA(indices_daily_ohlcv, "Ticker", "Adj_Close", 200)


In [43]:
indices_daily_ohlcv = hfs.get_BollBnd(indices_daily_ohlcv, window_size = 10, close_col = "Adj_Close")
indices_daily_ohlcv = hfs.get_BollBnd(indices_daily_ohlcv, window_size = 20, close_col = "Adj_Close")

indices_daily_ohlcv = hfs.get_RSI(indices_daily_ohlcv, window_size = 9, close_col = "Adj_Close") 
indices_daily_ohlcv = hfs.get_RSI(indices_daily_ohlcv, window_size = 14, close_col = "Adj_Close") 

indices_daily_ohlcv.tail(2) 

Unnamed: 0,Ticker,Date,Adj_Open,Adj_High,Adj_Low,Adj_Close,Adj_Vol,change_last_1d,change_last_2d,change_last_5d,change_last_10d,points_change,change_last_1d_mean10,Adj_Vol_1d,Adj_Vol_EMA_prev_5d,Adj_Vol_EMA_prev_20d,EMA_prev_20d_DIV_Adj_Vol,EMA_prev_5d_DIV_Adj_Vol,Adj_Close_MA50,Adj_Close_MA150,Adj_Close_MA200,BB_2_up_10_Adj_Close,BB_2_dn_10_Adj_Close,BB_2_up_20_Adj_Close,BB_2_dn_20_Adj_Close,RSI_9,RSI_14
1074,GSPC,2023-08-08,4498.03,4503.31,4464.39,4499.38,3884910000.0,-0.422,0.477,-1.69,-1.491,-19.06,-0.148,3493920000.0,4019886000.0,3945182000.0,1.016,1.035,4421.86,4173.533,4104.715,4611.772,4460.87,4601.645,4467.118,46.836,51.905
1075,GSPC,2023-08-09,4501.57,4502.44,4461.33,4467.71,2018497000.0,-0.704,-1.123,-1.012,-2.169,-31.67,-0.217,3884910000.0,3988394000.0,3958216000.0,1.961,1.976,4427.105,4177.823,4108.725,4608.958,4443.876,4602.268,4466.05,40.828,47.346


In [44]:
indices_daily_ohlcv = hfs.get_QM_ADR(indices_daily_ohlcv, window_size = 5 ,high_col = "Adj_High", low_col = "Adj_Low", close_col = "Adj_Close")
indices_daily_ohlcv = hfs.get_QM_ADR(indices_daily_ohlcv, window_size = 20,high_col = "Adj_High", low_col = "Adj_Low", close_col = "Adj_Close")


In [45]:
indices_daily_ohlcv['High_52_weeks'] = indices_daily_ohlcv["Adj_High"].rolling(52*5).max()
indices_daily_ohlcv['Low_52_weeks'] = indices_daily_ohlcv["Adj_Low"].rolling(52*5).min()

indices_daily_ohlcv['High_26_weeks'] = indices_daily_ohlcv["Adj_High"].rolling(26*5).max()
indices_daily_ohlcv['Low_26_weeks'] = indices_daily_ohlcv["Adj_Low"].rolling(26*5).min()

indices_daily_ohlcv['High_4_weeks'] = indices_daily_ohlcv["Adj_High"].rolling(4*5).max()
indices_daily_ohlcv['Low_4_weeks'] = indices_daily_ohlcv["Adj_Low"].rolling(4*5).min()

indices_daily_ohlcv['High_1_week'] = indices_daily_ohlcv["Adj_High"].rolling(5).max()
indices_daily_ohlcv['Low_1_week'] = indices_daily_ohlcv["Adj_Low"].rolling(5).min() 

In [46]:
base_feature = "Adj_Close"
indices_daily_ohlcv = hfs.get_EMA(indices_daily_ohlcv, base_feature, window_size_list=[5,10,20]) 
print(indices_daily_ohlcv.shape) 
indices_daily_ohlcv.tail(2) 

(1076, 40)


Unnamed: 0,Ticker,Date,Adj_Open,Adj_High,Adj_Low,Adj_Close,Adj_Vol,change_last_1d,change_last_2d,change_last_5d,change_last_10d,points_change,change_last_1d_mean10,Adj_Vol_1d,Adj_Vol_EMA_prev_5d,Adj_Vol_EMA_prev_20d,EMA_prev_20d_DIV_Adj_Vol,EMA_prev_5d_DIV_Adj_Vol,Adj_Close_MA50,Adj_Close_MA150,Adj_Close_MA200,BB_2_up_10_Adj_Close,BB_2_dn_10_Adj_Close,BB_2_up_20_Adj_Close,BB_2_dn_20_Adj_Close,RSI_9,RSI_14,ADR_percent_5d,ADR_percent_20d,High_52_weeks,Low_52_weeks,High_26_weeks,Low_26_weeks,High_4_weeks,Low_4_weeks,High_1_week,Low_1_week,EMA_5,EMA_10,EMA_20
1074,GSPC,2023-08-08,4498.03,4503.31,4464.39,4499.38,3884910000.0,-0.422,0.477,-1.69,-1.491,-19.06,-0.148,3493920000.0,4019886000.0,3945182000.0,1.016,1.035,4421.86,4173.533,4104.715,4611.772,4460.87,4601.645,4467.118,46.836,51.905,0.948,0.748,4607.07,3491.58,4607.07,3808.86,4607.07,4463.23,4550.93,4464.39,4511.204092,4521.787741,4510.833657
1075,GSPC,2023-08-09,4501.57,4502.44,4461.33,4467.71,2018497000.0,-0.704,-1.123,-1.012,-2.169,-31.67,-0.217,3884910000.0,3988394000.0,3958216000.0,1.961,1.976,4427.105,4177.823,4108.725,4608.958,4443.876,4602.268,4466.05,40.828,47.346,0.932,0.765,4607.07,3491.58,4607.07,3808.86,4607.07,4461.33,4540.34,4461.33,4496.706062,4511.955424,4506.726642


In [47]:

indices_daily_ohlcv['Adj_Close_DIV_EMA_10'] = indices_daily_ohlcv['Adj_Close'] / indices_daily_ohlcv['EMA_10'] 
indices_daily_ohlcv['EMA_10_DIV_MA50'] = indices_daily_ohlcv['EMA_10'] / indices_daily_ohlcv['Adj_Close_MA50'] 

indices_daily_ohlcv['Close_DIV_High_52_weeks'] = indices_daily_ohlcv['Adj_Close'] / indices_daily_ohlcv['High_52_weeks']  
indices_daily_ohlcv['Close_DIV_Low_52_weeks'] = indices_daily_ohlcv['Adj_Close'] / indices_daily_ohlcv['Low_52_weeks']  

indices_daily_ohlcv['Close_DIV_High_4_weeks'] = indices_daily_ohlcv['Adj_Close'] / indices_daily_ohlcv['High_4_weeks']  
indices_daily_ohlcv['Close_DIV_Low_4_weeks'] = indices_daily_ohlcv['Adj_Close'] / indices_daily_ohlcv['Low_4_weeks']  

indices_daily_ohlcv['Close_DIV_EMA_10'] = indices_daily_ohlcv['Adj_Close'] / indices_daily_ohlcv['EMA_10']
indices_daily_ohlcv['Close_DIV_EMA_20'] = indices_daily_ohlcv['Adj_Close'] / indices_daily_ohlcv['EMA_20']
indices_daily_ohlcv['Close_DIV_MA50'] = indices_daily_ohlcv['Adj_Close'] / indices_daily_ohlcv['Adj_Close_MA50']
indices_daily_ohlcv['Close_DIV_MA200'] = indices_daily_ohlcv['Adj_Close'] / indices_daily_ohlcv['Adj_Close_MA200']


In [48]:

indices_daily_ohlcv['EMA_5_DIV_EMA_20'] = indices_daily_ohlcv['EMA_5'] / indices_daily_ohlcv['EMA_20']

indices_daily_ohlcv['Vol_Price'] = indices_daily_ohlcv['Adj_Vol'] * indices_daily_ohlcv['Adj_Close']

indices_daily_ohlcv['ADR_5D'] = indices_daily_ohlcv['Adj_Close'] * indices_daily_ohlcv['ADR_percent_5d'] / 100
indices_daily_ohlcv['ADR_5D_70per'] = 0.7 * indices_daily_ohlcv['Adj_Close'] * indices_daily_ohlcv['ADR_percent_5d'] / 100
indices_daily_ohlcv['ADR_5D_95per'] = 0.95 * indices_daily_ohlcv['Adj_Close'] * indices_daily_ohlcv['ADR_percent_5d'] / 100

indices_daily_ohlcv['ADR_20D'] = indices_daily_ohlcv['Adj_Close'] * indices_daily_ohlcv['ADR_percent_20d'] / 100
indices_daily_ohlcv['ADR_20D_70per'] = 0.7 * indices_daily_ohlcv['Adj_Close'] * indices_daily_ohlcv['ADR_percent_20d'] / 100
indices_daily_ohlcv['ADR_20D_95per'] = 0.95 * indices_daily_ohlcv['Adj_Close'] * indices_daily_ohlcv['ADR_percent_20d'] / 100
indices_daily_ohlcv['close_high_avg'] = (indices_daily_ohlcv['Adj_High'] + indices_daily_ohlcv['Adj_Close']) / 2

indices_daily_ohlcv['stop_EMA20'] = .995 * indices_daily_ohlcv['EMA_20']
indices_daily_ohlcv['max_entry_stop_EMA20_ADR_20D_95per'] = indices_daily_ohlcv['stop_EMA20'] + indices_daily_ohlcv['ADR_20D_95per']
indices_daily_ohlcv['close_high_avg_inline'] = indices_daily_ohlcv['close_high_avg'] < indices_daily_ohlcv['max_entry_stop_EMA20_ADR_20D_95per'] 

In [49]:
indices_daily_ohlcv = indices_daily_ohlcv.tail(1027).reset_index(drop=True)

In [50]:
missing_df = hfs.missing_values_info(indices_daily_ohlcv.tail(1027)) 
print(missing_df.shape)
missing_df

(9, 3)


Unnamed: 0,Feature,#missing_values,%missing_values
0,Low_52_weeks,210,20.447907
1,High_52_weeks,210,20.447907
2,Close_DIV_High_52_weeks,210,20.447907
3,Close_DIV_Low_52_weeks,210,20.447907
4,Close_DIV_MA200,150,14.605648
5,Adj_Close_MA200,150,14.605648
6,Adj_Close_MA150,100,9.737098
7,High_26_weeks,80,7.789679
8,Low_26_weeks,80,7.789679


In [51]:
missing_cols_indices_daily_ohlcv = []

for col in missing_df['Feature']: 
    if col not in ['Date']:
        missing_cols_indices_daily_ohlcv.append(col) 
print(len(missing_cols_indices_daily_ohlcv)) 

9


In [52]:
for col in missing_cols_indices_daily_ohlcv:
    indices_daily_ohlcv[col] = indices_daily_ohlcv[col].ffill()
    indices_daily_ohlcv[col] = indices_daily_ohlcv[col].bfill()
    if col[-3:]=="_NA":
        indices_daily_ohlcv[col] = indices_daily_ohlcv[col].fillna(1)
    #cash_flow[col] = cash_flow.groupby(['Ticker'])[col].transform(lambda x: x.fillna(x.mean()))
    #cash_flow[col].fillna(0, inplace=True) 

In [53]:
missing_df = hfs.missing_values_info(indices_daily_ohlcv) 
print(missing_df.shape) 
missing_df 

(0, 3)


Unnamed: 0,Feature,#missing_values,%missing_values


In [54]:
print(indices_daily_ohlcv.shape)
indices_daily_ohlcv.head(2) 

(1027, 62)


Unnamed: 0,Ticker,Date,Adj_Open,Adj_High,Adj_Low,Adj_Close,Adj_Vol,change_last_1d,change_last_2d,change_last_5d,change_last_10d,points_change,change_last_1d_mean10,Adj_Vol_1d,Adj_Vol_EMA_prev_5d,Adj_Vol_EMA_prev_20d,EMA_prev_20d_DIV_Adj_Vol,EMA_prev_5d_DIV_Adj_Vol,Adj_Close_MA50,Adj_Close_MA150,Adj_Close_MA200,BB_2_up_10_Adj_Close,BB_2_dn_10_Adj_Close,BB_2_up_20_Adj_Close,BB_2_dn_20_Adj_Close,RSI_9,RSI_14,ADR_percent_5d,ADR_percent_20d,High_52_weeks,Low_52_weeks,High_26_weeks,Low_26_weeks,High_4_weeks,Low_4_weeks,High_1_week,Low_1_week,EMA_5,EMA_10,EMA_20,Adj_Close_DIV_EMA_10,EMA_10_DIV_MA50,Close_DIV_High_52_weeks,Close_DIV_Low_52_weeks,Close_DIV_High_4_weeks,Close_DIV_Low_4_weeks,Close_DIV_EMA_10,Close_DIV_EMA_20,Close_DIV_MA50,Close_DIV_MA200,EMA_5_DIV_EMA_20,Vol_Price,ADR_5D,ADR_5D_70per,ADR_5D_95per,ADR_20D,ADR_20D_70per,ADR_20D_95per,close_high_avg,stop_EMA20,max_entry_stop_EMA20_ADR_20D_95per,close_high_avg_inline
0,GSPC,2019-07-12,3003.36,3013.92,3001.87,3013.77,2974960000.0,0.462,0.692,0.781,3.038,13.86,0.3,3154620000.0,2935166000.0,3326478000.0,1.118,0.987,2891.38,2958.509,3032.607,3021.563,2943.969,3025.668,2879.791,75.395,72.717,0.49,0.614,3393.52,2191.86,3085.2,2728.81,3013.92,2879.62,3013.92,2963.44,2996.601321,2981.351935,2954.525837,1.010874,1.031117,0.845765,1.309445,0.99995,1.046586,1.010874,1.020052,1.042329,1.114605,1.014241,8965845000000.0,14.767473,10.337231,14.029099,18.504548,12.953183,17.57932,3013.845,2939.753208,2957.332529,False
1,GSPC,2019-07-15,3017.8,3017.8,3008.77,3014.3,2874970000.0,0.018,0.48,1.289,2.466,0.53,0.245,2974960000.0,3043316000.0,3321736000.0,1.155,1.059,2893.316,2958.509,3032.607,3021.956,2958.084,3030.169,2888.022,75.509,72.801,0.478,0.603,3393.52,2191.86,3085.2,2728.81,3017.8,2887.3,3017.8,2963.44,3002.500881,2987.342708,2960.253386,1.009024,1.032498,0.845765,1.309445,0.99884,1.043986,1.009024,1.018257,1.041815,1.114605,1.014272,8666022000000.0,14.408354,10.085848,13.687936,18.176229,12.72336,17.267418,3016.05,2945.452119,2962.719537,False


In [55]:
indices_daily_ohlcv = indices_daily_ohlcv.reset_index()
indices_daily_ohlcv.drop(columns=[x for x in indices_daily_ohlcv.columns if "threshold" in x] + ['Ticker', 'index'], inplace=True)
indices_daily_ohlcv.columns = ['Date'] + [f"{x}_GSPC" for x in indices_daily_ohlcv.columns[1:]]
print(indices_daily_ohlcv.shape)
indices_daily_ohlcv.tail(2) 

(1027, 61)


Unnamed: 0,Date,Adj_Open_GSPC,Adj_High_GSPC,Adj_Low_GSPC,Adj_Close_GSPC,Adj_Vol_GSPC,change_last_1d_GSPC,change_last_2d_GSPC,change_last_5d_GSPC,change_last_10d_GSPC,points_change_GSPC,change_last_1d_mean10_GSPC,Adj_Vol_1d_GSPC,Adj_Vol_EMA_prev_5d_GSPC,Adj_Vol_EMA_prev_20d_GSPC,EMA_prev_20d_DIV_Adj_Vol_GSPC,EMA_prev_5d_DIV_Adj_Vol_GSPC,Adj_Close_MA50_GSPC,Adj_Close_MA150_GSPC,Adj_Close_MA200_GSPC,BB_2_up_10_Adj_Close_GSPC,BB_2_dn_10_Adj_Close_GSPC,BB_2_up_20_Adj_Close_GSPC,BB_2_dn_20_Adj_Close_GSPC,RSI_9_GSPC,RSI_14_GSPC,ADR_percent_5d_GSPC,ADR_percent_20d_GSPC,High_52_weeks_GSPC,Low_52_weeks_GSPC,High_26_weeks_GSPC,Low_26_weeks_GSPC,High_4_weeks_GSPC,Low_4_weeks_GSPC,High_1_week_GSPC,Low_1_week_GSPC,EMA_5_GSPC,EMA_10_GSPC,EMA_20_GSPC,Adj_Close_DIV_EMA_10_GSPC,EMA_10_DIV_MA50_GSPC,Close_DIV_High_52_weeks_GSPC,Close_DIV_Low_52_weeks_GSPC,Close_DIV_High_4_weeks_GSPC,Close_DIV_Low_4_weeks_GSPC,Close_DIV_EMA_10_GSPC,Close_DIV_EMA_20_GSPC,Close_DIV_MA50_GSPC,Close_DIV_MA200_GSPC,EMA_5_DIV_EMA_20_GSPC,Vol_Price_GSPC,ADR_5D_GSPC,ADR_5D_70per_GSPC,ADR_5D_95per_GSPC,ADR_20D_GSPC,ADR_20D_70per_GSPC,ADR_20D_95per_GSPC,close_high_avg_GSPC,stop_EMA20_GSPC,max_entry_stop_EMA20_ADR_20D_95per_GSPC,close_high_avg_inline_GSPC
1025,2023-08-08,4498.03,4503.31,4464.39,4499.38,3884910000.0,-0.422,0.477,-1.69,-1.491,-19.06,-0.148,3493920000.0,4019886000.0,3945182000.0,1.016,1.035,4421.86,4173.533,4104.715,4611.772,4460.87,4601.645,4467.118,46.836,51.905,0.948,0.748,4607.07,3491.58,4607.07,3808.86,4607.07,4463.23,4550.93,4464.39,4511.204092,4521.787741,4510.833657,0.995044,1.022599,0.976625,1.288637,0.976625,1.0081,0.995044,0.997461,1.017531,1.096149,1.000082,17479690000000.0,42.654122,29.857886,40.521416,33.655362,23.558754,31.972594,4501.345,4488.279488,4520.252083,True
1026,2023-08-09,4501.57,4502.44,4461.33,4467.71,2018497000.0,-0.704,-1.123,-1.012,-2.169,-31.67,-0.217,3884910000.0,3988394000.0,3958216000.0,1.961,1.976,4427.105,4177.823,4108.725,4608.958,4443.876,4602.268,4466.05,40.828,47.346,0.932,0.765,4607.07,3491.58,4607.07,3808.86,4607.07,4461.33,4540.34,4461.33,4496.706062,4511.955424,4506.726642,0.990194,1.019166,0.969751,1.279567,0.969751,1.00143,0.990194,0.991343,1.009172,1.087371,0.997777,9018059000000.0,41.639057,29.14734,39.557104,34.177982,23.924587,32.469082,4485.075,4484.193008,4516.662091,True


In [56]:
# converting 0/1 columns to boolean type
bool_cols_01_vals = []
for col in indices_daily_ohlcv.columns:
    unique_vals = indices_daily_ohlcv[col].unique()
    if (len(unique_vals) == 2):
        if sorted(unique_vals) == [0,1]:
            bool_cols_01_vals.append(col)
            indices_daily_ohlcv[col] = indices_daily_ohlcv[col].astype(bool)

print("Bool cols: ", len(bool_cols_01_vals))
indices_daily_ohlcv[bool_cols_01_vals].tail(2) 

Bool cols:  1


Unnamed: 0,close_high_avg_inline_GSPC
1025,True
1026,True


In [57]:
indices_daily_ohlcv.head(2)

Unnamed: 0,Date,Adj_Open_GSPC,Adj_High_GSPC,Adj_Low_GSPC,Adj_Close_GSPC,Adj_Vol_GSPC,change_last_1d_GSPC,change_last_2d_GSPC,change_last_5d_GSPC,change_last_10d_GSPC,points_change_GSPC,change_last_1d_mean10_GSPC,Adj_Vol_1d_GSPC,Adj_Vol_EMA_prev_5d_GSPC,Adj_Vol_EMA_prev_20d_GSPC,EMA_prev_20d_DIV_Adj_Vol_GSPC,EMA_prev_5d_DIV_Adj_Vol_GSPC,Adj_Close_MA50_GSPC,Adj_Close_MA150_GSPC,Adj_Close_MA200_GSPC,BB_2_up_10_Adj_Close_GSPC,BB_2_dn_10_Adj_Close_GSPC,BB_2_up_20_Adj_Close_GSPC,BB_2_dn_20_Adj_Close_GSPC,RSI_9_GSPC,RSI_14_GSPC,ADR_percent_5d_GSPC,ADR_percent_20d_GSPC,High_52_weeks_GSPC,Low_52_weeks_GSPC,High_26_weeks_GSPC,Low_26_weeks_GSPC,High_4_weeks_GSPC,Low_4_weeks_GSPC,High_1_week_GSPC,Low_1_week_GSPC,EMA_5_GSPC,EMA_10_GSPC,EMA_20_GSPC,Adj_Close_DIV_EMA_10_GSPC,EMA_10_DIV_MA50_GSPC,Close_DIV_High_52_weeks_GSPC,Close_DIV_Low_52_weeks_GSPC,Close_DIV_High_4_weeks_GSPC,Close_DIV_Low_4_weeks_GSPC,Close_DIV_EMA_10_GSPC,Close_DIV_EMA_20_GSPC,Close_DIV_MA50_GSPC,Close_DIV_MA200_GSPC,EMA_5_DIV_EMA_20_GSPC,Vol_Price_GSPC,ADR_5D_GSPC,ADR_5D_70per_GSPC,ADR_5D_95per_GSPC,ADR_20D_GSPC,ADR_20D_70per_GSPC,ADR_20D_95per_GSPC,close_high_avg_GSPC,stop_EMA20_GSPC,max_entry_stop_EMA20_ADR_20D_95per_GSPC,close_high_avg_inline_GSPC
0,2019-07-12,3003.36,3013.92,3001.87,3013.77,2974960000.0,0.462,0.692,0.781,3.038,13.86,0.3,3154620000.0,2935166000.0,3326478000.0,1.118,0.987,2891.38,2958.509,3032.607,3021.563,2943.969,3025.668,2879.791,75.395,72.717,0.49,0.614,3393.52,2191.86,3085.2,2728.81,3013.92,2879.62,3013.92,2963.44,2996.601321,2981.351935,2954.525837,1.010874,1.031117,0.845765,1.309445,0.99995,1.046586,1.010874,1.020052,1.042329,1.114605,1.014241,8965845000000.0,14.767473,10.337231,14.029099,18.504548,12.953183,17.57932,3013.845,2939.753208,2957.332529,False
1,2019-07-15,3017.8,3017.8,3008.77,3014.3,2874970000.0,0.018,0.48,1.289,2.466,0.53,0.245,2974960000.0,3043316000.0,3321736000.0,1.155,1.059,2893.316,2958.509,3032.607,3021.956,2958.084,3030.169,2888.022,75.509,72.801,0.478,0.603,3393.52,2191.86,3085.2,2728.81,3017.8,2887.3,3017.8,2963.44,3002.500881,2987.342708,2960.253386,1.009024,1.032498,0.845765,1.309445,0.99884,1.043986,1.009024,1.018257,1.041815,1.114605,1.014272,8666022000000.0,14.408354,10.085848,13.687936,18.176229,12.72336,17.267418,3016.05,2945.452119,2962.719537,False


In [58]:
hfs.missing_values_info(indices_daily_ohlcv) 

Unnamed: 0,Feature,#missing_values,%missing_values


In [59]:
indices_daily_ohlcv.to_pickle(f'{PROJ_PATH}/{PROCESSING_TEMP_FOLDER_PATH}/indices_daily_ohlcv_{TODAY}.pkl')

In [60]:
%reset -f