In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from alpha_vantage.timeseries import TimeSeries
import pandas_ta as ta
import datetime as dt
from tqdm import tqdm
import warnings
import gc, time

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

api_key = os.environ['av_key']

In [2]:
warnings.filterwarnings('ignore')

In [3]:
ts = TimeSeries(key=api_key, output_format='pandas')

In [4]:
active_tickers = pd.read_csv("datas//us_volume_leaders.csv")

In [5]:
tickers = active_tickers.Symbol.tolist()[:-1]

In [6]:
for ticker in tqdm(tickers):
    try:
        df, meta_data = ts.get_daily(ticker, outputsize='full')
        df = df.rename(columns={"4. close": "close",
                           "1. open": "open",
                           "2. high": "high",
                           "3. low": "low",
                           "5. volume": "volume"})

        df = df[['close', 'open', 'high', 'low', 'volume']]
        df.to_parquet(f"datas//daily_ticks//daily_ticks_{ticker}.parquet")
        time.sleep(1)
    except:
        print(f"{ticker} is skipped")

100%|████████████████████████████████████████████████████████████████████████████████| 200/200 [10:03<00:00,  3.02s/it]


In [7]:
for ticker in tqdm(tickers):
    try:
        df = pd.read_parquet(f"datas//daily_ticks//daily_ticks_{ticker}.parquet")
    
        # Constructing features
        df.sort_index(inplace=True)
        df = df.reset_index()
        
        df.ta.rsi(length=3, append=True)
        df.ta.rsi(length=5, append=True)
        df.ta.rsi(length=10, append=True)
        df.ta.rsi(length=15, append=True)
        df.ta.rsi(length=20, append=True)
        df.ta.rsi(length=60, append=True)
        df.ta.rsi(length=120, append=True)
    
        df.ta.ema(length=3, append=True)
        df.ta.ema(length=5, append=True)
        df.ta.ema(length=10, append=True)
        df.ta.ema(length=15, append=True)
        df.ta.ema(length=20, append=True)
        df.ta.ema(length=60, append=True)
        df.ta.ema(length=120, append=True)
    
        df.ta.sma(length=3, append=True)
        df.ta.sma(length=5, append=True)
        df.ta.sma(length=10, append=True)
        df.ta.sma(length=15, append=True)
        df.ta.sma(length=20, append=True)
        df.ta.sma(length=60, append=True)
        df.ta.sma(length=120, append=True)
    
        df.ta.bbands(length=3, append=True)
        df.ta.bbands(length=5, append=True)
        df.ta.bbands(length=10, append=True)
        df.ta.bbands(length=15, append=True)
        df.ta.bbands(length=20, append=True)
        df.ta.bbands(length=60, append=True)
        df.ta.bbands(length=120, append=True)
    
        df.ta.adx(length=3, append=True)
        df.ta.adx(length=5, append=True)
        df.ta.adx(length=10, append=True)
        df.ta.adx(length=15, append=True)
        df.ta.adx(length=20, append=True)
        df.ta.adx(length=60, append=True)
        df.ta.adx(length=120, append=True)

        df.ta.mfi(length=3, append=True)
        df.ta.mfi(length=5, append=True)
        df.ta.mfi(length=10, append=True)
        df.ta.mfi(length=15, append=True)
        df.ta.mfi(length=20, append=True)
        df.ta.mfi(length=60, append=True)
        df.ta.mfi(length=120, append=True)

        df.ta.cci(length=3, append=True)
        df.ta.cci(length=5, append=True)
        df.ta.cci(length=10, append=True)
        df.ta.cci(length=15, append=True)
        df.ta.cci(length=20, append=True)
        df.ta.cci(length=60, append=True)
        df.ta.cci(length=120, append=True)

        df.ta.willr(length=3, append=True)
        df.ta.willr(length=5, append=True)
        df.ta.willr(length=10, append=True)
        df.ta.willr(length=15, append=True)
        df.ta.willr(length=20, append=True)
        df.ta.willr(length=60, append=True)
        df.ta.willr(length=120, append=True)

        df.ta.aroon(length=3, append=True)
        df.ta.aroon(length=5, append=True)
        df.ta.aroon(length=10, append=True)
        df.ta.aroon(length=15, append=True)
        df.ta.aroon(length=20, append=True)
        df.ta.aroon(length=60, append=True)
        df.ta.aroon(length=120, append=True)

        df.ta.cmf(length=3, append=True)
        df.ta.cmf(length=5, append=True)
        df.ta.cmf(length=10, append=True)
        df.ta.cmf(length=15, append=True)
        df.ta.cmf(length=20, append=True)
        df.ta.cmf(length=60, append=True)
        df.ta.cmf(length=120, append=True)

        df.ta.cmo(length=3, append=True)
        df.ta.cmo(length=5, append=True)
        df.ta.cmo(length=10, append=True)
        df.ta.cmo(length=15, append=True)
        df.ta.cmo(length=20, append=True)
        df.ta.cmo(length=60, append=True)
        df.ta.cmo(length=120, append=True)

        df.ta.efi(length=3, append=True)
        df.ta.efi(length=5, append=True)
        df.ta.efi(length=10, append=True)
        df.ta.efi(length=15, append=True)
        df.ta.efi(length=20, append=True)
        df.ta.efi(length=60, append=True)
        df.ta.efi(length=120, append=True)
    
        df.ta.macd(fast=5, slow=10, append=True)
        df.ta.macd(fast=10, slow=20, append=True)
        df.ta.macd(fast=20, slow=60, append=True)
        df.ta.macd(fast=60, slow=120, append=True)

        df.ta.obv(append=True)
        df.ta.ppo(append=True)
        df.ta.uo(append=True)
        df.ta.stoch(append=True)
        df.ta.ichimoku(lookahead=False, append=True)
        df.ta.rvi(lookahead=False, append=True)
    
        df_feat_main = pd.DataFrame(index=df.index)

        # MACD
        df_feat_main[f'grad_MACD_5_10_9'] = [np.nan] + list(np.gradient(df[f'MACD_5_10_9'], edge_order=1))[:-1]
        df_feat_main[f'diff_MACD_5_10_9'] =  [np.nan] + list(np.diff(df[f'MACD_5_10_9']))
        df_feat_main[f'grad_MACDh_5_10_9'] = [np.nan] + list(np.gradient(df[f'MACDh_5_10_9'], edge_order=1))[:-1]
        df_feat_main[f'diff_MACDh_5_10_9'] = [np.nan] + list(np.diff(df[f'MACDh_5_10_9']))
        df_feat_main[f'grad_MACDs_5_10_9'] = [np.nan] + list(np.gradient(df[f'MACDs_5_10_9'], edge_order=1))[:-1]
        df_feat_main[f'diff_MACDs_5_10_9'] = [np.nan] + list(np.diff(df[f'MACDs_5_10_9']))

        df_feat_main[f'grad_MACD_10_20_9'] = [np.nan] + list(np.gradient(df[f'MACD_10_20_9'], edge_order=1))[:-1]
        df_feat_main[f'diff_MACD_10_20_9'] =  [np.nan] + list(np.diff(df[f'MACD_10_20_9']))
        df_feat_main[f'grad_MACDh_10_20_9'] = [np.nan] + list(np.gradient(df[f'MACDh_10_20_9'], edge_order=1))[:-1]
        df_feat_main[f'diff_MACDh_10_20_9'] = [np.nan] + list(np.diff(df[f'MACDh_10_20_9']))
        df_feat_main[f'grad_MACDs_10_20_9'] = [np.nan] + list(np.gradient(df[f'MACDs_10_20_9'], edge_order=1))[:-1]
        df_feat_main[f'diff_MACDs_10_20_9'] = [np.nan] + list(np.diff(df[f'MACDs_10_20_9']))

        df_feat_main[f'grad_MACD_20_60_9'] = [np.nan] + list(np.gradient(df[f'MACD_20_60_9'], edge_order=1))[:-1]
        df_feat_main[f'diff_MACD_20_60_9'] =  [np.nan] + list(np.diff(df[f'MACD_20_60_9']))
        df_feat_main[f'grad_MACDh_20_60_9'] = [np.nan] + list(np.gradient(df[f'MACDh_20_60_9'], edge_order=1))[:-1]
        df_feat_main[f'diff_MACDh_20_60_9'] = [np.nan] + list(np.diff(df[f'MACDh_20_60_9']))
        df_feat_main[f'grad_MACDs_20_60_9'] = [np.nan] + list(np.gradient(df[f'MACDs_20_60_9'], edge_order=1))[:-1]
        df_feat_main[f'diff_MACDs_20_60_9'] = [np.nan] + list(np.diff(df[f'MACDs_20_60_9']))

        df_feat_main[f'grad_MACD_60_120_9'] = [np.nan] + list(np.gradient(df[f'MACD_60_120_9'], edge_order=1))[:-1]
        df_feat_main[f'diff_MACD_60_120_9'] =  [np.nan] + list(np.diff(df[f'MACD_60_120_9']))
        df_feat_main[f'grad_MACDh_60_120_9'] = [np.nan] + list(np.gradient(df[f'MACDh_60_120_9'], edge_order=1))[:-1]
        df_feat_main[f'diff_MACDh_60_120_9'] = [np.nan] + list(np.diff(df[f'MACDh_60_120_9']))
        df_feat_main[f'grad_MACDs_60_120_9'] = [np.nan] + list(np.gradient(df[f'MACDs_60_120_9'], edge_order=1))[:-1]
        df_feat_main[f'diff_MACDs_60_120_9'] = [np.nan] + list(np.diff(df[f'MACDs_60_120_9']))

        df_feat_main[f'grad_PPO_12_26_9'] = [np.nan] + list(np.gradient(df[f'PPO_12_26_9'], edge_order=1))[:-1]
        df_feat_main[f'diff_PPO_12_26_9'] =  [np.nan] + list(np.diff(df[f'PPO_12_26_9']))
        df_feat_main[f'grad_PPOh_12_26_9'] = [np.nan] + list(np.gradient(df[f'PPOh_12_26_9'], edge_order=1))[:-1]
        df_feat_main[f'diff_PPOh_12_26_9'] =  [np.nan] + list(np.diff(df[f'PPOh_12_26_9']))
        df_feat_main[f'grad_PPOs_12_26_9'] = [np.nan] + list(np.gradient(df[f'PPOs_12_26_9'], edge_order=1))[:-1]
        df_feat_main[f'diff_PPOs_12_26_9'] =  [np.nan] + list(np.diff(df[f'PPOs_12_26_9']))
        
        df_feat_main[f'grad_OBV'] = [np.nan] + list(np.gradient(df[f'OBV'], edge_order=1))[:-1]
        df_feat_main[f'diff_OBV'] =  [np.nan] + list(np.diff(df[f'OBV']))
        df_feat_main[f'grad_UO_7_14_28'] = [np.nan] + list(np.gradient(df[f'UO_7_14_28'], edge_order=1))[:-1]
        df_feat_main[f'diff_UO_7_14_28'] =  [np.nan] + list(np.diff(df[f'UO_7_14_28']))
        df_feat_main[f'grad_RVI_14'] = [np.nan] + list(np.gradient(df[f'RVI_14'], edge_order=1))[:-1]
        df_feat_main[f'diff_RVI_14'] =  [np.nan] + list(np.diff(df[f'RVI_14']))

        df_feat_main[f'grad_ISA_9'] = [np.nan] + list(np.gradient(df[f'ISA_9'], edge_order=1))[:-1]
        df_feat_main[f'diff_ISA_9'] =  [np.nan] + list(np.diff(df[f'ISA_9']))
        df_feat_main[f'grad_ISB_26'] = [np.nan] + list(np.gradient(df[f'ISB_26'], edge_order=1))[:-1]
        df_feat_main[f'diff_ISB_26'] =  [np.nan] + list(np.diff(df[f'ISB_26']))
        df_feat_main[f'grad_ITS_9'] = [np.nan] + list(np.gradient(df[f'ITS_9'], edge_order=1))[:-1]
        df_feat_main[f'diff_ITS_9'] =  [np.nan] + list(np.diff(df[f'ITS_9']))
        df_feat_main[f'grad_IKS_26'] = [np.nan] + list(np.gradient(df[f'IKS_26'], edge_order=1))[:-1]
        df_feat_main[f'diff_IKS_26'] =  [np.nan] + list(np.diff(df[f'IKS_26']))

        df_feat_main[f'grad_STOCHk_14_3_3'] = [np.nan] + list(np.gradient(df[f'STOCHk_14_3_3'], edge_order=1))[:-1]
        df_feat_main[f'diff_STOCHk_14_3_3'] =  [np.nan] + list(np.diff(df[f'STOCHk_14_3_3']))
        df_feat_main[f'grad_STOCHd_14_3_3'] = [np.nan] + list(np.gradient(df[f'STOCHd_14_3_3'], edge_order=1))[:-1]
        df_feat_main[f'diff_STOCHd_14_3_3'] =  [np.nan] + list(np.diff(df[f'STOCHd_14_3_3']))
        
    
        for icount in [3,5,10,15,20, 60, 120]:
            df_feat_main[f'grad_DMP_{icount}'] = [np.nan] + list(np.gradient(df[f'DMP_{icount}'], edge_order=1))[:-1]
            df_feat_main[f'grad_DMN_{icount}'] = [np.nan] + list(np.gradient(df[f'DMN_{icount}'], edge_order=1))[:-1]
            df_feat_main[f'grad_RSI_{icount}'] = [np.nan] + list(np.gradient(df[f'RSI_{icount}'], edge_order=1))[:-1]
            df_feat_main[f'diff_RSI_{icount}'] = [np.nan] + list(np.diff(df[f'RSI_{icount}']))
            df_feat_main[f'grad_EMA_{icount}'] = [np.nan] + list(np.gradient(df[f'EMA_{icount}'], edge_order=1))[:-1]
            df_feat_main[f'diff_EMA_{icount}'] = [np.nan] + list(np.diff(df[f'EMA_{icount}']))
            df_feat_main[f'grad_MFI_{icount}'] = [np.nan] + list(np.gradient(df[f'MFI_{icount}'], edge_order=1))[:-1]
            df_feat_main[f'diff_MFI_{icount}'] = [np.nan] + list(np.diff(df[f'MFI_{icount}']))
            df_feat_main[f'grad_SMA_{icount}'] = [np.nan] + list(np.gradient(df[f'SMA_{icount}'], edge_order=1))[:-1]
            df_feat_main[f'diff_SMA_{icount}'] = [np.nan] + list(np.diff(df[f'SMA_{icount}']))

            df_feat_main[f'grad_CCI_{icount}_0.015'] = [np.nan] + list(np.gradient(df[f'CCI_{icount}_0.015'], edge_order=1))[:-1]
            df_feat_main[f'diff_CCI_{icount}_0.015'] = [np.nan] + list(np.diff(df[f'CCI_{icount}_0.015']))
            df_feat_main[f'grad_WILLR_{icount}'] = [np.nan] + list(np.gradient(df[f'WILLR_{icount}'], edge_order=1))[:-1]
            df_feat_main[f'diff_WILLR_{icount}'] = [np.nan] + list(np.diff(df[f'WILLR_{icount}']))
            df_feat_main[f'grad_CMF_{icount}'] = [np.nan] + list(np.gradient(df[f'CMF_{icount}'], edge_order=1))[:-1]
            df_feat_main[f'diff_CMF_{icount}'] = [np.nan] + list(np.diff(df[f'CMF_{icount}']))
            df_feat_main[f'grad_CMO_{icount}'] = [np.nan] + list(np.gradient(df[f'CMO_{icount}'], edge_order=1))[:-1]
            df_feat_main[f'diff_CMO_{icount}'] = [np.nan] + list(np.diff(df[f'CMO_{icount}']))
            df_feat_main[f'grad_EFI_{icount}'] = [np.nan] + list(np.gradient(df[f'EFI_{icount}'], edge_order=1))[:-1]
            df_feat_main[f'diff_EFI_{icount}'] = [np.nan] + list(np.diff(df[f'EFI_{icount}']))

            df_feat_main[f'grad_AROOND_{icount}'] = [np.nan] + list(np.gradient(df[f'AROOND_{icount}'], edge_order=1))[:-1]
            df_feat_main[f'diff_AROOND_{icount}'] = [np.nan] + list(np.diff(df[f'AROOND_{icount}']))
            df_feat_main[f'grad_AROONU_{icount}'] = [np.nan] + list(np.gradient(df[f'AROONU_{icount}'], edge_order=1))[:-1]
            df_feat_main[f'diff_AROONU_{icount}'] = [np.nan] + list(np.diff(df[f'AROONU_{icount}']))
            df_feat_main[f'grad_AROONOSC_{icount}'] = [np.nan] + list(np.gradient(df[f'AROONOSC_{icount}'], edge_order=1))[:-1]
            df_feat_main[f'diff_AROONOSC_{icount}'] = [np.nan] + list(np.diff(df[f'AROONOSC_{icount}']))
            
            df_feat_main[f'grad_BBL_{icount}'] = [np.nan] + list(np.gradient(df[f'BBL_{icount}_2.0'], edge_order=1))[:-1]
            df_feat_main[f'diff_BBL_{icount}'] = [np.nan] + list(np.diff(df[f'BBL_{icount}_2.0']))
            df_feat_main[f'grad_BBM_{icount}'] = [np.nan] + list(np.gradient(df[f'BBM_{icount}_2.0'], edge_order=1))[:-1]
            df_feat_main[f'diff_BBM_{icount}'] = [np.nan] + list(np.diff(df[f'BBM_{icount}_2.0']))
            df_feat_main[f'grad_BBU_{icount}'] = [np.nan] + list(np.gradient(df[f'BBU_{icount}_2.0'], edge_order=1))[:-1]
            df_feat_main[f'diff_BBU_{icount}'] = [np.nan] + list(np.diff(df[f'BBU_{icount}_2.0']))
            df_feat_main[f'grad_BBB_{icount}'] = [np.nan] + list(np.gradient(df[f'BBB_{icount}_2.0'], edge_order=1))[:-1]
            df_feat_main[f'diff_BBB_{icount}'] = [np.nan] + list(np.diff(df[f'BBB_{icount}_2.0']))
            df_feat_main[f'grad_BBP_{icount}'] = [np.nan] + list(np.gradient(df[f'BBP_{icount}_2.0'], edge_order=1))[:-1]
            df_feat_main[f'diff_BBP_{icount}'] = [np.nan] + list(np.diff(df[f'BBP_{icount}_2.0']))
    
        td5 = 5
        td10 = 10
        td20 = 20
        td60 = 60
        td120 = 120
    
        df_perc_price = pd.DataFrame(index=df.index)
        
        for idate in df_perc_price.index:
            p_close = df.loc[idate, 'close']
        
            df_temp = df.loc[(df.index>=idate-td5)&(df.index<=idate)]
            df_perc_price.loc[idate, 'up5'] = df_temp.loc[df_temp['close']>p_close, 'close'].count()
            df_perc_price.loc[idate, 'down5'] = df_temp.loc[df_temp['close']<p_close, 'close'].count()
            df_perc_price.loc[idate, 'avg_vol5'] = df_temp['volume'].mean()
        
            df_temp = df.loc[(df.index>=idate-td10)&(df.index<=idate)]
            df_perc_price.loc[idate, 'up10'] = df_temp.loc[df_temp['close']>p_close, 'close'].count()
            df_perc_price.loc[idate, 'down10'] = df_temp.loc[df_temp['close']<p_close, 'close'].count()
            df_perc_price.loc[idate, 'avg_vol10'] = df_temp['volume'].mean()
            
            df_temp = df.loc[(df.index>=idate-td20)&(df.index<=idate)]
            df_perc_price.loc[idate, 'up20'] = df_temp.loc[df_temp['close']>p_close, 'close'].count()
            df_perc_price.loc[idate, 'down20'] = df_temp.loc[df_temp['close']<p_close, 'close'].count()
            df_perc_price.loc[idate, 'avg_vol20'] = df_temp['volume'].mean()
        
            df_temp = df.loc[(df.index>=idate-td60)&(df.index<=idate)]
            df_perc_price.loc[idate, 'up60'] = df_temp.loc[df_temp['close']>p_close, 'close'].count()
            df_perc_price.loc[idate, 'down60'] = df_temp.loc[df_temp['close']<p_close, 'close'].count()
            df_perc_price.loc[idate, 'avg_vol60'] = df_temp['volume'].mean()
        
            df_temp = df.loc[(df.index>=idate-td120)&(df.index<=idate)]
            df_perc_price.loc[idate, 'up120'] = df_temp.loc[df_temp['close']>p_close, 'close'].count()
            df_perc_price.loc[idate, 'down120'] = df_temp.loc[df_temp['close']<p_close, 'close'].count()
            df_perc_price.loc[idate, 'avg_vol120'] = df_temp['volume'].mean()
        
        df_perc_price['r_up_down5'] = (df_perc_price['up5'] + 0.0001) / (df_perc_price['down5'] + 0.0001)
        df_perc_price['r_up_down10'] = (df_perc_price['up10'] + 0.0001) / (df_perc_price['down10'] + 0.0001)
        df_perc_price['r_up_down20'] = (df_perc_price['up20'] + 0.0001) / (df_perc_price['down20'] + 0.0001)
        df_perc_price['r_up_down60'] = (df_perc_price['up60'] + 0.0001) / (df_perc_price['down60'] + 0.0001)
        df_perc_price['r_up_down120'] = (df_perc_price['up120'] + 0.0001) / (df_perc_price['down120'] + 0.0001)
        
        df_perc_price['r_curr_vol_5'] = df['volume'] / df_perc_price['avg_vol5']
        df_perc_price['r_curr_vol_10'] = df['volume'] / df_perc_price['avg_vol10']
        df_perc_price['r_curr_vol_20'] = df['volume'] / df_perc_price['avg_vol20']
        df_perc_price['r_curr_vol_60'] = df['volume'] / df_perc_price['avg_vol60']
        df_perc_price['r_curr_vol_120'] = df['volume'] / df_perc_price['avg_vol120']
        
        df_perc_price['intraday_direction'] = (df['close']-df['open']) / df['open']
        df_perc_price['perc_max_spread_to_close'] = (df['high']-df['low']) / df['close']
        df_perc_price['perc_max_spread_to_open'] = (df['high']-df['low']) / df['open']
        
        df_perc_price['r_close_to_sma3'] = df['close'] / df['SMA_3']
        df_perc_price['r_close_to_sma5'] = df['close'] / df['SMA_5']
        df_perc_price['r_close_to_sma10'] = df['close'] / df['SMA_10']
        df_perc_price['r_close_to_sma20'] = df['close'] / df['SMA_20']
        df_perc_price['r_close_to_sma60'] = df['close'] / df['SMA_60']
        df_perc_price['r_close_to_sma120'] = df['close'] / df['SMA_120']

        df_perc_price['r_close_to_EMA3'] = df['close'] / df['EMA_3']
        df_perc_price['r_close_to_EMA5'] = df['close'] / df['EMA_5']
        df_perc_price['r_close_to_EMA10'] = df['close'] / df['EMA_10']
        df_perc_price['r_close_to_EMA20'] = df['close'] / df['EMA_20']
        df_perc_price['r_close_to_EMA60'] = df['close'] / df['EMA_60']
        df_perc_price['r_close_to_EMA120'] = df['close'] / df['EMA_120']

        df_perc_price['perc_max_spread_to_sma3'] = (df['close']-df['open']) / df['SMA_3']
        df_perc_price['perc_max_spread_to_sma5'] = (df['close']-df['open']) / df['SMA_5']
        df_perc_price['perc_max_spread_to_sma10'] = (df['close']-df['open']) / df['SMA_10']
        df_perc_price['perc_max_spread_to_sma20'] = (df['close']-df['open']) / df['SMA_20']
        df_perc_price['perc_max_spread_to_sma60'] = (df['close']-df['open']) / df['SMA_60']
        df_perc_price['perc_max_spread_to_sma120'] = (df['close']-df['open']) / df['SMA_120']

        df_perc_price['perc_max_spread_to_EMA3'] = (df['close']-df['open']) / df['EMA_3']
        df_perc_price['perc_max_spread_to_EMA5'] = (df['close']-df['open']) / df['EMA_5']
        df_perc_price['perc_max_spread_to_EMA10'] = (df['close']-df['open']) / df['EMA_10']
        df_perc_price['perc_max_spread_to_EMA20'] = (df['close']-df['open']) / df['EMA_20']
        df_perc_price['perc_max_spread_to_EMA60'] = (df['close']-df['open']) / df['EMA_60']
        df_perc_price['perc_max_spread_to_EMA120'] = (df['close']-df['open']) / df['EMA_120']

        #Additional features
        
        df_perc_price[f'r_av_v5_10'] = (df_perc_price[f'avg_vol5'] / df_perc_price[f'avg_vol10']).copy()
        df_perc_price[f'r_av_v5_20'] = (df_perc_price[f'avg_vol5'] / df_perc_price[f'avg_vol20']).copy()
        df_perc_price[f'r_av_v5_60'] = (df_perc_price[f'avg_vol5'] / df_perc_price[f'avg_vol60']).copy()
        df_perc_price[f'r_av_v10_20'] = (df_perc_price[f'avg_vol10'] / df_perc_price[f'avg_vol20']).copy()
        df_perc_price[f'r_av_v10_60'] = (df_perc_price[f'avg_vol10'] / df_perc_price[f'avg_vol60']).copy()
        df_perc_price[f'r_av_v10_120'] = (df_perc_price[f'avg_vol10'] / df_perc_price[f'avg_vol120']).copy()
        df_perc_price[f'r_av_v20_60'] = (df_perc_price[f'avg_vol20'] / df_perc_price[f'avg_vol60']).copy()
        df_perc_price[f'r_av_v20_120'] = (df_perc_price[f'avg_vol20'] / df_perc_price[f'avg_vol120']).copy()
        df_perc_price[f'r_av_v60_120'] = (df_perc_price[f'avg_vol60'] / df_perc_price[f'avg_vol120']).copy()
    
        # Constructing label
        df_label_main = pd.DataFrame(index=df.index)
        data = df.copy()
    
        td2 = 2
        td3 = 3
        td7 = 5
        td14 = 10
        td21 = 15
        td30 = 20
        
        for idate in data.index:
            df_label_main.loc[idate, 'label_close2max'] = data.loc[(data.index > idate) & (data.index <= idate + td2), 'close'].max()
            df_label_main.loc[idate, 'label_close3max'] = data.loc[(data.index > idate) & (data.index <= idate + td3), 'close'].max()
            df_label_main.loc[idate, 'label_close5max'] = data.loc[(data.index > idate) & (data.index <= idate + td7), 'close'].max()
            df_label_main.loc[idate, 'label_close10max'] = data.loc[(data.index > idate) & (data.index <= idate + td14), 'close'].max()
            df_label_main.loc[idate, 'label_close15max'] = data.loc[(data.index > idate) & (data.index <= idate + td21), 'close'].max()
            df_label_main.loc[idate, 'label_close20max'] = data.loc[(data.index > idate) & (data.index <= idate + td30), 'close'].max()
        
            df_label_main.loc[idate, 'label_close2min'] = data.loc[(data.index > idate) & (data.index <= idate + td2), 'close'].min()
            df_label_main.loc[idate, 'label_close3min'] = data.loc[(data.index > idate) & (data.index <= idate + td3), 'close'].min()
            df_label_main.loc[idate, 'label_close5min'] = data.loc[(data.index > idate) & (data.index <= idate + td7), 'close'].min()
            df_label_main.loc[idate, 'label_close10min'] = data.loc[(data.index > idate) & (data.index <= idate + td14), 'close'].min()
            df_label_main.loc[idate, 'label_close15min'] = data.loc[(data.index > idate) & (data.index <= idate + td21), 'close'].min()
            df_label_main.loc[idate, 'label_close20min'] = data.loc[(data.index > idate) & (data.index <= idate + td30), 'close'].min()
        
            df_label_main.loc[idate, 'label_close2mean'] = data.loc[(data.index > idate) & (data.index <= idate + td2), 'close'].mean()
            df_label_main.loc[idate, 'label_close3mean'] = data.loc[(data.index > idate) & (data.index <= idate + td3), 'close'].mean()
            df_label_main.loc[idate, 'label_close5mean'] = data.loc[(data.index > idate) & (data.index <= idate + td7), 'close'].mean()
            df_label_main.loc[idate, 'label_close10mean'] = data.loc[(data.index > idate) & (data.index <= idate + td14), 'close'].mean()
            df_label_main.loc[idate, 'label_close15mean'] = data.loc[(data.index > idate) & (data.index <= idate + td21), 'close'].mean()
            df_label_main.loc[idate, 'label_close20mean'] = data.loc[(data.index > idate) & (data.index <= idate + td30), 'close'].mean()
        
        df_label_main['label_close2max'] = (df_label_main.label_close2max - data['close'])/ data['close']
        df_label_main['label_close3max'] = (df_label_main.label_close3max - data['close'])/ data['close']
        df_label_main['label_close5max'] = (df_label_main.label_close5max - data['close'])/ data['close']
        df_label_main['label_close10max'] = (df_label_main.label_close10max - data['close'])/ data['close']
        df_label_main['label_close15max'] = (df_label_main.label_close15max - data['close'])/ data['close']
        df_label_main['label_close20max'] = (df_label_main.label_close20max - data['close'])/ data['close']
        
        df_label_main['label_close2min'] = (df_label_main.label_close2min - data['close'])/ data['close']
        df_label_main['label_close3min'] = (df_label_main.label_close3min - data['close'])/ data['close']
        df_label_main['label_close5min'] = (df_label_main.label_close5min - data['close'])/ data['close']
        df_label_main['label_close10min'] = (df_label_main.label_close10min - data['close'])/ data['close']
        df_label_main['label_close15min'] = (df_label_main.label_close15min - data['close'])/ data['close']
        df_label_main['label_close20min'] = (df_label_main.label_close20min - data['close'])/ data['close']
        
        df_label_main['label_close2mean'] = (df_label_main.label_close2mean - data['close'])/ data['close']
        df_label_main['label_close3mean'] = (df_label_main.label_close3mean - data['close'])/ data['close']
        df_label_main['label_close5mean'] = (df_label_main.label_close5mean - data['close'])/ data['close']
        df_label_main['label_close10mean'] = (df_label_main.label_close10mean - data['close'])/ data['close']
        df_label_main['label_close15mean'] = (df_label_main.label_close15mean - data['close'])/ data['close']
        df_label_main['label_close20mean'] = (df_label_main.label_close20mean - data['close'])/ data['close']
        
        ## Increasing price buy
        df_label_main['inc_close2mean_perc5'] = 0
        df_label_main.loc[df_label_main["label_close2mean"] >= 0.05, 'inc_close2mean_perc5'] = 1
        df_label_main['inc_close2mean_perc10'] = 0
        df_label_main.loc[df_label_main["label_close2mean"] >= 0.10, 'inc_close2mean_perc10'] = 1
        df_label_main['inc_close2mean_perc7'] = 0
        df_label_main.loc[df_label_main["label_close2mean"] >= 0.07, 'inc_close2mean_perc7'] = 1
        df_label_main['inc_close2mean_perc3'] = 0
        df_label_main.loc[df_label_main["label_close2mean"] >= 0.03, 'inc_close2mean_perc3'] = 1
        
        df_label_main['inc_close2max_perc5'] = 0
        df_label_main.loc[df_label_main["label_close2max"] >= 0.05, 'inc_close2max_perc5'] = 1
        df_label_main['inc_close2max_perc10'] = 0
        df_label_main.loc[df_label_main["label_close2max"] >= 0.10, 'inc_close2max_perc10'] = 1
        df_label_main['inc_close2max_perc7'] = 0
        df_label_main.loc[df_label_main["label_close2max"] >= 0.07, 'inc_close2max_perc7'] = 1
        df_label_main['inc_close2max_perc3'] = 0
        df_label_main.loc[df_label_main["label_close2max"] >= 0.03, 'inc_close2max_perc3'] = 1
        
        df_label_main['inc_close3mean_perc5'] = 0
        df_label_main.loc[df_label_main["label_close3mean"] >= 0.05, 'inc_close3mean_perc5'] = 1
        df_label_main['inc_close3mean_perc10'] = 0
        df_label_main.loc[df_label_main["label_close3mean"] >= 0.10, 'inc_close3mean_perc10'] = 1
        df_label_main['inc_close3mean_perc3'] = 0
        df_label_main.loc[df_label_main["label_close3mean"] >= 0.03, 'inc_close3mean_perc3'] = 1
        df_label_main['inc_close3mean_perc7'] = 0
        df_label_main.loc[df_label_main["label_close3mean"] >= 0.07, 'inc_close3mean_perc7'] = 1
        
        df_label_main['inc_close3max_perc5'] = 0
        df_label_main.loc[df_label_main["label_close3max"] >= 0.05, 'inc_close3max_perc5'] = 1
        df_label_main['inc_close3max_perc10'] = 0
        df_label_main.loc[df_label_main["label_close3max"] >= 0.10, 'inc_close3max_perc10'] = 1
        df_label_main['inc_close3max_perc3'] = 0
        df_label_main.loc[df_label_main["label_close3max"] >= 0.03, 'inc_close3max_perc3'] = 1
        df_label_main['inc_close3max_perc7'] = 0
        df_label_main.loc[df_label_main["label_close3max"] >= 0.07, 'inc_close3max_perc7'] = 1
        
        df_label_main['inc_close5mean_perc5'] = 0
        df_label_main.loc[df_label_main["label_close5mean"] >= 0.05, 'inc_close5mean_perc5'] = 1
        df_label_main['inc_close5mean_perc10'] = 0
        df_label_main.loc[df_label_main["label_close5mean"] >= 0.10, 'inc_close5mean_perc10'] = 1
        df_label_main['inc_close5mean_perc3'] = 0
        df_label_main.loc[df_label_main["label_close5mean"] >= 0.03, 'inc_close5mean_perc3'] = 1
        df_label_main['inc_close5mean_perc7'] = 0
        df_label_main.loc[df_label_main["label_close5mean"] >= 0.07, 'inc_close5mean_perc7'] = 1
        
        df_label_main['inc_close5max_perc5'] = 0
        df_label_main.loc[df_label_main["label_close5max"] >= 0.05, 'inc_close5max_perc5'] = 1
        df_label_main['inc_close5max_perc10'] = 0
        df_label_main.loc[df_label_main["label_close5max"] >= 0.10, 'inc_close5max_perc10'] = 1
        df_label_main['inc_close5max_perc3'] = 0
        df_label_main.loc[df_label_main["label_close5max"] >= 0.03, 'inc_close5max_perc3'] = 1
        df_label_main['inc_close5max_perc7'] = 0
        df_label_main.loc[df_label_main["label_close5max"] >= 0.07, 'inc_close5max_perc7'] = 1
        
        df_label_main['inc_close10mean_perc5'] = 0
        df_label_main.loc[df_label_main["label_close10mean"] >= 0.05, 'inc_close10mean_perc5'] = 1
        df_label_main['inc_close10mean_perc10'] = 0
        df_label_main.loc[df_label_main["label_close10mean"] >= 0.10, 'inc_close10mean_perc10'] = 1
        df_label_main['inc_close10mean_perc3'] = 0
        df_label_main.loc[df_label_main["label_close10mean"] >= 0.03, 'inc_close10mean_perc3'] = 1
        df_label_main['inc_close10mean_perc7'] = 0
        df_label_main.loc[df_label_main["label_close10mean"] >= 0.07, 'inc_close10mean_perc7'] = 1
        
        df_label_main['inc_close10max_perc5'] = 0
        df_label_main.loc[df_label_main["label_close10max"] >= 0.05, 'inc_close10max_perc5'] = 1
        df_label_main['inc_close10max_perc10'] = 0
        df_label_main.loc[df_label_main["label_close10max"] >= 0.10, 'inc_close10max_perc10'] = 1
        df_label_main['inc_close10max_perc3'] = 0
        df_label_main.loc[df_label_main["label_close10max"] >= 0.03, 'inc_close10max_perc3'] = 1
        df_label_main['inc_close10max_perc7'] = 0
        df_label_main.loc[df_label_main["label_close10max"] >= 0.07, 'inc_close10max_perc7'] = 1
        
        ## Decreasing price --sell
        df_label_main['dec_close2mean_perc5'] = 0
        df_label_main.loc[df_label_main["label_close2mean"] <= -0.05, 'dec_close2mean_perc5'] = 1
        df_label_main['dec_close2mean_perc10'] = 0
        df_label_main.loc[df_label_main["label_close2mean"] <= -0.10, 'dec_close2mean_perc10'] = 1
        df_label_main['dec_close2mean_perc3'] = 0
        df_label_main.loc[df_label_main["label_close2mean"] <= -0.03, 'dec_close2mean_perc3'] = 1
        df_label_main['dec_close2mean_perc7'] = 0
        df_label_main.loc[df_label_main["label_close2mean"] <= -0.07, 'dec_close2mean_perc7'] = 1
        
        df_label_main['dec_close2min_perc5'] = 0
        df_label_main.loc[df_label_main["label_close2min"] <= -0.05, 'dec_close2min_perc5'] = 1
        df_label_main['dec_close2min_perc10'] = 0
        df_label_main.loc[df_label_main["label_close2min"] <= -0.10, 'dec_close2min_perc10'] = 1
        df_label_main['dec_close2min_perc3'] = 0
        df_label_main.loc[df_label_main["label_close2min"] <= -0.03, 'dec_close2min_perc3'] = 1
        df_label_main['dec_close2min_perc7'] = 0
        df_label_main.loc[df_label_main["label_close2min"] <= -0.07, 'dec_close2min_perc7'] = 1
        
        df_label_main['dec_close3mean_perc5'] = 0
        df_label_main.loc[df_label_main["label_close3mean"] <= -0.05, 'dec_close3mean_perc5'] = 1
        df_label_main['dec_close3mean_perc10'] = 0
        df_label_main.loc[df_label_main["label_close3mean"] <= -0.10, 'dec_close3mean_perc10'] = 1
        df_label_main['dec_close3mean_perc3'] = 0
        df_label_main.loc[df_label_main["label_close3mean"] <= -0.03, 'dec_close3mean_perc3'] = 1
        df_label_main['dec_close3mean_perc7'] = 0
        df_label_main.loc[df_label_main["label_close3mean"] <= -0.07, 'dec_close3mean_perc7'] = 1
        
        df_label_main['dec_close3min_perc5'] = 0
        df_label_main.loc[df_label_main["label_close3min"] <= -0.05, 'dec_close3min_perc5'] = 1
        df_label_main['dec_close3min_perc10'] = 0
        df_label_main.loc[df_label_main["label_close3min"] <= -0.10, 'dec_close3min_perc10'] = 1
        df_label_main['dec_close3min_perc3'] = 0
        df_label_main.loc[df_label_main["label_close3min"] <= -0.03, 'dec_close3min_perc3'] = 1
        df_label_main['dec_close3min_perc7'] = 0
        df_label_main.loc[df_label_main["label_close3min"] <= -0.07, 'dec_close3min_perc7'] = 1
        
        df_label_main['dec_close5mean_perc5'] = 0
        df_label_main.loc[df_label_main["label_close5mean"] <= -0.05, 'dec_close5mean_perc5'] = 1
        df_label_main['dec_close5mean_perc10'] = 0
        df_label_main.loc[df_label_main["label_close5mean"] <= -0.10, 'dec_close5mean_perc10'] = 1
        df_label_main['dec_close5mean_perc3'] = 0
        df_label_main.loc[df_label_main["label_close5mean"] <= -0.03, 'dec_close5mean_perc3'] = 1
        df_label_main['dec_close5mean_perc7'] = 0
        df_label_main.loc[df_label_main["label_close5mean"] <= -0.07, 'dec_close5mean_perc7'] = 1
        
        df_label_main['dec_close5min_perc5'] = 0
        df_label_main.loc[df_label_main["label_close5min"] <= -0.05, 'dec_close5min_perc5'] = 1
        df_label_main['dec_close5min_perc10'] = 0
        df_label_main.loc[df_label_main["label_close5min"] <= -0.10, 'dec_close5min_perc10'] = 1
        df_label_main['dec_close5min_perc3'] = 0
        df_label_main.loc[df_label_main["label_close5min"] <= -0.03, 'dec_close5min_perc3'] = 1
        df_label_main['dec_close5min_perc7'] = 0
        df_label_main.loc[df_label_main["label_close5min"] <= -0.07, 'dec_close5min_perc7'] = 1
        
        df_label_main['dec_close10mean_perc5'] = 0
        df_label_main.loc[df_label_main["label_close10mean"] <= -0.05, 'dec_close10mean_perc5'] = 1
        df_label_main['dec_close10mean_perc10'] = 0
        df_label_main.loc[df_label_main["label_close10mean"] <= -0.10, 'dec_close10mean_perc10'] = 1
        df_label_main['dec_close10mean_perc3'] = 0
        df_label_main.loc[df_label_main["label_close10mean"] <= -0.03, 'dec_close10mean_perc3'] = 1
        df_label_main['dec_close10mean_perc7'] = 0
        df_label_main.loc[df_label_main["label_close10mean"] <= -0.07, 'dec_close10mean_perc7'] = 1
        
        df_label_main['dec_close10min_perc5'] = 0
        df_label_main.loc[df_label_main["label_close10min"] <= -0.05, 'dec_close10min_perc5'] = 1
        df_label_main['dec_close10min_perc10'] = 0
        df_label_main.loc[df_label_main["label_close10min"] <= -0.10, 'dec_close10min_perc10'] = 1
        df_label_main['dec_close10min_perc3'] = 0
        df_label_main.loc[df_label_main["label_close10min"] <= -0.03, 'dec_close10min_perc3'] = 1
        df_label_main['dec_close10min_perc7'] = 0
        df_label_main.loc[df_label_main["label_close10min"] <= -0.07, 'dec_close10min_perc7'] = 1
    
        df = df.merge(df_feat_main, left_index=True, right_index=True, how='left')
        df = df.merge(df_perc_price, left_index=True, right_index=True, how='left')
        df = df.merge(df_label_main, left_index=True, right_index=True, how='left')
    
        df.to_parquet(f"datas//ml_data_xgb//ml_feature_label_{ticker}.parquet")
    
        del df_feat_main, df_perc_price, df_label_main, df
        gc.collect()
    except:
        print(f"{ticker} Failed to load")

  2%|█▌                                                                              | 4/200 [04:54<2:40:47, 49.22s/it]

JDZG Failed to load


 73%|███████████████████████████████████████████████████████▍                    | 146/200 [4:58:45<1:23:47, 93.09s/it]

NNE Failed to load


 83%|████████████████████████████████████████████████████████████████▋             | 166/200 [5:39:55<56:29, 99.69s/it]

ORKT Failed to load


100%|█████████████████████████████████████████████████████████████████████████████| 200/200 [6:51:38<00:00, 123.49s/it]


In [None]:
not_in = ['CANO', 'IRNT', 'SPWR', 'SFT', 'UNITY', 'OSTK', 'RIDE', 'SRAC']

In [None]:
tickers = list(set(tickers))

In [None]:
tickers = [i for i in tickers if i not in not_in]

In [None]:
tickers