# Add risk and technical indicators

In [1]:
import pandas as pd
import talib as tb
import datetime
import os

In [2]:
def create_technical_indicators_df(df_price_ohcl):
    try:
        o = df_price_ohcl['open'].values
        c = df_price_ohcl['close'].values
        h = df_price_ohcl['high'].values
        l = df_price_ohcl['low'].values
        v = df_price_ohcl['volume'].astype(float).values
        
        # Most data series are normalized by their series' mean
        ta = df_price_ohcl.copy()
        
        #ta['reference_date'] = df_price_ohcl.index
        
        # All Moving Average
        ta['MA5'] = tb.MA(c, timeperiod=5) #/ tb.MA(c, timeperiod=5).mean()
        ta['MA10'] = tb.MA(c, timeperiod=10) #/ tb.MA(c, timeperiod=10).mean()
        ta['MA20'] = tb.MA(c, timeperiod=20) #/ tb.MA(c, timeperiod=20).mean()
        ta['MA60'] = tb.MA(c, timeperiod=60) #/ tb.MA(c, timeperiod=60).mean()
        ta['MA120'] = tb.MA(c, timeperiod=120) #/ tb.MA(c, timeperiod=120).mean()
        ta['volume_MA5'] = tb.MA(v, timeperiod=5) #/ tb.MA(v, timeperiod=5).mean()
        ta['volume_MA10'] = tb.MA(v, timeperiod=10) #/ tb.MA(v, timeperiod=10).mean()
        ta['volume_MA20'] = tb.MA(v, timeperiod=20) #/ tb.MA(v, timeperiod=20).mean()
        
        # Simple Moving Average (SMA)
        ta['SMA5'] = tb.SMA(c, timeperiod=5) #/ tb.SMA(c, timeperiod=5).mean()
        ta['SMA10'] = tb.SMA(c, timeperiod=10) #/ tb.SMA(c, timeperiod=10).mean()
        ta['SMA20'] = tb.SMA(c, timeperiod=20) #/ tb.SMA(c, timeperiod=20).mean()
        ta['SMA60'] = tb.SMA(c, timeperiod=60) #/ tb.SMA(c, timeperiod=60).mean()
        ta['SMA120'] = tb.SMA(c, timeperiod=120) #/ tb.SMA(c, timeperiod=120).mean()
        ta['volume_SMA5'] = tb.SMA(v, timeperiod=5) #/ tb.SMA(v, timeperiod=5).mean()
        ta['volume_SMA10'] = tb.SMA(v, timeperiod=10) #/ tb.SMA(v, timeperiod=10).mean()
        ta['volume_SMA20'] = tb.SMA(v, timeperiod=20) #/ tb.SMA(v, timeperiod=20).mean()
        
        # Weighted Moving Average (WMA)
        ta['WMA5'] = tb.WMA(c, timeperiod=5) #/ tb.WMA(c, timeperiod=5).mean()
        ta['WMA10'] = tb.WMA(c, timeperiod=10) #/ tb.WMA(c, timeperiod=10).mean()
        ta['WMA20'] = tb.WMA(c, timeperiod=20) #/ tb.WMA(c, timeperiod=20).mean()
        ta['WMA60'] = tb.WMA(c, timeperiod=60) #/ tb.WMA(c, timeperiod=60).mean()
        ta['WMA120'] = tb.WMA(c, timeperiod=120) #/ tb.WMA(c, timeperiod=120).mean()
        ta['volume_WMA5'] = tb.WMA(v, timeperiod=5) #/ tb.WMA(v, timeperiod=5).mean()
        ta['volume_WMA10'] = tb.WMA(v, timeperiod=10) #/ tb.WMA(v, timeperiod=10).mean()
        ta['volume_WMA20'] = tb.WMA(v, timeperiod=20) #/ tb.WMA(v, timeperiod=20).mean()
        
        # Exponential Moving Average (EMA)
        ta['EMA5'] = tb.EMA(c, timeperiod=5) #/ tb.WMA(c, timeperiod=5).mean()
        ta['EMA10'] = tb.EMA(c, timeperiod=10) #/ tb.WMA(c, timeperiod=10).mean()
        ta['EMA20'] = tb.EMA(c, timeperiod=20) #/ tb.WMA(c, timeperiod=20).mean()
        ta['EMA60'] = tb.EMA(c, timeperiod=60) #/ tb.WMA(c, timeperiod=60).mean()
        ta['EMA120'] = tb.EMA(c, timeperiod=120) #/ tb.WMA(c, timeperiod=120).mean()
        ta['volume_EMA5'] = tb.EMA(v, timeperiod=5) #/ tb.WMA(v, timeperiod=5).mean()
        ta['volume_EMA10'] = tb.EMA(v, timeperiod=10) #/ tb.WMA(v, timeperiod=10).mean()
        ta['volume_EMA20'] = tb.EMA(v, timeperiod=20) #/ tb.WMA(v, timeperiod=20).mean()        
        
        # William'%R (WILLR)
        ta['WILLR_14'] = tb.WILLR(h, l, c, timeperiod=14)
        
        # Normalized Average True Range (NATR)
        ta['NATR_14'] = tb.NATR(h, l, c, timeperiod=14)
        
        # Percentage Price Oscillator (PPO)
        ta['PPO_12_26'] = tb.PPO(c, fastperiod=12, slowperiod=26, matype=0)
        
        # Commodity Channel Index (CCI)
        ta['CCI_14'] = tb.CCI(h, l, c, timeperiod=14)
        
        # Average Directional Movement Index
        ta['ADX_14'] = tb.ADX(h, l, c, timeperiod=14) #/ tb.ADX(h, l, c, timeperiod=14).mean()
        
        # Average Directional Movement Index Rating
        ta['ADXR_14'] = tb.ADXR(h, l, c, timeperiod=14) #/ tb.ADXR(h, l, c, timeperiod=14).mean()

        # Moving Average Convergence/Divergence
        ta['MACD_12_26_9'] = tb.MACD(c, fastperiod=12, slowperiod=26, signalperiod=9)[0] #/ tb.MACD(c, fastperiod=12, slowperiod=26, signalperiod=9)[0].mean()
        
        # Relative Strength Index
        ta['RSI_14'] = tb.RSI(c, timeperiod=14) #/ tb.RSI(c, timeperiod=14).mean()
        
        # Bollinger Bands
        ta['BBANDS_U'] = tb.BBANDS(c, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)[0] #/ \tb.BBANDS(c, timeperiod=5, nbdevup=2, nbdevdn=2, matype=0)[0].mean()
        ta['BBANDS_M'] = tb.BBANDS(c, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)[1] #/ \tb.BBANDS(c, timeperiod=5, nbdevup=2, nbdevdn=2, matype=0)[1].mean()
        ta['BBANDS_L'] = tb.BBANDS(c, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)[2] #/ tb.BBANDS(c, timeperiod=5, nbdevup=2, nbdevdn=2, matype=0)[2].mean()

        # Chaikin A/D Line
        ta['AD'] = tb.AD(h, l, c, v) #/ tb.AD(h, l, c, v).mean()
        
        # Average True Range
        ta['ATR'] = tb.ATR(h, l, c, timeperiod=14) #/ tb.ATR(h, l, c, timeperiod=14).mean()
        
        # Hilbert Transform - Dominant Cycle Period
        ta['HT_DC'] = tb.HT_DCPERIOD(c) #/ tb.HT_DCPERIOD(c).mean()

        # Parabolic SAR
        ta['SAR'] = tb.SAR(h, l, acceleration=0.02, maximum=0.2)        
        
        # prices ratio
        ta["ratio_high_open"] = h / o
        ta["ratio_low_open"] = l / o
        ta["ratio_close_open"] = c / o
        
        return ta
        
    except Exception as e:
        print(e)

## Get and process the data

In [3]:
def run():
    try:
        # Parameters
        list_asset_ticket = ["BTCUSDT", "ETHUSDT", "BNBUSDT"]
        list_timestamp = ["1d", "1h", "30m", "15m", "5m"]
        start_date = "1 Jan, 2017"
        end_date = "31 Dec, 2022"

        for asset_ticket in list_asset_ticket:
            for timestamp in list_timestamp:
                
                # Data file parameters
                input_data_path = '../../data/20_candlesticks_signals_processed'
                input_data_filename = "binance" + \
                                "_" + asset_ticket + \
                                "_" + timestamp + \
                                "_from_" + datetime.datetime.strptime(start_date,'%d %b, %Y').strftime('%Y_%m_%d') + \
                                "_to_" + datetime.datetime.strptime(end_date,'%d %b, %Y').strftime('%Y_%m_%d') + \
                                "_candlesticks_signals_processed"
                input_data_extension = ".csv"
                full_path_input_data = os.path.join(input_data_path, input_data_filename + input_data_extension)                

                df = pd.read_csv(full_path_input_data)

                # Set the index on the dataframe
                df['date_index'] = df['date']
                df.set_index('date_index', inplace=True)
                
                df_ta = create_technical_indicators_df(df)
                df_ta = df_ta.dropna()

                # Export the data
                export_path = "../../data/30_technical_indicators"
                export_filename = "binance" + \
                                "_" + asset_ticket + \
                                "_" + timestamp + \
                                "_from_" + datetime.datetime.strptime(start_date,'%d %b, %Y').strftime('%Y_%m_%d') + \
                                "_to_" + datetime.datetime.strptime(end_date,'%d %b, %Y').strftime('%Y_%m_%d') + \
                                "_candlesticks_signals_processed_and_technical_indicators"
                export_extension = ".csv"
                full_export_path = os.path.join(export_path, export_filename + export_extension)

                df_ta.to_csv(full_export_path, index=False)

    except Exception as e:
        print(e)    

In [4]:
run()