# Add risk and technical indicators

In [48]:
import pandas as pd
import talib as tb
import datetime
import os

In [49]:
def create_technical_indicators_df(df_price_ohcl):
    try:
        o = df_price_ohcl['open'].values
        c = df_price_ohcl['close'].values
        h = df_price_ohcl['high'].values
        l = df_price_ohcl['low'].values
        v = df_price_ohcl['volume'].astype(float).values
        
        # Most data series are normalized by their series' mean
        ta = df_price_ohcl.copy()
        
        #ta['reference_date'] = df_price_ohcl.index
        
        # All Moving Average
        ta['MA5'] = tb.MA(c, timeperiod=5) #/ tb.MA(c, timeperiod=5).mean()
        ta['MA10'] = tb.MA(c, timeperiod=10) #/ tb.MA(c, timeperiod=10).mean()
        ta['MA20'] = tb.MA(c, timeperiod=20) #/ tb.MA(c, timeperiod=20).mean()
        ta['MA60'] = tb.MA(c, timeperiod=60) #/ tb.MA(c, timeperiod=60).mean()
        ta['MA120'] = tb.MA(c, timeperiod=120) #/ tb.MA(c, timeperiod=120).mean()
        ta['volume_MA5'] = tb.MA(v, timeperiod=5) #/ tb.MA(v, timeperiod=5).mean()
        ta['volume_MA10'] = tb.MA(v, timeperiod=10) #/ tb.MA(v, timeperiod=10).mean()
        ta['volume_MA20'] = tb.MA(v, timeperiod=20) #/ tb.MA(v, timeperiod=20).mean()
        
        # Simple Moving Average (SMA)
        ta['SMA5'] = tb.SMA(c, timeperiod=5) #/ tb.SMA(c, timeperiod=5).mean()
        ta['SMA10'] = tb.SMA(c, timeperiod=10) #/ tb.SMA(c, timeperiod=10).mean()
        ta['SMA20'] = tb.SMA(c, timeperiod=20) #/ tb.SMA(c, timeperiod=20).mean()
        ta['SMA60'] = tb.SMA(c, timeperiod=60) #/ tb.SMA(c, timeperiod=60).mean()
        ta['SMA120'] = tb.SMA(c, timeperiod=120) #/ tb.SMA(c, timeperiod=120).mean()
        ta['volume_SMA5'] = tb.SMA(v, timeperiod=5) #/ tb.SMA(v, timeperiod=5).mean()
        ta['volume_SMA10'] = tb.SMA(v, timeperiod=10) #/ tb.SMA(v, timeperiod=10).mean()
        ta['volume_SMA20'] = tb.SMA(v, timeperiod=20) #/ tb.SMA(v, timeperiod=20).mean()
        
        # Weighted Moving Average (WMA)
        ta['WMA5'] = tb.WMA(c, timeperiod=5) #/ tb.WMA(c, timeperiod=5).mean()
        ta['WMA10'] = tb.WMA(c, timeperiod=10) #/ tb.WMA(c, timeperiod=10).mean()
        ta['WMA20'] = tb.WMA(c, timeperiod=20) #/ tb.WMA(c, timeperiod=20).mean()
        ta['WMA60'] = tb.WMA(c, timeperiod=60) #/ tb.WMA(c, timeperiod=60).mean()
        ta['WMA120'] = tb.WMA(c, timeperiod=120) #/ tb.WMA(c, timeperiod=120).mean()
        ta['volume_WMA5'] = tb.WMA(v, timeperiod=5) #/ tb.WMA(v, timeperiod=5).mean()
        ta['volume_WMA10'] = tb.WMA(v, timeperiod=10) #/ tb.WMA(v, timeperiod=10).mean()
        ta['volume_WMA20'] = tb.WMA(v, timeperiod=20) #/ tb.WMA(v, timeperiod=20).mean()
        
        # Exponential Moving Average (EMA)
        ta['EMA5'] = tb.EMA(c, timeperiod=5) #/ tb.WMA(c, timeperiod=5).mean()
        ta['EMA10'] = tb.EMA(c, timeperiod=10) #/ tb.WMA(c, timeperiod=10).mean()
        ta['EMA20'] = tb.EMA(c, timeperiod=20) #/ tb.WMA(c, timeperiod=20).mean()
        ta['EMA60'] = tb.EMA(c, timeperiod=60) #/ tb.WMA(c, timeperiod=60).mean()
        ta['EMA120'] = tb.EMA(c, timeperiod=120) #/ tb.WMA(c, timeperiod=120).mean()
        ta['volume_EMA5'] = tb.EMA(v, timeperiod=5) #/ tb.WMA(v, timeperiod=5).mean()
        ta['volume_EMA10'] = tb.EMA(v, timeperiod=10) #/ tb.WMA(v, timeperiod=10).mean()
        ta['volume_EMA20'] = tb.EMA(v, timeperiod=20) #/ tb.WMA(v, timeperiod=20).mean()        
        
        # William'%R (WILLR)
        ta['WILLR_14'] = tb.WILLR(h, l, c, timeperiod=14)
        
        # Normalized Average True Range (NATR)
        ta['NATR_14'] = tb.NATR(h, l, c, timeperiod=14)
        
        # Percentage Price Oscillator (PPO)
        ta['PPO_12_26'] = tb.PPO(c, fastperiod=12, slowperiod=26, matype=0)
        
        # Commodity Channel Index (CCI)
        ta['CCI_14'] = tb.CCI(h, l, c, timeperiod=14)
        
        # Average Directional Movement Index
        ta['ADX_14'] = tb.ADX(h, l, c, timeperiod=14) #/ tb.ADX(h, l, c, timeperiod=14).mean()
        
        # Average Directional Movement Index Rating
        ta['ADXR_14'] = tb.ADXR(h, l, c, timeperiod=14) #/ tb.ADXR(h, l, c, timeperiod=14).mean()

        # Moving Average Convergence/Divergence
        ta['MACD_12_26_9'] = tb.MACD(c, fastperiod=12, slowperiod=26, signalperiod=9)[0] #/ tb.MACD(c, fastperiod=12, slowperiod=26, signalperiod=9)[0].mean()
        
        # Relative Strength Index
        ta['RSI_14'] = tb.RSI(c, timeperiod=14) #/ tb.RSI(c, timeperiod=14).mean()
        
        # Bollinger Bands
        ta['BBANDS_U'] = tb.BBANDS(c, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)[0] #/ \tb.BBANDS(c, timeperiod=5, nbdevup=2, nbdevdn=2, matype=0)[0].mean()
        ta['BBANDS_M'] = tb.BBANDS(c, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)[1] #/ \tb.BBANDS(c, timeperiod=5, nbdevup=2, nbdevdn=2, matype=0)[1].mean()
        ta['BBANDS_L'] = tb.BBANDS(c, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)[2] #/ tb.BBANDS(c, timeperiod=5, nbdevup=2, nbdevdn=2, matype=0)[2].mean()

        # Chaikin A/D Line
        ta['AD'] = tb.AD(h, l, c, v) #/ tb.AD(h, l, c, v).mean()
        
        # Average True Range
        ta['ATR'] = tb.ATR(h, l, c, timeperiod=14) #/ tb.ATR(h, l, c, timeperiod=14).mean()
        
        # Hilbert Transform - Dominant Cycle Period
        ta['HT_DC'] = tb.HT_DCPERIOD(c) #/ tb.HT_DCPERIOD(c).mean()

        # Parabolic SAR
        ta['SAR'] = tb.SAR(h, l, acceleration=0.02, maximum=0.2)        
        
        # prices ratio
        ta["ratio_high_open"] = h / o
        ta["ratio_low_open"] = l / o
        ta["ratio_close_open"] = c / o
        
        return ta
        
    except Exception as e:
        print(e)

### Parameters

In [50]:
asset_ticket = "BTCUSDT"
timestamp = "1d"
start_date = "1 Jan, 2017"
end_date = "31 Dec, 2021"

In [51]:
input_data_path = '../data'
input_data_filename = "binance" + \
                  "_" + asset_ticket + \
                  "_" + timestamp + \
                  "_from_" + datetime.datetime.strptime(start_date,'%d %b, %Y').strftime('%Y_%m_%d') + \
                  "_to_" + datetime.datetime.strptime(end_date,'%d %b, %Y').strftime('%Y_%m_%d') + \
                  "_candlesticks_signals_processed"
input_data_extension = ".csv"
full_path_input_data = os.path.join(input_data_path, input_data_filename + input_data_extension)

In [52]:
df = pd.read_csv(full_path_input_data)

In [53]:
df.head()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,...,CDLINVERTEDHAMMER_NEW,CDLHAMMER_NEW,CDLPIERCING_NEW,CDLMORNINGSTAR_NEW,CDLENGULFINGBULLISH_NEW,CDLSHOOTINGSTAR_NEW,CDLHANGINGMAN_NEW,CDLDARKCLOUDCOVER_NEW,CDLEVENINGSTAR_NEW,CDLENGULFINGBEARISH_NEW
0,1502928000000,4261.48,4485.39,4200.74,4285.08,795.150377,1503014399999,3454770.0,3427,616.248541,...,0,0,0,0,0,0,0,0,0,0
1,1503014400000,4285.08,4371.52,3938.77,4108.37,1199.888264,1503100799999,5086958.0,5233,972.86871,...,0,0,0,0,0,0,0,0,0,0
2,1503100800000,4108.37,4184.69,3850.0,4139.98,381.309763,1503187199999,1549484.0,2153,274.336042,...,0,0,0,0,0,0,0,0,0,0
3,1503187200000,4120.98,4211.08,4032.62,4086.29,467.083022,1503273599999,1930364.0,2321,376.795947,...,0,0,0,0,0,0,0,0,0,0
4,1503273600000,4069.13,4119.62,3911.79,4016.0,691.74306,1503359999999,2797232.0,3972,557.356107,...,0,0,0,0,0,0,0,0,0,0


## Set the index on the dataframe

In [54]:
df.index = pd.DatetimeIndex(df['formatted_open_time'])

In [55]:
df_ta = create_technical_indicators_df(df)

In [56]:
df_ta = df_ta.dropna()

In [57]:
df_ta.head()

Unnamed: 0_level_0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,...,BBANDS_U,BBANDS_M,BBANDS_L,AD,ATR,HT_DC,SAR,ratio_high_open,ratio_low_open,ratio_close_open
formatted_open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-12-14,1513209600000,16030.34,16445.0,15450.0,16334.98,11616.867151,1513295999999,186092500.0,86150,5999.075173,...,18391.237211,12695.52,6999.802789,85678.916045,1570.046351,27.522205,12570.0392,1.025867,0.963797,1.019004
2017-12-15,1513296000000,16334.98,17991.0,16298.45,17539.83,9181.273947,1513382399999,159809700.0,85478,4712.5784,...,18896.019332,13137.511,7379.002668,89965.427218,1578.796612,27.561481,12668.038416,1.101379,0.997764,1.073759
2017-12-16,1513382400000,17516.81,19539.0,17190.01,19102.66,4202.628709,1513468799999,76293720.0,51777,2308.847163,...,19641.813804,13636.243,7630.672196,92606.725304,1633.810425,27.275392,12880.956879,1.115443,0.981344,1.090533
2017-12-17,1513468800000,19120.19,19798.68,18510.0,18860.02,9177.183434,1513555199999,175867400.0,84581,4197.960835,...,20220.319637,14096.744,7973.168363,88414.794602,1609.158252,27.027934,13280.439467,1.035486,0.968087,0.986393
2017-12-18,1513555200000,18860.04,19300.0,17029.98,18856.25,10624.633071,1513641599999,196339200.0,91956,4690.302146,...,20684.597174,14544.7165,8404.835826,94885.560372,1656.362662,26.783773,13801.898709,1.023328,0.902966,0.999799


## Export the Data

### Export parameters

In [58]:
export_path = "../data"
export_filename = "binance" + \
                  "_" + asset_ticket + \
                  "_" + timestamp + \
                  "_from_" + datetime.datetime.strptime(start_date,'%d %b, %Y').strftime('%Y_%m_%d') + \
                  "_to_" + datetime.datetime.strptime(end_date,'%d %b, %Y').strftime('%Y_%m_%d') + \
                  "_candlesticks_signals_processed_and_technical_indicators"
export_extension = ".csv"
full_export_path = os.path.join(export_path, export_filename + export_extension)

### Export

In [59]:
df_ta.to_csv(full_export_path, index=False)