In [1]:
import talib.abstract as ta
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from technical import qtpylib
impor

In [2]:
df = pd.read_json('data/ETH_USDT-5m.json')
df.columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
# convert timestamp to datetime format
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
# set timestamp as index
df.set_index('timestamp', inplace=True)

df.head()

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-08-17 04:00:00,301.13,301.13,300.0,301.13,3.82951
2017-08-17 04:05:00,300.0,301.13,298.0,298.0,1.97216
2017-08-17 04:10:00,298.0,298.0,298.0,298.0,0.0
2017-08-17 04:15:00,298.0,299.05,298.0,299.05,12.88486
2017-08-17 04:20:00,299.05,300.1,299.05,300.1,6.58304


In [3]:
informative = df
pair = 'BTCUSDT'

corr_pairs = ['ETHUSDT', "BNBUSDT"]
timeframes = ['5m', '15m']
timeperiods = [
    5,
    10,
    20
]

info = {
    'pair': pair,
    'corr_pairs': corr_pairs,
    'timeframes': timeframes,
    'timeperiods': timeperiods,
    'shift': 5
}

In [4]:
def timeperiod_related_indicators(informative, pair, t):
    # ADX
    informative[f"%-{pair}adx-period_{t}"] = ta.ADX(informative, window=t)
    # Minus Directional Indicator / Movement
    informative[f"%-{pair}minus_di-period_{t}"] = ta.MINUS_DI(informative, timeperiod=t)
    informative[f"%-{pair}minus_dm-period_{t}"] = ta.MINUS_DM(informative, timeperiod=t)
    # Plus Directional Indicator / Movement
    informative[f"%-{pair}plus_di-period_{t}"] = ta.PLUS_DI(informative, timeperiod=t)
    informative[f"%-{pair}plus_dm-period_{t}"] = ta.PLUS_DM(informative, timeperiod=t)
    # Commodity Channel Index
    informative[f"%-{pair}cci-period_{t}"] = ta.CCI(informative, timeperiod=t)
    # RSI
    informative[f"%-{pair}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t)
    # MIN - Lowest value over a specified period
    informative[f"%-{pair}min-period_{t}"] = ta.MIN(informative, timeperiod=t)
    # MAX - Highest value over a specified period
    informative[f"%-{pair}max-period_{t}"] = ta.MAX(informative, timeperiod=t)
    # DEMA - Double Exponential Moving Average
    informative[f"%-{pair}dema-period_{t}"] = ta.DEMA(informative, timeperiod=t)
    # Linear Regression
    informative[f"%-{pair}linearreg-period_{t}"] = ta.LINEARREG(informative, timeperiod=t)
    informative[f"%-{pair}linearreg_angle-period_{t}"] = ta.LINEARREG_ANGLE(informative, timeperiod=t)
    informative[f"%-{pair}linearreg_intercept-period_{t}"] = ta.LINEARREG_INTERCEPT(informative, timeperiod=t)
    informative[f"%-{pair}linearreg_slope-period_{t}"] = ta.LINEARREG_SLOPE(informative, timeperiod=t)
    # CORREL - Pearson's Correlation Coefficient (r)
    informative[f"%-{pair}correl-period_{t}"] = ta.CORREL(informative, timeperiod=t)
    # STDDEV - Standard Deviation
    informative[f"%-{pair}stddev-period_{t}"] = ta.STDDEV(informative, timeperiod=t)
    # TSF - Time Series Forecast
    informative[f"%-{pair}tsf-period_{t}"] = ta.TSF(informative, timeperiod=t)
    # VAR - Variance
    informative[f"%-{pair}var-period_{t}"] = ta.VAR(informative, timeperiod=t)
    # Momentum
    informative[f"%-{pair}mom-period_{t}"] = ta.MOM(informative, timeperiod=t)
    # MFI
    informative[f"%-{pair}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t)
    # TRIX - 1-day Rate-Of-Change (ROC) of a Triple Smooth EMA
    informative[f"%-{pair}trix-period_{t}"] = ta.TRIX(informative, timeperiod=t)
    # WILLR - Williams' %R
    informative[f"%-{pair}willr-period_{t}"] = ta.WILLR(informative, timeperiod=t)
    # Kaufman's Adaptive Moving Average (KAMA)
    informative[f"%-{pair}kama-period_{t}"] = ta.KAMA(informative, window=t)
    # ATR - Average True Range
    informative[f"%-{pair}atr-period_{t}"] = ta.ATR(informative, timeperiod=t)
    # Volume Weighted Average Price
    vwap = qtpylib.rolling_vwap(informative)
    # OBV - On Balance Volume
    informative[f"%-{pair}obv"] = ta.OBV(informative)
    # Stoch
    stoch = ta.STOCH(informative)
    informative[f"%-{pair}slowd"] = stoch["slowd"]
    informative[f"%-{pair}slowk"] = stoch["slowk"]
    # Stoch Fast
    stochf = ta.STOCHF(informative)
    informative[f"%-{pair}fastd"] = stochf["fastd"]
    informative[f"%-{pair}fastk"] = stochf["fastk"]
    # Stoch RSI
    stoch_rsi = ta.STOCHRSI(informative)
    informative[f"%-{pair}fastd"] = stoch_rsi["fastd"]
    informative[f"%-{pair}fastk"] = stoch_rsi["fastk"]
    #ROC
    informative[f"%-{pair}roc-period_{t}"] = ta.ROC(informative)
    #RV
    informative[f"%-{pair}relative_volume-period_{t}"] = (
        informative["volume"] / informative["volume"].rolling(t).mean()
    )
    # Bollinger bands
    bollinger = qtpylib.bollinger_bands(
        qtpylib.typical_price(informative), window=t, stds=2.2
    )
    informative[f"%-{pair}bb_lowerband-period_{t}"] = bollinger["lower"]
    informative[f"%-{pair}bb_middleband-period_{t}"] = bollinger["mid"]
    informative[f"%-{pair}bb_upperband-period_{t}"] = bollinger["upper"]
    informative[f"%-{pair}bb_width-period_{t}"] = (
        informative[f"%-{pair}bb_upperband-period_{t}"]
        - informative[f"%-{pair}bb_lowerband-period_{t}"]
    ) / informative[f"%-{pair}bb_middleband-period_{t}"]
    informative[f"%-{pair}close-bb_lower-period_{t}"] = (
        informative["close"] / informative[f"%-{pair}bb_lowerband-period_{t}"]
    )
    # Bollinger Bands - Weighted (EMA based instead of SMA)
    weighted_bollinger = qtpylib.weighted_bollinger_bands(
        qtpylib.typical_price(informative), window=t, stds=2
    )
    informative[f"%-{pair}wbb_upperband_{t}"] = weighted_bollinger["upper"]
    informative[f"%-{pair}wbb_lowerband_{t}"] = weighted_bollinger["lower"]
    informative[f"%-{pair}wbb_middleband_{t}"] = weighted_bollinger["mid"]
    informative[f"%-{pair}wbb_percent_{t}"] = (
        (informative["close"] - informative[f"%-{pair}wbb_lowerband_{t}"]) /
        (informative[f"%-{pair}wbb_upperband_{t}"] - informative[f"%-{pair}wbb_lowerband_{t}"])
    )
    informative[f"%-{pair}wbb_width_{t}"] = (
        (informative[f"%-{pair}wbb_upperband_{t}"] - informative[f"%-{pair}wbb_lowerband_{t}"]) /
        informative[f"%-{pair}wbb_middleband_{t}"]
    )
    # Keltner Channel
    keltner = qtpylib.keltner_channel(informative, window=t)
    informative[f"%-{pair}kc_upperband"] = keltner["upper"]
    informative[f"%-{pair}kc_lowerband"] = keltner["lower"]
    informative[f"%-{pair}kc_middleband"] = keltner["mid"]
    informative[f"%-{pair}kc_percent"] = (
        (informative["close"] - informative[f"%-{pair}kc_lowerband"]) /
        (informative[f"%-{pair}kc_upperband"] - informative[f"%-{pair}kc_lowerband"])
    )
    informative[f"%-{pair}kc_width"] = (
        (informative[f"%-{pair}kc_upperband"] - informative[f"%-{pair}kc_lowerband"]) / informative[f"%-{pair}kc_middleband"]
    )
    # EMA
    informative[f"%-{pair}ema-period_{t}"] = ta.EMA(informative, timeperiod=t)
    # SMA
    informative[f"%-{pair}sma-period_{t}"] = ta.SMA(informative, timeperiod=t)
    # TEMA
    informative[f"%-{pair}tema-period_{t}"] = ta.TEMA(informative, timeperiod=t)
    
    return informative

In [5]:
def without_timeperiod_indicators(informative, pair):
    # MACD
    macd = ta.MACD(informative)
    informative[f"%-{pair}macd"] = macd['macd']
    informative[f"%-{pair}macdsignal"] = macd['macdsignal']
    informative[f"%-{pair}macdhist"] = macd['macdhist']
    # Awesome Oscillator
    informative[f"%-{pair}ao"] = qtpylib.awesome_oscillator(informative)
    # Ultimate Oscillator
    informative[f"%-{pair}uo"] = ta.ULTOSC(informative)
    # Aroon, Aroon Oscillator
    aroon = ta.AROON(informative)
    informative[f"%-{pair}aroonup"] = aroon['aroonup']
    informative[f"%-{pair}aroondown"] = aroon['aroondown']
    informative[f"%-{pair}aroonosc"] = ta.AROONOSC(informative)    
    # Parabolic SAR
    informative[f'%-{pair}sar'] = ta.SAR(informative)
    # Hilbert
    hilbert = ta.HT_SINE(informative)
    informative[f"%-{pair}htsine"] = hilbert["sine"]
    informative[f"%-{pair}htleadsine"] = hilbert["leadsine"]
    
    return informative

In [6]:
def bars(informative):
#     # Heikin Ashi Strategy
#     heikinashi = qtpylib.heikinashi(informative)
#     informative[f'%-{pair}ha_open'] = heikinashi['open']
#     informative[f'%-{pair}ha_close'] = heikinashi['close']
#     informative[f'%-{pair}ha_high'] = heikinashi['high']
#     informative[f'%-{pair}ha_low'] = heikinashi['low']
    
    # input
    # ------------------------------------
    informative[f"%-{pair}pct-change"] = informative["close"].pct_change()
    informative[f"%-{pair}raw_volume"] = informative["volume"]

    informative[f"%-{pair}raw_close"] = informative["close"]
    informative[f"%-{pair}raw_open"] = informative["open"]
    informative[f"%-{pair}raw_high"] = informative["high"]
    informative[f"%-{pair}raw_low"] = informative["low"]
    
    return informative

In [7]:
def bar_patterns(informative):
    return informative

In [8]:
def shifted_candles(informative, shift_count):
    indicators = [col for col in informative if col.startswith("%")]
    # This loop duplicates and shifts all indicators to add a sense of recency to data
    for n in range(shift_count + 1):
        if n == 0:
            continue
        informative_shift = informative[indicators].shift(n)
        informative_shift = informative_shift.add_suffix("_shift-" + str(n))
        informative = pd.concat((informative, informative_shift), axis=1)

    return informative

In [9]:
def populate_indicators(informative, info):
    # bars
    informative = bars(informative)
    
    # pattern
    bar_patterns(informative)
    
    # timeperiod_related_indicators
    for t in info["timeperiods"]:
        t = int(t)
        informative = timeperiod_related_indicators(informative, pair, t)
        
    # without_timeperiod_indicators
    informative = without_timeperiod_indicators(informative, pair)

    # shifted candles
    informative = shifted_candles(informative, info['shift'])
    
    return informative

In [10]:
d = populate_indicators(df, info)

  informative[f"%-{pair}cci-period_{t}"] = ta.CCI(informative, timeperiod=t)
  informative[f"%-{pair}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t)
  informative[f"%-{pair}min-period_{t}"] = ta.MIN(informative, timeperiod=t)
  informative[f"%-{pair}max-period_{t}"] = ta.MAX(informative, timeperiod=t)
  informative[f"%-{pair}dema-period_{t}"] = ta.DEMA(informative, timeperiod=t)
  informative[f"%-{pair}linearreg-period_{t}"] = ta.LINEARREG(informative, timeperiod=t)
  informative[f"%-{pair}linearreg_angle-period_{t}"] = ta.LINEARREG_ANGLE(informative, timeperiod=t)
  informative[f"%-{pair}linearreg_intercept-period_{t}"] = ta.LINEARREG_INTERCEPT(informative, timeperiod=t)
  informative[f"%-{pair}linearreg_slope-period_{t}"] = ta.LINEARREG_SLOPE(informative, timeperiod=t)
  informative[f"%-{pair}correl-period_{t}"] = ta.CORREL(informative, timeperiod=t)
  informative[f"%-{pair}stddev-period_{t}"] = ta.STDDEV(informative, timeperiod=t)
  informative[f"%-{pair}tsf-period_{t}"] = ta.T

In [11]:
d.columns

Index(['open', 'high', 'low', 'close', 'volume', '%-BTCUSDTpct-change',
       '%-BTCUSDTraw_volume', '%-BTCUSDTraw_close', '%-BTCUSDTraw_open',
       '%-BTCUSDTraw_high',
       ...
       '%-BTCUSDTmacdsignal_shift-5', '%-BTCUSDTmacdhist_shift-5',
       '%-BTCUSDTao_shift-5', '%-BTCUSDTuo_shift-5',
       '%-BTCUSDTaroonup_shift-5', '%-BTCUSDTaroondown_shift-5',
       '%-BTCUSDTaroonosc_shift-5', '%-BTCUSDTsar_shift-5',
       '%-BTCUSDThtsine_shift-5', '%-BTCUSDThtleadsine_shift-5'],
      dtype='object', length=869)

In [13]:
print(d.shape)
d.tail()

(585075, 869)


Unnamed: 0_level_0,open,high,low,close,volume,%-BTCUSDTpct-change,%-BTCUSDTraw_volume,%-BTCUSDTraw_close,%-BTCUSDTraw_open,%-BTCUSDTraw_high,...,%-BTCUSDTmacdsignal_shift-5,%-BTCUSDTmacdhist_shift-5,%-BTCUSDTao_shift-5,%-BTCUSDTuo_shift-5,%-BTCUSDTaroonup_shift-5,%-BTCUSDTaroondown_shift-5,%-BTCUSDTaroonosc_shift-5,%-BTCUSDTsar_shift-5,%-BTCUSDThtsine_shift-5,%-BTCUSDThtleadsine_shift-5
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-03-16 13:25:00,1644.4,1645.28,1641.57,1643.74,2229.7682,-0.000395,2229.7682,1643.74,1644.4,1645.28,...,-1.052263,-1.142108,-8.104353,44.207542,0.0,78.571429,-78.571429,1657.143197,-0.181741,0.566821
2023-03-16 13:30:00,1643.75,1645.87,1640.54,1644.05,2575.6837,0.000189,2575.6837,1644.05,1643.75,1645.87,...,-1.226869,-0.698426,-8.160353,51.793894,42.857143,71.428571,-28.571429,1655.788014,-0.120301,0.616906
2023-03-16 13:35:00,1644.04,1650.45,1643.27,1649.91,4278.2257,0.003564,4278.2257,1649.91,1644.04,1650.45,...,-1.326226,-0.397427,-6.847088,49.587966,35.714286,64.285714,-28.571429,1645.85,-0.008817,0.700844
2023-03-16 13:40:00,1649.91,1652.37,1649.48,1651.05,2244.6877,0.000691,2244.6877,1651.05,1649.91,1652.37,...,-1.57887,-1.010576,-7.242029,44.262389,28.571429,100.0,-71.428571,1656.69,-0.004778,0.70372
2023-03-16 13:45:00,1651.06,1652.28,1646.77,1648.02,2583.7367,-0.001835,2583.7367,1648.02,1651.06,1652.28,...,-1.853632,-1.099049,-8.334882,52.898508,21.428571,100.0,-78.571429,1656.69,0.00205,0.708555
