Implementation of LSTM Model on AUD_USD(H1) prediction of the close price 1 hour ahead based on data of previous 10 hours

## 1. Data Preparation

### 1.1 Read in data file

In [1]:
import talib as ta

import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler

from utils import series_to_supervised

In [2]:
data = pd.read_csv('../data/AUD_USD_H1.csv')
data

Unnamed: 0,datetime,open,high,low,close
0,2005-01-02T18:00:00.000000000Z,0.78230,0.78230,0.78230,0.78230
1,2005-01-02T19:00:00.000000000Z,0.78200,0.78280,0.78180,0.78180
2,2005-01-02T20:00:00.000000000Z,0.78140,0.78160,0.78060,0.78160
3,2005-01-02T21:00:00.000000000Z,0.78110,0.78150,0.78060,0.78060
4,2005-01-02T22:00:00.000000000Z,0.78070,0.78240,0.78070,0.78240
...,...,...,...,...,...
102983,2021-02-26T18:00:00.000000000Z,0.77218,0.77353,0.77122,0.77186
102984,2021-02-26T19:00:00.000000000Z,0.77190,0.77235,0.77064,0.77088
102985,2021-02-26T20:00:00.000000000Z,0.77092,0.77152,0.76924,0.76947
102986,2021-02-26T21:00:00.000000000Z,0.76944,0.77106,0.76926,0.77038


### 1.2 Datetime formatting

In [3]:
data['datetime'] = pd.to_datetime(data['datetime'])
data['datetime'] = data['datetime'].dt.date
# data['datetime'] = data['datetime'].dt.tz_localize(None)
data.set_index('datetime', inplace=True)
data

Unnamed: 0_level_0,open,high,low,close
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2005-01-02,0.78230,0.78230,0.78230,0.78230
2005-01-02,0.78200,0.78280,0.78180,0.78180
2005-01-02,0.78140,0.78160,0.78060,0.78160
2005-01-02,0.78110,0.78150,0.78060,0.78060
2005-01-02,0.78070,0.78240,0.78070,0.78240
...,...,...,...,...
2021-02-26,0.77218,0.77353,0.77122,0.77186
2021-02-26,0.77190,0.77235,0.77064,0.77088
2021-02-26,0.77092,0.77152,0.76924,0.76947
2021-02-26,0.76944,0.77106,0.76926,0.77038


### 1.3 TA indicators

In [4]:
open_ = data['open'].values
high_ = data['high'].values
low_ = data['low'].values
close_ = data['close'].values
dt = data.index

In [5]:
tp = 24 ## set time period window

## overlap studies indicators
# BBANDS - Bollinger Bands
data['upperband'] = ta.BBANDS(close_, timeperiod=tp, nbdevup=2, nbdevdn=2, matype=0)[0]
data['middleband'] = ta.BBANDS(close_, timeperiod=tp, nbdevup=2, nbdevdn=2, matype=0)[1]
data['lowerband'] = ta.BBANDS(close_, timeperiod=tp, nbdevup=2, nbdevdn=2, matype=0)[2]
# DEMA - Double Exponential Moving Average
data['dema'] = ta.DEMA(close_, timeperiod=tp)
# EMA - Exponential Moving Average
data['ema'] = ta.EMA(close_, timeperiod=tp)
# HT_TRENDLINE - Hilbert Transform - Instantaneous Trendline
data['ht'] = ta.HT_TRENDLINE(close_)
# KAMA - Kaufman Adaptive Moving Average
data['kama'] = ta.KAMA(close_, timeperiod=tp)
# MA - Moving average
data['ma'] = ta.MA(close_, timeperiod=tp, matype=0)
# MAMA - MESA Adaptive Moving Average
# data['mama'], data['fama'] = ta.MAMA(close_, fastlimit=5, slowlimit=10)
# MAVP - Moving average with variable period
# data['mavp'] = ta.MAVP(close_, periods, minperiod=2, maxperiod=30, matype=0)
# MIDPOINT - MidPoint over period
data['midpoint'] = ta.MIDPOINT(close_, timeperiod=tp)
# MIDPRICE - Midpoint Price over period
data['midprice'] = ta.MIDPRICE(high_, low_, timeperiod=tp)
# SAR - Parabolic SAR
data['sar'] = ta.SAR(high_, low_, acceleration=0, maximum=0)
# SAREXT - Parabolic SAR - Extended
data['sarext'] = ta.SAREXT(high_, low_, startvalue=0, offsetonreverse=0, accelerationinitlong=0, accelerationlong=0, accelerationmaxlong=0, accelerationinitshort=0, accelerationshort=0, accelerationmaxshort=0)
# SMA - Simple Moving Average
data['sma5'] = ta.SMA(close_, timeperiod=tp)
data['sma10'] = ta.SMA(close_, timeperiod=tp*2)
data['sma30'] = ta.SMA(close_, timeperiod=tp*6)
# T3 - Triple Exponential Moving Average (T3)
data['t3'] = ta.T3(close_, timeperiod=tp, vfactor=0)
# TEMA - Triple Exponential Moving Average
data['tema'] = ta.TEMA(close_, timeperiod=tp*6)
# TRIMA - Triangular Moving Average
data['trima5'] = ta.TRIMA(close_, timeperiod=tp)
data['trima30'] = ta.TRIMA(close_, timeperiod=tp*6)
# WMA - Weighted Moving Average
data['wma5'] = ta.WMA(close_, timeperiod=tp)
data['wma30'] = ta.WMA(close_, timeperiod=tp*6)

In [6]:
## Momentum indicators
# ADX - Average Directional Movement Index
data['adx'] = ta.ADX(high_, low_, close_, timeperiod=tp)
# ADXR - Average Directional Movement Index Rating
data['adxr'] = ta.ADXR(high_, low_, close_, timeperiod=tp)
# APO - Absolute Price Oscillator
data['apo'] = ta.APO(close_, fastperiod=12, slowperiod=24, matype=0)
# AROON - Aroon
data['aroondown'], data['aroonup'] = ta.AROON(high_, low_, timeperiod=tp)
# AROONOSC - Aroon Oscillator
data['aroonosc'] = ta.AROONOSC(high_, low_, timeperiod=tp)
# BOP - Balance Of Power
data['bop'] = ta.BOP(open_, high_, low_, close_)
# CCI - Commodity Channel Index
data['cci'] = ta.CCI(high_, low_, close_, timeperiod=tp)
# CMO - Chande Momentum Oscillator
data['cmo'] = ta.CMO(close_, timeperiod=tp)
# DX - Directional Movement Index
data['dx'] = ta.DX(high_, low_, close_, timeperiod=tp)
# MACD - Moving Average Convergence/Divergence
data['macd'], data['macdsignal'], data['macdhist'] = ta.MACD(close_, fastperiod=12, slowperiod=24, signalperiod=9)
# MACDEXT - MACD with controllable MA type
data['macdext'], data['macdsignalext'], data['macdhistext'] = ta.MACDEXT(close_, fastperiod=12, fastmatype=0, slowperiod=26, slowmatype=0, signalperiod=9, signalmatype=0)
# MACDFIX - Moving Average Convergence/Divergence Fix 12/26
data['macdfix'], data['macdsignalfix'], data['macdhistfix'] = ta.MACDFIX(close_, signalperiod=9)
# MINUS_DI - Minus Directional Indicator
data['minus_di'] = ta.MINUS_DI(high_, low_, close_, timeperiod=tp)
# MINUS_DM - Minus Directional Movement
data['minus_dm'] = ta.MINUS_DM(high_, low_, timeperiod=tp)
# MOM - Momentum
data['mom'] = ta.MOM(close_, timeperiod=tp)
# PLUS_DI - Plus Directional Indicator
data['plus_di'] = ta.PLUS_DI(high_, low_, close_, timeperiod=tp)
# PLUS_DM - Plus Directional Movement
data['plus_dm'] = ta.PLUS_DM(high_, low_, timeperiod=tp)
# PPO - Percentage Price Oscillator
data['ppo'] = ta.PPO(close_, fastperiod=12, slowperiod=24, matype=0)
# ROC - Rate of change : ((price/prevPrice)-1)*100
data['roc'] = ta.ROC(close_, timeperiod=tp)
# ROCP - Rate of change Percentage: (price-prevPrice)/prevPrice
data['rocp'] = ta.ROCP(close_, timeperiod=tp)
# ROCR - Rate of change ratio: (price/prevPrice)
data['rocr'] = ta.ROCR(close_, timeperiod=tp)
# ROCR100 - Rate of change ratio 100 scale: (price/prevPrice)*100
data['rocr100'] = ta.ROCR100(close_, timeperiod=tp)
# RSI - Relative Strength Index
data['rsi'] = ta.RSI(close_, timeperiod=tp)
# STOCH - Stochastic
data['slowk'], data['slowd'] = ta.STOCH(high_, low_, close_, fastk_period=24, slowk_period=12, slowk_matype=0, slowd_period=3, slowd_matype=0)
# STOCHF - Stochastic Fast
data['fastk'], data['fastd'] = ta.STOCHF(high_, low_, close_, fastk_period=24, fastd_period=12, fastd_matype=0)
# STOCHRSI - Stochastic Relative Strength Index
data['fastkrsi'], data['fastdrsi'] = ta.STOCHRSI(close_, timeperiod=tp, fastk_period=5, fastd_period=3, fastd_matype=0)
# TRIX - 1-day Rate-Of-Change (ROC) of a Triple Smooth EMA
data['trix'] = ta.TRIX(close_, timeperiod=tp)
# ULTOSC - Ultimate Oscillator
data['ultosc'] = ta.ULTOSC(high_, low_, close_, timeperiod1=tp, timeperiod2=tp*2, timeperiod3=tp*3)
# WILLR - Williams' %R
data['willr'] = ta.WILLR(high_, low_, close_, timeperiod=tp)

In [7]:
## volatility indicators

# ATR - Average True Range
data['atr'] = ta.ATR(high_, low_, close_, timeperiod=tp)
# NATR - Normalized Average True Range
data['natr'] = ta.NATR(high_, low_, close_, timeperiod=tp)
# TRANGE - True Range
data['trange'] = ta.TRANGE(high_, low_, close_)


In [8]:
## price transform

# AVGPRICE - Average Price
data['avgprice'] = ta.AVGPRICE(open_, high_, low_, close_)
# MEDPRICE - Median Price
data['medprice'] = ta.MEDPRICE(high_, low_)
# TYPPRICE - Typical Price
data['typprice'] = ta.TYPPRICE(high_, low_, close_)
# WCLPRICE - Weighted Close Price
data['wclprice'] = ta.WCLPRICE(high_, low_, close_)


In [9]:
## cycle indicators

# HT_DCPERIOD - Hilbert Transform - Dominant Cycle Period
data['ht_dcperiod'] = ta.HT_DCPERIOD(close_)
# HT_DCPHASE - Hilbert Transform - Dominant Cycle Phase
data['ht_dcphase'] = ta.HT_DCPHASE(close_)
# HT_PHASOR - Hilbert Transform - Phasor Components
data['inphase'], data['quadrature'] = ta.HT_PHASOR(close_)
# HT_SINE - Hilbert Transform - SineWave
data['sine'], data['leadsine'] = ta.HT_SINE(close_)
# HT_TRENDMODE - Hilbert Transform - Trend vs Cycle Mode
data['ht_trendmode'] = ta.HT_TRENDMODE(close_)


In [10]:
## pattern recognition functions

# CDL2CROWS - Two Crows
data['cdl2crows'] = ta.CDL2CROWS(open_, high_, low_, close_)
# CDL3BLACKCROWS - Three Black Crows
data['cdl3blackcrows'] = ta.CDL3BLACKCROWS(open_, high_, low_, close_)
# CDL3INSIDE - Three Inside Up/Down
data['cdl3inside'] = ta.CDL3INSIDE(open_, high_, low_, close_)
# CDL3LINESTRIKE - Three-Line Strike
data['cdl3linestrike'] = ta.CDL3LINESTRIKE(open_, high_, low_, close_)
# CDL3OUTSIDE - Three Outside Up/Down
data['cdl3outside'] = ta.CDL3OUTSIDE(open_, high_, low_, close_)
# CDL3STARSINSOUTH - Three Stars In The South
data['cdl3starsinsouth'] = ta.CDL3STARSINSOUTH(open_, high_, low_, close_)
# CDL3WHITESOLDIERS - Three Advancing White Soldiers
data['cdl3whitesoldiers'] = ta.CDL3WHITESOLDIERS(open_, high_, low_, close_)
# CDLABANDONEDBABY - Abandoned Baby
data['cdlabandonedbaby'] = ta.CDLABANDONEDBABY(open_, high_, low_, close_, penetration=0)
# CDLADVANCEBLOCK - Advance Block
data['cdladvanceblock'] = ta.CDLADVANCEBLOCK(open_, high_, low_, close_)
# CDLBELTHOLD - Belt-hold
data['cdlbelthold'] = ta.CDLBELTHOLD(open_, high_, low_, close_)
# CDLBREAKAWAY - Breakaway
data['cdlbreakaway'] = ta.CDLBREAKAWAY(open_, high_, low_, close_)
# CDLCLOSINGMARUBOZU - Closing Marubozu
data['cdlclosingmarubozu'] = ta.CDLCLOSINGMARUBOZU(open_, high_, low_, close_)
# CDLCONCEALBABYSWALL - Concealing Baby Swallow
data['cdlconcealbabyswall'] = ta.CDLCONCEALBABYSWALL(open_, high_, low_, close_)
# CDLCOUNTERATTACK - Counterattack
data['cdlcounterattack'] = ta.CDLCOUNTERATTACK(open_, high_, low_, close_)
# CDLDARKCLOUDCOVER - Dark Cloud Cover
data['cdldarkcloudcover'] = ta.CDLDARKCLOUDCOVER(open_, high_, low_, close_, penetration=0)
# CDLDOJI - Doji
data['cdldoji'] = ta.CDLDOJI(open_, high_, low_, close_)
# CDLDOJISTAR - Doji Star
data['cdldojistar'] = ta.CDLDOJISTAR(open_, high_, low_, close_)
# CDLDRAGONFLYDOJI - Dragonfly Doji
data['cdldragonflydoji'] = ta.CDLDRAGONFLYDOJI(open_, high_, low_, close_)
# CDLENGULFING - Engulfing Pattern
data['cdlengulfing'] = ta.CDLENGULFING(open_, high_, low_, close_)
# CDLEVENINGDOJISTAR - Evening Doji Star
data['cdleveningdojistar'] = ta.CDLEVENINGDOJISTAR(open_, high_, low_, close_, penetration=0)
# CDLEVENINGSTAR - Evening Star
data['cdleveningstar'] = ta.CDLEVENINGSTAR(open_, high_, low_, close_, penetration=0)
# CDLGAPSIDESIDEWHITE - Up/Down-gap side-by-side white lines
data['cdlgapsidesidewhite'] = ta.CDLGAPSIDESIDEWHITE(open_, high_, low_, close_)
# CDLGRAVESTONEDOJI - Gravestone Doji
data['cdlgravestonedoji'] = ta.CDLGRAVESTONEDOJI(open_, high_, low_, close_)
# CDLHAMMER - Hammer
data['cdlhammer'] = ta.CDLHAMMER(open_, high_, low_, close_)
# CDLHANGINGMAN - Hanging Man
data['cdlhangingman'] = ta.CDLHANGINGMAN(open_, high_, low_, close_)
# CDLHARAMI - Harami Pattern
data['cdlharami'] = ta.CDLHARAMI(open_, high_, low_, close_)
# CDLHARAMICROSS - Harami Cross Pattern
data['cdlharamicross'] = ta.CDLHARAMICROSS(open_, high_, low_, close_)
# CDLHIGHWAVE - High-Wave Candle
data['cdlhighwave'] = ta.CDLHIGHWAVE(open_, high_, low_, close_)
# CDLHIKKAKE - Hikkake Pattern
data['cdlhikkake'] = ta.CDLHIKKAKE(open_, high_, low_, close_)
# CDLHIKKAKEMOD - Modified Hikkake Pattern
data['cdlhikkakemod'] = ta.CDLHIKKAKEMOD(open_, high_, low_, close_)
# CDLHOMINGPIGEON - Homing Pigeon
data['cdlhomingpigeon'] = ta.CDLHOMINGPIGEON(open_, high_, low_, close_)
# CDLIDENTICAL3CROWS - Identical Three Crows
data['cdlidentical3crows'] = ta.CDLIDENTICAL3CROWS(open_, high_, low_, close_)
# CDLINNECK - In-Neck Pattern
data['cdlinneck'] = ta.CDLINNECK(open_, high_, low_, close_)
# CDLINVERTEDHAMMER - Inverted Hammer
data['cdlinvertedhammer'] = ta.CDLINVERTEDHAMMER(open_, high_, low_, close_)
# CDLKICKING - Kicking
data['cdlkicking'] = ta.CDLKICKING(open_, high_, low_, close_)
# CDLKICKINGBYLENGTH - Kicking - bull/bear determined by the longer marubozu
data['cdlkickingbylength'] = ta.CDLKICKINGBYLENGTH(open_, high_, low_, close_)
# CDLLADDERBOTTOM - Ladder Bottom
data['cdlladderbottom'] = ta.CDLLADDERBOTTOM(open_, high_, low_, close_)
# CDLLONGLEGGEDDOJI - Long Legged Doji
data['cdllongleggeddoji'] = ta.CDLLONGLEGGEDDOJI(open_, high_, low_, close_)
# CDLLONGLINE - Long Line Candle
data['cdllongline'] = ta.CDLLONGLINE(open_, high_, low_, close_)
# CDLMARUBOZU - Marubozu
data['cdlmarubozu'] = ta.CDLMARUBOZU(open_, high_, low_, close_)
# CDLMATCHINGLOW - Matching Low
data['cdlmatchinglow'] = ta.CDLMATCHINGLOW(open_, high_, low_, close_)
# CDLMATHOLD - Mat Hold
data['cdlmathold'] = ta.CDLMATHOLD(open_, high_, low_, close_, penetration=0)
# CDLMORNINGDOJISTAR - Morning Doji Star
data['cdlmorningdojistar'] = ta.CDLMORNINGDOJISTAR(open_, high_, low_, close_, penetration=0)
# CDLMORNINGSTAR - Morning Star
data['cdlmorningstar'] = ta.CDLMORNINGSTAR(open_, high_, low_, close_, penetration=0)
# CDLONNECK - On-Neck Pattern
data['cdlonneck'] = ta.CDLONNECK(open_, high_, low_, close_)
# CDLPIERCING - Piercing Pattern
data['cdlpiercing'] = ta.CDLPIERCING(open_, high_, low_, close_)
# CDLRICKSHAWMAN - Rickshaw Man
data['cdlrickshawman'] = ta.CDLRICKSHAWMAN(open_, high_, low_, close_)
# CDLRISEFALL3METHODS - Rising/Falling Three Methods
data['cdlrisefall3methods'] = ta.CDLRISEFALL3METHODS(open_, high_, low_, close_)
# CDLSEPARATINGLINES - Separating Lines
data['cdlseparatinglines'] = ta.CDLSEPARATINGLINES(open_, high_, low_, close_)
# CDLSHOOTINGSTAR - Shooting Star
data['cdlshootingstar'] = ta.CDLSHOOTINGSTAR(open_, high_, low_, close_)
# CDLSHORTLINE - Short Line Candle
data['cdlshortline'] = ta.CDLSHORTLINE(open_, high_, low_, close_)
# CDLSPINNINGTOP - Spinning Top
data['cdlspinningtop'] = ta.CDLSPINNINGTOP(open_, high_, low_, close_)
# CDLSTALLEDPATTERN - Stalled Pattern
data['cdlstalledpattern'] = ta.CDLSTALLEDPATTERN(open_, high_, low_, close_)
# CDLSTICKSANDWICH - Stick Sandwich
data['cdlsticksandwich'] = ta.CDLSTICKSANDWICH(open_, high_, low_, close_)
# CDLTAKURI - Takuri (Dragonfly Doji with very long lower shadow)
data['cdltakuri'] = ta.CDLTAKURI(open_, high_, low_, close_)
# CDLTASUKIGAP - Tasuki Gap
data['cdltasukigap'] = ta.CDLTASUKIGAP(open_, high_, low_, close_)
# CDLTHRUSTING - Thrusting Pattern
data['cdlthrusting'] = ta.CDLTHRUSTING(open_, high_, low_, close_)
# CDLTRISTAR - Tristar Pattern
data['cdltristar'] = ta.CDLTRISTAR(open_, high_, low_, close_)
# CDLUNIQUE3RIVER - Unique 3 River
data['cdlunique3river'] = ta.CDLUNIQUE3RIVER(open_, high_, low_, close_)
# CDLUPSIDEGAP2CROWS - Upside Gap Two Crows
data['cdlupsidegap2crows'] = ta.CDLUPSIDEGAP2CROWS(open_, high_, low_, close_)
# CDLXSIDEGAP3METHODS - Upside/Downside Gap Three Methods
data['cdlxsidegap3methods'] = ta.CDLXSIDEGAP3METHODS(open_, high_, low_, close_)


In [11]:
data.shape

(102988, 139)

In [12]:
## drop rows with NANs
data.dropna(axis=0, inplace=True)
data = data.reset_index(drop=True)
print('shape of data: ', data.shape)
data

shape of data:  (102559, 139)


Unnamed: 0,open,high,low,close,upperband,middleband,lowerband,dema,ema,ht,...,cdlspinningtop,cdlstalledpattern,cdlsticksandwich,cdltakuri,cdltasukigap,cdlthrusting,cdltristar,cdlunique3river,cdlupsidegap2crows,cdlxsidegap3methods
0,0.76660,0.76740,0.76620,0.76690,0.772125,0.767171,0.762217,0.765635,0.766812,0.766752,...,100,0,0,0,0,0,0,0,0,0
1,0.76700,0.76740,0.76680,0.76720,0.771836,0.767046,0.762255,0.765789,0.766843,0.766669,...,0,0,0,0,0,0,0,0,0,0
2,0.76720,0.76720,0.76630,0.76710,0.771420,0.766892,0.762363,0.765913,0.766864,0.766620,...,0,0,0,100,0,0,0,0,0,0
3,0.76720,0.76750,0.76620,0.76660,0.771182,0.766775,0.762368,0.765948,0.766843,0.766482,...,0,0,0,0,0,0,0,0,0,0
4,0.76660,0.76740,0.76630,0.76700,0.770581,0.766596,0.762610,0.766044,0.766855,0.766334,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102554,0.77218,0.77353,0.77122,0.77186,0.792204,0.781209,0.770214,0.773963,0.780610,0.786190,...,-100,0,0,0,0,0,0,0,0,0
102555,0.77190,0.77235,0.77064,0.77088,0.791442,0.780397,0.769351,0.773000,0.779832,0.785098,...,0,0,0,0,0,0,0,0,0,0
102556,0.77092,0.77152,0.76924,0.76947,0.791248,0.779687,0.768125,0.771955,0.779003,0.784054,...,0,0,0,0,0,0,0,0,0,0
102557,0.76944,0.77106,0.76926,0.77038,0.790680,0.778986,0.767292,0.771195,0.778313,0.783281,...,0,0,0,0,0,0,0,0,0,0


In [13]:
data.to_csv('AUD_USD_H1_withindicators.csv', index=False)