# Introduction


In [1]:
# Append path for MyUtils libraries
import sys
sys.path.append('../..')

####################################################################################
# Data handling 
import pandas as pd
from pandas import concat
from pandas.plotting import scatter_matrix
import numpy as np

####################################################################################
# Machine learning
import sklearn
from sklearn import preprocessing
from sklearn import metrics as sk
from sklearn.externals import joblib
from sklearn.model_selection import StratifiedKFold, KFold
import keras
from keras.models import Model, Sequential
from keras.layers import Input, LSTM, Dense, Dropout, BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import TensorBoard, ModelCheckpoint
from keras.optimizers import Adam
from keras.constraints import max_norm

####################################################################################
# Visualization
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import dates, ticker
from matplotlib.dates import (MONDAY, DateFormatter, MonthLocator, WeekdayLocator, date2num)
import matplotlib as mpl
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.graph_objs import *
from plotly.tools import FigureFactory as FF
import plotly.tools as tls

####################################################################################
# TA-Lib: 
import talib

####################################################################################
# Other utilities
import datetime
import time
import os
import sys
import math
from enum import Enum

Using TensorFlow backend.


In [2]:
# imports library with own utilities for data handling and machine learning models
import common.MyUtils as utils
print('Ready!')

Ready!


In [3]:
# Load instrument's data feed and visualize first rows
df = utils.load_hist('../csv_data/EURUSD_M15.csv', freq='15min', sep=';', remove_weekends=True)

Deleting weekends...


In [4]:
# Let's apply zigzag indicator on last 2000 samples
zigzag_df, zigzag_x, zigzag_y = utils.ZIGZAG(df[-2000:].copy(), minbars=3, nan_value=0.0)
_df = zigzag_df.copy().reset_index()
_df.rename(columns={'index':'TIME'}, inplace=True)

# ----- Visualize Bollinger and Zigzag 

# selects zigzag points
_dfz = _df[_df.ZIGZAG > 0].copy()

# Creates 3 lines for the BollingerBands indicator
line_bbup = Scatter(
    x=_df.index.values,
    y=_df.BOLLINGER_HI,
    name= 'UpperBand',
    line=scatter.Line(color='red', width=1)
    )
line_bbmid = Scatter(
    x=_df.index.values,
    y=_df.BOLLINGER_MA,
    name= 'MiddleBand',
    line=scatter.Line(color='red', width=1)
    )
line_bbdn = Scatter(
    x=_df.index.values,
    y=_df.BOLLINGER_LO,
    name= 'LowerBand',
    line=scatter.Line(color='red', width=1)
    )
# Creates zigzag lines 
line_zigzag = go.Scatter(
    x=_dfz.reset_index()['index'], 
    y=_dfz.ZIGZAG,     
    name='zigzag', 
    line=scatter.Line(color='blue', width=1))

# draw them
py.iplot([line_bbup, line_bbmid, line_bbdn, line_zigzag], filename='simple_ohlc')

In [21]:
# let's build columns with different stats, for example:
# ZIGDIFF gives difference between consecutive zigzags.

_dfz_stat = _dfz.copy()
_dfz_stat.head(20)


Unnamed: 0,TIME,BOLLINGER_HI,BOLLINGER_MA,BOLLINGER_LO,ZIGZAG,ACTION
0,2019-03-01 09:00:00,1.137683,1.137077,1.136472,1.13706,no-action
6,2019-03-01 10:30:00,1.137937,1.136949,1.135961,1.13533,low
32,2019-03-01 17:00:00,1.139596,1.138249,1.136902,1.14088,high
49,2019-03-01 21:15:00,1.139944,1.137537,1.135131,1.13544,low
64,2019-03-04 01:00:00,1.137952,1.136541,1.135131,1.13813,high
93,2019-03-04 08:15:00,1.137019,1.136533,1.136047,1.13606,low
96,2019-03-04 09:00:00,1.136888,1.136491,1.136095,1.13692,high
130,2019-03-04 17:30:00,1.134979,1.133361,1.131743,1.1309,low
152,2019-03-04 23:00:00,1.134186,1.133146,1.132107,1.13429,high
196,2019-03-05 10:00:00,1.133533,1.132785,1.132038,1.1316,low


In [22]:
# add zigzag ranges from one to another
_dfz_stat['ZIGDIFF'] = _dfz_stat.ZIGZAG - _dfz_stat.ZIGZAG.shift(1)
_dfz_stat.head(20)

Unnamed: 0,TIME,BOLLINGER_HI,BOLLINGER_MA,BOLLINGER_LO,ZIGZAG,ACTION,ZIGDIFF
0,2019-03-01 09:00:00,1.137683,1.137077,1.136472,1.13706,no-action,
6,2019-03-01 10:30:00,1.137937,1.136949,1.135961,1.13533,low,-0.00173
32,2019-03-01 17:00:00,1.139596,1.138249,1.136902,1.14088,high,0.00555
49,2019-03-01 21:15:00,1.139944,1.137537,1.135131,1.13544,low,-0.00544
64,2019-03-04 01:00:00,1.137952,1.136541,1.135131,1.13813,high,0.00269
93,2019-03-04 08:15:00,1.137019,1.136533,1.136047,1.13606,low,-0.00207
96,2019-03-04 09:00:00,1.136888,1.136491,1.136095,1.13692,high,0.00086
130,2019-03-04 17:30:00,1.134979,1.133361,1.131743,1.1309,low,-0.00602
152,2019-03-04 23:00:00,1.134186,1.133146,1.132107,1.13429,high,0.00339
196,2019-03-05 10:00:00,1.133533,1.132785,1.132038,1.1316,low,-0.00269


In [23]:
# add zigzag retracement percentage (0-1)
_dfz_stat['RETRACEMENT'] = _dfz_stat.ZIGDIFF / _dfz_stat.ZIGDIFF.shift(1)
_dfz_stat['PROJECTION'] = _dfz_stat.ZIGDIFF / _dfz_stat.ZIGDIFF.shift(2)
_dfz_stat.head(20)

Unnamed: 0,TIME,BOLLINGER_HI,BOLLINGER_MA,BOLLINGER_LO,ZIGZAG,ACTION,ZIGDIFF,RETRACEMENT,PROJECTION
0,2019-03-01 09:00:00,1.137683,1.137077,1.136472,1.13706,no-action,,,
6,2019-03-01 10:30:00,1.137937,1.136949,1.135961,1.13533,low,-0.00173,,
32,2019-03-01 17:00:00,1.139596,1.138249,1.136902,1.14088,high,0.00555,-3.208092,
49,2019-03-01 21:15:00,1.139944,1.137537,1.135131,1.13544,low,-0.00544,-0.98018,3.144509
64,2019-03-04 01:00:00,1.137952,1.136541,1.135131,1.13813,high,0.00269,-0.494485,0.484685
93,2019-03-04 08:15:00,1.137019,1.136533,1.136047,1.13606,low,-0.00207,-0.769517,0.380515
96,2019-03-04 09:00:00,1.136888,1.136491,1.136095,1.13692,high,0.00086,-0.415459,0.319703
130,2019-03-04 17:30:00,1.134979,1.133361,1.131743,1.1309,low,-0.00602,-7.0,2.908213
152,2019-03-04 23:00:00,1.134186,1.133146,1.132107,1.13429,high,0.00339,-0.563123,3.94186
196,2019-03-05 10:00:00,1.133533,1.132785,1.132038,1.1316,low,-0.00269,-0.79351,0.446844


In [24]:
# add fibo retracements and extensions
_dfz_stat['FIBO_RETR'] = _dfz_stat.apply(lambda x: (abs(x.RETRACEMENT) >= 0.382 and abs(x.RETRACEMENT) <= 0.618), axis=1)
_dfz_stat['FIBO_EXTN'] = _dfz_stat.apply(lambda x: x.PROJECTION >= 0.9, axis=1)
_dfz_stat.head(20)

Unnamed: 0,TIME,BOLLINGER_HI,BOLLINGER_MA,BOLLINGER_LO,ZIGZAG,ACTION,ZIGDIFF,RETRACEMENT,PROJECTION,FIBO_RETR,FIBO_EXTN
0,2019-03-01 09:00:00,1.137683,1.137077,1.136472,1.13706,no-action,,,,False,False
6,2019-03-01 10:30:00,1.137937,1.136949,1.135961,1.13533,low,-0.00173,,,False,False
32,2019-03-01 17:00:00,1.139596,1.138249,1.136902,1.14088,high,0.00555,-3.208092,,False,False
49,2019-03-01 21:15:00,1.139944,1.137537,1.135131,1.13544,low,-0.00544,-0.98018,3.144509,False,True
64,2019-03-04 01:00:00,1.137952,1.136541,1.135131,1.13813,high,0.00269,-0.494485,0.484685,True,False
93,2019-03-04 08:15:00,1.137019,1.136533,1.136047,1.13606,low,-0.00207,-0.769517,0.380515,False,False
96,2019-03-04 09:00:00,1.136888,1.136491,1.136095,1.13692,high,0.00086,-0.415459,0.319703,True,False
130,2019-03-04 17:30:00,1.134979,1.133361,1.131743,1.1309,low,-0.00602,-7.0,2.908213,False,True
152,2019-03-04 23:00:00,1.134186,1.133146,1.132107,1.13429,high,0.00339,-0.563123,3.94186,True,True
196,2019-03-05 10:00:00,1.133533,1.132785,1.132038,1.1316,low,-0.00269,-0.79351,0.446844,False,False


In [28]:
# add training watermarks
_dfz_stat['RALLY_START'] = (_dfz_stat.FIBO_RETR.shift(-1) & _dfz_stat.FIBO_EXTN.shift(-2))
_dfz_stat.head(20)

Unnamed: 0,TIME,BOLLINGER_HI,BOLLINGER_MA,BOLLINGER_LO,ZIGZAG,ACTION,ZIGDIFF,RETRACEMENT,PROJECTION,FIBO_RETR,FIBO_EXTN,RALLY_START
0,2019-03-01 09:00:00,1.137683,1.137077,1.136472,1.13706,no-action,,,,False,False,False
6,2019-03-01 10:30:00,1.137937,1.136949,1.135961,1.13533,low,-0.00173,,,False,False,False
32,2019-03-01 17:00:00,1.139596,1.138249,1.136902,1.14088,high,0.00555,-3.208092,,False,False,False
49,2019-03-01 21:15:00,1.139944,1.137537,1.135131,1.13544,low,-0.00544,-0.98018,3.144509,False,True,False
64,2019-03-04 01:00:00,1.137952,1.136541,1.135131,1.13813,high,0.00269,-0.494485,0.484685,True,False,False
93,2019-03-04 08:15:00,1.137019,1.136533,1.136047,1.13606,low,-0.00207,-0.769517,0.380515,False,False,True
96,2019-03-04 09:00:00,1.136888,1.136491,1.136095,1.13692,high,0.00086,-0.415459,0.319703,True,False,False
130,2019-03-04 17:30:00,1.134979,1.133361,1.131743,1.1309,low,-0.00602,-7.0,2.908213,False,True,False
152,2019-03-04 23:00:00,1.134186,1.133146,1.132107,1.13429,high,0.00339,-0.563123,3.94186,True,True,False
196,2019-03-05 10:00:00,1.133533,1.132785,1.132038,1.1316,low,-0.00269,-0.79351,0.446844,False,False,False


In [35]:
# get indexes of rally starts
rallies = _dfz_stat[_dfz_stat.RALLY_START==True].index
rallies

Int64Index([93, 1344, 1499, 1764], dtype='int64')

In [None]:
maxs      

In [None]:
# builds decreasing max
dec_max = []
for x in range(len(maxs)-1):  
  if x==0 and x >= max(maxs[x:]):
    dec_max.append(mins[x])
  elif x>0 and x >= max(maxs[x:]) and x <= min(maxs[:x]):
    dec_max.append(maxs[x])
      
dec_max                   



As said, this model will predict an estimation of High and Low prices for next session. To achieve it, it will predict the price movement from the current close price. So, knowing the current close time and the prediction movement, it will extract the real High,Low prices.

So, both outputs could be expressed with these equations:

PREDICTION_HIGH(t+1) = PRICE_HIGH(t+1) - CLOSE(t)

PREDICTION_LOW(t+1) = PRICE_LOW(t+1) - CLOSE(t)


In [None]:
# function to generate predictions as explained above
def define_outputs(df):  
  df['OUT_HIGH'] = df.HIGH - df.CLOSE.shift(1)
  df['OUT_LOW'] = df.LOW - df.CLOSE.shift(1)
  return df

# Add both outputs to df dataframe and check first rows (also remove NaNs)
df = define_outputs(df)
df.dropna(inplace=True)
df.head()

In [None]:
# Visualization of all columns: input features and target predictions
rows = (df.shape[1] / 4) + 1
plt.figure(figsize = (12,50))
for c in df.columns:
  i = df.columns.get_loc(c)+1
  plt.subplot(rows, 4, i)
  plt.plot(df[c])
  plt.legend([c])
  

### Data preprocessing

This model will predict price movement one timestep ahead, from a loopback window of the last 4 timesteps: 

(t-3), (t-2), (t-1), (t) ==> (t+1)

LSTM cells require input features and target outputs to be in a proper format:

- Input features shape: (num_samples, num_loopback_timesteps, num_input_features)
- Output target shape: (num_outputs * num_prediction_timesteps)

To adapt data contained in df dataframe into these sets, I use function 'series_to_supervised' contained in python file 'MyUtils.py'. You can check how it works there.


In [None]:
# Setup:

# Num outputs to predict
num_outputs = 2

# Num input features
num_inputs = len(df.columns) - num_outputs

# Num loopback timesteps
n_in = 4

# Num timesteps to predict
n_out = 1

# Flag to remove NaN values from  df dataframe
dropnan = True

# Run the preprocessor and get a new dataframe 'df_nn'
df_nn = utils.series_to_supervised(df, num_inputs, num_outputs, n_in, n_out, dropnan)

# Check the result
print(df_nn.shape)
df_nn.head()

In [None]:
# DATA DEBUGGING:
# Now I use an auxiliar temporal dataframe '_df' to check if target outputs are calculated correctly. I check if 'next_high' 
# column has the same content as 'HIGH(t)' shifted 1 timestemp in the future.
_df = df_nn[['HIGH(t-1)','HIGH(t)','LOW(t-1)','LOW(t)','CLOSE(t-1)', 'CLOSE(t)','OUT_HIGH(t+1)']].head(10).copy()
_df['next_high'] = _df['CLOSE(t)']+ _df['OUT_HIGH(t+1)']
_df

### Data normalization

A general preprocessing operation in Neural Networks is data normalization, in this case, as LSTM cells contains a 'tanh' cell, I will normalize all features in range [-1,+1].

I will save the scaler for further use when de-normalization process is required later on model validation. I use 'MinMaxScaler' from Scikit-learn library. For this purpose I use 'normalize_data' function from MyUtils.py file:

In [None]:
#### Data normalization. Get the scaler (save a copy on disk for later use) and a new normalized dataframe: dfnorm
dfnorm, scaler = utils.normalize_data(df_nn, feat_range = (-1,1), csv_to_save=None, scaler_to_save='../scaler_data/scaler.data')
# check result
dfnorm.describe()

## 3 RNN model construction

First of all, I like to say that I will use Keras as fronted over Tensorflow, to build a stacked LSTM network.

I use function 'build_lstm_net' in MyUtils.py file (check it there if curious). It requires different arguments: num_inputs, num_outputs, loopback and predicted timestemps, number of LSTM layers, ...

In first place  I will execute a cross validation phase, using 'Kfolds' from Scikit-learn to get a first impression about the model. I'll generate 5 different models.


### Cross validation

dfnorm dataframe is used to get train-test in-out pairs. LSTM model is:

- 3 stacked LSTM layers with 64 neurons each.
- 1 Dense hidden layer with 128 neurons.
- 1 Dense output layer with 2 neurons (one per output)
- Dropout after each layer to improve generalization and avoid overfitting


In [None]:
# split into input (X) and output (Y) variables
X = dfnorm.values[:,:(num_inputs*n_in)]
Y = dfnorm.values[:,(num_inputs*n_in):]
# define 5-fold cross validation test harness
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)
kfold = KFold(n_splits=5, shuffle=True, random_state=seed)
cvscores = []
i=0
for train, test in kfold.split(X, Y):
  # create model
  model,callbacks = utils.build_lstm_net(num_inputs, n_in, num_outputs, n_out, 3,2,64,128,'weights_0412_m'+str(i)+'.hd5')
  i += 1
  # Fit the model
  x = X[train]
  x = x.reshape(x.shape[0], n_in, num_inputs)
  utils.fit(model, x, Y[train], num_inputs, n_in, num_epochs=5, callbacks=callbacks, batchsize=32)  
  # evaluate the model
  x = X[test]
  scores = utils.test_evaluation(model, x, Y[test], num_inputs, n_in)
  x = x.reshape(x.shape[0], n_in, num_inputs)
  scores = model.evaluate(x, Y[test], verbose=0)
  print("%s: %.2f%%" % (model.metrics_names[0], scores[0]*100))
  print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
  cvscores.append(scores[1] * 100)
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

As this model seems to be raise good results, I will train with a whole train-test dataset.

### Model training

In this case, I will use 'dfnorm' dataframe and get 80% (around 88000 samples) for training and 20% (around 22000 samples) for validation. I'll use 'prepare_training_data' and 'fit' functions from MyUtils.py file:

In [None]:
# Prepare data for train-test
x_train,y_train,x_test,y_test = utils.prepare_training_data(dfnorm, num_inputs * n_in, 0.8)
print('x_train:', x_train.shape, 'y_train', y_train.shape)
print('x_test:', x_test.shape, 'y_test', y_test.shape)


In [None]:
# Build the RNN model with same configuration
model,callbacks = utils.build_lstm_net(num_inputs, n_in, num_outputs, n_out, 3,2,64,128,'../hd5_data/weightsH4_3.2.64.128.hd5')

In [None]:
# Setup:

# Training epochs
num_epochs = 30
# Batch size
batchsize = 32
# Ratio validation-training (20%)
val_ratio = 0.2
# Enable data shuffling
shuffle = True
# Enable result plotting
plot_results = True
# Start training
result = utils.fit(model, x_train, y_train, num_inputs, n_in, num_epochs, callbacks, batchsize, val_ratio, shuffle, plot_results, verbose=2)

In [None]:
# Save model weights on disk, as HDF5 file
model.save_weights('../hd5_data/weightsH4_3.2.64.128.hd5')

### Model verification

Once trained, let's check its behaviour with new data. Function 'test_rmse' in MyUtils.py executes the test and returns the RMSE result. A callback 'out_builder' is needed to de-normalize data to calculate RMSE.

In [None]:
# column selection for 'out_builder' callback
ih = df_nn.columns.get_loc('OUT_HIGH(t+1)')  
il = df_nn.columns.get_loc('OUT_LOW(t+1)')
ic = df_nn.columns.get_loc('CLOSE(t)')
  
# Callback to build de-normalized output data from normalized predictions
def out_builder(xy,xyhat):
  return np.asarray([xy[ih]+xy[ic],xy[il]+xy[ic]]), np.asarray([xyhat[ih]+xyhat[ic],xyhat[il]+xyhat[ic]])
  
# Executes test process and visualizes RMSE result
scores, targets, preds, rmse = utils.test_rmse(model, x_test, y_test, num_inputs, n_in, num_outputs, n_out, scaler, out_builder, True)

In [None]:
# Dataframe update:

# Reshape results into np.arrays
atgt = np.asarray(targets)
apreds = np.asarray(preds)
armse = np.asarray(rmse)

# Build a new dataframe named 'df_predict' with prediction results.
df_predict = df_nn[x_train.shape[0]:].copy()
#... add predictions...
df_predict['PREDICT_HIGH'] = apreds[:,0]
df_predict['PREDICT_LOW'] = apreds[:,1]

#... add targets...
df_predict['TARGET_HIGH'] = df_predict['CLOSE(t)'] + df_predict['OUT_HIGH(t+1)'] 
df_predict['TARGET_LOW'] = df_predict['CLOSE(t)'] + df_predict['OUT_LOW(t+1)'] 

#... add some data-debugging columns to validate results
df_predict['TARGET_HIGH_CHECK'] = atgt[:,0]
df_predict['TARGET_LOW_CHECK'] = atgt[:,1]

#... add RMSE error
df_predict['RMSE'] = armse[:]

# Visualize first rows for visual debugging
df_predict.head()

Now I carry on a visual inspection of results obtained. I can range the visualization plot to zoom in or out.

Target range (real range between high and low prices) is coloured in grey, while predicted range (area between predicted high and low) is coloured in yellow.

In [None]:
plt.figure(figsize=(16,16))
i=-10
x = np.arange(10)
y1 = df_predict['TARGET_HIGH'].values[i:]
y2 = df_predict['TARGET_LOW'].values[i:]
y3 = df_predict['PREDICT_HIGH'].values[i:]
y4 = df_predict['PREDICT_LOW'].values[i:]
plt.plot(x,y1, color='black')
plt.plot(x,y2, color = 'brown')
plt.plot(x,y3, color = 'red')
plt.plot(x,y4, color = 'green')
plt.legend(['target_hi','target_lo', 'predict_hi', 'predict_lo'])
plt.fill_between(x,y1,y2, color='grey', alpha='0.5')
plt.fill_between(x,y3,y4, color='yellow', alpha='0.5')
#plt.plot(df_predict['TARGET_HIGH_CHECK'].values[i:j])
#plt.plot(df_predict['TARGET_LOW_CHECK'].values[i:j])


As can be seen, there exist a small prediction error. In general, predicted range between High and Low prices as greater than real High-Low range.


### Next steps

Once verified that prediction is valid for my purposes, it's time to build a dataframe for the next stage: A3C-LSTM agent.

In this new stage, the agent will require real time market data (bars, quotes, account info and predictions) to start its training process as a RL problem.

Now we are going to build a dataframe (only with bars, targets, predictions) for this next stage:

In [None]:
# split historic data into input (X) and output (Y) variables and executes predictions
X = dfnorm.values[:,:(num_inputs*n_in)]
Y = dfnorm.values[:,(num_inputs*n_in):]
predictions = model.predict(X.reshape(X.shape[0], n_in, num_inputs), batch_size=1)

In [None]:
# let's copy dfnorm and add predictions as new columns
df_predict = dfnorm.copy()
df_predict['OUT_HIGH(t+1)'] = predictions[:,0]
df_predict['OUT_LOW(t+1)']  = predictions[:,1]
df_predict.tail()

In [None]:
# de-normalize data
df_predict = utils.denormalize_data(df_predict, scaler)
df_predict.tail()

In [None]:
# Add new columns for expected targets
df_predict['TARGET_HIGH'] = df_nn['OUT_HIGH(t+1)'] 
df_predict['TARGET_LOW'] = df_nn['OUT_LOW(t+1)'] 

#... calculate RMSE error between targets and predictions
df_predict['RMSE'] = df_predict.apply(lambda row: math.sqrt(sk.mean_squared_error(np.asarray([row['TARGET_HIGH'], row['TARGET_LOW']]), np.asarray([row['OUT_HIGH(t+1)'], row['OUT_LOW(t+1)']]), multioutput = 'uniform_average')), axis=1)

# add new columns to get targets and predictions as absolute price values
df_predict['PREDICT_HIGH_PRICE'] = df_predict['CLOSE(t)'] + df_predict['OUT_HIGH(t+1)']
df_predict['PREDICT_LOW_PRICE'] = df_predict['CLOSE(t)'] + df_predict['OUT_LOW(t+1)']
df_predict['TARGET_HIGH_PRICE'] = df_predict['CLOSE(t)'] + df_predict['TARGET_HIGH']
df_predict['TARGET_LOW_PRICE'] = df_predict['CLOSE(t)'] + df_predict['TARGET_LOW']

#... and calculate RMSE error using absolute price values
df_predict['RMSE_PRICE'] = df_predict.apply(lambda row: math.sqrt(sk.mean_squared_error(np.asarray([row['TARGET_HIGH_PRICE'], row['TARGET_LOW_PRICE']]), np.asarray([row['PREDICT_HIGH_PRICE'], row['PREDICT_LOW_PRICE']]), multioutput = 'uniform_average')), axis=1)

df_predict.head()

In [None]:
# Select only required columns for next stage
df_predict = df_predict[['OPEN(t)','HIGH(t)','LOW(t)','CLOSE(t)','OUT_HIGH(t+1)','OUT_LOW(t+1)','TARGET_HIGH','TARGET_LOW','RMSE','PREDICT_HIGH_PRICE','PREDICT_LOW_PRICE','TARGET_HIGH_PRICE','TARGET_LOW_PRICE', 'RMSE_PRICE']]
df_predict.tail()                      

In [None]:
# Show targets (grey area) vs predictions (yellow area)
plt.figure(figsize=(16,16))
# Zoom in range to plot (can be changed)
i=-20
x = np.arange(-i)
y1 = df_predict['TARGET_HIGH_PRICE'].values[i:]
y2 = df_predict['TARGET_LOW_PRICE'].values[i:]
y3 = df_predict['PREDICT_HIGH_PRICE'].values[i:]
y4 = df_predict['PREDICT_LOW_PRICE'].values[i:]
plt.plot(x,y1, color='black')
plt.plot(x,y2, color = 'brown')
plt.plot(x,y3, color = 'red')
plt.plot(x,y4, color = 'green')
plt.legend(['target_hi','target_lo', 'predict_hi', 'predict_lo'])
plt.fill_between(x,y1,y2, color='grey', alpha='0.5')
plt.fill_between(x,y3,y4, color='yellow', alpha='0.5')
#plt.plot(df_predict['TARGET_HIGH_CHECK'].values[i:j])
#plt.plot(df_predict['TARGET_LOW_CHECK'].values[i:j])

In [None]:
# Show RMSE errors
plt.subplot(1,2,1)
plt.plot(df_predict['RMSE'], color='b')
plt.legend(['RMSE_prediction'])
plt.subplot(1,2,2)
plt.plot(df_predict['RMSE_PRICE'], color = 'r')
plt.legend(['RMSE_price'])

In [None]:
# Save file on disk
df_predict.to_csv('../csv_data/EURUSD_H4_predictions.csv', sep=';')
print('File saved!')