<a href="https://colab.research.google.com/github/kconstable/crypto-ensemble-model-predictions/blob/main/models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deep Learning LSTM Models
This notebook contains code to train the LSTM model.  It takes the data output from the market_data notebook as input to the model.  The data is transformed,scaled and partitioned in preparation for training.  Visualizations are created to evaluate the model training performance.  The model hyper-parameters are optimized. The final output of this notebook are trained and optimized LSTM models

## Import Libraries 

In [2]:
# install keras tuner
!pip install -q -U keras-tuner

[?25l[K     |███▍                            | 10 kB 22.6 MB/s eta 0:00:01[K     |██████▊                         | 20 kB 25.7 MB/s eta 0:00:01[K     |██████████                      | 30 kB 12.5 MB/s eta 0:00:01[K     |█████████████▍                  | 40 kB 9.7 MB/s eta 0:00:01[K     |████████████████▊               | 51 kB 5.2 MB/s eta 0:00:01[K     |████████████████████            | 61 kB 5.6 MB/s eta 0:00:01[K     |███████████████████████▍        | 71 kB 6.1 MB/s eta 0:00:01[K     |██████████████████████████▊     | 81 kB 6.8 MB/s eta 0:00:01[K     |██████████████████████████████  | 92 kB 6.5 MB/s eta 0:00:01[K     |████████████████████████████████| 98 kB 3.7 MB/s 
[?25h

In [3]:
# install the prophet library
import sys
!{sys.executable} -m pip install fbprophet



In [None]:
# deep learning /RNN
import tensorflow as tf
from keras import backend as K
from tensorflow import keras
from keras.models import Sequential,backend
from keras.layers import LSTM, Dense, Dropout
from keras.callbacks import EarlyStopping
import keras_tuner as kt
from kerastuner.tuners import Hyperband
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

# stats, data structures and plotting
import random as rn
import math
import numpy as np
import pandas as pd
import timeit
import datetime
from datetime import timedelta
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# import plotly.express as px
from statsmodels.tsa.stattools import adfuller
from scipy import stats

# File operations
import os
import pickle
from google.colab import files
from google.colab import drive
drive.mount('/content/drive')

# install the prophet library
from fbprophet import Prophet

# Set Seeds
seed = 1985
np.random.seed(seed)
rn.seed(seed)
os.environ['PYTHONHASHSEED']=str(seed)

# constants
ticker = 'BTC'
path = '/content/drive/MyDrive/Colab Notebooks/capstone2/data/'
model_path = '/content/drive/MyDrive/Colab Notebooks/capstone2/models/'

In [None]:
# check the collab connection
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Not connected to a GPU


## Load Data
+ Load market data (output from market_data notebook)
+ The data was collected, cleaned and consolidated 
+ Optimal features were selected using RFE

In [5]:
# get the final dataset
df_features = pd.read_pickle(f'{path}{ticker}_market_data_final.pickle')

print(f"\n{ticker}")
print("="*60)
print(f"{ticker}-optimal features: ",df_features.shape)


BTC
BTC-optimal features:  (1363, 44)


In [25]:
# df_all = pd.read_pickle(f'{path}{ticker}_market_data_shifted.pickle')
cols = df_all.columns

Index(['ticker', 'open', 'high', 'low', 'close', 'volume', 'month', 'weekday',
       'futures_open', 'futures_high', 'futures_low', 'futures_close',
       'futures_volume', 'idx_fear_greed', 'idx_classification', 'ETH', 'DOGE',
       'LTC', 'weighted_sentiment', 'ma_sentiment_10', 'ma_sentiment_20',
       'ma_sentiment_40', 'b-upper-ma_sentiment_10',
       'b-middle-ma_sentiment_10', 'b-lower-ma_sentiment_10', 'news_count',
       'sentiment_title', 'ma_news_count', 'ma_sentiment_title',
       'b-upper-ma_news_count', 'b-middle-ma_news_count',
       'b-lower-ma_news_count', 'b-upper-ma_sentiment_title',
       'b-middle-ma_sentiment_title', 'b-lower-ma_sentiment_title',
       'google_trends', 'b-upper-google_trends', 'b-middle-google_trends',
       'b-lower-google_trends', 'USDEUR', 'USDJPY', 'USDGBP', 'GLD', 'OIL',
       'BOIL', 'VXX', 'SPY', 'XLE', 'QQQ', 'b-upper', 'b-middle', 'b-lower',
       'rsi', 'stoch_high', 'stoch_low', 'stoch_K', 'stoch_D', 'macd',
       'macd_si

# Data Preprocessing for LSTM Deep Learning

### Prepare the Data 
+ Reverse the date-time index
+ Convert the date-time index to an integer index
+ Convert to numpy arrays

In [35]:
def prepare_data(df,y='close',features=[]):
  """
  Filter, scale and convert dataframe data to numpy arrays

  Inputs: 
    df       => A dataframe of observations with features and y-labels
    y        => The name of the column that is the truth labels
    features => A list of features.  Used to subset columns

  Outputs:
    scaled_y => numpy array of the y-label data
    scaled_x => numpy array of the training features

  """

  # reverse the index such that dates are in chronological order
  df = df.iloc[::-1]

  # Subset features, get the y-label values
  df_y = df[y]
  df_X = df[features]

  # replace the date index with an integer index
  df_X.reset_index(drop=True,inplace=True)

  # convert to numpay arrays
  array_X = np.array(df_X)
  array_y = np.array(df_y).reshape(-1,1)

  # print the output
  print("\nData Preparation")
  print("="*60)
  print(f"=> {len(features)} Features")
  print(f"=> Input Dimensions :{array_X.shape}")
  print(f"=> Output Dimensions:{array_y.shape}")
  print("\n")

  return array_y, array_X

### Split the Data into Train/Test Sets
+ Time series data cannot be split randomly like  observational data because the order is important.  
+ The data is split into train/test based on dates
+ The close price at time t is the target variable, while the close price at t-1 is also included as a feature

In [36]:
def split_train_test(X,idx_close,train_pct=0.80):
  """
  Split the dataset into train/test based on input train percentage
  Input:
    X: time-series dataset of all features in a numpy array
    idx_close:  The index position of the target variable in the dataset
    train_pct: The percent of data allocated to train.  The remainder is test
  Output:
    Two numpay arrays, train_X and test_X
  """

  # Train
  train_num = math.ceil(X.shape[0]*train_pct)
  train_X = X[0:train_num,:]
  train_str = f'{round(train_X.shape[0]/X.shape[0],3)*100}%'

  # Test
  # test_X = X[train_num-n_steps:,:]  # if you use this, need to add n_steps as a param!
  test_X = X[train_num:,:]
  test_str =f'{round(test_X.shape[0]/X.shape[0],3)*100}%'

  # print the results
  print("\nSplit Data (X)")
  print("="*60)
  print(f'Data  :100.0% {X.shape}')
  print(f'Train :{train_str:>6} {train_X.shape}')
  print(f'Test  :{test_str:>6} {test_X.shape}')
  print('\n')

  # create the line plots
  plot_train_test_data(train_X,test_X,idx_close)

  return train_X,test_X


In [37]:
def plot_train_test_data(train_x,test_x,idx_close):
  """
  Plots the close price by date, showing the train/test split
  Input:
    train_x: the training dataset
    test_x:  the test dataset
    idx_close: the index position of the close price
  Output: 
     A time-series plot of close price
  """

  ts0 = list(range(0,train_x.shape[0]))
  ts1 = list(range(train_x.shape[0],train_x.shape[0]+test_x.shape[0]))

  fig = go.Figure()
  fig.add_trace(go.Scatter(x=ts0,y=train_x[:,idx_close],name='Train',line_color = '#a8b8d0'))
  fig.add_trace(go.Scatter(x=ts1,y=test_x[:,idx_close],name='Test',line_color= 'orange'))
  fig.add_shape(type ='rect',
                  x0=ts1[0],x1=ts1[-1],
                  y0=0,y1=1,
                  line=dict(color='#F6B28D'),
                  fillcolor='#F6B28D',
                  opacity=0.1)
  fig.add_shape(type ='rect',
                  x0=ts0[0],x1=ts0[-1],
                  y0=0,y1=1,
                  line=dict(color='#7BA1AA'),
                  fillcolor='#7BA1AA',
                  opacity=0.1)
  fig.update_layout(title = 'Train/Test Datasets (Scaled)',
                      template="plotly_white",
                      yaxis_title='Closing Price (Scaled)',
                      xaxis_title='time steps',
                      width = 600,
                      height =600)
  fig.update_shapes(dict(xref='x',yref='paper'))
  fig.show()

### Partition the Data

In [38]:
def create_partitions(data,idx_close,n_steps,n_predict,visualize=False):
  """
  This function partitions the train/test data into batches with times-step windows for training

  Each batch consists of n_steps of training data, and n_predict steps of label data
  The function outputs an x array [samples, time steps, features] and a y array [samples, time steps]

  Inputs:
    data      => train or test array
    idx_close => the position of y value in the data
    n_steps   => the number of time steps in each training batch
    n_predict => the number of time steps that will be predicted
    visualize => boolean, will plot a visual of training/prediction windows
  Outputs:
    array(i)  => np.array of batched & partitioned training data with features
    array(p)  => np.array of batched & partitioned y-lables 

  Reference:
    https://www.relataly.com/time-series-forecasting-multi-step-regression-using-neural-networks-with-multiple-outputs-in-python/5800/
  """
  n = data.shape[0]
  window = n_steps + n_predict
  i, p = [],[]

  # print the moving window
  if visualize:
    print("Data Window: I(Input), P(Predict),-(scanned), +(to be scanned)")
    print("="*max(n,100))

  # create the partitions
  for step in range(n_steps, n-n_predict):
    # get the input window + all features

    # train window
    i.append(data[step-n_steps:step,:])

    # get the prediction window + the closing price
    p.append(data[step:step+n_predict,idx_close])

    # print the moving window
    if visualize and step <= 50:
      scanned = n-((step-n_steps)+window)
      print("-"*(step-n_steps),'I'*n_steps,'P'*n_predict,"+"*scanned,sep="")
    
  return np.array(i),np.array(p)

In [39]:
def plot_training_window(x_array,y_array,idx_close,n_steps,n_predict,batch):
  """
  Plots a single training batch showing the train/prediction windows
  Inputs: 
    x_array/y_array => partitioned data from create_partitions
    idx_close       => the position of the y-label (close price)
    n_steps         => the number of time steps in each training batch
    n_predict       => the number of time steps predicted each training batch
    batch           => the batch number to plot
  Outputs:
    A line chart with the train/predict values
  """
  # convert the arrays to dataframes
  # align the x indexes to compare
  df_y = pd.DataFrame(y_array[batch],index=range(n_steps-1,n_predict+n_steps-1),columns=['y'])
  df_x = pd.DataFrame(x_array[batch+1])[idx_close]
  df_x = pd.DataFrame(df_x)
  df_x.columns = ['x']

  # create the plots
  fig = go.Figure()
  fig.add_trace(go.Scatter(x=df_x.index,y=df_x['x'],name='train window',line_color='#a8b8d0',fill='tozeroy'))
  fig.add_trace(go.Scatter(x=df_y.index,y=df_y['y'],name='predict window',line_color='orange',fill='tozeroy'))
  fig.update_layout(template='plotly_white',
                    title='Train/Predict Windows',
                    yaxis_title = 'Closing Price (Scaled)',
                    xaxis_title='Period',
                    width = 700,
                    height = 500)
  fig.show()


### Data Transformations

#### Check for stationarity
+ Deep learning performs better with stationary data where the mean and variance is constant over time.  Market price data is rarely stationary as it includes trending. The time-series data is transformed to make it stationary 
+ The transform_stationary funciton applies data transformations and performs the Augmented Dickey-Fuller (ADF) to test for stationarity
  + null hypothesis: the data is non-stationary
  + alternative: the data is stationary
  + => if the test stat is less than all critical values,  we cannot reject the null, therefore the data is non-stationary

**Reference**
+ [Checking time-series data for stationarity](https://analyzingalpha.com/check-time-series-stationarity-python#augmented-dickey-fuller-adf)

In [40]:
def transform_stationary(df,features_to_transform,transform='log',verbose=False):
  """
  Transform time-series data using a log or boxcox transform.  Calculate the augmented
  dickey-fuller (ADF) test for stationarity after the transform
  Inputs:
    df: a dataframe of features
    features_to_transform: A list of features to apply the transform
    transform: The transform to apply (log, boxbox)
  Output
    Applies the transforms inplace in df
  """
  # transform each column in the features_to_transform list
  for feature in df.columns:
    if feature in features_to_transform:
      # log transform
      if transform=='log':
        df[feature] = df[feature].apply(np.log)

      # boxcox transform  
      elif transform=='boxcox':
        bc,_ = stats.boxcox(df[feature])
        df[feature] = bc

      else:
        print("Transformation not recognized")
        
  if verbose:
    # check the closing price for stationarity using the augmented dicky fuller test
    t_stat, p_value, _, _, critical_values, _  = adfuller(df.close.values, autolag='AIC')
    print('Augmented Dicky Fuller Test for Stationarity')
    print("="*60)
    print(f'ADF Statistic: {t_stat:.2f}')
    for key, value in critical_values.items():
      print('Critial Values:')
      if t_stat < value:
        print(f'   {key}, {value:.2f} => non-stationary')
      else:
        print(f'   {key}, {value:.2f} => stationary')

### Process the Data
+ Select the dataset (BTC,VMW,BLX.TO) for all features or the features selected using RFE
+ Transform the data to be stationary
+ Convert to numpy arrays
+ Scale the data for deep learning
+ Split the data into train/test sets
+ Partition the data into time-series windows
+ Plot the close price split into train/test sets
+ Plot a single time-series train/predict batch

In [41]:
def pre_process(df_features,features_to_transform,n_steps,n_predict,transform='log',train_pct=0.80,visualize=False):
  """
  """

  # Training Config
  df = df_features.copy()

  # index postiion variable (closing price)
  idx_close = df.columns.get_loc("close") 

  # transform the data
  transform_stationary(df,features_to_transform,transform,False)

  # Convert to numpy arrays
  array_y,array_X = prepare_data(df,'close',df.columns)

  # scale the input and outputs
  scaler_X = MinMaxScaler(feature_range=(0,1))
  scaler_y = MinMaxScaler(feature_range=(0,1))
  scaled_X = scaler_X.fit_transform(array_X)
  scaled_y = scaler_y.fit_transform(array_y)

  # split into train, test  # split into train test datasets
  if train_pct < 1.0:
    # split into train, test
    train_x,test_x=split_train_test(scaled_X,idx_close,train_pct)

    # Partition the train/test data into time series windows for training
    # LSTM input format: [samples, time steps, features]
    x_train, y_train = create_partitions(train_x,idx_close,n_steps, n_predict)
    x_test,  y_test  = create_partitions(test_x, idx_close,n_steps, n_predict)

    # Print the results
    print('Train/Test Dimensions')
    print('='*60)
    print("Train Data Dimensions: ","x",x_train.shape,"y",y_train.shape)
    print("Test Data Dimensions : ","x",x_test.shape," y",y_test.shape)

  # don't split into train/test
  else:
    # don't split into train/test
    train_x = scaled_X

    # Partition the train/test data into time series windows for training
    # LSTM input format: [samples, time steps, features]
    x_train, y_train = create_partitions(train_x,idx_close,n_steps, n_predict)

      # Print the results
    print('Train Dimensions')
    print('='*60)
    print("Train Data Dimensions: ","x",x_train.shape,"y",y_train.shape)

  print("\n\nCheck the dimensions of the training windows")
  print("="*60)
  print('The last value in the training batch should match the first value in the predict batch')
  print("Last Training Value:   ",x_train[1][n_steps-1][idx_close])
  print("First Prediction Value:",y_train[0][0])

  # Make sure the train/predict batch windows are aligned
  if visualize:
    plot_training_window(x_train,y_train,idx_close,n_steps,n_predict,seed//10)

  if train_pct < 1.0:
    return x_train, y_train, x_test, y_test, scaler_y,idx_close
  else:
    return x_train, y_train, scaler_y,idx_close
    

# Deep Learning LSTM

### Build the LSTM Model

In [None]:
def create_lstm_model(config):
  """
  Builds an LSTM deep learning model
  Inputs:
    config: A dictionary containing the network topology
    x_train: The train set with all features
    lr: the learning rate for the adam optimizer
  Output:
    An LSTM model built according to config
  """
  # get window size
  n_steps = config['window']['n_steps']
  n_predict = config['window']['n_predict']
  n_features = config['data']['x_train'].shape[2]

  # clear previous models
  backend.clear_session()

  # LSTM Model+ first layer
  model = Sequential(name='LSTM')
  model.add(LSTM(n_steps,return_sequences=True,input_shape=(n_steps,n_features)))

  # add additional layers
  for layer,nodes,ret_seq,drop in config['layers']:
    # add LSTM layers + dropout
    if layer=='lstm':
      model.add(LSTM(nodes,return_sequences =ret_seq))
      if drop is not None:
        model.add(Dropout(drop))

    # Add Dense Layers + dropout
    elif layer =='dense':
      model.add(Dense(nodes))
      if drop is not None:
        model.add(Dropout(drop))

  # add the prediction layer
  model.add(Dense(n_predict))
  
  # compile
  model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=config['train']['lr']),
                loss='mse')
  
  # print the model summary
  model.summary()

  return model


### Calculate Model Performance

In [None]:
def calculate_performance(x_test,y_test,model,scaler_y):
  """
  Predicts the price output and calculates the performance of the model
  Inputs:
    x_test: the test dataset
    y_test: the test labels
    model: the trained LSTM model
    scaler_y: the transfrom applied to the labels
  Output:
    Prints the performance metrics and returns the predictions
  """

  # Predict the prices
  y_pred = model.predict(x_test)

  # convert units back to the original scale
  y_pred_unscaled = scaler_y.inverse_transform(y_pred)
  y_test_unscaled = scaler_y.inverse_transform(y_test)

  # # Root mean squarred error,mean abs error, mean abs percent error, median abs percent error
  rmse  = math.sqrt(mean_squared_error(y_test_unscaled, y_pred_unscaled))
  mae   = mean_absolute_error(y_test_unscaled, y_pred_unscaled)
  mape  = np.mean((np.abs(np.subtract(y_test_unscaled, y_pred_unscaled)/ y_test_unscaled))) * 100
  mdape = np.median((np.abs(np.subtract(y_test_unscaled, y_pred_unscaled)/ y_test_unscaled)) ) * 100

  print("\nModel Error")
  print("="*62)
  print(f'{"Mean Absolute Error (MAE)" :-<55} {np.round(mae, 2):>5}')
  print(f'{"Root Mean Squared Error (MSE)" :-<55} {np.round(rmse,2):>5}')
  print(f'{"Mean Absolute Percentage Error (MAPE)" :-<55} {np.round(mape, 2):>5}%')
  print(f'{"Median Absolute Percentage Error (MDAPE)" :-<55} {np.round(mdape, 2):>5}%')

  return y_pred

### Plot Training Metrics

In [None]:

def plot_training_metrics(history):
  """
  Plots the training metrics: loss and val_loss by epochs
  Input:
    history: the train history
  """
  # get the number of epochs
  epochs = list(range(1, len(history.history['loss']) + 1))

  # create the line plots
  fig = go.Figure()
  fig.add_trace(go.Scatter(x=epochs,
                           y=history.history['loss'],
                           name = 'train-loss',
                           line=dict(width=3,color='orange')))
  fig.add_trace(go.Scatter(x=epochs,
                           y=history.history['val_loss'],
                           name='val-loss',
                           line=dict(width=3,color='#8D4004')))

  fig.update_layout(title = 'Training Metrics',
                    template="plotly_white",
                    width = 500,
                    height= 500,
                    yaxis_title='loss',
                    xaxis_title='epochs')

  fig.show()

In [None]:
def plot_price_predictions(batch, idx_close, x_test, y_pred_scaled,scaler_y): 
  """
  Plots the predicted vs actual prices for a single batch
  Inputs:
    batch: the batch index to plot
    idx_close: the index position of the close price in the feature set
    x_test: the test dataset
    y_pred_scaled: the predicted y values (scaled)
    scaler_y: the scaler used to scale y
  """

  # unscale the y predictions
  y_pred_unscaled = scaler_y.inverse_transform(y_pred_scaled)

  # unscale the x_test data
  x_test_np = np.array(pd.DataFrame(x_test[batch])[idx_close]).reshape(-1,1)
  x_test_unscaled = scaler_y.inverse_transform(x_test_np)
  x_test_df = pd.DataFrame(x_test_unscaled)

  # set the indexes for plotting
  max_test_idx=x_test_df.shape[0]
  max_pred_idx =y_pred_unscaled[0].shape[0]
  test_idx = list(range(batch,batch + max_test_idx))
  pred_idx = list(range(batch + max_test_idx,batch + max_test_idx + max_pred_idx))


  # combine the actual + predicted prices
  data = pd.DataFrame(list(zip(y_pred_unscaled[batch], x_test_df[0])), columns=['pred', 'actual'])

  # create the plot
  fig = go.Figure()
  fig.add_trace(go.Scatter(x=test_idx, y=x_test_df[0],
                        mode='lines',
                        name='Test Data',
                        fill='tozeroy',
                        line_color='#ccc'))
  fig.add_trace(go.Scatter(x=pred_idx, y=data['actual'],
                        mode='lines+markers', 
                        name='Actual Price',
                        fill='tozeroy',
                        line_color ='#ccc')) 
  fig.add_trace(go.Scatter(x=pred_idx, y=data['pred'],
                        mode='lines+markers',
                        name='Predicted Price',
                        line_color='orange'))

  fig.update_layout(template = 'plotly_white',
                      title= 'Actual vs Predicted Price',
                      xaxis_title = 'Batch',
                      yaxis_title = 'Price',
                      width=500,
                      height=500)

  fig.show()

In [None]:
def plot_n_price_predictions(idx_close,x_test,y_pred_scaled,scaler_y,n_batches=2):
  """
  Plots random batches of predictions
  """

  # select random batch numbers
  batches=[]
  [batches.append(rn.randint(0,y_pred_scaled.shape[0]-1)) for r in list(range(n_batches))]

  # plot predictions
  for batch in batches:
    plot_price_predictions(batch, idx_close, x_test, y_pred_scaled,scaler_y)

## Train the Model
Training config file.

In [None]:
def train_lstm_model(config,final_model=False):
  """
  """
  # create the lstm model
  model = create_lstm_model(config)

  # Train the model
  tic = timeit.default_timer()
  history = model.fit(config['data']['x_train'],
                      config['data']['y_train'],
                      batch_size=config['train']['batch_size'],
                      epochs=config['train']['epochs'],
                      callbacks = [config['train']['early_stop']],
                      validation_split =config['window']['val_split'], 
                      verbose = 0
                      )

  # print the training time
  toc =timeit.default_timer()
  print('\nTraining Time')
  print('='*60)
  print(f'Minutes:{round((toc-tic)/60,2)}\n')


  # predict prices
  # calculate the performance metrics
  # plot training metrics
  # plot predictions for random batches
  if final_model is False:
    y_pred_scaled = calculate_performance(config['data']['x_test'],config['data']['y_test'],model,config['data']['scaler_y'])
    plot_training_metrics(history)
    plot_n_price_predictions(config['data']['idx_close'],config['data']['x_test'],y_pred_scaled,config['data']['scaler_y'],1)

  if config['model']['save_model']:
    # save model and training performance
    model.save(config['model']['model_name'])

    df_hist = pd.DataFrame(history.history) 
    df_hist.to_csv(f"{config['model']['model_name']}/train_history.csv")


In [None]:
# the network topology is contained in a config dictionary
n_steps = 60
n_predict = 3
model_name = f'{model_path}{ticker}_model_medium_64'

# Transform the data to be stationary
features_to_transform = ['open','high','low','close', 'b-upper', 'b-middle', 'b-lower',
            'futures_open', 'futures_high', 'futures_low', 'futures_close', 
            'ETH', 'DOGE', 'LTC',
            'USDEUR', 'USDGBP', 'OIL_shift', 'BOIL', 'VXX_shift', 'XLE', 'QQQ',
            ]

# pre-process the data
x_train, y_train, x_test, y_test, scaler_y,idx_close = pre_process(df_features,features_to_transform,n_steps,n_predict,'log',0.80,True)

# build training config file
config ={
    'window': {'n_steps':n_steps,'n_predict':n_predict,'val_split':0.10},
    'layers': [('lstm',512,True,0.5),('lstm',256,False,0.1),('dense',128,None,0.1),('dense',64,None,0.1)],
    'train':{'epochs':250,'batch_size':64,'early_stop':EarlyStopping(monitor='loss',patience=20),'lr':0.001},
    'data':{'x_train':x_train,'y_train':y_train,'x_test':x_test,'y_test':y_test,'scaler_y':scaler_y,'idx_close':idx_close},
    'model':{'model_name':model_name,'save_model':True}
}

# train the model
train_lstm_model(config)



Data Preparation
=> 44 Features
=> Input Dimensions :(1363, 44)
=> Output Dimensions:(1363, 1)



Split Data (X)
Data  :100.0% (1363, 44)
Train : 80.0% (1091, 44)
Test  : 20.0% (272, 44)




Train/Test Dimensions
Train Data Dimensions:  x (1028, 60, 44) y (1028, 3)
Test Data Dimensions :  x (209, 60, 44)  y (209, 3)


Check the dimensions of the training windows
The last value in the training batch should match the first value in the predict batch
Last Training Value:    0.8865002241167672
First Prediction Value: 0.8865002241167672


Model: "LSTM"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 60, 60)            25200     
_________________________________________________________________
lstm_1 (LSTM)                (None, 60, 512)           1173504   
_________________________________________________________________
dropout (Dropout)            (None, 60, 512)           0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 256)               787456    
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               32896     
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0      



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Colab Notebooks/capstone2/models/BTC_model_medium_64/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Colab Notebooks/capstone2/models/BTC_model_medium_64/assets


## Results
Different batch sizes and number of input days were evaluated to determine the best performance. The MAPE/MDAPE are shown in the table.  The batch size with the best performance for each set of input days were optimized with hyper-parameter tuning 

| Batch Size | Long (100 days) | Medium (60 days) | Short (20 days) |
|------------|-----------------|------------------|-----------------|
| 64         | 0.98/0.76%      | 0.92/0.75%       | 0.92/0.71%      |
| 128        | 0.91/0.75%      | 1.21/1.23%       | **0.68/0.57%**  |
| 256        | 0.72/0.53%      | 1.17/1.12%       | 0.89/0.87%      |
| 512        | 0.74/0.51%      | **0.88/0.69%**   | 1.63/1.56%      |
| 1024       | **0.67/0.53%**  | 1.10/1.08%       | 3.12/3.15%      |

# Hyperparameter Tuning

## Load Saved Model

In [None]:
# get saved training evaluation
modelname = f'{ticker}_model_medium_512'
model = keras.models.load_model(f'{model_path}{modelname}')
df_hist = pd.read_csv(f'{model_path}{modelname}/train_history.csv')

## Seach for hyper-parmameters

In [None]:
def build_tuner_model(hp):
  """
  Builds an LSTM model for hyper-parameter tuning using keras-tuner
  """
  # Clear previous session
  backend.clear_session()

  # searchable parameters
  lstm_nodes    = hp.Choice('lstm_units',values=LSTM_UNITS)
  dense_nodes   = hp.Choice('dense_units',values=DENSE)
  dropout_lstm  = hp.Choice('lstm_dropout',values=DROPOUT_LSTM)
  dropout_dense = hp.Choice('dense_dropout',values=DROPOUT_DENSE)
  learn_rate    = hp.Choice('learn_rate',values = LEARN_RATE)


  # LSTM Model+ first layer
  model = Sequential()
  model.add(LSTM(N_STEPS,return_sequences=True,input_shape=(N_STEPS,N_FEATURES)))


  # add layers
  model.add(LSTM(units=lstm_nodes,return_sequences=True))
  model.add(Dropout(dropout_lstm))
  model.add(LSTM(lstm_nodes//2,return_sequences=False))
  model.add(Dropout(max(0.1,dropout_lstm//2)))
  model.add(Dense(units=dense_nodes))
  model.add(Dropout(dropout_dense))
  model.add(Dense(dense_nodes//2))
  model.add(Dropout(max(0.1,dropout_dense//2)))

  # add prediction layer
  model.add(Dense(N_PREDICT))

  # compile
  model.compile(loss='mse',optimizer =tf.keras.optimizers.Adam(learning_rate=learn_rate))
  model.summary()

  return model

In [None]:
# BTC - tuned with optimzed features
# the network topology is contained in a config dictionary
n_steps = 60
n_predict = 3

# pre-process the data
x_train, y_train, x_test, y_test, scaler_y,idx_close = pre_process(df_features,features_to_transform,n_steps,n_predict,'log',0.80,True)

# build training config file
config ={
    'window': {'n_steps':n_steps,'n_predict':n_predict,'val_split':0.10},
    'layers': [('lstm',512,True,0.5),('lstm',256,False,0.1),('dense',128,None,0.1),('dense',64,None,0.1)],
    'train':{'epochs':250,'batch_size':512,'early_stop':EarlyStopping(monitor='loss',patience=20),'lr':0.001},
    'data':{'x_train':x_train,'y_train':y_train,'x_test':x_test,'y_test':y_test,'scaler_y':scaler_y,'idx_close':idx_close},
    'model':{'model_name':model_name,'save_model':True}
}

# Searchable hyperparameters
LSTM_UNITS    = [32,64,128,256,512,1024,2048]
DENSE         = [32,64,128,256,512,1024,2048]
DROPOUT_LSTM  = [0.1,0.20,0.3,0.50]
DROPOUT_DENSE = [0.1,0.20,0.3,0.50]
N_FEATURES    = x_train.shape[2]
N_STEPS       = config['window']['n_steps']
N_PREDICT     = config['window']['n_predict']
LEARN_RATE    = [0.001,0.0001]

# initialize
hp = kt.HyperParameters()


# Setup the Tuner
tuner = Hyperband(
    build_tuner_model,
    max_epochs = 50,
    objective = 'val_loss', 
    directory = 'tune_hyperband',
    project_name = 'prices_tune_hyperband',
    overwrite = True
)

# Implement early stopping
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)


# Perform hyper
tuner.search(x_train,
             y_train,
             batch_size = config['train']['batch_size'],
             epochs = 50,
             validation_split=0.10,
             callbacks=[stop_early]
)



# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]
best_model = tuner.get_best_models(num_models=1)[0]

print("Hyperparmeter Search")
print("="*60)
print(f"First LSTM Node Size:          {best_hps.get('lstm_units')}")
print(f"First LSTM Dropout Rate:       {best_hps.get('lstm_dropout')}")
print(f"Second LSTM Node Size:         {best_hps.get('lstm_units')//2}")
print(f"Second LSTM Dropout Rate:      {max(0.1,best_hps.get('lstm_dropout')//2)}")
print(f"First Dense Node Size:         {best_hps.get('dense_units')}")
print(f"First Dense Node Droput Rate:  {best_hps.get('dense_dropout')}")
print(f"Second Dense Node Size:        {best_hps.get('dense_units')//2}")
print(f"Second Dense Node Dropout Rate:{max(0.1,best_hps.get('dense_dropout')//2)}")
print(f"Learning Rate:                 {best_hps.get('learn_rate')}")


print('\n\n')
# show the optimal hyperparameters
tuner.oracle.get_best_trials(num_trials=1)[0].hyperparameters.values

NameError: ignored

## Train with optimal Parameters



### Short Model

In [None]:
# the network topology is contained in a config dictionary
n_steps = 20
n_predict = 3

# save model path + name
model_name = f'{model_path}{ticker}_model_short_optimal'


# Transform the data to be stationary
features_to_transform = ['open','high','low','close', 'b-upper', 'b-middle', 'b-lower',
            'futures_open', 'futures_high', 'futures_low', 'futures_close', 
            'ETH', 'DOGE', 'LTC',
            'USDEUR', 'USDGBP', 'OIL_shift', 'BOIL', 'VXX_shift', 'XLE', 'QQQ',
            ]

# pre-process the data
x_train, y_train, x_test, y_test, scaler_y,idx_close = pre_process(df_features,features_to_transform,n_steps,n_predict,'log',0.80,True)

# build training config file
config ={
    'window': {'n_steps':n_steps,'n_predict':n_predict,'val_split':0.10},
    'layers': [('lstm',1024,True,0.2),('lstm',512,False,0.1),('dense',2048,None,0.2),('dense',1024,None,0.1)],
    'train':{'epochs':250,'batch_size':64,'early_stop':EarlyStopping(monitor='loss',patience=20),'lr':0.001},
    'data':{'x_train':x_train,'y_train':y_train,'x_test':x_test,'y_test':y_test,'scaler_y':scaler_y,'idx_close':idx_close},
    'model':{'model_name':model_name,'save_model':True}
}

# train the model
model = train_lstm_model(config)

### Medium Model

In [None]:
# the network topology is contained in a config dictionary
n_steps = 60
n_predict = 3

# save model path + name
model_name = f'{model_path}{ticker}_model_medium_optimal'


# Transform the data to be stationary
features_to_transform = ['open','high','low','close', 'b-upper', 'b-middle', 'b-lower',
            'futures_open', 'futures_high', 'futures_low', 'futures_close', 
            'ETH', 'DOGE', 'LTC',
            'USDEUR', 'USDGBP', 'OIL_shift', 'BOIL', 'VXX_shift', 'XLE', 'QQQ',
            ]

# pre-process the data
x_train, y_train, x_test, y_test, scaler_y,idx_close = pre_process(df_features,features_to_transform,n_steps,n_predict,'log',0.80,True)

# build training config file
config ={
    'window': {'n_steps':n_steps,'n_predict':n_predict,'val_split':0.10},
    'layers': [('lstm',1024,True,0.2),('lstm',512,False,0.1),('dense',512,None,0.5),('dense',256,None,0.1)],
    'train':{'epochs':250,'batch_size':512,'early_stop':EarlyStopping(monitor='loss',patience=20),'lr':0.001},
    'data':{'x_train':x_train,'y_train':y_train,'x_test':x_test,'y_test':y_test,'scaler_y':scaler_y,'idx_close':idx_close},
    'model':{'model_name':model_name,'save_model':True}
}

# train the model
model = train_lstm_model(config)

### Long Model

In [None]:
# the network topology is contained in a config dictionary
n_steps = 100
n_predict = 3

# save model path + name
model_name = f'{model_path}{ticker}_model_long_optimal'


# Transform the data to be stationary
features_to_transform = ['open','high','low','close', 'b-upper', 'b-middle', 'b-lower',
            'futures_open', 'futures_high', 'futures_low', 'futures_close', 
            'ETH', 'DOGE', 'LTC',
            'USDEUR', 'USDGBP', 'OIL_shift', 'BOIL', 'VXX_shift', 'XLE', 'QQQ',
            ]

# pre-process the data
x_train, y_train, x_test, y_test, scaler_y,idx_close = pre_process(df_features,features_to_transform,n_steps,n_predict,'log',0.80,True)

# build training config file
config ={
    'window': {'n_steps':n_steps,'n_predict':n_predict,'val_split':0.10},
    'layers': [('lstm',512,True,0.5),('lstm',256,False,0.1),('dense',2048,None,0.5),('dense',1024,None,0.1)],
    'train':{'epochs':250,'batch_size':1024,'early_stop':EarlyStopping(monitor='loss',patience=20)},
    'data':{'x_train':x_train,'y_train':y_train,'x_test':x_test,'y_test':y_test,'scaler_y':scaler_y,'idx_close':idx_close},
    'model':{'model_name':model_name,'save_model':True}
}

# train the model
model = train_lstm_model(config)

## Train Final LSTM Models

In [None]:
# LONG FINAL
# the network topology is contained in a config dictionary
n_steps = 100
n_predict = 3

# save model path + name
model_name = f'{model_path}{ticker}_model_long_final'


# Transform the data to be stationary
features_to_transform = ['open','high','low','close', 'b-upper', 'b-middle', 'b-lower',
            'futures_open', 'futures_high', 'futures_low', 'futures_close', 
            'ETH', 'DOGE', 'LTC',
            'USDEUR', 'USDGBP', 'OIL_shift', 'BOIL', 'VXX_shift', 'XLE', 'QQQ',
            ]

# pre-process the data (all training data)
x_train, y_train, scaler_y,idx_close = pre_process(df_features,features_to_transform,n_steps,n_predict,'log',1.0,True)
x_test = None
y_test = None

# build training config file
config ={
    'window': {'n_steps':n_steps,'n_predict':n_predict,'val_split':0.10},
    'layers': [('lstm',512,True,0.5),('lstm',256,False,0.1),('dense',2048,None,0.5),('dense',1024,None,0.1)],
    'train':{'epochs':250,'batch_size':1024,'early_stop':EarlyStopping(monitor='loss',patience=20),'lr':0.001},
    'data':{'x_train':x_train,'y_train':y_train,'x_test':x_test,'y_test':y_test,'scaler_y':scaler_y,'idx_close':idx_close},
    'model':{'model_name':model_name,'save_model':True}
}

# train the model
model = train_lstm_model(config,True)

In [None]:
# MEDIUM FINAL
# the network topology is contained in a config dictionary
n_steps = 60
n_predict = 3

# save model path + name
model_name = f'{model_path}{ticker}_model_medium_final'


# Transform the data to be stationary
features_to_transform = ['open','high','low','close', 'b-upper', 'b-middle', 'b-lower',
            'futures_open', 'futures_high', 'futures_low', 'futures_close', 
            'ETH', 'DOGE', 'LTC',
            'USDEUR', 'USDGBP', 'OIL_shift', 'BOIL', 'VXX_shift', 'XLE', 'QQQ',
            ]

# pre-process the data (all training data)
x_train, y_train, scaler_y,idx_close = pre_process(df_features,features_to_transform,n_steps,n_predict,'log',1.0,True)
x_test = None
y_test = None

# build training config file
config ={
    'window': {'n_steps':n_steps,'n_predict':n_predict,'val_split':0.10},
    'layers': [('lstm',1024,True,0.2),('lstm',512,False,0.1),('dense',512,None,0.5),('dense',256,None,0.1)],
    'train':{'epochs':250,'batch_size':512,'early_stop':EarlyStopping(monitor='loss',patience=20),'lr':0.001},
    'data':{'x_train':x_train,'y_train':y_train,'x_test':x_test,'y_test':y_test,'scaler_y':scaler_y,'idx_close':idx_close},
    'model':{'model_name':model_name,'save_model':True}
}

# train the model
model = train_lstm_model(config,True)

In [None]:
# SHORT FINAL
# the network topology is contained in a config dictionary
n_steps = 20
n_predict = 3

# save model path + name
model_name = f'{model_path}{ticker}_model_short_final'


# Transform the data to be stationary
features_to_transform = ['open','high','low','close', 'b-upper', 'b-middle', 'b-lower',
            'futures_open', 'futures_high', 'futures_low', 'futures_close', 
            'ETH', 'DOGE', 'LTC',
            'USDEUR', 'USDGBP', 'OIL_shift', 'BOIL', 'VXX_shift', 'XLE', 'QQQ',
            ]

# pre-process the data (all training data)
x_train, y_train, scaler_y,idx_close = pre_process(df_features,features_to_transform,n_steps,n_predict,'log',1.0,True)
x_test = None
y_test = None

# build training config file
config ={
    'window': {'n_steps':n_steps,'n_predict':n_predict,'val_split':0.10},
    'layers': [('lstm',1024,True,0.2),('lstm',512,False,0.1),('dense',2048,None,0.2),('dense',1024,None,0.1)],
    'train':{'epochs':250,'batch_size':64,'early_stop':EarlyStopping(monitor='loss',patience=20),'lr':0.001},
    'data':{'x_train':x_train,'y_train':y_train,'x_test':x_test,'y_test':y_test,'scaler_y':scaler_y,'idx_close':idx_close},
    'model':{'model_name':model_name,'save_model':True}
}

# train the model
model = train_lstm_model(config,True)

# FB Prophat Model
+ facebooks prophet algorithm was used to capture trend and seasonality in the price movements.
+ prophet is a variation of an ARIMA model which only uses the time-series of closing prices to estimate the next closing price

In [47]:
def get_prophet_df(df):
  """
  Convert a dataframe into prophet format
  Input:
    Dataframe of prices
  Output:
    dataframe matching the prohet requirements of datestamp,
    target variable(ds,y)
  """

  df_prophet = df[['close']].copy()
  df_prophet.reset_index(inplace= True)
  df_prophet.rename(columns={'date':'ds','close':'y'},inplace=True)

  return df_prophet


def create_prophet_model(df):
  """
  Creates and fits a FB Prophet model on the input time-series
  Input:
    df: a dataframe containing the closing price time-series
  Output:
    a trained prophet model
  """
  # convert the data into prophet format
  df_prophet = get_prophet_df(df,growth = 'linear')

  # init the prophet model and fit to the dataset
  m = Prophet(daily_seasonality=False)
  m.fit(df_prophet)

  return m



def get_prophet_forecast(df,periods=3,visualize= False):
  """
  Predicts the closing price using FB Prophat algorithm for the input 
  number of periods
  Input:
    df: a dataframe of closing prices
    periods: the number of periods to predict
    visualize: boolen, determines if a plot should be displayed
  Output:
    a dataframe of predcited closing prices
  """

  # get lastest date
  latest_date =df.index.max()

  # convert the data into prophet format
  df_prophet = get_prophet_df(df)

  # init the prophet model and fit to the dataset
  model = Prophet(daily_seasonality=False)
  model.fit(df_prophet)

  # create a df to hold the predictions
  future = model.make_future_dataframe(periods=periods)


  # forecast the data
  forecast = model.predict(future[future['ds']>latest_date])

  # show history + forcast
  if visualize:
    model.plot(forecast)

  # subset rows/cols
  # df_subset = forecast[forecast['ds']>latest_date]
  df_subset = forecast.loc[:,('ds','yhat','yhat_lower','yhat_upper')]

  return df_subset

# Historical Back Test
The model was tested by creating a historical back test of predcitions from the beginning of January 2021 to September 2021
+ Pull all data to current date and create the df_hist file of actual prices
+ Start on Jan 4,2021 and make the 3 days of predictions
+ On Jan 5
  + get the actual price for Jan 5
  + calculate the prediction error
  + roll the model forward 1 day and make the next 3 day prediction
+ Repeat for everyday up until the end of september

In [27]:
def prepare_prediction_data(df,n_steps,features=[],verbose=False):
  """
  Filter, scale and convert dataframe data to numpy arrays

  Inputs: 
    df       => A dataframe of observations with features and y-labels
    y        => The name of the column that is the truth labels
    features => A list of features.  Used to subset columns

  Outputs:
    scaled_y => numpy array of the y-label data
    scaled_x => numpy array of the training features

  """

  # subset the latest n_steps rows to be used for prediction
  df = df.iloc[0:n_steps,:]

  # reverse the index such that dates are in chronological order
  df = df.iloc[::-1]

  # Subset features, get the y-label values
  df_y = df['close']
  df_X = df[features]

  # replace the date index with an integer index
  idx_dates = df.index
  df_X.reset_index(drop=True,inplace=True)

  # convert to numpay arrays
  array_X = np.array(df_X)
  array_y = np.array(df_y).reshape(-1,1)

  if verbose:
    # print the output
    print("\nData Preparation")
    print("="*60)
    print(f"=> {len(features)} Features")
    print(f"=> Input Dimensions :{array_X.shape}")
    print("\n")

  return idx_dates, array_y,array_X

## Backtest: FB Prophet

In [48]:
def backtest_prophet(df,features_to_transform,start_date,end_date,n_predict=3):
  """
  """
  # subset the df by start/end dates
  df_new = df.loc[df.index <= end_date].copy()

  # init
  d_1day = dict()
  d_3day = dict()
  df_actuals = pd.DataFrame(df_new.close)

  # transform the data to stationary
  transform_stationary(df_new,features_to_transform)

  # get date list
  datelist = df_new[start_date:end_date].index

  print('Historical Backtest:Facebook Prophet Model')
  print('='*60)
  for dt in datelist:
    if dt.day==1:
      print(dt)

    # subset the df to the current date (dt) to make predictions
    df_tmp = df_new.loc[df_new.index <=dt].copy()

    # make the next n_predictions from the current date (dt)
    fbp = get_prophet_forecast(df_tmp,n_predict,False)

    # update predictions
    # get the dates 1 day and 3 days from dt
    dt_1day = dt + timedelta(days=1)
    dt_3day = dt + timedelta(days=3)

    # get the predictions for those dates
    day1 = fbp[fbp.ds == dt_1day].yhat.item()
    day3 = fbp[fbp.ds == dt_3day].yhat.item()

    # update the dicts with the predicted values
    d_1day[dt_1day] = np.exp(day1)
    d_3day[dt_3day] = np.exp(day3)

  # convert to dfs
  df_1day = pd.DataFrame([d_1day]).T
  df_1day.columns = ['prophet_1day']

  df_3day = pd.DataFrame([d_3day]).T
  df_3day.columns = ['prophet_3day']

  # combine dfs
  df_pred = df_actuals.join(df_1day,how='left').join(df_3day,how='left')

  # calc the daily and cumulative errors
  df_pred['prophet_1day_error'] = df_pred['prophet_1day']-df_pred['close']
  df_pred['prophet_3day_error'] = df_pred['prophet_3day']-df_pred['close']
  df_pred['prophet_1day_cum'] = df_pred['prophet_1day_error'].cumsum()
  df_pred['prophet_3day_cum'] = df_pred['prophet_3day_error'].cumsum()

  return df_pred

In [49]:
# fb preds -back-test
# constants
ticker = 'BTC'
model_type = 'prophet'
n_predict = 3
start_date = '2021-01-26'
end_date = '2021-10-25' 

# Transform the data to be stationary
features_to_transform = ['open','high','low','close', 'b-upper', 'b-middle', 'b-lower',
            'futures_open', 'futures_high', 'futures_low', 'futures_close', 
            'ETH', 'DOGE', 'LTC',
            'USDEUR', 'USDGBP', 'OIL_shift', 'BOIL', 'VXX_shift', 'XLE', 'QQQ',
            ]

# get the data
df = pd.read_pickle(f'{path}{ticker}_market_data_final.pickle')

# run the backtest and save the results
df_prophet = backtest_prophet(df,features_to_transform,start_date,end_date)
df_prophet.to_pickle(f'{path}{ticker}_{model_type}_backtest_ytd.pickle')


Historical Backtest:Facebook Prophet Model
2021-02-01 00:00:00
2021-03-01 00:00:00
2021-04-01 00:00:00
2021-05-01 00:00:00
2021-06-01 00:00:00
2021-07-01 00:00:00
2021-08-01 00:00:00
2021-09-01 00:00:00
2021-10-01 00:00:00


## Backtest: LSTM

In [29]:
def make_lstm_predictions(df,lstm_model,scaler,scaled_X,n_steps,n_features,n_pred,start_date):
  """
  Predict the next n_pred days with n_steps of daily data as input to the model
  Input:
    lstm_model: A trained LSTM model
    scaler: The scaler used
    scaled_X: scaled input features
    n_steps: the number of input days used in the model
    n_features: the number of features used in the model
    n_pred: the number of days predicted in the model
    start_date: the start date of the prediction window
  Output:
    a data frame of predicted prices
  """
  # LSTM Prediction
  # Predict the prices
  y_pred_scaled = lstm_model.predict(scaled_X.reshape(1,n_steps,n_features))

  # convert units back to the original scale
  y_pred_unscaled = scaler.inverse_transform(y_pred_scaled)

  # convert from log transform back to original scale
  y_pred_np = np.exp(y_pred_unscaled)

  # set the date index
  pred_dates = pd.date_range(start_date + datetime.timedelta(days=1), periods=n_pred,freq='D').tolist()

  # convert to dataframe
  lstm = pd.DataFrame(y_pred_np.T,columns=['lstm'])
  lstm['date'] = pred_dates
  lstm.set_index(['date'],inplace=True)
  lstm.index = pd.to_datetime(lstm.index)

  return lstm

In [30]:

def backtest_lstm(df,model,model_type,features_to_transform,start_date,end_date,n_predict=3):
  """
  """
  # subset the df by start/end dates
  df_new = df.loc[df.index <= end_date].copy()
  features = [f for f in df.columns if f not in ['ticker']]

  # init
  d_1day = dict()
  d_3day = dict()
  df_actuals = pd.DataFrame(df_new.close)

  # get date list
  datelist = df_new[start_date:end_date].index

  print(f'Historical Backtest:LSTM Model ({model_type})')
  print('='*60)
  for dt in datelist:
    if dt.day==1:
      print(dt)

    # subset the df to the current date (dt) to make predictions
    df_tmp = df_new.loc[df_new.index <=dt].copy()
    df_tmp = df_tmp[::-1]

    # transform the data to stationary
    transform_stationary(df_tmp,features_to_transform)

    # prepare
    idx_dates, array_y, array_X = prepare_prediction_data(df_tmp,n_steps,features)

    # scale the input and outputs
    scaler_X = MinMaxScaler(feature_range=(0,1))
    scaled_X = scaler_X.fit_transform(array_X)
    scaler_y = MinMaxScaler(feature_range=(0,1))
    scaled_y = scaler_y.fit_transform(array_y)

    # make the next n_predictions from the current date (dt)
    # make predictions
    df_pred = make_lstm_predictions(df_tmp,model,scaler_y,scaled_X,n_steps,len(features),n_predict,df_tmp.index.max())

    # update predictions
    # get the dates 1 day and 3 days from dt
    dt_1day = dt + timedelta(days=1)
    dt_3day = dt + timedelta(days=3)

    # get the predictions for those dates
    day1 = df_pred[df_pred.index == dt_1day].lstm.item()
    day3 = df_pred[df_pred.index == dt_3day].lstm.item()

    # update the dicts with the predicted values
    d_1day[dt_1day] = day1
    d_3day[dt_3day] = day3

  # convert to dfs
  df_1day = pd.DataFrame([d_1day]).T
  df_1day.columns = [f'lstm_{model_type}_1day']

  df_3day = pd.DataFrame([d_3day]).T
  df_3day.columns = [f'lstm_{model_type}_3day']

  # combine dfs
  df_pred = df_actuals.join(df_1day,how='left').join(df_3day,how='left')

  # calc the daily and cumulative errors
  df_pred[f'lstm_{model_type}_1day_error'] = df_pred[f'lstm_{model_type}_1day']-df_pred['close']
  df_pred[f'lstm_{model_type}_3day_error'] = df_pred[f'lstm_{model_type}_3day']-df_pred['close']
  df_pred[f'lstm_{model_type}_1day_cum'] = df_pred[f'lstm_{model_type}_1day_error'].cumsum()
  df_pred[f'lstm_{model_type}_3day_cum'] = df_pred[f'lstm_{model_type}_3day_error'].cumsum()

  return df_pred

In [42]:
# constants
ticker = 'BTC'
model_type = 'short'
n_steps = 20
n_predict = 3
start_date = '2021-01-26'
end_date = '2021-10-25' 

# Transform the data to be stationary
features_to_transform = ['open','high','low','close', 'b-upper', 'b-middle', 'b-lower',
            'futures_open', 'futures_high', 'futures_low', 'futures_close', 
            'ETH', 'DOGE', 'LTC',
            'USDEUR', 'USDGBP', 'OIL_shift', 'BOIL', 'VXX_shift', 'XLE', 'QQQ',
            ]

# get the trained lstm model
model = keras.models.load_model(f'{model_path}{ticker}_model_{model_type}_final')

# get the data
df = pd.read_pickle(f'{path}{ticker}_market_data_final.pickle')

# return df_pred
df_pred_short = backtest_lstm(df,model,model_type,features_to_transform,start_date,end_date,n_predict=3)  
df_pred_short.to_pickle(f'{path}{ticker}_{model_type}_backtest_ytd.pickle')


Historical Backtest:LSTM Model (short)
2021-02-01 00:00:00
2021-03-01 00:00:00
2021-04-01 00:00:00
2021-05-01 00:00:00
2021-06-01 00:00:00
2021-07-01 00:00:00
2021-08-01 00:00:00
2021-09-01 00:00:00
2021-10-01 00:00:00


In [43]:
# constants
ticker = 'BTC'
model_type = 'medium'
n_steps = 60
n_predict = 3
start_date = '2021-01-26'
end_date = '2021-10-25' 

# get the trained lstm model
model = keras.models.load_model(f'{model_path}{ticker}_model_{model_type}_final')

# get the data
df = pd.read_pickle(f'{path}{ticker}_market_data_final.pickle')

# return df_pred
df_pred_medium = backtest_lstm(df,model,model_type,features_to_transform,start_date,end_date,n_predict=3)  
df_pred_medium.to_pickle(f'{path}{ticker}_{model_type}_backtest_ytd.pickle')

Historical Backtest:LSTM Model (medium)
2021-02-01 00:00:00
2021-03-01 00:00:00
2021-04-01 00:00:00
2021-05-01 00:00:00
2021-06-01 00:00:00
2021-07-01 00:00:00
2021-08-01 00:00:00
2021-09-01 00:00:00
2021-10-01 00:00:00


In [44]:
# constants
ticker = 'BTC'
model_type = 'long'
n_steps = 100
n_predict = 3
start_date = '2021-01-26'
end_date = '2021-10-25' 

# get the trained lstm model
model = keras.models.load_model(f'{model_path}{ticker}_model_{model_type}_final')

# get the data
df = pd.read_pickle(f'{path}{ticker}_market_data_final.pickle')

# return df_pred
df_pred_long = backtest_lstm(df,model,model_type,features_to_transform,start_date,end_date,n_predict=3)  
df_pred_long.to_pickle(f'{path}{ticker}_{model_type}_backtest_ytd.pickle')

Historical Backtest:LSTM Model (long)
2021-02-01 00:00:00
2021-03-01 00:00:00
2021-04-01 00:00:00
2021-05-01 00:00:00
2021-06-01 00:00:00
2021-07-01 00:00:00
2021-08-01 00:00:00
2021-09-01 00:00:00
2021-10-01 00:00:00


## Compare Models

In [9]:
# model config to plot models and build an ensemble model
# key: (column-name, model-name,weight, color)
model_dict ={
    'short-1day':{'col_name':'lstm_short_1day','name':'LSTM-Short (1day)','weight':0.20,'color':'cornflowerblue'},
    'short-3day':{'col_name':'lstm_short_3day','name':'LSTM-Short (3day)','weight':0.20,'color':'cornflowerblue'},
    'medium-1day':{'col_name':'lstm_medium_1day','name':'LSTM-Medium (1day)','weight':0.20,'color':'goldenrod'},
    'medium-3day':{'col_name':'lstm_medium_3day','name':'LSTM-Medium (3day)','weight':0.20,'color':'goldenrod'},
    'long-1day':{'col_name':'lstm_long_1day','name':'LSTM-Long (1day)','weight':0.20,'color':'#ff7f0e'},
    'long-3day':{'col_name':'lstm_long_3day','name':'LSTM-Long (3day)','weight':0.20,'color':'#ff7f0e'},
    'prophet-1day':{'col_name':'prophet_1day','name':'Prophet (1day)','weight':0.20,'color':'skyblue'},
    'prophet-3day':{'col_name':'prophet_3day','name':'Prophet (3day)','weight':0.20,'color':'skyblue'},
    'ensemble-1day':{'col_name':'ensemble_short','name':'Ensemble (1day)','color':'crimson'},
    'ensemble-3day':{'col_name':'ensemble_long','name':'Ensemble (3day)','color':'crimson'},
}


# creates the ensemble model (short/long)
ensemble_dict={
    'short':{'models':['lstm_short_1day','naive'],'weights':[0.80,0.20]},
    'long':{'models':['lstm_long_3day','prophet_3day'],'weights':[0.80,0.20]}
}


In [51]:
def combine_models(model_dict,models,ensemble_dict=None,dropna=True):
  """
  Combine backtest data (lstm models, prophet, ensemble)
  params:
    model_dict: dictionary of model info, weights for ensemble
    models: short, medium, long, prophet, ensemble
    dropna: boolean - remove nan or not
  """
  # combine models
  for i,m in enumerate(models):
    # get model hist
    if m != 'ensemble' and m != 'naive':
      # df_hist = pd.read_pickle(f'{path}{ticker}_{m}_hist.pickle')
      df_hist = pd.read_pickle(f'{path}{ticker}_{m}_backtest_ytd.pickle')
    
      if i==0:
        dff = df_hist.copy()
      else:
        df_hist.drop(columns=['close'],inplace=True)
        dff = dff.join(df_hist,how='inner')
  if dropna:
    dff.dropna(inplace=True)
  
  if 'naive' in models or 'ensemble' in models:
    # add the naive model
    dff['naive'] = dff.close.shift(1)
    dff['naive_error'] = dff['naive']-dff['close']
    dff['naive_cum'] = dff['naive_error'].cumsum()

  if 'ensemble' in models:
    # create ensemble models
    dff = create_ensemble_models(dff,ensemble_dict)
    dff['ensemble_long_error'] = dff['ensemble_long']-dff['close']
    dff['ensemble_long_cum'] = dff['ensemble_long_error'].cumsum()

    dff['ensemble_short_error'] = dff['ensemble_short']-dff['close']
    dff['ensemble_short_cum'] = dff['ensemble_short_error'].cumsum()

  if dropna:
    dff.dropna(inplace=True)
    
  return dff

In [11]:
def create_ensemble_models(df,ensemble_dict):
  """
  Add a long/short ensemble models to the df according to the specs in ensemble_dict
  Params:
    df: Dataframe output from combine_models
    ensemble_dict: A dictionary of ensemble models and weights
  """
  # short ensemble model: sum(model prediction * weights)
  df['ensemble_short'] = df[ensemble_dict['short']['models']].mul(ensemble_dict['short']['weights']).sum(1)

  # long ensemble model: sum(model prediction * weights)
  # need to shift the 3rd days prediction to the same rows as the 1 day prediction
  df['ensemble_long'] = df[ensemble_dict['long']['models']].mul(ensemble_dict['long']['weights']).sum(1)
  df['ensemble_long']  = df['ensemble_long'].shift(-2)

  return df

In [12]:
def plot_backtests(model_dict,models,ensemble_dict=None,daily='short',window='1day',dropna=False):
  """
  Compare LSTM models in a Plot of predictions vs actual with daily and cumulative errors
  """
  # combine backtest data
  dff = combine_models(model_dict,models,ensemble_dict,dropna)

  # plots
  fig = make_subplots(
    rows=3, 
    cols=1, 
    subplot_titles=("Predictions", "Daily Error", "Cumulative Errors")
  )

  # actual prices
  fig.add_trace(
      go.Scatter(
          name='Actual Prices',
          x = dff.index,
          y = dff.close,
          fill = 'tozeroy',
          line_color = '#536872'
      ),row=1,col=1
  )
  
  # add model predictions+cumulative errors
  for m in models:
    if 'naive' not in m:
      key = f'{m}-{window}'
      column_name = model_dict[key]['col_name']
      model_name = model_dict[key]['name']
      model_color = model_dict[key]['color']

      # add model predictions
      fig.add_trace(
          go.Scatter(
              name = model_name,
              x = dff.index,
              y = dff[column_name],
              line =dict(color = model_color,width=3)
          ),row=1,col=1
      )

      fig.add_trace(
          go.Scatter(
              name = f'{model_name}-Cumulative Error',
              x = dff.index,
              y = dff[f'{column_name}_cum'],
              line_color = model_color,
              showlegend = False
          ),row = 3,col=1
      )
  
  # add the daily error bar chart for the selected model
  key = f'{daily}-{window}'
  column_name = model_dict[key]['col_name']+'_error'
  fig.add_trace(
      go.Bar(
          name = f'Daily Error:{daily} Model',
          x = dff.index,
          y = dff[column_name],      
          marker_color = model_dict[key]['color'],    
          showlegend=False
      ),row = 2, col=1
  )
  
  fig.update_layout(
      title = "Model Backtest",
      template = 'plotly_white',
      height = 1200,
      width = 900
  )

  fig.show()
  

plot_backtests(model_dict,['short','long','prophet','ensemble'],ensemble_dict,'short','1day',True)


## Compare Model Errors

In [13]:
def plot_error_distributions(models,model_dict,ensemble_dict=None,compare_capstone=True):
  """
  Plot box-plots of model errors
  Params:
    models: a list of models to compare
    model_dict: a dictionary of model configurations for plotting
  """

  # get models
  df = combine_models(model_dict,models,ensemble_dict,True)
 
  # get error columns
  err_cols = [c for c in df.columns if 'error' in c]

  # get error columns that are in models
  cols = [c for c in err_cols for m in models if c.find(m)!=-1]
  cols = list(set(cols))
  cols.sort()
  dff = df[cols].copy()

  # model info
  d ={
    'lstm_short_1day_error':{'name':'LSTM-Short (1day)','color':'cornflowerblue'},
    'lstm_short_3day_error':{'name':'LSTM-Short (3day)','color':'cornflowerblue'},
    'lstm_medium_1day_error':{'name':'LSTM-Medium (1day)','color':'goldenrod'},
    'lstm_medium_3day_error':{'name':'LSTM-Medium (3day)','color':'goldenrod'},
    'lstm_long_1day_error':{'name':'LSTM-Long (1day)','color':'#ff7f0e'},
    'lstm_long_3day_error':{'name':'LSTM-Long (3day)','color':'#ff7f0e'},
    'prophet_1day_error':{'name':'Prophet (1day)','color':'skyblue'},
    'prophet_3day_error':{'name':'Prophet (3day)','color':'skyblue'},
    'ensemble_short_error':{'name':'Ensemble (1day)','color':'crimson'},
    'ensemble_long_error':{'name':'Ensemble (3day)','color':'crimson'},
}

  # create plot
  fig = go.Figure()

  # add models 
  for c in cols:
    fig.add_trace(
        go.Box(
            name = d[c]['name'],
            x = dff[c],
            marker_color = d[c]['color'],
            opacity = 0.7,
            boxpoints = 'suspectedoutliers'
        )
    )

  if compare_capstone:

    df_capstone1 = pd.read_pickle(f'{path}{ticker}_capstone1_hist.pickle')
    fig.add_trace(
        go.Box(
            name = 'Practicum 1 -LSTM Model',
            x = df_capstone1.lstm_diff,
            marker_color = 'black',
            opacity = 0.7,
            boxpoints = 'suspectedoutliers'
        )
    )

  fig.update_layout(
      title = 'Daily Error Distributions',
      template = 'plotly_white',
      height = 500,
      width = 600,
      showlegend =False
  )
  fig.show()

plot_error_distributions(['short','long','prophet','ensemble'],model_dict,ensemble_dict,compare_capstone=False)


# Evaluate Trading Strategy

In [60]:
def create_trading_strategy(ensemble_dict,model_dict,models,tolerance=0.01,max_loss =-15,cash=40000,start_date='2021-01-27',end_date='2021-09-28'):
  """
  Backtest a trading strategy based on long/short ensemble predictions.
  Params:
    ensemble_dict: A dictionary that defines the long/short models and their weights
    model_dict: A dictionary that define model properties
    models: A list of models to included
    tolerance: A percent that determines the return difference between the long/short models
    cash: The initial investment that is allocated between coin/cash
  """
  # get the model backtest data
  df = combine_models(model_dict,models,True)
  df = df.loc[start_date:end_date,:].copy()

  # create the long/short ensemble models
  df = create_ensemble_models(df,ensemble_dict)

  # Calculate the signal stength
  # return difference between the prediction in 3-days vs the next day
  df['signal_strength'] = (df['ensemble_long']-df['ensemble_short']) / df['naive']

  # Add the strength
  # if the strenth is less than the tolerance => hold
  # if the strength is negative => sell
  # if the strength is postive => buy
  # if the price is trending down, and at a loss of 15% or more, sell
  df['signal'] = df['signal_strength'].apply(lambda x: 'hold' if abs(x) < tolerance else ('sell' if x<0 else 'buy' ))

  # init columns
  df['coin'] = 0
  df['cash'] = 0
  df['transaction']=0
  df['action'] = ''
  df['profit_loss'] = 0
  df['return_since_buy']=0.0

  # Evaluate trade decisions at each time step, calculate profit/loss,holdings
  start_date = df.index.min()
  last_buy =  start_date
  if max_loss is None:
    max_loss = -np.inf

  for idx,row in df.iterrows():

    # get the previous date
    prev_date = idx - timedelta(days=1)

    # initial buy
    if idx == start_date:
      df.at[idx,'signal'] = 'buy'
      df.at[idx,'coin']  = row.close
      df.at[idx,'cash'] = cash - row.close
      df.at[idx,'transaction'] = -row.close
      df.at[idx,'action'] = 'buy'

    # buy- if coin isn't already owned
    elif row.signal == 'buy' and df.loc[prev_date,'coin'] == 0:
      df.at[idx,'cash'] = df.loc[prev_date,'cash'] - row.close
      df.at[idx,'transaction'] = -row.close
      df.at[idx,'coin'] = row.close 
      df.at[idx,'action'] = 'buy'
      last_buy = idx

    # sell -if coin is currently owned if there is a sell signal
    # Or, if the return since purchase is less than the max_loss
    # this corrects for the scenario where the price is trending down for 
    # both short /long models, but where the long model price is still higher than the short price 
    # In this scenario the model will hold until the short price is higher than the long price
    elif df.loc[prev_date,'coin']>0 and (row.signal =='sell' or df.loc[prev_date,'return_since_buy']<=max_loss):
      df.at[idx,'cash'] = df.loc[prev_date,'cash'] + row.close
      df.at[idx,'transaction'] = row.close
      df.at[idx,'coin'] = 0
      df.at[idx,'action'] = 'sell'
      df.at[idx,'profit_loss'] = row.close - df.loc[prev_date,'coin']
    
    # hold (hold cash or coin)-wait for the next signal
    else:
      df.at[idx,'action']= 'hold'
      df.at[idx,'cash'] = df.loc[prev_date,'cash']
      df.at[idx,'coin'] = df.loc[prev_date,'coin']

    # update the return since last purchase if coin is currently owned
    if df.loc[idx,'coin'] > 0:
      return_since_buy = round((df.loc[idx,'close'] / df.loc[last_buy,'close']-1)*100,2)
      df.at[idx,'return_since_buy'] = return_since_buy
    
  # add totals
  df['total'] = df.coin + df.cash

  # add invested amount
  df['invested'] = df.apply(lambda row:row.close if row.coin>0 else 0,axis=1)

  return df[['close','ensemble_short','ensemble_long','signal_strength','signal','action','cash','coin','transaction','total','profit_loss','invested','return_since_buy']]


In [15]:
def get_invested_dates(df):
  """
  Returns a dataframe of trades,dates,transaction amounts and profit/loss
  Params:
    df: The output from create_trading_strateguy
  """
  # get dates with transactions
  df_trans = df[df.transaction!=0].copy()

  # move the index to the from_date field
  df_trans['from_date'] = df_trans.index

  # create the to_date (the next date in the df)
  df_trans['to_date'] = df_trans.from_date.shift(-1)

  # the the row will have a null for to_date becuase of the 
  # shifted rows. Replace that value with the latest date in the df
  idx = df_trans[df_trans.to_date.isnull()].index
  df_trans.loc[idx,'to_date'] = dff.index.max()

  return df_trans


In [16]:
def plot_trading_strategy(df,annotate=None):
  """
  Plot the trading strategy (close prices, long/short-ensemble models, 
  invested periods, and total amount invested in coin/cash)
  Params:
    df: a dataframe of profit/loss. The output from create_trading_strategy
    annotate: A dictionary to define where to put the buy/sell annotations
              {date:buy-date,shift:y-axis offset}
  """
  # calculate profit/loss, return and the number of trades
  profit_loss = df.profit_loss.sum()
  return_strategy = round(profit_loss / df.loc[df.index.min(),'total']*100,1)
  return_hold = round((df.loc[df.index.max(),'close']/df.loc[df.index.min(),'close']-1)*100,1) 

  # number of trades
  no_trades = min(df[df.action=='buy'].shape[0]-1,df[dff.action=='sell'].shape[0])

  # get the dates of investment in coin
  df_trans = get_invested_dates(df)

  # create the plots
  fig = make_subplots(specs=[[{"secondary_y": True}]])

  # total invested
  fig.add_trace(
      go.Scatter(
          name = 'Total Portfolio Value (Cash+Coin)',
          x = df.index,
          y = df.total,
          line = dict(color = 'darkgrey',width=4,dash='dot'),
      ),secondary_y=False
  )

  # add the investment periods as shapes
  for idx,row in df_trans.iterrows():
    if row.coin >0:
        fig.add_shape(
            dict(
              name = 'Invested',
              type='rect',
              fillcolor = 'skyblue',
              x0 = row.from_date,
              x1=  row.to_date,
              y0= 0,
              y1=1,
              opacity = 0.1,
            ),secondary_y=False
    )
        
  # close price
  fig.add_trace(
      go.Scatter(
          name = 'Closing Price',
          x = df.index,
          y = df.close,  
          line_color = '#536872',
          fill = 'tozeroy',
      ),secondary_y=False
  )
  
  # short-ensemble-model
  fig.add_trace(
      go.Scatter(
          name = 'Ensemble Model (1day)',
          x = df.index,
          y = df.ensemble_short,
          line = dict(color = 'cornflowerblue',width=3)
      ),secondary_y=False
  )

  # long-ensemble-model
  fig.add_trace(
      go.Scatter(
          name = 'Ensemble Model (3day)',
          x = df.index,
          y = df.ensemble_long,
          line = dict(color='#ff7f0e',width=3)
      ),secondary_y=False
  )

  # add annotations
  txt = f'Profit/Loss:${profit_loss:,}|Strategy Return:{return_strategy}%|Buy & Hold Return:{return_hold}%|Trades:{no_trades}'

  # add sub-title
  fig.add_annotation(
      text=txt,
      xref="paper", yref="paper",
      x=-0.05, y=1.12, 
      showarrow=False
  )

  # add buy/sell lables if provided
  if annotate is not None:
    # get buy/sell dates and prices
    buy_date = df_trans.loc[annotate['date'],'from_date']
    buy_price = df_trans.loc[buy_date,'close']
    sell_date = df_trans.loc[annotate['date'],'to_date']
    sell_price = df_trans.loc[sell_date,'close']

    fig.add_annotation(
        x=buy_date, 
        y=buy_price,
        text="Buy",
        showarrow=True,
        yshift=annotate['shift'],
        arrowhead=1,
        bordercolor="#c7c7c7",
        borderwidth=2,
        borderpad=4,
        bgcolor="#ff7f0e"
    )
    fig.add_annotation(
        x = sell_date,
        y = sell_price,
        text = 'Sell',
        arrowhead = True,
        yshift = annotate['shift'],
        bordercolor="#c7c7c7",
        borderwidth=2,
        borderpad=4,
        bgcolor="#ff7f0e"
    )

  fig.update_shapes(dict(xref='x', yref='paper'))
  fig.update_layout(
      title = 'Trading Strategy Evaluation',
      template = 'plotly_white',
      width = 900,
      height = 500,
      legend = dict(
          yanchor = 'bottom',
          y = 0.01,
          xanchor = 'right',
          x = 0.935,
      )
  )

  fig.show()

In [66]:
dff=create_trading_strategy(ensemble_dict,model_dict,['short','long','prophet','naive'],tolerance=0.01,max_loss =-15,cash=40000,start_date='2021-01-27',end_date='2021-09-28')    
plot_trading_strategy(dff,{'date':'2021-03-05','shift':20})
# plot_trading_strategy(dff)


In [None]:
dff = combine_models(model_dict,['short','medium','long','prophet','ensemble'],False) 
dff.to_csv(f'{path}{ticker}_ensemble_model.csv')