In [1]:
#Install tensorflow GPU and other general project dependencies
%tensorflow_version 2.x
%pip install yfinance
%pip install quandl
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))


TensorFlow 2.x selected.
Collecting yfinance
  Downloading https://files.pythonhosted.org/packages/c2/31/8b374a12b90def92a4e27d0fc595fc43635f395984e36a075244d98bd265/yfinance-0.1.54.tar.gz
Building wheels for collected packages: yfinance
  Building wheel for yfinance (setup.py) ... [?25l[?25hdone
  Created wheel for yfinance: filename=yfinance-0.1.54-py2.py3-none-any.whl size=22411 sha256=470fba5d9910cab8ce39f615c94f49cccae41550bad35f55743f054fbbd820d8
  Stored in directory: /root/.cache/pip/wheels/f9/e3/5b/ec24dd2984b12d61e0abf26289746c2436a0e7844f26f2515c
Successfully built yfinance
Installing collected packages: yfinance
Successfully installed yfinance-0.1.54
Collecting quandl
  Downloading https://files.pythonhosted.org/packages/07/ab/8cd479fba8a9b197a43a0d55dd534b066fb8e5a0a04b5c0384cbc5d663aa/Quandl-3.5.0-py2.py3-none-any.whl
Collecting inflection>=0.3.1
  Downloading https://files.pythonhosted.org/packages/d5/35/a6eb45b4e2356fe688b21570864d4aa0d0a880ce387defe9c589112077f8/infl

In [2]:
#Install and configure Tensorboard dependencies for Google Colab
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip ngrok-stable-linux-amd64.zip


LOG_DIR = './logs'
get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
    .format(LOG_DIR)
)

get_ipython().system_raw('./ngrok http 6006 &')

!curl -s http://localhost:4040/api/tunnels | python3 -c "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

--2020-01-07 22:17:39--  https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
Resolving bin.equinox.io (bin.equinox.io)... 52.5.84.255, 34.206.126.139, 34.238.178.61, ...
Connecting to bin.equinox.io (bin.equinox.io)|52.5.84.255|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 13773305 (13M) [application/octet-stream]
Saving to: ‘ngrok-stable-linux-amd64.zip’


2020-01-07 22:17:39 (38.2 MB/s) - ‘ngrok-stable-linux-amd64.zip’ saved [13773305/13773305]

Archive:  ngrok-stable-linux-amd64.zip
  inflating: ngrok                   
https://fb347612.ngrok.io


In [3]:
#Download ccryptocurrency historical data
!wget https://pythonprogramming.net/static/downloads/machine-learning-data/crypto_data.zip
!unzip crypto_data.zip

--2020-01-07 22:17:41--  https://pythonprogramming.net/static/downloads/machine-learning-data/crypto_data.zip
Resolving pythonprogramming.net (pythonprogramming.net)... 104.237.143.20, 2600:3c00::f03c:91ff:fe84:176d
Connecting to pythonprogramming.net (pythonprogramming.net)|104.237.143.20|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5998694 (5.7M) [application/zip]
Saving to: ‘crypto_data.zip’


2020-01-07 22:17:42 (37.7 MB/s) - ‘crypto_data.zip’ saved [5998694/5998694]

Archive:  crypto_data.zip
  inflating: crypto_data/BCH-USD.csv  
  inflating: crypto_data/BTC-USD.csv  
  inflating: crypto_data/ETH-USD.csv  
  inflating: crypto_data/LTC-USD.csv  


In [0]:
import time
import quandl
import random 
import numpy as np
import pandas as pd
import yfinance as yf
from datetime import datetime 
from collections import deque
from sklearn import preprocessing
from sklearn.feature_selection import chi2
from sklearn.feature_selection import SelectKBest
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization

In [0]:
#Global configuration variables
RATIOS = ["BTC-USD", "LTC-USD", "BCH-USD", "ETH-USD"]
TARGET_RATIO = "ETH-USD"
RATIO_LABELS = ["time", "low", "high", "open", "close", "volume"]
FUTURE_PERIOD_PREDICT = 3
SEQ_LEN = 60
EPOCHS = 10
BATCH_SIZE = 64
VALIDATION_PERCENTAGE_SIZE = .05
LSTM_MODEL_PARAMETERS = {
    "nodes_per_LSTM_layer": [128, 256], 
    "nodes_per_dense_layer": [32, 64],
    "num_of_LSTM_layers": [2, 3], #Assumes at minimum 2 LSTM layers
    "num_of_dense_layers": [1, 2]
}
NAME = f"{TARGET_RATIO}-LSTM-"

In [0]:
def get_data():
  #Inialize empty main df 
  df = pd.DataFrame() 

  #Iteratively add Close and Volume columns of ratios to df 
  for ratio in RATIOS:  
    ratio_path_string = f"crypto_data/{ratio}.csv"
    ratio_df = pd.read_csv(ratio_path_string, names=RATIO_LABELS)  
    ratio_df.rename(columns={"close": f"{ratio}_Close", "volume": f"{ratio}_Volume"}, inplace=True)
    ratio_df.set_index("time", inplace=True) 
    ratio_df = ratio_df[[f"{ratio}_Close", f"{ratio}_Volume"]]  
    if len(df)==0:  
      df = ratio_df  
    else:  
      df = df.join(ratio_df)

  # Add technical features corresponding to TARGET_RATIO'S pricing movement 
  df = get_technical_features(df, TARGET_RATIO)
  df.fillna(method="ffill", inplace=True)  
  df.dropna(inplace=True)
  
  return df

In [0]:
def get_technical_features(df, ratio):
    # Create 7 and 21 days Moving Average
    df[ratio + "_MA7"] = df[ratio + "_Close"].rolling(window=7).mean()
    df[ratio + "_MA21"] = df[ratio + "_Close"].rolling(window=21).mean()
    
    # Create MACD
    df[ratio + "_26EMA"] = df[ratio + "_Close"].ewm(span=26).mean()
    df[ratio + "_12EMA"] = df[ratio + "_Close"].ewm(span=12).mean()
    df[ratio + "_MACD"] = (df[ratio + "_12EMA"] - df[ratio + "_26EMA"])
    
    # Create Bollinger Bands
    df[ratio + "_20SD"] = df[ratio + "_Close"].rolling(20).std()
    df[ratio + "_Upper_Band"] = df[ratio + "_MA21"] + (df[ratio + "_20SD"]*2)
    df[ratio+ "_Lower_Band"] = df[ratio + "_MA21"] - (df[ratio + "_20SD"]*2)
    
    # Create Exponential moving average
    df[ratio + "_EMA"] = df[ratio + "_Close"].ewm(com=0.5).mean()
    
    return df

In [0]:
def classify(current, future):
  if float(future) > float(current):
    return 1
  else:
    return 0

In [0]:
def get_target(df): 
  df['Future'] = df[TARGET_RATIO + "_Close"].shift(-FUTURE_PERIOD_PREDICT)
  df['Target'] = list(map(classify, df[TARGET_RATIO + "_Close"], df['Future']))
  return df

In [0]:
def preprocess(df):
  #Normalize data by converting to percentage change
  df = df.drop("Future", 1)
  for col in df.columns:
      if col != "Target":
        df[col] = df[col].pct_change()
        df.dropna(inplace=True)
        df[col] = preprocessing.scale(df[col].values)
    
  df.dropna(inplace=True)
    
  #List of sequences 
  sequential_data = []
  #Sequence with fixed length of SEQ_LEN
  prev_days = deque(maxlen=SEQ_LEN)
    
  for i in df.values:
    prev_days.append([n for n in i[:-1]])
    if len(prev_days) == SEQ_LEN:
      sequential_data.append([np.array(prev_days), i[-1]])
        
  random.shuffle(sequential_data)
    
  #Lists to sore buy/sell sequences and targets
  buys = []
  sells = []
    
  #Partition data into two seperate list buys and selss
  for sequence, target in sequential_data: 
    if target == 1:
      buys.append([sequence, target])
    else:
      sells.append([sequence, target])
    
  random.shuffle(buys)  
  random.shuffle(sells)
    
  lower_class_count = min(len(buys), len(sells))
    
  #Downsample majority class 
  buys = buys[:lower_class_count]
  sells = sells[:lower_class_count]
    
  sequential_data = buys+sells
  random.shuffle(sequential_data)
    
  X = []
  y = []
    
  #Seperate data into feature list X and target list y
  for seq, target in sequential_data:  
    X.append(seq)  
    y.append(target)  

  return np.array(X), np.array(y)

In [0]:
def train_validation_split(df):
  split = sorted(df.index.values)[-int(VALIDATION_PERCENTAGE_SIZE*df.shape[0])]
  validation_df = df[(df.index >= split)]
  df = df[(df.index < split)]
  return df, validation_df

In [0]:
def build_dataset():
  data = get_data()
  df = get_target(data)
  df, validation_df = train_validation_split(df)
  train_x, train_y = preprocess(df)
  test_x, test_y = preprocess(validation_df)
  return train_x, train_y, test_x, test_y

In [0]:
def generate_models(): 
  train_x, train_y, test_x, test_y = build_dataset()
  for nodes_per_LSTM_layer in LSTM_MODEL_PARAMETERS["nodes_per_LSTM_layer"]: 
    for nodes_per_dense_layer in LSTM_MODEL_PARAMETERS["nodes_per_dense_layer"]: 
      for num_of_LSTM_layers in LSTM_MODEL_PARAMETERS["num_of_LSTM_layers"]: 
        for num_of_dense_layers in LSTM_MODEL_PARAMETERS["num_of_dense_layers"]: 
          model = Sequential()
          model.add(LSTM(nodes_per_LSTM_layer, input_shape=(train_x.shape[1:]), return_sequences=True))
          model.add(Dropout(0.2))
          model.add(BatchNormalization())  #normalizes activation outputs

          for i in range(num_of_LSTM_layers - 1): 
            return_sequence = False if (i == num_of_LSTM_layers - 2) else True 
            model.add(LSTM(nodes_per_LSTM_layer, return_sequences=return_sequence))
            model.add(Dropout(0.2))
            model.add(BatchNormalization())  #normalizes activation outputs

          for i in range(num_of_dense_layers): 
            model.add(Dense(nodes_per_dense_layer, activation='relu'))
            model.add(Dropout(0.2))

          model.add(Dense(2, activation='softmax'))

          opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)

          currrent_model_string = f"{NAME}-{str(nodes_per_LSTM_layer)}-{str(nodes_per_dense_layer)}-{str(num_of_LSTM_layers)}-{str(num_of_dense_layers)}"
          tensorboard = TensorBoard(log_dir="logs/{}".format(currrent_model_string), histogram_freq=1, write_graph=True, write_grads=True, batch_size=BATCH_SIZE, write_images=True)

          model.compile(
            loss='sparse_categorical_crossentropy',
            optimizer=opt,
            metrics=['accuracy']
          )

          # Train model
          history = model.fit(
            train_x, train_y,
            batch_size=BATCH_SIZE,
            epochs=EPOCHS,
            validation_data=(test_x, test_y),
            callbacks=[tensorboard]
          )

          # Score model
          score = model.evaluate(test_x, test_y, verbose=0)
          print('Test loss:', score[0])
          print('Test accuracy:', score[1])

          # Save model
          model.save("models/{}".format(currrent_model_string))      


In [0]:
generate_models()

Train on 83384 samples, validate on 4112 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.6936686138698563
Test accuracy: 0.5464494
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: models/ETH-USD-LSTM--128-32-2-1/assets
Train on 83384 samples, validate on 4112 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.6837433128969215
Test accuracy: 0.5671206
INFO:tensorflow:Assets written to: models/ETH-USD-LSTM--128-32-2-2/assets
Train on 83384 samples, validate on 4112 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.6854575587153898
Test accuracy: 0.56177044
INFO:tensorflow:Assets written to: models/ETH-USD-LSTM--128-32-3-1/assets
Train on 83384 samples, validate on 4112 samples
Ep