In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
cd /content/drive/MyDrive/RNN_Cryptocurrency-predicting

/content/drive/MyDrive/RNN_Cryptocurrency-predicting


In [25]:
import pandas as pd
import os
from sklearn import preprocessing
from collections import deque
import random
import numpy as np
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.compat.v1.keras.layers import CuDNNLSTM 
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping

In [21]:
SEQ_LEN = 60
FUTURE_PERIOD_PREDICT = 3
RATIO_TO_PREDICT = "LTC-USD"
EPOCHS = 10
BATCH_SIZE = 64
NAME = f"{RATIO_TO_PREDICT}-{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"

def classify(current, future):
  if float(future) > float(current):
    return 1
  else:
    return 0

def preprocess_df(df):
  df = df.drop('future', 1)  #axis=1 means "column", default axis is 0
  for col in df.columns: 
    if col != "target":
      df[col] = df[col].pct_change()
      df.dropna(inplace=True)
      df[col] = preprocessing.scale(df[col].values) #for balance,  X-Meam/std for every column and then every column add up with 100%
  df.dropna(inplace=True)
  
  sequential_data = []  #empty list
  prev_days = deque(maxlen=SEQ_LEN)
  
  for i in df.values:  #df.values show every entry of col(col by col) which not including the index
    prev_days.append([n for n in i[:-1]])
    if len(prev_days) == SEQ_LEN:
      sequential_data.append([np.array(prev_days), i[-1]])  #store all but the target
  
  random.shuffle(sequential_data)
  buys = []
  sells = []
  
  for seq, target in sequential_data:
    if target == 0:
      sells.append([seq, target])
    elif target == 1:
      buys.append([seq, target])
  #Maybe we don't need following shuffle, but shuffle for good measure
  random.shuffle(buys)
  random.shuffle(sells)

  lower = min(len(buys), len(sells))

  buys = buys[:lower]
  sells = sells[:lower]

  sequential_data = buys + sells

  random.shuffle(sequential_data)

  x = []
  y = []

  for seq, target in sequential_data:
    x.append(seq)
    y.append(target)
  
  return np.array(x), y


df = pd.read_csv("crypto_data/LTC-USD.csv", names=["time", "low", "high", "open", "close", "volume"])

main_df = pd.DataFrame() #A new empty dataframe
#print(main_df)
ratios = ["BTC-USD", "LTC-USD", "ETH-USD", "BCH-USD"]
for ratio in ratios:
  dataset = f"crypto_data/{ratio}.csv"
  df = pd.read_csv(dataset, names=["time", "low", "high", "open", "close", "volume"])
  #print(df.head()) 
  df.rename(columns={"close": f"{ratio}_close", "volume": f"{ratio}_volume"}, inplace=True) #Set inplace=True such that don't neet to define new dataframe
  df.set_index("time", inplace=True) #Set "time" column as index
  df = df[[f"{ratio}_close", f"{ratio}_volume"]]
  #print(df.head())
  if len(main_df) == 0:
    main_df = df
  else:
    main_df = main_df.join(df)

main_df['future'] = main_df[f"{RATIO_TO_PREDICT}_close"].shift(-FUTURE_PERIOD_PREDICT)

main_df['target'] = list(map(classify, main_df[f"{RATIO_TO_PREDICT}_close"], main_df["future"]))
#print(main_df[[f"{RATIO_TO_PREDICT}_close", "future", "target"]].head(10))

times = sorted(main_df.index.values)
last_5pct = times[-int(0.05*len(times))]
#print(last_5pct)

validation_main_df = main_df[(main_df.index >= last_5pct)]
main_df = main_df[(main_df.index < last_5pct)]

train_x, train_y = preprocess_df(main_df)
validation_x, validation_y = preprocess_df(validation_main_df)



In [22]:
print(train_x.shape)
print(len(train_y))

(69188, 60, 8)
69188


In [23]:
print(f"train data: {len(train_x)} validation: {len(validation_x)}")
print(f"Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}")
print(f"VALIDATION Dont buys: {validation_y.count(0)}, buys: {validation_y.count(1)}")

train data: 69188 validation: 3062
Dont buys: 34594, buys: 34594
VALIDATION Dont buys: 1531, buys: 1531


In [28]:
model = Sequential()
model.add(CuDNNLSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128, input_shape=(train_x.shape[1:])))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation="relu"))
model.add(Dropout(0.2))

model.add(Dense(2, activation="softmax"))

opt = tf.keras.optimizers.Adam(learning_rate=0.001, decay=1e-6)

model.compile(loss='sparse_categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))


es = EarlyStopping(monitor='val_acc', mode='max', verbose=1, patience=5)
mc = ModelCheckpoint('models/RNN_Cryptocurrency.h5')
callbacks_list = [mc, es]
train_y = np.array(train_y)
validation_y = np.array(validation_y)

history = model.fit(
    train_x, train_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(validation_x, validation_y)
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
