<a href="https://colab.research.google.com/github/xayron/ML-examples/blob/master/Cryptocurrency_using_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Tutorial from https://pythonprogramming.net/crypto-rnn-model-deep-learning-python-tensorflow-keras/

In [0]:
import pandas as pd
from sklearn import preprocessing
from collections import deque
import random
import numpy as np
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.compat.v1.keras.layers import CuDNNLSTM
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint

In [0]:
SEQ_LEN = 60
FUTURE_PERIOD_PREDICT = 3
RATIO_TO_PREDICT = "LTC-USD"
EPOCHS = 10
BATCH_SIZE = 64 
NAME = f"{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"

In [0]:
def classify(current, future):
  if float(future) > float(current):
    return 1
  else:
    return 0

In [0]:
def preprocess_df(df):
  df = df.drop('future', 1)

  for col in df.columns:
    if col != "target":
      df[col] = df[col].pct_change()
      df.dropna(inplace=True)
      df[col] = preprocessing.scale(df[col].values)
  
  df.dropna(inplace=True)

  sequential_data = []
  prev_days = deque(maxlen=SEQ_LEN)

  for i in df.values:
    prev_days.append([n for n in i[:-1]])
    if len(prev_days) == SEQ_LEN:
      sequential_data.append([np.array(prev_days), i[-1]])

  random.shuffle(sequential_data)

  buys = []
  sells = []

  for seq, target in sequential_data:
    if target == 0:
      sells.append([seq, target])
    elif target == 1:
      buys.append([seq, target])

  random.shuffle(buys)
  random.shuffle(sells)

  lower = min(len(buys), len(sells))

  buys = buys[:lower]
  sells = sells[:lower]

  sequential_data = buys+sells
  random.shuffle(sequential_data)

  x = []
  y = []

  for seq, target in sequential_data:
    x.append(seq)
    y.append(target)

  return np.array(x), y

In [5]:
df = pd.read_csv("crypto_data/LTC-USD.csv", names=["time", "low", "high", "open", "close", "volume"])
df.head()

Unnamed: 0,time,low,high,open,close,volume
0,1528968660,96.580002,96.589996,96.589996,96.580002,9.6472
1,1528968720,96.449997,96.669998,96.589996,96.660004,314.387024
2,1528968780,96.470001,96.57,96.57,96.57,77.129799
3,1528968840,96.449997,96.57,96.57,96.5,7.216067
4,1528968900,96.279999,96.540001,96.5,96.389999,524.539978


In [0]:
main_df = pd.DataFrame()
ratios = ["BTC-USD", "LTC-USD", "ETH-USD", 'BCH-USD']
for ratio in ratios:
  dataset = f"crypto_data/{ratio}.csv"

  df = pd.read_csv(dataset, names=["time", "low", "high", "open", "close", "volume"])
  df.rename(columns={"close": f"{ratio}_close", "volume": f"{ratio}_volume"}, inplace=True)

  df.set_index("time", inplace=True)
  df = df[[f"{ratio}_close", f"{ratio}_volume"]]

  if len(main_df) == 0:
    main_df = df
  else:
    main_df = main_df.join(df)

In [7]:
main_df['future'] = main_df[f"{RATIO_TO_PREDICT}_close"].shift(-FUTURE_PERIOD_PREDICT)
main_df['target'] = list(map(classify, main_df[f"{RATIO_TO_PREDICT}_close"],  main_df["future"]))
print(main_df[[f"{RATIO_TO_PREDICT}_close", "future", "target"]].head())

            LTC-USD_close     future  target
time                                        
1528968660      96.580002  96.500000       0
1528968720      96.660004  96.389999       0
1528968780      96.570000  96.519997       0
1528968840      96.500000  96.440002       0
1528968900      96.389999  96.470001       1


In [8]:
times = sorted(main_df.index.values)
last_5pct = times[-int(0.05*len(times))]
print(last_5pct)

1534922100


In [9]:
validation_main_df = main_df[(main_df.index >= last_5pct)]
main_df = main_df[(main_df.index < last_5pct)]

print(validation_main_df.head())
print(main_df.head())

            BTC-USD_close  BTC-USD_volume  ...     future  target
time                                       ...                   
1534922100    6684.500000        0.969366  ...  57.509998       0
1534922160    6684.500000        0.611018  ...  57.509998       0
1534922220    6682.740234        1.121768  ...  57.509998       0
1534922280    6682.660156        0.912729  ...  57.500000       0
1534922340    6682.450195        0.334119  ...  57.509998       0

[5 rows x 10 columns]
            BTC-USD_close  BTC-USD_volume  ...     future  target
time                                       ...                   
1528968660    6489.549805        0.587100  ...  96.500000       0
1528968720    6487.379883        7.706374  ...  96.389999       0
1528968780    6479.410156        3.088252  ...  96.519997       0
1528968840    6479.410156        1.404100  ...  96.440002       0
1528968900    6479.979980        0.753000  ...  96.470001       1

[5 rows x 10 columns]


In [0]:
train_x, train_y = preprocess_df(main_df)
validation_x, validation_y = preprocess_df(validation_main_df)

In [11]:
print(f"train data: {len(train_x)} validation: {len(validation_x)}")
print(f"Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}")
print(f"VALIDATION Don't buys: {validation_y.count(0)}, buys: {validation_y.count(1)}")

train data: 69188 validation: 3062
Dont buys: 34594, buys: 34594
VALIDATION Don't buys: 1531, buys: 1531


In [0]:
model = Sequential()
model.add(CuDNNLSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

In [0]:
model.add(CuDNNLSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

In [0]:
model.add(CuDNNLSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())

In [0]:
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

In [0]:
model.add(Dense(2, activation='softmax'))

In [0]:
opt = tf.keras.optimizers.Adam(lr=0.001, decay =1e-6)

In [0]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

In [0]:
tensorboard = TensorBoard(log_dir=f'log/{NAME}')

In [0]:
filepath = "RNN_Final-{epoch:02d}-{val_loss:.3f}" 
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='max'))

In [0]:
train_x = np.asarray(train_x)
train_y = np.asarray(train_y)
validation_x = np.asarray(validation_x)
validation_y = np.asarray(validation_y)

In [40]:
history = model.fit(
    train_x, train_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(validation_x, validation_y),
    #callbacks=[tensorboard, checkpoint]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
