In [12]:
import pandas as pd
import numpy as np
import random
from sklearn import preprocessing
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, CuDNNLSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint

from collections import deque
import time

In [5]:
df = pd.read_csv("LTC-USD.csv",names=["time","low","high","open","close","volume"])
df.head()

Unnamed: 0,time,low,high,open,close,volume
0,1528968660,96.580002,96.589996,96.589996,96.580002,9.6472
1,1528968720,96.449997,96.669998,96.589996,96.660004,314.387024
2,1528968780,96.470001,96.57,96.57,96.57,77.129799
3,1528968840,96.449997,96.57,96.57,96.5,7.216067
4,1528968900,96.279999,96.540001,96.5,96.389999,524.539978


In [6]:
SEQ_LEN = 60 #data from the last 60 min
FUT_PERIOD_PREDICT = 3 #minutes into the future
RATIO_PREDICT = "LTC-USD"
EPOCHS = 10
BATCH_SIZE = 64
NAME = f"{SEQ_LEN}-SEQ-{FUT_PERIOD_PREDICT}-PRED-{int(time.time())}"


def classify(current,future):
    if float(future) > float(current):
        return 1 #future price is higher than current price. Good for buying
    else:
        return 0
    
def df_preprocess(df):
    df = df.drop("future",1)
    for column in df.columns:
        if column != "target":
            df[column] = df[column].pct_change()
            df.dropna(inplace=True)
            df[column] = preprocessing.scale(df[column].values)
            
    df.dropna(inplace=True)
    
    sequential_data = []
    previous_days = deque(maxlen=SEQ_LEN)
    
    for i in df.values:
        previous_days.append([n for n in i[:-1]])
        if len(previous_days) == SEQ_LEN:
            sequential_data.append([np.array(previous_days), i[-1]])
            
    random.shuffle(sequential_data)
    
    buys = []
    sells = []
    
    for seq, target in sequential_data:
        if target == 0:
            sells.append([seq,target])
        elif target == 1:
            buys.append([seq,target])
    
    random.shuffle(buys)
    random.shuffle(sells)
    
    lower = min(len(buys), len(sells))
    
    buys = buys[:lower]
    sells = sells[:lower]
    
    sequential_data = buys + sells
    random.shuffle(sequential_data)
    
    x = []
    y = []
    
    for seq, target in sequential_data:
        x.append(seq)
        y.append(target)
        
    return np.array(x), y
    
    


In [7]:
master_df = pd.DataFrame()
cryptos = ["BTC-USD","LTC-USD","ETH-USD","BCH-USD"]
for crypto in cryptos:
    dataset = f"{crypto}.csv"
    df = pd.read_csv(dataset, names=["time","low","high","open","close","volume"])
    df.rename(columns={"close": f"{crypto}_close", "volume": f"{crypto}_volume"},inplace=True)
    df.set_index("time",inplace=True)
    df = df[[f"{crypto}_close",f"{crypto}_volume"]]
    
    if len(master_df) == 0:
        master_df = df
    else:
        master_df = master_df.join(df)
    
master_df["future"] = master_df[f"{RATIO_PREDICT}_close"].shift(-FUT_PERIOD_PREDICT)

master_df["target"] = list(map(classify, master_df[f"{RATIO_PREDICT}_close"], master_df["future"]))

master_df[[f"{RATIO_PREDICT}_close", "future", "target"]].head(10)


Unnamed: 0_level_0,LTC-USD_close,future,target
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1528968660,96.580002,96.5,0
1528968720,96.660004,96.389999,0
1528968780,96.57,96.519997,0
1528968840,96.5,96.440002,0
1528968900,96.389999,96.470001,1
1528968960,96.519997,96.400002,0
1528969020,96.440002,96.400002,0
1528969080,96.470001,96.400002,0
1528969140,96.400002,96.400002,0
1528969200,96.400002,96.400002,0


In [10]:
times = sorted(master_df.index.values)
last_5pct_bound = times[-int(0.05*len(times))]

validation_master_df = master_df[(master_df.index >= last_5pct_bound)]
master_df = master_df[(master_df.index < last_5pct_bound)]


df_preprocess(master_df)
x_train, y_train = df_preprocess(master_df)
x_valid, y_valid = df_preprocess(validation_master_df)

print(f"train data: {len(x_train)} validation: {len(x_valid)}")
print(f"Buy_No: {y_train.count(0)} Buy_Yes: {y_train.count(1)}")
print(f"VALIDATION: Buy_No: {y_valid.count(0)} Buy_Yes: {y_valid.count(1)}")


train data: 65962 validation: 3174
Buy_No: 32981 Buy_Yes: 32981
VALIDATION: Buy_No: 1587 Buy_Yes: 1587


In [9]:
model = Sequential()
model.add(LSTM(128, input_shape=(x_train.shape[1:]),return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(x_train.shape[1:]),return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(x_train.shape[1:])))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation="relu"))
model.add(Dropout(0.2))

model.add(Dense(2, activation="softmax"))

opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)

model.compile(loss="sparse_categorical_crossentropy",
             optimizer=opt,
             metrics=['accuracy']

tensorboard = TensorBoard(log_dir=f'logs/{NAME}')

filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}"
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max'))

history = model.fit(
    x_train,y_train,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(x_valid,y_valid),
    callbacks=[tensorboard,checkpoint])
              
