In [1]:
import pandas as pd
import os
import tensorflow as tf
from sklearn import preprocessing
from collections import deque
import numpy as np 
import random
import tensorflow as tf 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
import time
 

print(tf.__version__)

main_df = pd.DataFrame()

ratios = ["BTC-USD", "LTC-USD", "ETH-USD", "BCH-USD"]
for ratio in ratios:
    dataset = f"D:/projects/dataset/crypto_data/{ratio}.csv"

    df = pd.read_csv(dataset, names = ["time", "low", "high", "open", "close", "volume"])

    df.rename(columns={"close":f"{ratio}_close", "volume":f"{ratio}_volume"}, inplace=True)

    df.set_index("time", inplace = True)

    df = df[[f"{ratio}_close", f"{ratio}_volume"]]

    if len(main_df)==0:
        main_df = df
    else:
        main_df=main_df.join(df)

    print(main_df.head())


2.2.0
            BTC-USD_close  BTC-USD_volume
time                                     
1528968660    6489.549805        0.587100
1528968720    6487.379883        7.706374
1528968780    6479.410156        3.088252
1528968840    6479.410156        1.404100
1528968900    6479.979980        0.753000
            BTC-USD_close  BTC-USD_volume  LTC-USD_close  LTC-USD_volume
time                                                                    
1528968660    6489.549805        0.587100      96.580002        9.647200
1528968720    6487.379883        7.706374      96.660004      314.387024
1528968780    6479.410156        3.088252      96.570000       77.129799
1528968840    6479.410156        1.404100      96.500000        7.216067
1528968900    6479.979980        0.753000      96.389999      524.539978
            BTC-USD_close  BTC-USD_volume  LTC-USD_close  LTC-USD_volume  \
time                                                                       
1528968660    6489.549805        0.58

In [2]:
SEQ_LEN = 60
FUTURE_PERIOD_PREDICT = 3 #3MINUTES INTO THE FUTURE
RATIO_TO_PREDICT = "LTC-USD"
EPOCHS = 32
BATCH_SIZE = 64



main_df['future'] = main_df[f"{RATIO_TO_PREDICT}_close"].shift(-FUTURE_PERIOD_PREDICT)



def classify(current,future):
    if float(future)>float(current):
        return 1
    else:
        return 0

main_df['target'] = list(map(classify, main_df[f"{RATIO_TO_PREDICT}_close"], main_df["future"]))

# print(main_df[[f"{RATIO_TO_PREDICT}_close","future", "target"]].head(10))

def preprocess_df(df):
    df.drop('future', 1)
    for col in df.columns:
        if col != "target":
            df[col] = df[col].pct_change()
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values)
    df.dropna(inplace=True)

    sequential_data = []
    prev_days = deque(maxlen=SEQ_LEN)#updates data to the latest 60min batch
    for i in df.values:
        prev_days.append([n for n in i[:-1]]) #all columns except target
        if len(prev_days) == SEQ_LEN:
            sequential_data.append([np.array(prev_days), i[-1]]) #Xandy

    random.shuffle(sequential_data)

#balancing the data

    buys = []
    sells = []

    for seq , target in sequential_data:
        if target == 0:
            sells.append([seq, target])
        elif target == 1:
            buys.append([seq, target])
    
    random.shuffle(buys)
    random.shuffle(sells)

    lower =  min(len(buys), len(sells))
    buys = buys[:lower]
    sells = sells[:lower]

    sequential_data = buys + sells
    random.shuffle(sequential_data)

    X = []
    y = []

    for seq, target in sequential_data:
        X.append(seq)
        y.append(target)
    return np.array(X), np.array(y)
    


In [3]:
times = sorted(main_df.index.values)
# print(times)
last_20pct = times[int(len(times)-1)]
last_5pct = times[int(0.80*len(times))] #separating last 5pct to test against the model as taking random 5% cannot lead to overfitting
threshold = times[int(0.75*len(times))]
print(last_20pct)


test_data = main_df[np.logical_and(main_df.index > last_5pct, main_df.index <= last_20pct)]
validation_main_df = main_df[np.logical_and(main_df.index > threshold,main_df.index <= last_5pct)] #timestamp after last 5%
main_df =  main_df[(main_df.index <= threshold)]

# print(test_data[-1:])
# # print(main_df[-1:])
# # print(validation_main_df[0:])

1535215200


In [4]:
train_x, train_y = preprocess_df(main_df)
validation_x, validation_y = preprocess_df(validation_main_df)
test_x, test_y = preprocess_df(test_data)

uniquet, countst = np.unique(train_y, return_counts=True)
uniquev, countsv = np.unique(train_y, return_counts=True)

print(f"train data: {len(train_x)} validation: {len(validation_x)}")
print(f"Dont buys: {countst[0]}, buys: {countst[1]}")
print(f"VALIDATION Dont buys: {countsv[0]}, buys: {countsv[1]}")

train data: 53960 validation: 3730
Dont buys: 26980, buys: 26980
VALIDATION Dont buys: 26980, buys: 26980


In [5]:
NAME = f"{RATIO_TO_PREDICT}-{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"

model = Sequential()
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.3))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(train_x.shape[1:])))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation='softmax'))

opt = tf.keras.optimizers.Adam(lr=0.001, decay = 1e-6)

model.compile(loss='sparse_categorical_crossentropy',optimizer=opt,metrics=['accuracy'])

tensorboard = TensorBoard(log_dir = f"logs/{NAME}")

filepath = "RNN_Final-{epoch:02d}"  # unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) # saves only the best ones

history = model.fit(train_x, train_y, batch_size=BATCH_SIZE, epochs=EPOCHS,validation_data=(validation_x, validation_y), callbacks=[tensorboard, checkpoint])




Epoch 1/32
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: models/RNN_Final-01.model\assets
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32


In [10]:

CATEGORIES = ["Sell", "Buy"]
model = tf.keras.models.load_model("D:/projects/rnn/models/RNN_Final-32.model")

prediction = model.predict(test_x)
for i in range(len(prediction)):
    predictionfinal = max(prediction[i])
    print("Prediction: "+CATEGORIES[int(predictionfinal)],"Answer: "+CATEGORIES[int(test_y[i])])


 Buy
Prediction: Sell Answer: Sell
Prediction: Sell Answer: Sell
Prediction: Sell Answer: Sell
Prediction: Sell Answer: Buy
Prediction: Sell Answer: Buy
Prediction: Buy Answer: Buy
Prediction: Sell Answer: Sell
Prediction: Buy Answer: Buy
Prediction: Sell Answer: Sell
Prediction: Sell Answer: Sell
Prediction: Sell Answer: Buy
Prediction: Sell Answer: Sell
Prediction: Buy Answer: Buy
Prediction: Sell Answer: Sell
Prediction: Buy Answer: Buy
Prediction: Buy Answer: Buy
Prediction: Sell Answer: Sell
Prediction: Sell Answer: Sell
Prediction: Sell Answer: Buy
Prediction: Sell Answer: Buy
Prediction: Sell Answer: Sell
Prediction: Sell Answer: Sell
Prediction: Sell Answer: Buy
Prediction: Sell Answer: Sell
Prediction: Sell Answer: Sell
Prediction: Sell Answer: Sell
Prediction: Buy Answer: Buy
Prediction: Sell Answer: Sell
Prediction: Sell Answer: Buy
Prediction: Sell Answer: Buy
Prediction: Buy Answer: Buy
Prediction: Buy Answer: Buy
Prediction: Sell Answer: Buy
Prediction: Sell Answer: Sell


In [15]:
loss, accuracy = model.evaluate(test_x, test_y)
print("Accuracy: " +str(accuracy*100))
print("Loss: " +str(loss*100))

Accuracy: 96.44533395767212
Loss: 11.565238982439041
