In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
import pandas as pd
from sklearn import preprocessing
import random
import numpy as np
from collections import deque
import time

In [None]:
df = pd.read_csv('../data/crypto_data/LTC-USD.csv', names = ['time', 'low', 'high', 'open', 'close', 'volume'])

In [None]:
df.head()

In [None]:
SEQ_LEN = 60
FUTURE_PERIOD_PREDICT = 3
RATIO_TO_PREDICT ="LTC-USD"
EPOCHS = 3
BATCH_SIZE = 64
NAME = "{}_SEQ_{}_PRED_{}".format(SEQ_LEN, FUTURE_PERIOD_PREDICT, int(time.time()))

In [None]:
main_df = pd.DataFrame()

In [None]:
fileNames = ['BTC-USD', 'LTC-USD', 'ETH-USD', 'BCH-USD']

for fileName in fileNames:
    dataPath = '../data/crypto_data/{}.csv'.format(fileName)
    df = pd.read_csv(dataPath, names = ['time', 'low', 'high', 'open', 'close', 'volume'])
    df = df.rename(columns = {'close': '{}_close'.format(fileName), 'volume': '{}_volume'.format(fileName)})
    df = df.set_index('time')
    df = df[['{}_close'.format(fileName), '{}_volume'.format(fileName)]]
    
    if len(main_df) == 0:
        main_df = df
    else:
        main_df = main_df.join(df)

In [None]:
main_df.columns

In [None]:
main_df['future'] = main_df['{}_close'.format(RATIO_TO_PREDICT)].shift(-FUTURE_PERIOD_PREDICT)

In [None]:
main_df.head()

In [None]:
def classify(current, future):
    if float(future) > float(current):
        return 1
    else:
        return 0
    
main_df['target'] = list(map(classify, main_df['{}_close'.format(RATIO_TO_PREDICT)], main_df['future']))

In [None]:
main_df.head()

In [None]:
times = sorted(main_df.index.values)
last_5pct = times[-int(0.05*len(times))]

validation_main_df = main_df[main_df.index >= last_5pct]
main_df = main_df[main_df.index < last_5pct]

In [None]:
def preprocess_df(df):
    df = df.drop('future', axis = 1)
    for col in df.columns:
        if col != 'target':
            df[col] = df[col].pct_change()
            df = df.dropna()
            df[col] = preprocessing.scale(df[col])
            
    df = df.dropna()
    
    sequential_data = []
    prev_days = deque(maxlen = SEQ_LEN)
    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days) == SEQ_LEN:
            sequential_data.append([np.array(prev_days), i[-1]])
            
    random.shuffle(sequential_data)
    
    buys = []
    sells = []
    
    for seq, target in sequential_data:
        if target == 0:
            sells.append([seq, target])
        elif target == 1:
            buys.append([seq, target])
            
    random.shuffle(buys)
    random.shuffle(sells)
    
    lower = min(len(buys), len(sells))
    buys = buys[:lower]
    sells = sells[:lower]
    
    sequential_data = buys + sells
    random.shuffle(sequential_data)
    
    X = []
    y = []
    
    for seq, target in sequential_data:
        X.append(seq)
        y.append(target)
        
    return np.array(X), y


train_x, train_y = preprocess_df(main_df)
val_x, val_y = preprocess_df(validation_main_df)

print('train data: ', len(train_x))
print('validation data: ', len(val_x))
print('Dont buys: ', train_y.count(0))
print('Buys: ', train_y.count(1))
print('Validation dont buys: ', val_y.count(0))
print('Validation buys: ', val_y.count(1))

In [None]:
model = Sequential()
model.add(LSTM(128, activation = 'tanh', input_shape = train_x.shape[1:], return_sequences = True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, activation = 'tanh', input_shape = train_x.shape[1:], return_sequences = True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(128, activation = 'tanh', input_shape = train_x.shape[1:]))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation = 'relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation = 'softmax'))

opt = tf.keras.optimizers.Adam(lr = 0.001, decay = 1e-6)

model.compile(loss = 'sparse_categorical_crossentropy', optimizer = opt, metrics = ['accuracy'])

tensorboard = TensorBoard(log_dir = f'logs/{NAME}')

# interesting
# to be demystified
filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}"  # unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) # saves only the best ones

history = model.fit(train_x, train_y, batch_size = BATCH_SIZE, epochs = EPOCHS, validation_data = (val_x, val_y), callbacks = [tensorboard, checkpoint])

In [None]:
# import shutil
# shutil.rmtree('logs/')