In [None]:
import os
from sklearn import preprocessing
from collections import deque
import numpy as np
import random

In [None]:
import pandas as pd
df = pd.read_csv('crypto_data/LTC-USD.csv', names = ['time','low','high','open','close','volume'])

In [None]:
print(df.head())

In [None]:
#merging the data of 4 csv files    
main_df = pd.DataFrame()
ratios =['BTC-USD','LTC-USD','ETH-USD','BCH-USD']
for ratio in ratios:
    dataset = f'crypto_data/{ratio}.csv'
    
    df = pd.read_csv(dataset, names=['time','low','high','open','close','volume'])
    df.rename(columns={'close':f'{ratio}_close','volume':f'{ratio}_volume'},inplace=True) #inplace is used so we donot need to redefine dataframe
    df.set_index('time',inplace=True)
    df = df[[f'{ratio}_close',f'{ratio}_volume']]
    
    #print(df.head())
    
    if len(main_df)==0:
        main_df=df
    else:
        main_df = main_df.join(df)
        
main_df.fillna(method="ffill", inplace=True)  # if there are gaps in data, use previously known values
main_df.dropna(inplace=True)        
        
SEQ_LEN = 60
FUTURE_PERIOD_PREDICT = 3
RATIO_TO_PREDICT = 'LTC-USD'

def classify(current,future):
    if float(future)>float(current):
        return 1
    else:
        return 0
        
    
main_df['future'] = main_df[f'{RATIO_TO_PREDICT}_close'].shift(-FUTURE_PERIOD_PREDICT)

print(main_df[[f'{RATIO_TO_PREDICT}_close','future']].head())

main_df['target'] = list(map(classify,main_df[f'{RATIO_TO_PREDICT}_close'],main_df['future']))

print(main_df[[f'{RATIO_TO_PREDICT}_close','future','target']].head(10))


In [None]:
times = sorted(main_df.index.values)
last_5pct = times[-int(0.05*len(times))]
print(last_5pct)

In [None]:
validation_main_df = main_df[(main_df.index >= last_5pct)]
main_df = main_df[(main_df.index < last_5pct)]

In [None]:
def preprocess_df(df):    
    df = df.drop('future',1)
    
    for col in df.columns:
        if col != 'target':
            df[col] = df[col].pct_change()
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values)
            
    df.dropna(inplace=True)
    
    sequential_data = []
    prev_days = deque(maxlen=SEQ_LEN)
    
    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days) == SEQ_LEN:
            sequential_data.append([np.array(prev_days), i[-1]])
            
    random.shuffle(sequential_data)
    
    buys = []
    sells = []

    for seq, target in sequential_data:
        if target == 0:
            sells.append([seq, target])
        elif target == 1:
            buys.append([seq,target])

    random.shuffle(buys)
    random.shuffle(sells)

    lower = min(len(buys), len(sells))

    buys = buys[:lower]
    sells = sells[:lower]

    sequential_data = buys+sells

    random.shuffle(sequential_data)
    
    X =[]
    y =[]

    for seq,target in sequential_data:
        X.append(seq)
        y.append(target)
    
    return np.array(X),y

In [None]:
train_X,train_y = preprocess_df(main_df)
validation_X, validation_y = preprocess_df(validation_main_df)

In [None]:
print(f"train data: {len(train_X)} validation: {len(validation_X)}")
print(f"Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}")
print(f"VALIDATION Dont buys: {validation_y.count(0)}, buys: {validation_y.count(1)}")

In [None]:
import time
EPOCHS = 3
BATCH_SIZE = 64
NAME = f"{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}" 

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
model = Sequential()
model.add(LSTM(128, input_shape=(train_X.shape[1:]),return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(train_X.shape[1:]),return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape=(train_X.shape[1:]),return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation='softmax'))

opt = tf.keras.optimizers.Adam(lr=0.001,decay=1e-6) #lr=learning rate
#model.compile(loss='mse', optimizer='adam', metrics=['accuracy']
model.compile(loss='sparse_categorical_crossentropy',
             optimizer = opt,
             metrics = ['accuracy'])
tensorboard = TensorBoard(log_dir=f'logs\\{NAME}')

filepath = "RNN_Final-{epoch:02d}-{val_accuracy:.3f}.hdf5"  # unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')) # saves only the best ones

#checkpoint = ModelCheckpoint('deep-learning-model-full-v0.03.01.weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5' , monitor='val_loss' , verbose=1 , save_best_only=True , period=3)



In [None]:
train_y = np.asarray(train_y)
validation_y = np.asarray(validation_y)

In [None]:
# Train model
history = model.fit(
    train_X, train_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(validation_X, validation_y),
    callbacks=[tensorboard, checkpoint],
)

In [None]:
# Save model
model.save("models/{}".format(NAME))