In [3]:
import pandas as pd 
import os
from sklearn import preprocessing
from collections import deque
import numpy as np
import random


SEQ_LEN=60
FUTURE_PERIOD_PREDICT=3
RATIO_TO_PREDICT='LTC-USD'
data_files=os.listdir('crypto_data')
columns_name=['time','low','high','open','close','volume']
print(data_files)
df=pd.read_csv('crypto_data/LTC-USD.csv',names=columns_name)


def classify(current,future):
    if float(future)> float(current):
        return 1
    else:
        return 0

def preprocess_df(df):
    df = df.drop('future',1)
    for col in df.columns:
        if col != 'target':
            df[col]=df[col].pct_change()
            df.dropna(inplace=True)
            df[col]= preprocessing.scale(df[col].values)
    df.dropna(inplace=True)
    
    sequential_data=[]
    prev_days=deque(maxlen=SEQ_LEN)
    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days)==SEQ_LEN:
            sequential_data.append((prev_days,i[-1]))
    random.shuffle(sequential_data)

    buys=[]
    sells=[]

    for seq,target in sequential_data:
        if target==0:
            sells.append([seq,target])
        elif target==1:
            buys.append([seq,target])
    lower = min(len(buys),len(sells))

    buys= buys[:lower]
    sells=sells[:lower]

    sequential_data=buys + sells
    random.shuffle(sequential_data)
    X=[]
    Y=[]
    for seq,target in sequential_data:
        X.append(seq)
        Y.append(target)
    return np.array(X),np.array(Y)
    




main_df=pd.DataFrame()
for i in data_files:
    dataset=f'crypto_data/{i}'
    coin=i.split('.')[0]
    df=pd.read_csv(dataset,names=columns_name)
    df.rename(columns={'close':f'{coin}_close','volume':f'{coin}_volume'},inplace=True)

    df.set_index('time',inplace=True)
    df=df[[f'{coin}_close',f'{coin}_volume']]
    if len(main_df)==0:
        main_df=df
    else:
        main_df=main_df.join(df)

main_df['future']=main_df[f"{RATIO_TO_PREDICT}_close"].shift(-FUTURE_PERIOD_PREDICT)


main_df['target']=list(map(classify,main_df[f"{RATIO_TO_PREDICT}_close"],main_df['future']))

times = sorted(main_df.index.values)
last_5pct= times[-int(0.05*len(times))]

validation_main_df=main_df[(main_df.index>= last_5pct)]
main_df=main_df[(main_df<last_5pct)]

train_x, train_y = preprocess_df(main_df)
val_x, val_y = preprocess_df(validation_main_df)

print(len(train_x))

['BCH-USD.csv', 'BTC-USD.csv', 'ETH-USD.csv', 'LTC-USD.csv']
73152


In [9]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,LSTM,CuDNNLSTM,BatchNormalization
from tensorflow.keras.callbacks import TensorBoard,ModelCheckpoint
import time

In [10]:
EPOCHS=10
BATCH_SIZE=64

NAME=f'{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}'
data_files=os.listdir('crypto_data')
model=Sequential()
model.add(CuDNNLSTM(128,input_shape=(train_x.shape[1:]),return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128,input_shape=(train_x.shape[1:]),return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128,input_shape=(train_x.shape[1:])))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32,activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2,activation='softmax'))

opt=tf.keras.optimizers.Adam(lr=0.001,decay=1e-6)

model.compile(loss='sparse_categorical_crossentropy',optimizer=opt,metrics=['accuracy'])

tensorboard=TensorBoard(log_dir=f'logs/{NAME}')

filepath='RNN_Final-{epoch:02d}'

checkpoint = ModelCheckpoint('models/{}.model'.format(filepath,monitor='val_acc',verbose=1,save_best_only=True,mode='max'))


In [11]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
cu_dnnlstm_3 (CuDNNLSTM)     (None, 60, 128)           70656     
_________________________________________________________________
dropout_4 (Dropout)          (None, 60, 128)           0         
_________________________________________________________________
batch_normalization_v1_3 (Ba (None, 60, 128)           512       
_________________________________________________________________
cu_dnnlstm_4 (CuDNNLSTM)     (None, 60, 128)           132096    
_________________________________________________________________
dropout_5 (Dropout)          (None, 60, 128)           0         
_________________________________________________________________
batch_normalization_v1_4 (Ba (None, 60, 128)           512       
_________________________________________________________________
cu_dnnlstm_5 (CuDNNLSTM)     (None, 128)               132096    
__________

In [13]:
hist = model.fit(train_x,train_y,batch_size= BATCH_SIZE, epochs=EPOCHS, validation_data=(val_x,val_y),callbacks=[tensorboard,checkpoint])

Train on 73152 samples, validate on 3688 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [15]:
train_x.shape

(73152, 60, 8)