In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,LSTM,CuDNNLSTM


mnist=tf.keras.datasets.mnist
(x_train,y_train),(x_test,y_test)=mnist.load_data()

x_train=x_train/255.0
x_test=x_test/255.0

print(x_train.shape)

model=Sequential()

#model.add(LSTM(128,input_shape=(x_train.shape[1:]),activation='relu',return_sequences=True))
model.add(CuDNNLSTM(128,input_shape=(x_train.shape[1:]),return_sequences=True))
model.add(Dropout(0.2))

#model.add(LSTM(128,activation='relu'))
model.add(CuDNNLSTM(128))
model.add(Dropout(0.2))

model.add(Dense(32,activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(10,activation='softmax'))

opt=tf.keras.optimizers.Adam(lr=0.001,decay=1e-6)

model.compile(loss='sparse_categorical_crossentropy',
             optimizer=opt,
             metrics=['accuracy'])

model.fit(x_train,y_train,epochs=3,validation_data=(x_test,y_test))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
(60000, 28, 28)
Train on 60000 samples, validate on 10000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7f67002a0f98>

In [2]:
from sklearn import preprocessing
import numpy as np
import pandas as pd
import random
from collections import deque
pd.set_option('display.max_columns', 500)



SEQ_LEN=60 #length of prediction sequence to collect for rnn
FUTURE_PERIOD_PREDICT=3  #how far into future are we going to predict
RATIO_TO_PREDICT='LTC-USD'
#RATIO_TO_PREDICT='BTC-USD'

ratios = ["BTC-USD", "LTC-USD", "BCH-USD", "ETH-USD"]  

main_df=pd.DataFrame()
for ratio in ratios:
    print(ratio)
    dataset=f'../input/top4bitcoinsdata/{ratio}.csv'
    df=pd.read_csv(dataset,names=['time','low','high','open','close','volume'])
    df.rename(columns={'close':f'{ratio}_close','volume':f'{ratio}_volume'},inplace=True)
    df.set_index('time',inplace=True)
    df=df[[f'{ratio}_close',f'{ratio}_volume']]
    if len(main_df)==0:
        main_df=df
    else:
        main_df=main_df.join(df)
main_df.fillna(method='ffill',inplace=True)
main_df.dropna(inplace=True)
#creating target


def classify(current,future):
    if float(future)>float(current):
        return 1
    else:
        return 0
main_df['future']=main_df[f'{RATIO_TO_PREDICT}_close'].shift(-FUTURE_PERIOD_PREDICT)
main_df['target']=list(map(classify,main_df[f'{RATIO_TO_PREDICT}_close'],main_df['future']))
main_df[[f'{RATIO_TO_PREDICT}_close','future','target']].head(5)
#seperating validation and train set
times=sorted(main_df.index.values)
last_5pcs=sorted(main_df.index.values)[-int(len(times)*0.05)]
validation_main_df=main_df[(main_df.index>=last_5pcs)]
main_df=main_df[(main_df.index<last_5pcs)]

def preprocess_df(df):
    df=df.drop('future',1)
    #scaling the data
    for col in df.columns:
        if col!='target':
            df[col]=df[col].pct_change()
            df.dropna(inplace=True)
            df[col]=preprocessing.scale(df[col].values)
    df.dropna(inplace=True)
    #creating sequential data
    sequential_data=[]
    prev_days=deque(maxlen=SEQ_LEN)
    
    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days)==SEQ_LEN:
            sequential_data.append([np.array(prev_days),i[-1]])
    random.shuffle(sequential_data)
    #Doing process to ensure buys and sells in equal weight
    buys=[]
    sells=[]
    
    for seq,target in sequential_data:
        if target==0:
            sells.append([seq,target])
        else:
            buys.append([seq,target])
    random.shuffle(buys)
    random.shuffle(sells)
    lower=min(len(buys),len(sells))
    
    buys=buys[:lower]
    sells=sells[:lower]
    
    sequential_data=buys+sells
    random.shuffle(sequential_data)
    
    X=[]
    y=[]
    for seq,target in sequential_data:
        X.append(seq)
        y.append(target)
    return np.array(X),y
            
            
train_x,train_y=preprocess_df(main_df)
validation_x,validation_y=preprocess_df(validation_main_df)
print(f"train data: {len(train_x)} validation: {len(validation_x)}")
print(f"TRAIN Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}")
print(f"VALIDATION Dont buys: {validation_y.count(0)}, buys: {validation_y.count(1)}")

BTC-USD
LTC-USD
BCH-USD
ETH-USD
train data: 77922 validation: 3860
TRAIN Dont buys: 38961, buys: 38961
VALIDATION Dont buys: 1930, buys: 1930


In [3]:
#training model
import tensorflow as tf
import time
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,LSTM,CuDNNLSTM,BatchNormalization
from tensorflow.keras.callbacks import TensorBoard,ModelCheckpoint
from tensorflow.keras.callbacks import ModelCheckpoint, ModelCheckpoint

EPOCHS = 10  # how many passes through our data
BATCH_SIZE = 64  # how many batches? Try smaller batch if you're getting OOM (out of memory) errors.



model = Sequential()
model.add(CuDNNLSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation='softmax'))


opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)

# Compile model
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=opt,
    metrics=['accuracy']
)
NAME=RATIO_TO_PREDICT
tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))

filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}"  # unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("../input/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) # saves only the best ones

# Train model
history = model.fit(
    train_x, train_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(validation_x, validation_y),
    callbacks=[tensorboard,checkpoint],
)

# Score model
score = model.evaluate(validation_x, validation_y, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
# Save model
model.save("../input/{}".format(NAME))

Train on 77922 samples, validate on 3860 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.6970174242177776
Test accuracy: 0.56865287


In [4]:
%reload_ext tensorboard.notebook
%tensorboard --logdir logs