## Importing the requirements

In [None]:
import pandas as pd
import numpy as np

from collections import deque
import random


from sklearn import preprocessing

from google.colab import files


import time



import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.compat.v1.keras.layers import CuDNNLSTM
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import ModelCheckpoint

## Uploading the data and setting

In [None]:
#Download(https://www.kaggle.com/prasoonkottarathil/btcinusd) and upload daat set
uploaded = files.upload()

## Working on the data set

Storing the data into a variable

In [None]:
df = pd.read_csv("BTCUSD_day.csv")

#Show the first 7 rows of data
print(df.head(7))

In [None]:
main_df = pd.DataFrame() # begin empty




# rename volume and close to include the ticker so we can still which close/volume is which:

df.set_index("Date", inplace=True)  # set time as index so we can join them on this shared time
df = df[[f"Close", f"Volume BTC"]]  # ignore the other columns besides price and volume

if len(main_df)==0:  # if the dataframe is empty
    main_df = df  # then it's just the current df
else:  # otherwise, join this data to the main one
    main_df = main_df.join(df)

main_df.fillna(method="ffill", inplace=True)  # if there are gaps in data, use previously known values
main_df.dropna(inplace=True)
print(main_df.head())  # how did we do??

In [None]:
SEQ_LEN = 60  # how long of a preceeding sequence to collect for RNN
FUTURE_PERIOD_PREDICT = 3  # how far into the future are we trying to predict?

EPOCHS = 10  # how many passes through our data
BATCH_SIZE = 64  # how many batches? Try smaller batch if you're getting OOM (out of memory) errors.
NAME = f"{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"  # a unique name for the model

#Changing the name constant to include the ratio we're predicting:
#NAME = f"{RATIO_TO_PREDICT}-{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"

In [None]:
def classify(current, future):
    if float(future) > float(current):
        return 1
    else:
        return 0




def preprocess_df(df):
    df = df.drop("future",1)
    for col in df.columns:
      if col != "target":
        if not col == "Volume BTC":
         df[col] = df[col].pct_change()  # pct change "normalizes" the different currencies (each crypto coin has vastly diff values, we're really more interested in the other coin's movements)
         df.dropna(inplace=True)  # remove the nas created by pct_change
         df[col] = preprocessing.scale(df[col].values)  # scale between 0 and 1.
  
    df.dropna(inplace=True)  # cleanup again... jic.


    sequential_data = []  # this is a list that will CONTAIN the sequences
    prev_days = deque(maxlen=SEQ_LEN)  # These will be our actual sequences. They are made with deque, which keeps the maximum length by popping out older values as new ones come in

    for i in df.values:  # iterate over the values
        prev_days.append([n for n in i[:-1]])  # store all but the target
        if len(prev_days) == SEQ_LEN:  # make sure we have 60 sequences!
            sequential_data.append([np.array(prev_days), i[-1]])  # append those bad boys!

    random.shuffle(sequential_data)  # shuffle for good measure.

    buys = []  # list that will store our buy sequences and targets
    sells = []  # list that will store our sell sequences and targets

    for seq, target in sequential_data:  # iterate over the sequential data
        if target == 0:  # if it's a "not buy"
            sells.append([seq, target])  # append to sells list
        elif target == 1:  # otherwise if the target is a 1...
            buys.append([seq, target])  # it's a buy!

    random.shuffle(buys)  # shuffle the buys
    random.shuffle(sells)  # shuffle the sells!

    lower = min(len(buys), len(sells))  # what's the shorter length?

    buys = buys[:lower]  # make sure both lists are only up to the shortest length.
    sells = sells[:lower]  # make sure both lists are only up to the shortest length.

    sequential_data = buys+sells  # add them together
    random.shuffle(sequential_data)  # another shuffle, so the model doesn't get confused with all 1 class then the other.

    X = []
    y = []

    for seq, target in sequential_data:  # going over our new sequential data
        X.append(seq)  # X is the sequences
        y.append(target)  # y is the targets/labels (buys vs sell/notbuy)

    return np.array(X), y  # return X and y...and make X a numpy array!


main_df = pd.DataFrame() # begin empty


ratio = "BTCUSD_day.csv"

ratio = ratio.split('.csv')[0]  # split away the ticker from the file-name
dataset = f'{ratio}.csv'  # get the full path to the file.
df = pd.read_csv(dataset)  # read in specific file

    # rename volume and close to include the ticker so we can still which close/volume is which:
df.rename(columns={"close": f"Close", "volume": f"Volume BTC"}, inplace=True)

df.set_index("Date", inplace=True)  # set time as index so we can join them on this shared time
df = df[[f"Close", f"Volume BTC"]]  # ignore the other columns besides price and volume

if len(main_df)==0:  # if the dataframe is empty
    main_df = df  # then it's just the current df
else:  # otherwise, join this data to the main one
    main_df = main_df.join(df)

main_df.fillna(method="ffill", inplace=True)  # if there are gaps in data, use previously known values
main_df.dropna(inplace=True)
#print(main_df.head())  # how did we do??


In [None]:
main_df['future'] = main_df[f'Close'].shift(-FUTURE_PERIOD_PREDICT)
main_df['target'] = list(map(classify, main_df[f'Close'], main_df['future']))

main_df.dropna(inplace=True)

## here, split away some slice of the future data from the main main_df.
times = sorted(main_df.index.values)
last_5pct = sorted(main_df.index.values)[-int(0.05*len(times))]

validation_main_df = main_df[(main_df.index >= last_5pct)]
main_df = main_df[(main_df.index < last_5pct)]

train_x, train_y = preprocess_df(main_df)
validation_x, validation_y = preprocess_df(validation_main_df)

print(f"train data: {len(train_x)} validation: {len(validation_x)}")
print(f"Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}")
print(f"VALIDATION Dont buys: {validation_y.count(0)}, buys: {validation_y.count(1)}")

In [None]:
model = Sequential()
model.add(CuDNNLSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation='softmax'))


opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)

# Compile model
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=opt,
    metrics=['accuracy']
)

tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))


filepath = "RNN_Final-{epoch:02d}-{val_accuracy:.3f}.hdf5"  # unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("models/checkpoint/{}".format(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')) # saves only the best ones







In [None]:
# Train model
history = model.fit(train_x, np.array(train_y),batch_size=BATCH_SIZE,epochs=EPOCHS,validation_data=(validation_x, np.array(validation_y)),callbacks=[tensorboard, checkpoint],)


In [None]:
# Score model
score = model.evaluate(validation_x, np.array(validation_y), verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
# Save model
model.save("models/{}".format(NAME)+".hdf5")

## Loading model and predict

In [None]:
model = tf.keras.models.load_model('/content/model/60-SEQ-3-PRED-1611314204.hdf5')

In [None]:
model_predict = model.predict(np.array([[[21313,32131]]]))

In [None]:
print(model_predict)