In [1]:
import pandas as pd
from sklearn import preprocessing
from collections import deque
import numpy as np
import random
import time
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout,Dense,LSTM,BatchNormalization
from tensorflow.keras.callbacks import TensorBoard,ModelCheckpoint

SEQ_LEN=60
FUTURE_PERIOD_PREDICT=3
RATIO_TO_PREDICT='LTC-USD'

NAME=f'{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{time.time()}' 


categories=['BTC-USD','LTC-USD','ETH-USD','BCH-USD']

main_df=pd.DataFrame()

for category in categories:
    
    df=pd.read_csv(f'D:\\pyhton\\crypto_data\\{category}.csv',names=['time','low','high','open','close','volume'])
    
    df.rename(columns={'close':f'{category}_close','volume':f'{category}_volume'},inplace=True)
    
    df.set_index('time',inplace=True)
    
    df=df[[f'{category}_close',f'{category}_volume']]
  
    if len(main_df)==0:
        main_df=df
    else:
        main_df=main_df.join(df)
     
    

In [2]:
def preprocess(df):
    df=df.drop('future',axis=1)
    for col in df.columns:
        if col!= 'target':
            df[col]=df[col].pct_change()
            df.dropna(inplace=True)
            df[col]=preprocessing.scale(df[col].values)
            
        
        df.dropna(inplace=True)
        sequential_data=[]
        prev_days=deque(maxlen=SEQ_LEN)
        
        for i in df.values:
            prev_days.append([n for n in i[:-1]])
            if len(prev_days)==SEQ_LEN:
                sequential_data.append([np.array(prev_days),i[-1]])
          
        random.shuffle(sequential_data)
        
        buys=[]
        sells=[]
        for seq,target in sequential_data:
            if target == 0 :
                sells.append([seq,target])
             
            else :
                buys.append([seq,target])
        
        minlen=min(len(buys),len(sells))
        
        buys=buys[:minlen]
        sells=sells[:minlen]
        
        sequential_data=buys+sells
        
        random.shuffle(sequential_data)
        
        X=[]
        y=[]
        
        for seq,target in sequential_data:
            X.append(seq)
            y.append(target)
            
            
        return np.array(X),np.array(y)  

In [3]:
main_df.head()

Unnamed: 0_level_0,BTC-USD_close,BTC-USD_volume,LTC-USD_close,LTC-USD_volume,ETH-USD_close,ETH-USD_volume,BCH-USD_close,BCH-USD_volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1528968660,6489.549805,0.5871,96.580002,9.6472,,,871.719971,5.675361
1528968720,6487.379883,7.706374,96.660004,314.387024,486.01001,26.019083,870.859985,26.856577
1528968780,6479.410156,3.088252,96.57,77.129799,486.0,8.4494,870.099976,1.1243
1528968840,6479.410156,1.4041,96.5,7.216067,485.75,26.994646,870.789978,1.749862
1528968900,6479.97998,0.753,96.389999,524.539978,486.0,77.355759,870.0,1.6805


In [4]:
def classify(current,future):
    if float(current)<float(future):
        return 1
    else :
        return 0
    

In [5]:
main_df['future']=main_df[f'{RATIO_TO_PREDICT}_close'].shift(-FUTURE_PERIOD_PREDICT)

In [6]:
main_df.dropna(inplace=True)
main_df.shape

(85323, 9)

In [7]:
main_df['target']=list(map(classify,main_df[f'{RATIO_TO_PREDICT}_close'],main_df['future']))

In [8]:
main_df.head(10)

Unnamed: 0_level_0,BTC-USD_close,BTC-USD_volume,LTC-USD_close,LTC-USD_volume,ETH-USD_close,ETH-USD_volume,BCH-USD_close,BCH-USD_volume,future,target
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1528968720,6487.379883,7.706374,96.660004,314.387024,486.01001,26.019083,870.859985,26.856577,96.389999,0
1528968780,6479.410156,3.088252,96.57,77.129799,486.0,8.4494,870.099976,1.1243,96.519997,0
1528968840,6479.410156,1.4041,96.5,7.216067,485.75,26.994646,870.789978,1.749862,96.440002,0
1528968900,6479.97998,0.753,96.389999,524.539978,486.0,77.355759,870.0,1.6805,96.470001,1
1528968960,6480.0,1.4909,96.519997,16.991997,486.0,7.5033,869.98999,1.669014,96.400002,0
1528969020,6477.220215,2.73195,96.440002,95.524078,485.98999,85.877251,869.450012,0.8652,96.400002,0
1528969080,6480.0,2.17424,96.470001,175.205307,485.98999,160.915192,869.98999,23.534929,96.400002,0
1528969140,6479.990234,0.9031,96.400002,43.652802,485.98999,61.371887,870.0,2.3,96.400002,0
1528969200,6478.660156,3.258786,96.400002,8.16,485.98999,42.687656,870.320007,9.255514,96.400002,0
1528969260,6478.660156,1.970352,96.400002,20.4259,486.0,97.693878,870.650024,2.7956,96.449997,1


In [9]:
times=sorted(main_df.index.values)
last_5pct=times[-int(0.05*len(times))]

validation_main_df=main_df[(main_df.index>=last_5pct)]
main_df=main_df[(main_df.index<last_5pct)]

x_train,y_train=preprocess(main_df)
x_test,y_test=preprocess(validation_main_df)

print(len(x_train),len(y_train))
print(len(x_test),len(y_test))
                         
                    


68840 68840
3400 3400


In [10]:
model=Sequential()
model.add(LSTM(128,input_shape=x_train.shape[1:],return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128,input_shape=x_train.shape[1:],return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128,input_shape=x_train.shape[1:]))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32,activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2,activation='softmax'))



  super().__init__(**kwargs)


In [11]:
model.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

In [12]:
import os
import datetime

logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback =TensorBoard(logdir, histogram_freq=1)
model.fit(
    x_train,
    y_train,
    batch_size=32,
    epochs=5,
    validation_data=(x_test, y_test),
    callbacks=[tensorboard_callback]
)

Epoch 1/5
[1m2152/2152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m301s[0m 137ms/step - accuracy: 0.5063 - loss: 0.7586 - val_accuracy: 0.5306 - val_loss: 0.6920
Epoch 2/5
[1m2152/2152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m295s[0m 137ms/step - accuracy: 0.5177 - loss: 0.6930 - val_accuracy: 0.5000 - val_loss: 0.6937
Epoch 3/5
[1m2152/2152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m285s[0m 132ms/step - accuracy: 0.5099 - loss: 0.6934 - val_accuracy: 0.5212 - val_loss: 0.6911
Epoch 4/5
[1m2152/2152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m283s[0m 131ms/step - accuracy: 0.5180 - loss: 0.6925 - val_accuracy: 0.5000 - val_loss: 0.6943
Epoch 5/5
[1m2152/2152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m277s[0m 129ms/step - accuracy: 0.5103 - loss: 0.6929 - val_accuracy: 0.5112 - val_loss: 0.6920


<keras.src.callbacks.history.History at 0x189d2e7f450>

In [14]:
from tensorboard import notebook
notebook.list()
notebook.display(port=6006, height=1000)

No known TensorBoard instances running.
