In [1]:
import pandas as pd
import os
from sklearn import preprocessing
from collections import deque
import random
import numpy as np

In [2]:
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint

2023-01-12 12:49:07.433561: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
SEQ_LEN = 60
FUTURE_PERIOD_PREDICT = 3
RATIO_TO_PREDICT = 'LTC-USD'
EPOCHS = 10
BATCH_SIZE = 64
NAME = f'{RATIO_TO_PREDICT}-{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}'

In [4]:
from imblearn.over_sampling import SMOTE

def preprocess(df): 
    df = df.drop('future', axis=1)
    for col in df.columns: 
        if col != 'target':
            # data normalization
            df[col] = df[col].pct_change()
            # data scaling
            df[col] = preprocessing.scale(df[col].values)
    
    df.dropna(inplace=True)
    
    sequential_data = []
    prev_days = deque(maxlen=SEQ_LEN)
    # convert df to array
    for i in df.values: 
        # features
        prev_days.append([n for n in i[:-1]]) # exclude target col
        if len(prev_days) == SEQ_LEN: 
            sequential_data.append([np.array(prev_days), i[-1]])
    random.shuffle(sequential_data)
        
    # SMOTE to prevent undersampling of buys

    seq_array = np.array([a[0] for a in sequential_data])
    target_list = [b[1] for b in sequential_data]
    
    # Integer encoding
    target_list = [1 if b else 0 for b in target_list]
    
    nsamples, nx, ny = seq_array.shape
    seq_array = seq_array.reshape((nsamples, nx*ny))
    
    sm = SMOTE(random_state=42)
    X_res, y_res = sm.fit_resample(seq_array, target_list)
    
    return X_res, y_res

In [5]:
# stores the merged csvs
joined_df = pd.DataFrame() 

ratios = ['LTC-USD','BTC-USD','ETH-USD','BCH-USD']

for ratio in ratios:
    # path to each csv
    dataset = f'crypto_data/{ratio}.csv'
    df = pd.read_csv(dataset, names=['time', 'low', 'high', 'open', 'close', 'volume'])
    
    # rename columns to prevent errors when merging
    df.rename(columns={'close': f'{ratio}_close', 'volume': f'{ratio}_volume'}, inplace=True)
    
    # set a common index time
    df.set_index('time', inplace=True)
    df = df[[f'{ratio}_close', f'{ratio}_volume']]
    
    # join dataframes
    if len(joined_df): 
        joined_df = joined_df.join(df)
    else:
        joined_df = df

joined_df.fillna(method='ffill', inplace=True)
joined_df.dropna(inplace=True)
print(joined_df.head())

            LTC-USD_close  LTC-USD_volume  BTC-USD_close  BTC-USD_volume  \
time                                                                       
1528968720      96.660004      314.387024    6487.379883        7.706374   
1528968780      96.570000       77.129799    6479.410156        3.088252   
1528968840      96.500000        7.216067    6479.410156        1.404100   
1528968900      96.389999      524.539978    6479.979980        0.753000   
1528968960      96.519997       16.991997    6480.000000        1.490900   

            ETH-USD_close  ETH-USD_volume  BCH-USD_close  BCH-USD_volume  
time                                                                      
1528968720      486.01001       26.019083     870.859985       26.856577  
1528968780      486.00000        8.449400     870.099976        1.124300  
1528968840      485.75000       26.994646     870.789978        1.749862  
1528968900      486.00000       77.355759     870.000000        1.680500  
1528968960      4

In [6]:
joined_df['future'] = joined_df[f'{RATIO_TO_PREDICT}_close'].shift(-FUTURE_PERIOD_PREDICT)

In [7]:
# create target column to identify targets with future prices greater than current prices
joined_df['target'] = joined_df[f'{RATIO_TO_PREDICT}_close'] < joined_df['future']
print(joined_df['target'])

time
1528968720    False
1528968780    False
1528968840    False
1528968900     True
1528968960    False
              ...  
1535215020     True
1535215080     True
1535215140    False
1535215200    False
1535215260    False
Name: target, Length: 101882, dtype: bool


In [8]:
times = sorted(joined_df.index.values)

last_20percent = times[-int(0.2*len(times))]
print(last_20percent)

1533977280


In [9]:
# split data into training and validation sets using an 80-20 split
val_joined_df = joined_df[joined_df.index >= last_20percent]
joined_df = joined_df[joined_df.index < last_20percent]

In [10]:
X_train, y_train = preprocess(joined_df)
X_val, y_val = preprocess(val_joined_df)

In [11]:
print(f'training data: {len(X_train)} validation: {len(X_val)}')
print (f'do not buy: {y_train.count(0)}, buy: {y_train.count(1)}')
print(f'validation do not buy: {y_val.count(0)}, buy: {y_val.count(1)}')

training data: 94966 validation: 22892
do not buy: 47483, buy: 47483
validation do not buy: 11446, buy: 11446


In [12]:
X_train.shape

(94966, 480)

In [13]:
from numpy import newaxis 
X_train = X_train[:, :, newaxis]
X_val = X_val[:, :, newaxis]
print(X_train.shape)
print(X_val.shape)

(94966, 480, 1)
(22892, 480, 1)


In [14]:
model = Sequential() 

model.add(LSTM(128, input_shape=(X_train.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='tanh'))
model.add(Dropout(0.2))

# Binary choice so 2 output channels
model.add(Dense(2, activation='softmax'))

model.summary()

2023-01-12 12:49:24.768857: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 480, 128)          66560     
                                                                 
 dropout (Dropout)           (None, 480, 128)          0         
                                                                 
 batch_normalization (BatchN  (None, 480, 128)         512       
 ormalization)                                                   
                                                                 
 lstm_1 (LSTM)               (None, 480, 128)          131584    
                                                                 
 dropout_1 (Dropout)         (None, 480, 128)          0         
                                                                 
 batch_normalization_1 (Batc  (None, 480, 128)         512       
 hNormalization)                                        

In [15]:
optimizer = tf.keras.optimizers.SGD(learning_rate=0.001, decay=1e-6)
model.compile(loss='sparse_categorical_crossentropy',
             optimizer=optimizer, 
             metrics=['accuracy'])

In [16]:
tensorboard = TensorBoard(log_dir=f'logs/{NAME}')

filepath = 'RNN_Final-{epoch:02d}-{val_accuracy:.3f}'
checkpoint = ModelCheckpoint('models/{}.model'.format(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max'))

In [17]:
# Train Model 

history = model.fit(
    X_train, np.array(y_train), 
    batch_size=BATCH_SIZE, 
    epochs=EPOCHS,
    validation_data=(X_val, np.array(y_val)),
    callbacks=[tensorboard, checkpoint])

Epoch 1/10



INFO:tensorflow:Assets written to: models/RNN_Final-01-0.535.model/assets


INFO:tensorflow:Assets written to: models/RNN_Final-01-0.535.model/assets


Epoch 2/10



INFO:tensorflow:Assets written to: models/RNN_Final-02-0.533.model/assets


INFO:tensorflow:Assets written to: models/RNN_Final-02-0.533.model/assets


Epoch 3/10



INFO:tensorflow:Assets written to: models/RNN_Final-03-0.536.model/assets


INFO:tensorflow:Assets written to: models/RNN_Final-03-0.536.model/assets


Epoch 4/10



INFO:tensorflow:Assets written to: models/RNN_Final-04-0.538.model/assets


INFO:tensorflow:Assets written to: models/RNN_Final-04-0.538.model/assets


Epoch 5/10



INFO:tensorflow:Assets written to: models/RNN_Final-05-0.544.model/assets


INFO:tensorflow:Assets written to: models/RNN_Final-05-0.544.model/assets


Epoch 6/10



INFO:tensorflow:Assets written to: models/RNN_Final-06-0.548.model/assets


INFO:tensorflow:Assets written to: models/RNN_Final-06-0.548.model/assets


Epoch 7/10



INFO:tensorflow:Assets written to: models/RNN_Final-07-0.547.model/assets


INFO:tensorflow:Assets written to: models/RNN_Final-07-0.547.model/assets


Epoch 8/10



INFO:tensorflow:Assets written to: models/RNN_Final-08-0.551.model/assets


INFO:tensorflow:Assets written to: models/RNN_Final-08-0.551.model/assets


Epoch 9/10



INFO:tensorflow:Assets written to: models/RNN_Final-09-0.549.model/assets


INFO:tensorflow:Assets written to: models/RNN_Final-09-0.549.model/assets


Epoch 10/10



INFO:tensorflow:Assets written to: models/RNN_Final-10-0.546.model/assets


INFO:tensorflow:Assets written to: models/RNN_Final-10-0.546.model/assets


