In [28]:
import pandas as pd
from sklearn import preprocessing
from collections import deque
import numpy as np
import random
import os
import time
import matplotlib.pyplot as plt
import tensorflow as tf

In [29]:
SEQ_LEN = 60
FUTURE_PERIOD_PREDICT = 3
RATIO_TO_PREDICT = "BTC-USD"
EPOCHS = 10
BATCH_SIZE = 64
NAME = f"{RATIO_TO_PREDICT}-{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"

In [30]:
def load_data(ratios):
    main_df = pd.DataFrame()
    for ratio in ratios:
        file_path = f'crypto_data/{ratio}.csv'
        df = pd.read_csv(file_path, names=['time', 'low', 'high', 'open', 'close', 'volume'])
        df.rename(columns={'close': f'close_{ratio}', 'volume': f'volume_{ratio}'}, inplace=True)
        df.set_index('time', inplace=True)
        df = df[[f'close_{ratio}', f'volume_{ratio}']]
        if main_df.empty:
            main_df = df
        else:
            main_df = main_df.join(df[[f'close_{ratio}', f'volume_{ratio}']])
            main_df.fillna(method='ffill', inplace=True)
            main_df.dropna(inplace=True)
    return main_df

In [31]:
def classify(current, future):
    if float(future) > float(current):
        return 1
    else:
        return 0

In [32]:
# Getting the data
ratios = ["BTC-USD", "ETH-USD", "LTC-USD", "BCH-USD"]
df = load_data(ratios)

# Creating the target column
df['future'] = df[f'close_{RATIO_TO_PREDICT}'].shift(-FUTURE_PERIOD_PREDICT)
df['target'] = list(map(classify, df[f'close_{RATIO_TO_PREDICT}'], df['future']))

  main_df.fillna(method='ffill', inplace=True)
  main_df.fillna(method='ffill', inplace=True)
  main_df.fillna(method='ffill', inplace=True)


Date Preprocessing

In [33]:
# Separate the data into out_of_sample and in_sample
times = sorted(df.index.values)
last_5pct = times[-int(0.05*len(times))]

validation_df = df[(df.index >= last_5pct)]
df = df[(df.index < last_5pct)]

def preprocess_df(df):
    df = df.drop('future', axis=1)

    # Scaling and normalizing the data
    for col in df.columns:
        if col != 'target':
            df[col] = df[col].pct_change()
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values)
    df.dropna(inplace=True)

    # Creating sequences
    sequential_data = []
    prev_days = deque(maxlen=SEQ_LEN)

    for c in df.values:
        prev_days.append([n for n in c[:-1]])
        if len(prev_days) == SEQ_LEN:
            sequential_data.append([np.array(prev_days), c[-1]])
    random.shuffle(sequential_data)

    # Balancing the data to have balanced classes
    buys = []
    sells = []

    for seq, target in sequential_data:
        if target == 0:
            sells.append([seq, target])
        else:
            buys.append([seq, target])
    
    random.shuffle(buys)
    random.shuffle(sells)

    lower = min(len(buys), len(sells))
    buys = buys[:lower]
    sells = sells[:lower]

    sequential_data = buys + sells
    random.shuffle(sequential_data)

    X = []
    y = []

    for seq, target in sequential_data:
        X.append(seq)
        y.append(target)

    return np.array(X), np.array(y)
        

Building the RNN Model

In [34]:
X_train, y_train = preprocess_df(df)
X_val, y_val = preprocess_df(validation_df)

def build_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(X_train.shape[1:])),
        tf.keras.layers.LSTM(128, return_sequences=True),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LSTM(128),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(2, activation='softmax')
    ])

    opt = tf.keras.optimizers.Adam(learning_rate=0.001)

    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])
    tensorboard = tf.keras.callbacks.TensorBoard(log_dir="logs/{}".format(NAME))

    filepath = "RNN_Final-{epoch:02d}-{val_accuracy:.3f}"
    checkpoint = tf.keras.callbacks.ModelCheckpoint("models/{}.keras".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max'))
    
    return model, tensorboard, checkpoint

In [None]:
def train_model():
    model, tensorboard, checkpoint = build_model()
    history = model.fit(X_train, y_train,
                        batch_size=BATCH_SIZE,
                        epochs=EPOCHS,
                        validation_data=(X_val, y_val),
                        callbacks=[tensorboard, checkpoint])
    score = model.evaluate(X_val, y_val, verbose=0)
    print('Validation loss:', score[0])
    print('Validation accuracy:', score[1])
    model.save(f"models/{NAME}.keras")
    accuracy = history.history['accuracy']
    loss = history.history['loss']
    plt.plot(accuracy, label='accuracy')
    plt.plot(loss, label='loss')
    plt.xlabel('Epoch')
    plt.ylabel('Value')
    plt.legend()
    plt.show()
    
    return model, history

In [36]:
train_model()

Epoch 1/10
[1m1300/1300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m233s[0m 174ms/step - accuracy: 0.5423 - loss: 0.7055 - val_accuracy: 0.5510 - val_loss: 0.6827
Epoch 2/10
[1m1300/1300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m229s[0m 176ms/step - accuracy: 0.5704 - loss: 0.6778 - val_accuracy: 0.5774 - val_loss: 0.6764
Epoch 3/10
[1m1300/1300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m229s[0m 176ms/step - accuracy: 0.5779 - loss: 0.6742 - val_accuracy: 0.5751 - val_loss: 0.6761
Epoch 4/10
[1m1300/1300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m225s[0m 173ms/step - accuracy: 0.5849 - loss: 0.6720 - val_accuracy: 0.5827 - val_loss: 0.6740
Epoch 5/10
[1m1300/1300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m217s[0m 167ms/step - accuracy: 0.5880 - loss: 0.6696 - val_accuracy: 0.5763 - val_loss: 0.6795
Epoch 6/10
[1m1300/1300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m254s[0m 195ms/step - accuracy: 0.5941 - loss: 0.6668 - val_accuracy: 0.5756 - val_loss:

(<Sequential name=sequential_3, built=True>,
 <keras.src.callbacks.history.History at 0x12fa53050>)