Data Collection

In [11]:
import ccxt
import pandas as pd

def fetch_data(symbol='BTC/USDT', time_frame='1h', from_ts_str='2021-01-01T00:00:00Z'):
    
    # Initialize the Binance exchange API
    binance = ccxt.binance()

    since = binance.parse8601(from_ts_str)

    # Fetch new data from the exchange
    new_ohlcv = binance.fetch_ohlcv(symbol, time_frame, since=since, limit=1000)
    
    # Initialize the DataFrame with the first batch of data
    columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
    df = pd.DataFrame(new_ohlcv, columns=columns)

    batch_number = 1
    while len(new_ohlcv) == 1000:
        print(f"Fetching batch {batch_number}...")
        from_ts = new_ohlcv[-1][0] + 1  # add 1 millisecond to avoid duplicates
        new_ohlcv = binance.fetch_ohlcv(symbol, time_frame, since=from_ts, limit=1000)
        
        # Append new data to the DataFrame
        df = pd.concat([df, pd.DataFrame(new_ohlcv, columns=columns)], ignore_index=True)

        batch_number += 1

    print("Data fetching completed!")
    return df


Data Preprocessing & Feature Engineering

In [12]:
import talib
from sklearn.preprocessing import MinMaxScaler

def preprocess_data(df):
    
    # Computing RSI values
    df['rsi'] = talib.RSI(df['close'].values, timeperiod=16)
    
    # Computing ATR values
    df['atr'] = talib.ATR(df['high'].values, df['low'].values, df['close'].values, timeperiod=14)
    
    df.dropna(inplace=True)
    
    # Scaling Close price values
    scaler_price = MinMaxScaler(feature_range=(0, 1))
    df['scaled_close'] = scaler_price.fit_transform(df['close'].values.reshape(-1, 1))
    
    # Scaling RSI values
    scaler_rsi = MinMaxScaler(feature_range=(0, 1))
    df['scaled_rsi'] = scaler_rsi.fit_transform(df['rsi'].values.reshape(-1, 1))
    
    # Scaling ATR values
    scaler_atr = MinMaxScaler(feature_range=(0, 1))
    df['scaled_atr'] = scaler_atr.fit_transform(df['atr'].values.reshape(-1, 1))
    
    return df, scaler_price


Prepare the LSTM dataset

In [13]:
import numpy as np

def create_dataset(df):
    X, y = [], []
    for i in range(60, len(df)):
        X.append(np.column_stack(
            (df.scaled_close.values[i-60:i], df.scaled_rsi.values[i-60:i])))
        y.append(df.scaled_close.values[i])
    return np.array(X), np.array(y)

Build the LSTM model

In [14]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout

def create_model():
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=(60, 2)))
    model.add(Dropout(0.2))
    model.add(LSTM(100))
    model.add(Dropout(0.2))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

Run the logic flow

In [15]:
df = fetch_data()
df, scaler_price = preprocess_data(df)
X, y = create_dataset(df)

model = create_model()
    
model.fit(X, y, epochs=10, batch_size=64)

# Using the entire X array to make the prediction
predictions = model.predict(X)

# I might want to do something more meaningful with the predictions. 
# For simplicity, I'm just printing the last prediction.
predicted_price = scaler_price.inverse_transform(predictions[-1].reshape(-1, 1))
print(f"Predicted BTC Price for the last hour in dataset: ${predicted_price[0][0]}")

Fetching batch 1...
Fetching batch 2...
Fetching batch 3...
Fetching batch 4...
Fetching batch 5...
Fetching batch 6...
Fetching batch 7...
Fetching batch 8...
Fetching batch 9...
Fetching batch 10...
Fetching batch 11...
Fetching batch 12...
Fetching batch 13...
Fetching batch 14...
Fetching batch 15...
Fetching batch 16...
Fetching batch 17...
Fetching batch 18...
Fetching batch 19...
Fetching batch 20...
Fetching batch 21...
Fetching batch 22...
Fetching batch 23...
Fetching batch 24...
Data fetching completed!
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Predicted BTC Price for the last hour in dataset: $28722.642578125
