**Data Collection**

In [156]:
# Installation (if needed)
# !pip install ccxt

import ccxt
import pandas as pd

# Initialize the exchange (using Binance as an example)
exchange = ccxt.binance()

# Define the symbol and timeframe
symbol = 'BTC/USDT'
timeframe = '1h'  # hourly candlesticks

# Fetch OHLCV data
ohlcv = exchange.fetch_ohlcv(symbol, timeframe, limit=5000)

# Convert to DataFrame
df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])

# Convert timestamp to datetime format
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
df.set_index('timestamp', inplace=True)

print(df['close'].head(10))
print(df['close'].tail(10))

timestamp
2023-09-14 05:00:00    26194.76
2023-09-14 06:00:00    26291.88
2023-09-14 07:00:00    26344.45
2023-09-14 08:00:00    26306.51
2023-09-14 09:00:00    26287.60
2023-09-14 10:00:00    26334.44
2023-09-14 11:00:00    26446.15
2023-09-14 12:00:00    26458.68
2023-09-14 13:00:00    26582.42
2023-09-14 14:00:00    26731.40
Name: close, dtype: float64
timestamp
2023-10-25 11:00:00    34199.20
2023-10-25 12:00:00    34337.01
2023-10-25 13:00:00    34483.99
2023-10-25 14:00:00    34769.68
2023-10-25 15:00:00    34839.80
2023-10-25 16:00:00    34480.71
2023-10-25 17:00:00    34660.15
2023-10-25 18:00:00    34661.22
2023-10-25 19:00:00    34738.54
2023-10-25 20:00:00    34777.60
Name: close, dtype: float64


**Feature Engineering**

In [157]:
import talib

features = {
    '7_day_avg': ('SMA', 7),
    '30_day_avg': ('SMA', 30),
    'momentum': ('MOM', 4),
    'rsi': ('RSI', 14),
    'volatility': ('ATR', 14)
}

for feature, (method, timeperiod) in features.items():
    if method == 'SMA':
        df[feature] = talib.SMA(df['close'], timeperiod=timeperiod)
    elif method == 'MOM':
        df[feature] = talib.MOM(df['close'], timeperiod=timeperiod)
    elif method == 'RSI':
        df[feature] = talib.RSI(df['close'], timeperiod=timeperiod)
    elif method == 'ATR':
        df[feature] = talib.ATR(df['high'], df['low'], df['close'], timeperiod=timeperiod)

In [158]:
df.dropna(inplace=True)

**Normalize the data**

In [159]:
from sklearn.preprocessing import MinMaxScaler

# Fit close_scaler only on the training set's close prices
train_size = int(len(df) * 0.8)
train_data = df.iloc[:train_size]
close_scaler = MinMaxScaler(feature_range=(0, 1))
close_scaler.fit(train_data['close'].values.reshape(-1, 1))

# Normalize all features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df.values)
scaled_df = pd.DataFrame(scaled_data, columns=df.columns, index=df.index)

**Create sequences for LSTM**

In [160]:
import numpy as np

def create_dataset(data, look_back=48):
    close_col_idx = data.columns.get_loc('close')
    X, Y = [], []
    for i in range(len(data) - look_back):
        X.append(data.iloc[i:(i + look_back)].values)
        Y.append(data.iloc[i + look_back, close_col_idx])
    return np.array(X), np.array(Y)

X_train, Y_train = create_dataset(train_data)
X_test, Y_test = create_dataset(df.iloc[train_size - 48:])

**Implement, and train the LSTM model**

In [161]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

num_features = df.shape[1]
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], num_features)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, Y_train, epochs=50, batch_size=64)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x2510f76f1f0>

**Making Predictions**

In [162]:
predicted_prices = model.predict(X_test)
predicted_prices = close_scaler.inverse_transform(predicted_prices)



**Extracting the Actual Prices**

In [163]:
# Extracting the actual prices from the test data and inverting the transformation
actual_prices = close_scaler.inverse_transform(Y_test.reshape(-1, 1))
time_range = df.index[-len(actual_prices):]

**Visualization**

In [164]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scatter(x=time_range, y=actual_prices.flatten(), mode='lines', name='Actual BTC Price'))
fig.add_trace(go.Scatter(x=time_range, y=predicted_prices.flatten(), mode='lines', name='Predicted BTC Price'))
fig.update_layout(title='BTC Price Prediction', xaxis_title='Time', yaxis_title='BTC Price (USDT)', template="plotly_dark", height=800)
fig.show()