In [1]:
# Step 1: Import required libraries and initialize variables
import ccxt
import pandas as pd
import talib
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
import plotly.graph_objects as go

In [11]:
# Step 2: Collect data from Binance exchange
exchange = ccxt.binance()
symbol = 'BTC/USDT'
timeframe = '1h'
ohlcv = exchange.fetch_ohlcv(symbol, timeframe)
df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
df.set_index('timestamp', inplace=True)

df

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-10-05 01:00:00,27811.79,27812.41,27688.22,27732.16,889.76376
2023-10-05 02:00:00,27732.15,27738.13,27660.89,27677.67,621.91168
2023-10-05 03:00:00,27677.67,27709.53,27650.01,27651.73,437.85462
2023-10-05 04:00:00,27651.73,27698.25,27638.36,27692.91,503.48328
2023-10-05 05:00:00,27692.91,27702.20,27641.30,27641.31,482.42741
...,...,...,...,...,...
2023-10-25 16:00:00,34839.80,35132.85,34457.01,34480.71,5462.19006
2023-10-25 17:00:00,34480.72,34704.32,34380.85,34660.15,2157.62890
2023-10-25 18:00:00,34660.16,34734.82,34429.83,34661.22,1933.18457
2023-10-25 19:00:00,34661.22,34792.15,34547.37,34738.54,1359.04309


In [23]:
# Step 3: Feature Engineering
features = {
    '7_day_avg': ('SMA', 7),
    '30_day_avg': ('SMA', 30),
    'momentum': ('MOM', 4),
    'rsi': ('RSI', 14),
    'volatility': ('ATR', 14)
}
for feature, (method, timeperiod) in features.items():
    if method == 'SMA':
        df[feature] = talib.SMA(df['close'], timeperiod=timeperiod)
    elif method == 'MOM':
        df[feature] = talib.MOM(df['close'], timeperiod=timeperiod)
    elif method == 'RSI':
        df[feature] = talib.RSI(df['close'], timeperiod=timeperiod)
    elif method == 'ATR':
        df[feature] = talib.ATR(df['high'], df['low'], df['close'], timeperiod=timeperiod)
        
df

Unnamed: 0_level_0,open,high,low,close,volume,7_day_avg,30_day_avg,momentum,rsi,volatility
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-10-05 01:00:00,27811.79,27812.41,27688.22,27732.16,889.76376,,,,,
2023-10-05 02:00:00,27732.15,27738.13,27660.89,27677.67,621.91168,,,,,
2023-10-05 03:00:00,27677.67,27709.53,27650.01,27651.73,437.85462,,,,,
2023-10-05 04:00:00,27651.73,27698.25,27638.36,27692.91,503.48328,,,,,
2023-10-05 05:00:00,27692.91,27702.20,27641.30,27641.31,482.42741,,,-90.85,,
...,...,...,...,...,...,...,...,...,...,...
2023-10-25 16:00:00,34839.80,35132.85,34457.01,34480.71,5462.19006,34478.827143,34156.351333,143.70,58.342836,407.611229
2023-10-25 17:00:00,34480.72,34704.32,34380.85,34660.15,2157.62890,34538.648571,34160.259333,176.16,61.306635,401.601141
2023-10-25 18:00:00,34660.16,34734.82,34429.83,34661.22,1933.18457,34604.651429,34163.087000,-108.46,61.324305,394.700345
2023-10-25 19:00:00,34661.22,34792.15,34547.37,34738.54,1359.04309,34662.012857,34173.332000,-101.26,62.651621,383.991749


In [24]:
# Step 4: Preprocess the data
# df.dropna(inplace=True)

df

Unnamed: 0_level_0,open,high,low,close,volume,7_day_avg,30_day_avg,momentum,rsi,volatility
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-10-05 01:00:00,27811.79,27812.41,27688.22,27732.16,889.76376,,,,,
2023-10-05 02:00:00,27732.15,27738.13,27660.89,27677.67,621.91168,,,,,
2023-10-05 03:00:00,27677.67,27709.53,27650.01,27651.73,437.85462,,,,,
2023-10-05 04:00:00,27651.73,27698.25,27638.36,27692.91,503.48328,,,,,
2023-10-05 05:00:00,27692.91,27702.20,27641.30,27641.31,482.42741,,,-90.85,,
...,...,...,...,...,...,...,...,...,...,...
2023-10-25 16:00:00,34839.80,35132.85,34457.01,34480.71,5462.19006,34478.827143,34156.351333,143.70,58.342836,407.611229
2023-10-25 17:00:00,34480.72,34704.32,34380.85,34660.15,2157.62890,34538.648571,34160.259333,176.16,61.306635,401.601141
2023-10-25 18:00:00,34660.16,34734.82,34429.83,34661.22,1933.18457,34604.651429,34163.087000,-108.46,61.324305,394.700345
2023-10-25 19:00:00,34661.22,34792.15,34547.37,34738.54,1359.04309,34662.012857,34173.332000,-101.26,62.651621,383.991749


In [13]:
# Fit close_scaler only on the training set's close prices
train_size = int(len(df) * 0.8)
train_data = df.iloc[:train_size]
close_scaler = MinMaxScaler(feature_range=(0, 1))
close_scaler.fit(train_data['close'].values.reshape(-1, 1))

print(train_data)

                         open      high       low     close      volume
timestamp                                                              
2023-10-05 01:00:00  27811.79  27812.41  27688.22  27732.16   889.76376
2023-10-05 02:00:00  27732.15  27738.13  27660.89  27677.67   621.91168
2023-10-05 03:00:00  27677.67  27709.53  27650.01  27651.73   437.85462
2023-10-05 04:00:00  27651.73  27698.25  27638.36  27692.91   503.48328
2023-10-05 05:00:00  27692.91  27702.20  27641.30  27641.31   482.42741
...                       ...       ...       ...       ...         ...
2023-10-21 12:00:00  29804.99  29833.03  29744.98  29768.27  1029.79584
2023-10-21 13:00:00  29768.28  29798.32  29678.69  29775.48  1018.96783
2023-10-21 14:00:00  29775.49  29921.68  29752.76  29884.10  1621.77159
2023-10-21 15:00:00  29884.10  29980.00  29771.46  29937.36  1895.23623
2023-10-21 16:00:00  29937.37  29974.26  29846.00  29867.41  1190.57162

[400 rows x 5 columns]


In [14]:
# Normalize all features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df.values)
scaled_df = pd.DataFrame(scaled_data, columns=df.columns, index=df.index)

print(scaled_data)

[[0.14371193 0.13039646 0.14172258 0.13401092 0.02770342]
 [0.13400865 0.12174652 0.13835322 0.1273719  0.01793977]
 [0.12737084 0.11841604 0.13701189 0.1242114  0.01123059]
 ...
 [0.97811278 0.93651359 0.97285652 0.97824196 0.06573783]
 [0.97824193 0.94318969 0.98734734 0.98766256 0.04480943]
 [0.98766255 0.94995313 1.         0.98893335 0.05323587]]


In [17]:
# Create training and testing data
def create_dataset(data, look_back=48):
    close_col_idx = data.columns.get_loc('close')
    X, Y = [], []
    for i in range(len(data) - look_back):
        X.append(data.iloc[i:(i + look_back)].values)
        Y.append(data.iloc[i + look_back, close_col_idx])
    return np.array(X), np.array(Y)

X_train, Y_train = create_dataset(train_data)
X_test, Y_test = create_dataset(df.iloc[train_size - 48:])

print(Y_train)

[27725.68 27724.35 27992.76 27960.61 27555.   27446.63 27483.43 27495.08
 27471.28 27472.65 27450.   27435.26 27410.39 27457.17 27516.87 27541.63
 27559.2  27527.87 27499.99 27491.66 27638.52 27616.68 27690.6  27670.5
 27760.   27252.99 27526.1  27670.   27941.12 27877.74 27958.52 27904.12
 27952.36 27977.6  28042.37 27971.25 27931.09 27912.91 27901.94 27878.83
 27900.41 27915.01 27900.44 27890.01 27890.8  28006.61 27951.74 27947.1
 27942.12 27958.99 27938.91 27952.48 27939.8  27940.88 27933.15 27891.19
 27865.48 27925.55 27931.9  27951.03 27956.67 27964.93 28016.27 28012.64
 28064.87 27924.19 27933.91 27891.82 27905.61 27911.55 27878.8  27818.77
 27811.63 27810.2  27910.75 27930.72 27892.74 27912.59 27878.8  27871.88
 27894.99 27904.04 27916.89 27919.01 27917.05 27822.13 27961.86 27938.22
 27921.73 27929.49 27864.2  27902.44 27813.27 27745.76 27542.97 27532.84
 27468.92 27436.01 27518.87 27498.19 27443.31 27403.03 27589.76 27565.31
 27616.66 27569.6  27620.21 27586.73 27590.12 27553.1

In [18]:
# Step 5: Implement and train the LSTM model
print(df)

num_features = df.shape[1]
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], num_features)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, Y_train, epochs=50, batch_size=64)

                         open      high       low     close      volume
timestamp                                                              
2023-10-05 01:00:00  27811.79  27812.41  27688.22  27732.16   889.76376
2023-10-05 02:00:00  27732.15  27738.13  27660.89  27677.67   621.91168
2023-10-05 03:00:00  27677.67  27709.53  27650.01  27651.73   437.85462
2023-10-05 04:00:00  27651.73  27698.25  27638.36  27692.91   503.48328
2023-10-05 05:00:00  27692.91  27702.20  27641.30  27641.31   482.42741
...                       ...       ...       ...       ...         ...
2023-10-25 16:00:00  34839.80  35132.85  34457.01  34480.71  5462.19006
2023-10-25 17:00:00  34480.72  34704.32  34380.85  34660.15  2157.62890
2023-10-25 18:00:00  34660.16  34734.82  34429.83  34661.22  1933.18457
2023-10-25 19:00:00  34661.22  34792.15  34547.37  34738.54  1359.04309
2023-10-25 20:00:00  34738.54  34850.23  34650.00  34748.97  1590.21071

[500 rows x 5 columns]
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4

<keras.src.callbacks.History at 0x2872b956bf0>

In [19]:
# Step 6: Make Predictions
predicted_prices = model.predict(X_test)
predicted_prices = close_scaler.inverse_transform(predicted_prices)



In [22]:
# Step 7: Visualization
actual_prices = close_scaler.inverse_transform(Y_test.reshape(-1, 1))
time_range = df.index[-len(actual_prices):]
fig = go.Figure()
fig.add_trace(go.Scatter(x=time_range, y=actual_prices.flatten(), mode='lines', name='Actual BTC Price'))
fig.add_trace(go.Scatter(x=time_range, y=predicted_prices.flatten(), mode='lines', name='Predicted BTC Price'))
fig.update_layout(title='BTC Price Prediction', xaxis_title='Time', yaxis_title='BTC Price (USDT)', template="plotly_dark", height=800)
fig.show()