Step 1: Environment Setup

In [19]:
# pip install ccxt numpy pandas ta-lib plotly

Step 2: Fetching Data

In [20]:
import ccxt
import pandas as pd
from datetime import datetime

# Initialize the exchange
exchange = ccxt.mexc()

# Define the trading pair and timeframe
symbol = 'BTC/USDT'  # Replace with your desired symbol
timeframe = '1d'  # Daily data

# Fetch historical data
def fetch_data(symbol, timeframe):
    ohlcv = exchange.fetch_ohlcv(symbol, timeframe)
    data = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
    data['timestamp'] = pd.to_datetime(data['timestamp'], unit='ms')
    return data

# Fetch and display the data
data = fetch_data(symbol, timeframe)
data.head()

Unnamed: 0,timestamp,open,high,low,close,volume
0,2022-06-29,20278.52,20424.45,19860.15,20120.8,24062.67444
1,2022-06-30,20120.8,20168.98,18629.14,19947.18,26024.79006
2,2022-07-01,19947.18,20862.31,18979.41,19279.12,30998.650268
3,2022-07-02,19279.12,19452.12,18989.94,19251.81,13266.074649
4,2022-07-03,19251.81,19645.37,18787.5,19314.82,13665.058084


Step 3: Feature Engineering
Using TA-Lib for technical analysis and linear slope as the first feature.

In [21]:
import talib

# Calculating simple moving averages as an example feature
data['SMA_20'] = talib.SMA(data['close'], timeperiod=20)
data['SMA_50'] = talib.SMA(data['close'], timeperiod=50)

# Linear slope
data['slope'] = talib.LINEARREG_SLOPE(data['close'], timeperiod=14)

data.tail()

Unnamed: 0,timestamp,open,high,low,close,volume,SMA_20,SMA_50,slope
495,2023-11-06,35011.88,35276.33,34725.91,35046.08,8549.363589,33202.933,29584.2166,90.933802
496,2023-11-07,35046.08,35887.99,34523.07,35399.13,9517.144032,33556.8895,29756.9378,91.564286
497,2023-11-08,35399.13,36102.57,35101.68,35620.81,9821.577793,33902.245,29925.1488,110.885143
498,2023-11-09,35620.81,37969.6,35539.61,36700.73,10409.875916,34253.829,30116.6634,144.763626
499,2023-11-10,36700.73,37450.74,36330.64,37309.89,8711.177635,34623.8335,30331.4902,176.282088


Step 4: Data Visualization

In [22]:
import plotly.graph_objects as go

# Create an interactive candlestick chart
fig = go.Figure(data=[go.Candlestick(x=data['timestamp'],
                open=data['open'],
                high=data['high'],
                low=data['low'],
                close=data['close'])])

# Add moving averages to the chart
fig.add_trace(go.Scatter(x=data['timestamp'], y=data['SMA_20'], mode='lines', name='SMA 20'))
fig.add_trace(go.Scatter(x=data['timestamp'], y=data['SMA_50'], mode='lines', name='SMA 50'))

fig.update_layout(title='Price Chart with Moving Averages', xaxis_title='Date', yaxis_title='Price')
fig.show()

Step 5: Preparing Data for Model Training
We need to split the data into training and testing sets and normalize it if necessary.

In [23]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Creating the target variable as the next day's close price
data['target'] = data['close'].shift(-1)

# Drop rows with NaN values in the features and the target variable
data_cleaned = data.dropna(subset=['open', 'high', 'low', 'close', 'volume', 'SMA_20', 'SMA_50', 'slope', 'target'])

# Define features and target variable again
features_cleaned = data_cleaned[['open', 'high', 'low', 'close', 'volume', 'SMA_20', 'SMA_50', 'slope']]
target_cleaned = data_cleaned['target']

# Split the dataset again
X_train_cleaned, X_test_cleaned, y_train_cleaned, y_test_cleaned = train_test_split(features_cleaned, target_cleaned, test_size=0.2, shuffle=False)

# Scaling features (optional based on model choice)
scaler = StandardScaler()
X_train_scaled_cleaned = scaler.fit_transform(X_train_cleaned)
X_test_scaled_cleaned = scaler.transform(X_test_cleaned)


Step 6: Model Building and Training
Let's use a simple linear regression model as a starting point. You can explore more complex models like LSTM or ARIMA based on your requirement.

In [24]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Initialize and train the model with revised data
model_revised = LinearRegression()
model_revised.fit(X_train_scaled_cleaned, y_train_cleaned)

# Predictions and evaluation with revised data
predictions_revised = model_revised.predict(X_test_scaled_cleaned)
mse_revised = mean_squared_error(y_test_cleaned, predictions_revised)
print(f'Mean Squared Error: {mse_revised}')

Mean Squared Error: 360988.8133275415


Step 7: Model Evaluation and Visualization
Evaluate the model performance and visualize the predictions.

In [25]:
# Visualizing the predictions with revised data
test_dates_revised = data_cleaned['timestamp'].iloc[len(data_cleaned) - len(predictions_revised):]

fig_revised = go.Figure()
fig_revised.add_trace(go.Scatter(x=test_dates_revised, y=y_test_cleaned, mode='lines', name='Actual'))
fig_revised.add_trace(go.Scatter(x=test_dates_revised, y=predictions_revised, mode='lines', name='Predicted'))

fig_revised.update_layout(title='Model Predictions vs Actual Data (Revised)', xaxis_title='Date', yaxis_title='Price')
fig_revised.show()
