Step 1: Environment Setup

In [10]:
# pip install ccxt numpy pandas ta-lib plotly

Step 2: Fetching Data

In [11]:
import ccxt
import pandas as pd

# Initialize the exchange
exchange = ccxt.mexc()

# Define the trading pair and timeframe
symbol = 'BTC/USDT:USDT'  # Replace with your desired symbol
timeframe = '1d'  # Daily data

# Fetch historical data
def fetch_data(symbol, timeframe):
    ohlcv = exchange.fetch_ohlcv(symbol, timeframe)
    data = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
    data['timestamp'] = pd.to_datetime(data['timestamp'], unit='ms')
    return data

# Fetch and display the data
data = fetch_data(symbol, timeframe)
data.head()

Unnamed: 0,timestamp,open,high,low,close,volume
0,2020-06-05,9779.5,9849.5,9605.5,9619.5,12648208.0
1,2020-06-06,9619.5,9735.0,9531.0,9679.5,27077653.0
2,2020-06-07,9679.5,9802.0,9401.0,9741.5,22092848.0
3,2020-06-08,9741.5,9799.0,9651.0,9781.5,25817097.0
4,2020-06-09,9781.5,9870.5,9608.5,9774.0,44778753.0


Step 3: Feature Engineering
Using TA-Lib for technical analysis and linear slope as the first feature.

In [12]:
import talib

# Calculating simple moving averages as an example feature
data['SMA_20'] = talib.SMA(data['close'], timeperiod=20)
data['SMA_50'] = talib.SMA(data['close'], timeperiod=50)

# Linear slope
data['slope'] = talib.LINEARREG_SLOPE(data['close'], timeperiod=14)

data.tail()

Unnamed: 0,timestamp,open,high,low,close,volume,SMA_20,SMA_50,slope
1252,2023-11-09,35626.8,38200.3,35360.0,36719.6,2801233000.0,34256.985,30109.632,145.366374
1253,2023-11-10,36719.6,37566.2,36350.3,37330.3,1323597000.0,34628.74,30325.18,177.087033
1254,2023-11-11,37330.3,37422.2,36600.0,37162.1,877322500.0,34987.77,30537.09,195.26967
1255,2023-11-12,37162.1,37258.4,36753.7,37085.5,572769500.0,35188.31,30747.538,212.313187
1256,2023-11-13,37085.5,37460.4,36555.0,37014.8,547846100.0,35343.2,30963.13,214.623516


Step 4: Data Visualization

In [13]:
import plotly.graph_objects as go

# Create an interactive candlestick chart
fig = go.Figure(data=[go.Candlestick(x=data['timestamp'],
                open=data['open'],
                high=data['high'],
                low=data['low'],
                close=data['close'])])

# Add moving averages to the chart
fig.add_trace(go.Scatter(x=data['timestamp'], y=data['SMA_20'], mode='lines', name='SMA 20'))
fig.add_trace(go.Scatter(x=data['timestamp'], y=data['SMA_50'], mode='lines', name='SMA 50'))

fig.update_layout(title='Price Chart with Moving Averages', xaxis_title='Date', yaxis_title='Price', template="plotly_dark", height=800)
fig.show()

Step 5: Preparing Data for Model Training
We need to split the data into training and testing sets and normalize it if necessary.

In [14]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Creating the target variable as the next day's close price
data['target'] = data['close'].shift(-1)

# Drop rows with NaN values in the features and the target variable
data_cleaned = data.dropna(subset=['open', 'high', 'low', 'close', 'volume', 'SMA_20', 'SMA_50', 'slope', 'target'])

# Define features and target variable again
features_cleaned = data_cleaned[['open', 'high', 'low', 'close', 'volume', 'SMA_20', 'SMA_50', 'slope']]
target_cleaned = data_cleaned['target']

# Split the dataset again
X_train_cleaned, X_test_cleaned, y_train_cleaned, y_test_cleaned = train_test_split(features_cleaned, target_cleaned, test_size=0.2, shuffle=False)

# Scaling features (optional based on model choice)
scaler = StandardScaler()
X_train_scaled_cleaned = scaler.fit_transform(X_train_cleaned)
X_test_scaled_cleaned = scaler.transform(X_test_cleaned)

Step 6: Model Building and Training
Let's use a simple linear regression model as a starting point. You can explore more complex models like LSTM or ARIMA based on your requirement.

In [15]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Initialize and train the model with revised data
model_revised = LinearRegression()
model_revised.fit(X_train_scaled_cleaned, y_train_cleaned)

# Predictions and evaluation with revised data
predictions_revised = model_revised.predict(X_test_scaled_cleaned)
mse_revised = mean_squared_error(y_test_cleaned, predictions_revised)
print(f'Mean Squared Error: {mse_revised}')

Mean Squared Error: 350859.6522051639


Step 7: Model Evaluation and Visualization
Evaluate the model performance and visualize the predictions.

In [16]:
# Visualizing the predictions with revised data
test_dates_revised = data_cleaned['timestamp'].iloc[len(data_cleaned) - len(predictions_revised):]

fig_revised = go.Figure()
fig_revised.add_trace(go.Scatter(x=test_dates_revised, y=y_test_cleaned, mode='lines', name='Actual'))
fig_revised.add_trace(go.Scatter(x=test_dates_revised, y=predictions_revised, mode='lines', name='Predicted'))

fig_revised.update_layout(title='Model Predictions vs Actual Data (Revised)', xaxis_title='Date', yaxis_title='Price', template="plotly_dark", height=800)
fig_revised.show()


**Next Day Prediction**

Step 1: Prepare the Most Recent Data Point

In [17]:
# Select the most recent data point (excluding the 'target' column)
latest_data_point = data_cleaned[features_cleaned.columns].iloc[-1].to_frame().T

# Apply the same scaling to this data point
latest_data_point_scaled = scaler.transform(latest_data_point)

# Display the latest data point for reference
print("Latest Data Point (Scaled):")
print(latest_data_point_scaled)

Latest Data Point (Scaled):
[[ 0.29480517  0.23689437  0.34456631  0.28910189  0.21917252  0.17427369
  -0.10393527  0.48855547]]


Step 2: Predict the Next Day's Price

In [18]:
# Predict the next day's price
next_day_prediction = model_revised.predict(latest_data_point_scaled)

# Display the prediction
print(f"Predicted Price for the Next Day: {next_day_prediction[0]}")

Predicted Price for the Next Day: 37089.96735429899
