**Import the required libraries**

In [None]:
import ccxt
import pandas as pd
import numpy as np
import talib
from prophet import Prophet
from sklearn.metrics import mean_squared_error
import itertools
import plotly.graph_objects as go
import datetime

**Fetch data from Binance**

In [None]:
def fetch_data():
    binance = ccxt.binance()
    
    # End time is current date
    end_time = datetime.datetime.now()
    
    # Start time is 2 years from current date
    start_time = end_time - datetime.timedelta(days=730)
    
    # Lists to store fetched data
    ohlcv_list = []
    
    # Loop to fetch data monthly
    while start_time < end_time:
        since_time = start_time.strftime('%Y-%m-%dT%H:%M:%S') + 'Z'
        ohlc = binance.fetch_ohlcv('BTC/USDT', '1h', since=binance.parse8601(since_time))
        
        # If no data is returned, break
        if not ohlc:
            break
        
        ohlcv_list.extend(ohlc)
        
        # Add one month to start time for next iteration
        start_time += datetime.timedelta(days=30)
    
    # Convert to DataFrame
    df = pd.DataFrame(ohlcv_list, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    df.set_index('timestamp', inplace=True)
    
    # Remove duplicates, if any, and sort by timestamp
    df = df[~df.index.duplicated(keep='first')].sort_index()
    
    return df

df = fetch_data()

df

**Create Technical Indicators using Ta-Lib**

In [None]:
def add_technical_indicators(df):
    close = df['close'].values
    high = df['high'].values
    low = df['low'].values
    volume = df['volume'].values
    
    # RSI
    df['rsi'] = talib.RSI(close)

    # MA5 and MA10
    df['ma5'] = talib.SMA(close, timeperiod=5)
    df['ma10'] = talib.SMA(close, timeperiod=10)
    
    # OBV
    df['obv'] = talib.OBV(close, volume)
    
    # MACD
    macd, macdsignal, macdhist = talib.MACD(close, fastperiod=12, slowperiod=26, signalperiod=9)
    df['macd'] = macd
    df['macdsignal'] = macdsignal
    df['macdhist'] = macdhist
    
    # ATR
    df['atr'] = talib.ATR(high, low, close, timeperiod=14)
    
    # Stochastic Oscillator
    slowk, slowd = talib.STOCH(high, low, close, fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
    df['slowk'] = slowk
    df['slowd'] = slowd
    
    # Bollinger Bands
    upper, middle, lower = talib.BBANDS(close, timeperiod=20)
    df['bb_upper'] = upper
    df['bb_middle'] = middle
    df['bb_lower'] = lower
    
    return df

df = add_technical_indicators(df)

**Handle NaN values after adding technical indicators**

In [None]:
# Fill NaN values
df.bfill(inplace=True)

**Compute the Correlation Matrix**

In [None]:
# Define the list of technical indicators
indicators = [col for col in df.columns if col not in ['open', 'high', 'low', 'close', 'volume']]

# Feature Correlation Analysis
correlation_matrix = df[['close'] + indicators].corr()

# Feature Correlation Analysis
correlation_matrix = df[['close'] + indicators].corr()

import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title("Feature Correlation Matrix")
plt.show()

In [None]:
# Assuming df is your dataframe
correlation_matrix = df.corr()

# Choose a specific feature
feature = 'close'

# Extract correlations for that feature
correlations = correlation_matrix[feature]

# Print the columns and their correlation values with 'close'
for col, val in correlations.items():
    print(f"{col} - {val:.2f}")

**Prepare data for Prophet**

In [None]:
def prepare_prophet_data(df):
    # Convert the dataframe suitable for Prophet
    df_prophet = df.reset_index().rename(columns={"timestamp": "ds", "close": "y"})
    
    # Remove the standard columns to get only the technical indicators
    indicators = [col for col in df_prophet.columns if col not in ['ds', 'y', 'open', 'high', 'low', 'volume']]
    
    return df_prophet, indicators

df_prophet = prepare_prophet_data(df)

**Hyperparameter tuning for Prophet model using Grid Search**

In [None]:
def tune_prophet_parameters(df_prophet, indicators):
    # Splitting data into training and validation sets
    train_size = int(0.9 * len(df_prophet))
    train_data = df_prophet.iloc[:train_size]
    validation_data = df_prophet.iloc[train_size:]
    
    # Define parameter grid
    param_grid = {
        'seasonality_mode': ['additive', 'multiplicative'],
        'changepoint_prior_scale': [0.01, 0.05, 0.1, 0.5],
        'yearly_seasonality': [True, False],
        'weekly_seasonality': [True, False],
        'daily_seasonality': [True, False]
    }

    # Generate all combinations of parameters
    all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]

    # Placeholder for best params and RMSE
    best_params = {}
    lowest_rmse = float('inf')
    
    # Function to train and evaluate a model
    def train_evaluate_model(params, train, validation, indicators):
        model = Prophet(**params)
        for indicator in indicators:
            model.add_regressor(indicator)
        model.fit(train)
        future = model.make_future_dataframe(periods=len(validation))
        for indicator in indicators:
            future[indicator] = pd.concat([train[indicator], validation[indicator]], ignore_index=True)
        forecast = model.predict(future)
        rmse = np.sqrt(mean_squared_error(validation['y'], forecast[-len(validation):]['yhat']))
        return rmse

    # Grid search loop
    for params in all_params:
        rmse = train_evaluate_model(params, train_data, validation_data, indicators)
        if rmse < lowest_rmse:
            best_params = params
            lowest_rmse = rmse

    print(f"Best parameters: {best_params}")

    return best_params

**Forecast using Prophet**

In [None]:
def forecast(df, hours=24):
    df_prophet, indicators = prepare_prophet_data(df)
    
    # Get the best parameters
    best_params = tune_prophet_parameters(df_prophet, indicators)
    
    model = Prophet(**best_params)
    
    # Dynamically add regressors based on the indicators list
    for indicator in indicators:
        model.add_regressor(indicator)
    
    model.fit(df_prophet)

    future = model.make_future_dataframe(periods=hours, freq='H')
    
    # IMPORTANT: Dynamically add regressors to future dataframe
    for indicator in indicators:
        future[indicator] = pd.concat([df_prophet[indicator], pd.Series([np.nan] * hours)], ignore_index=True).ffill()

    forecast = model.predict(future)
    return forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(hours)

forecast_data = forecast(df, hours=24)

**Visualize the forecast**

In [None]:
def plot_forecast(forecast_data):
    fig = go.Figure()

    # Add actual data
    fig.add_trace(go.Scatter(x=forecast_data['ds'], y=forecast_data['yhat'], mode='lines', name='Predicted'))
    
    # Add confidence intervals
    fig.add_trace(go.Scatter(x=forecast_data['ds'], y=forecast_data['yhat_upper'], fill='tonexty', mode='none', name='Upper Bound'))
    fig.add_trace(go.Scatter(x=forecast_data['ds'], y=forecast_data['yhat_lower'], fill='tonexty', mode='none', name='Lower Bound'))
    
    fig.update_layout(template="plotly_dark")

    fig.show()

plot_forecast(forecast_data)

**Actual price vs. the predicted price**

In [None]:
def plot_actual_vs_predicted(df_actual, df_forecast):
    """
    Plot actual vs. predicted prices.
    
    Args:
    - df_actual: DataFrame containing the actual prices with a 'timestamp' index and a 'close' column.
    - df_forecast: DataFrame containing the predicted prices with a 'timestamp' index and a 'yhat' column.
    """
    
    # Create a trace for actual prices
    trace_actual = go.Scatter(
        x=df_actual.index,
        y=df_actual['close'],
        mode='lines',
        name='Actual Price'
    )
    
    # Create a trace for predicted prices
    trace_predicted = go.Scatter(
        x=df_forecast['ds'],
        y=df_forecast['yhat'],
        mode='lines',
        name='Predicted Price'
    )
    
    # Create the layout
    layout = go.Layout(
        title='Actual vs. Predicted Prices',
        xaxis=dict(title='Date'),
        yaxis=dict(title='Price'),
        template='plotly_dark'  # Using the dark mode
    )
    
    # Create the figure and add the traces
    fig = go.Figure(data=[trace_actual, trace_predicted], layout=layout)
    
    # Display the figure
    fig.show()
    
plot_actual_vs_predicted(df, forecast_data)

**Actual price vs. the predicted price**

In [None]:
def merge_actual_with_predicted(df_actual, df_forecast):
    """
    Merge current and predicted prices into a single DataFrame.
    
    Args:
    - df_actual: DataFrame containing the actual prices with a 'timestamp' index and a 'close' column.
    - df_forecast: DataFrame containing the predicted prices with a 'ds' column and a 'yhat' column.

    Returns:
    - merged_df: DataFrame containing columns 'timestamp', 'current_price', and 'predicted_price'.
    """
    
    # Get the most recent (i.e., current) price
    current_price = df_actual['close'].iloc[-1]
    
    # Create a DataFrame with timestamps from df_forecast and filled with the current price
    df_current_price = pd.DataFrame({
        'ds': df_forecast['ds'],
        'current_price': current_price
    })
    
    # Merge the dataframes
    merged_df = pd.merge(df_current_price, df_forecast[['ds', 'yhat']], on='ds', how='right')
    merged_df.rename(columns={'yhat': 'predicted_price'}, inplace=True)
    
    return merged_df

# Create the merged DataFrame
merged_data = merge_actual_with_predicted(df, forecast_data)

# Display the merged data
print(merged_data[['ds', 'current_price', 'predicted_price']])