In [73]:
import pandas as pd
import numpy as np
from binance.client import Client
from prophet import Prophet
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import warnings

# Suppress ARIMA depreciation warning
warnings.filterwarnings("ignore", category=DeprecationWarning)

# Binance API credentials
api_key = "5bb2b139e2f3693d8b35ca1f8252723355505d562b8e20625b513d0415f2a1e9"
api_secret = "7bbec5768461b201caba13b9605b0bc7fa0332a06f9480d5ef3405edd582ae80"

client = Client(api_key, api_secret)

# Get historical candlestick data
candlesticks = client.get_historical_klines("BTCUSDT", Client.KLINE_INTERVAL_1DAY)

# Extract relevant data from the candlesticks
data = []
for candlestick in candlesticks:
    timestamp = candlestick[0]
    open_, high, low, close, volume, close_time, _ = candlestick[1:8]  # Extract OHLCV values
    data.append([timestamp, open_, high, low, close, volume, close_time])

# Create DataFrame with proper column names
df = pd.DataFrame(data, columns=['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Close_Time'])

# Convert the 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'], unit='ms')

# Convert relevant columns to numeric
cols_to_numeric = ['Open', 'High', 'Low', 'Close', 'Volume']
df[cols_to_numeric] = df[cols_to_numeric].apply(pd.to_numeric)



# Function to calculate Exponential Moving Average (EMA)
def calculate_ema(data, window):
    ema = data.ewm(span=window, adjust=False).mean()
    return ema

# Function to calculate Relative Strength Index (RSI)
def calculate_rsi(data, window=14):
    delta = data.diff()
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)
    avg_gain = gain.rolling(window=window, min_periods=1).mean()
    avg_loss = loss.rolling(window=window, min_periods=1).mean()
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

# Function to calculate Average Directional Index (ADX)
def calculate_dx(high, low, close, window):
    tr = high - low
    dm_plus = high.diff().where((high.diff() > low.diff()) & (high.diff() > 0), 0)
    dm_minus = -low.diff().where((low.diff() > high.diff()) & (low.diff() > 0), 0)
    
    ema_dm_plus = calculate_ema(dm_plus, window)
    ema_dm_minus = calculate_ema(dm_minus, window)
    ema_tr = calculate_ema(tr, window)
    
    dx = 100 * abs(ema_dm_plus - ema_dm_minus) / ema_tr
    return dx

def calculate_adx(high, low, close, window):
    dx = calculate_dx(high, low, close, window)
    adx = calculate_ema(dx, window)
    return adx

# Calculate EMA, RSI, and ADX
def calculate_indicators(df):
    df['EMA'] = calculate_ema(df['Close'], 55)
    df['RSI'] = calculate_rsi(df['Close'])
    df['ADX'] = calculate_adx(df['High'], df['Low'], df['Close'], 14)

calculate_indicators(df)

# Implement the long and short strategies
def long_strategy(row):
    if row['RSI'] < 30 and row['ADX'] > 20 and row['Close'] > row['EMA']:
        return 1
    return 0

def short_strategy(row):
    if row['RSI'] > 70 and row['ADX'] > 20 and row['Close'] < row['EMA']:
        return -1
    return 0

df['Buy_Signal'] = df.apply(long_strategy, axis=1)
df['Sell_Signal'] = df.apply(short_strategy, axis=1)

# Data Preprocessing for Models

# Splitting data into train and test sets
train_size = int(0.8 * len(df))
train_data = df[:train_size]
test_data = df[train_size:]

# Model Training (FBProphet)

prophet_model = Prophet()
prophet_model.fit(train_data.rename(columns={'Date': 'ds', 'Close': 'y'}))

# Model Training (ARIMA)

from statsmodels.tsa.arima.model import ARIMA as sm_ARIMA

# Fit ARIMA model
arima_model = sm_ARIMA(train_data['Close'], order=(5, 1, 0))
arima_model_fit = arima_model.fit()


# Normalize the data for LSTM model
scaler = MinMaxScaler()
train_data.loc[:, 'Close'] = scaler.fit_transform(train_data[['Close']])
test_data.loc[:, 'Close'] = scaler.transform(test_data[['Close']])


# Create sequences for LSTM
def create_sequences(data, seq_length):
    sequences = []
    for i in range(len(data) - seq_length):
        sequence = data[i:i + seq_length]
        sequences.append(sequence)
    return np.array(sequences)

seq_length = 10  # Define the sequence length
train_sequences = create_sequences(train_data['Close'].values, seq_length)
test_sequences = create_sequences(test_data['Close'].values, seq_length)

X_train = train_sequences[:, :-1]
y_train = train_sequences[:, -1]
X_test = test_sequences[:, :-1]
y_test = test_sequences[:, -1]

# Model Training (LSTM)

lstm_model = Sequential()
lstm_model.add(LSTM(50, activation='relu', input_shape=(seq_length - 1, 1)))  # Reshaped input shape
lstm_model.add(Dense(1))
lstm_model.compile(optimizer='adam', loss='mse')

lstm_model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

# Model Training (GRU)

gru_model = Sequential()
gru_model.add(GRU(50, activation='relu', input_shape=(seq_length - 1, 1)))  # Reshaped input shape
gru_model.add(Dense(1))
gru_model.compile(optimizer='adam', loss='mse')

gru_model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

# Prediction (Prophet)
prophet_pred = prophet_model.predict(test_data.rename(columns={'Date': 'ds'}))
test_data['Prophet_Predicted_Close'] = prophet_pred['yhat']

# Handle NaN values in Prophet predictions
prophet_pred_non_nan = test_data['Prophet_Predicted_Close'].fillna(0)
test_data['Prophet_Predicted_Close'] = (prophet_pred_non_nan > threshold) * 1

# Prediction (ARIMA)
arima_pred = arima_model_fit.forecast(steps=len(test_data))
test_data['ARIMA_Predicted_Close'] = arima_pred

lstm_pred = lstm_model.predict(X_test)
lstm_pred = scaler.inverse_transform(np.concatenate((X_test[:, :-1], lstm_pred), axis=1))[:, -1]
test_data.loc[lstm_pred_index, 'LSTM_Predicted_Close'] = (lstm_pred > threshold) * 1

gru_pred = gru_model.predict(X_test)
gru_pred = scaler.inverse_transform(np.concatenate((X_test[:, :-1], gru_pred), axis=1))[:, -1]
test_data.loc[gru_pred_index, 'GRU_Predicted_Close'] = (gru_pred > threshold) * 1


# Convert predictions to binary based on your strategy
test_data['Prophet_Predicted_Close'] = (test_data['Prophet_Predicted_Close'] > threshold) * 1
test_data['ARIMA_Predicted_Close'] = (test_data['ARIMA_Predicted_Close'] > threshold) * 1
test_data['LSTM_Predicted_Close'] = (test_data['LSTM_Predicted_Close'] > threshold) * 1
test_data['GRU_Predicted_Close'] = (test_data['GRU_Predicted_Close'] > threshold) * 1

# Calculate evaluation metrics for ARIMA, LSTM, and GRU
arima_pred_index = test_data.index[seq_length - 1:]
lstm_pred_index = test_data.index[seq_length - 1 : seq_length - 1 + len(lstm_pred)]
gru_pred_index = test_data.index[seq_length - 1 : seq_length - 1 + len(gru_pred)]

arima_accuracy = accuracy_score(test_data.loc[arima_pred_index, 'Buy_Signal'], test_data.loc[arima_pred_index, 'ARIMA_Predicted_Close'])
lstm_accuracy = accuracy_score(test_data.loc[lstm_pred_index, 'Buy_Signal'], test_data.loc[lstm_pred_index, 'LSTM_Predicted_Close'])
gru_accuracy = accuracy_score(test_data.loc[gru_pred_index, 'Buy_Signal'], test_data.loc[gru_pred_index, 'GRU_Predicted_Close'])

# Calculate weights based on accuracies
model_accuracies = [arima_accuracy, lstm_accuracy, gru_accuracy]
model_weights = [1 / accuracy for accuracy in model_accuracies]
total_weight = sum(model_weights)
normalized_weights = [weight / total_weight for weight in model_weights]

# Calculate weighted average predictions
weighted_avg_pred = (
    normalized_weights[0] * test_data['ARIMA_Predicted_Close'] +
    normalized_weights[1] * test_data['LSTM_Predicted_Close'] +
    normalized_weights[2] * test_data['GRU_Predicted_Close']
)

# Convert weighted average predictions to binary based on strategy
test_data['Ensemble_Predicted_Close'] = (weighted_avg_pred > threshold) * 1

# Calculate accuracy of the ensemble model
ensemble_accuracy = accuracy_score(test_data.loc[arima_pred_index, 'Buy_Signal'], test_data.loc[arima_pred_index, 'Ensemble_Predicted_Close'])
print(f"Ensemble Accuracy: {ensemble_accuracy}")

# Save ensemble results to CSV
result_df = test_data[['Date', 'Buy_Signal', 'Prophet_Predicted_Close', 'ARIMA_Predicted_Close', 'LSTM_Predicted_Close', 'GRU_Predicted_Close', 'Ensemble_Predicted_Close']]
result_df.to_csv('prediction_results.csv', index=False)








10:17:39 - cmdstanpy - INFO - Chain [1] start processing
10:17:40 - cmdstanpy - INFO - Chain [1] done processing


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/5

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data['Prophet_Predicted_Close'] = prophet_pred['yhat']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data['Prophet_Predicted_Close'] = (prophet_pred_non_nan > threshold) * 1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data['ARIMA_Predicted_Close'] = arima_pred




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data.loc[lstm_pred_index, 'LSTM_Predicted_Close'] = (lstm_pred > threshold) * 1


Ensemble Accuracy: 0.02617801047120419
Ensemble results saved to 'prediction_results.csv' successfully.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data.loc[gru_pred_index, 'GRU_Predicted_Close'] = (gru_pred > threshold) * 1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data['Prophet_Predicted_Close'] = (test_data['Prophet_Predicted_Close'] > threshold) * 1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data['ARIMA_Predicted_Cl