In [None]:
"enter your code"

In [None]:
import pandas as pd

In [None]:
merged_data = pd.read_csv("/kaggle/input/merge-data-1h-and-1d/merged_data_1d.csv")
merged_data

In [None]:
print(merged_data.columns.tolist())

In [None]:
from sklearn.ensemble import RandomForestRegressor
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import LSTM, Dense, Dropout  
from sklearn.preprocessing import MinMaxScaler  
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau  


features = ['close_price', 
            'S&P 500 Index', 
            'VIX Volatility Index', 
            'WTI Crude Oil Futures', 
            'US Dollar Index', 
            'Gold Futures', 
            'volume', 
            "positive", 
            "neutral", 
            "negative", 
            "Average Block Size",
            "Difficulty", 
            "Hash Rate", 
            "Miners Revenue", 
            "Number Of Unique Addresses Used",
            'open_price', 
            'high_price', 
            'low_price']


data = merged_data[features]


X = data.drop('close_price', axis=1)
y = data['close_price']

In [None]:
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

def create_dataset(data, time_step):
    X, y = [], []
    for i in range(len(data) - time_step):
        X.append(data[i:(i + time_step), :])  
        y.append(data[i + time_step, 0]) 
    return np.array(X), np.array(y)



scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)
time_step = 7

X, y = create_dataset(data_scaled, time_step)


train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]




model = Sequential()


model.add(tf.keras.layers.Bidirectional(LSTM(64, return_sequences=True), input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.3))  


model.add(tf.keras.layers.Bidirectional(LSTM(64, return_sequences=False)))
model.add(Dropout(0.3))


model.add(Dense(32, activation='relu'))


model.add(Dense(1))  


model.compile(optimizer='adam', loss='mean_squared_error')


early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.0001) 


model.fit(X_train, y_train, epochs=1, batch_size=32, validation_data=(X_test, y_test), 
          callbacks=[early_stopping, reduce_lr])

In [None]:

y_pred = model.predict(X_test)


y_pred_rescaled = scaler.inverse_transform(np.concatenate((y_pred, np.zeros((y_pred.shape[0], data.shape[1] - 1))), axis=1))[:, 0]
y_test_rescaled = scaler.inverse_transform(np.concatenate((y_test.reshape(-1, 1), np.zeros((y_test.shape[0], data.shape[1] - 1))), axis=1))[:, 0]



In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


mse = mean_squared_error(y_test_rescaled, y_pred_rescaled)
rmse = np.sqrt(mse)  
mae = mean_absolute_error(y_test_rescaled, y_pred_rescaled) 
r2 = r2_score(y_test_rescaled, y_pred_rescaled)  


direction_actual = np.sign(np.diff(y_test_rescaled))  
direction_predicted = np.sign(np.diff(y_pred_rescaled))  


direction_accuracy = np.mean(direction_actual == direction_predicted)


print(f'Mean Squared Error (MSE): {mse:.2f}')
print(f'Root Mean Squared Error (RMSE): {rmse:.2f}')
print(f'Mean Absolute Error (MAE): {mae:.2f}')
print(f'R² (Coefficient of Determination): {r2:.2f}')
print(f'Direction Accuracy: {direction_accuracy:.2f}')


evaluation_metrics = {
    'Metric': ['MSE', 'RMSE', 'MAE', 'R²', 'Direction Accuracy'],
    'Value': [mse, rmse, mae, r2, direction_accuracy]
}


df_metrics = pd.DataFrame(evaluation_metrics)


df_metrics.to_csv('evaluation_metrics.csv', index=False)


plt.figure(figsize=(14, 7))
plt.plot(y_test_rescaled, color='blue', label='Actual Price', linewidth=2)
plt.plot(y_pred_rescaled, color='red', label='Predicted Price', linewidth=2)
plt.title('Actual vs Predicted Close Prices')
plt.xlabel('Time')
plt.ylabel('Close Price')
plt.legend()
plt.show()


In [None]:
import pandas as pd
import numpy as np


dates = merged_data['date'].tail(len(y_test_rescaled)).values  


prediction_error = y_test_rescaled - y_pred_rescaled


direction_accuracy = np.sign(y_test_rescaled[1:] - y_test_rescaled[:-1]) == np.sign(y_pred_rescaled[1:] - y_pred_rescaled[:-1])
direction_accuracy = np.concatenate(([np.nan], direction_accuracy)) 


results_df = pd.DataFrame({
    'Date': dates,
    'Actual Price': y_test_rescaled,
    'Predicted Price': y_pred_rescaled,
    'Prediction Error': prediction_error,
    'Direction Accuracy': direction_accuracy
})


results_df.to_csv('pred.csv', index=False)

print("Results with additional data have been saved to 'predictions_with_additional_data.csv'.")
