In [6]:
# to get the all subsets of a set for implement exhaustive feature selection
from itertools import chain, combinations

# to read and manipulation data
import pandas as pd

# to download data of the cryptocurrency
import yfinance as yf

# to preprocess the data, make, train and evaluate the model
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
import sklearn.metrics as metrics
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam

In [7]:
# download data of the cryptocurrency

df_xmr = yf.download(tickers="XMR-USD", period="max", interval="1d", start="2023-01-01", end="2023-10-09")

[*********************100%%**********************]  1 of 1 completed


In [8]:
# show the downloaded data
df_xmr.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-01-01,147.309662,148.93103,146.437485,148.576935,148.576935,36453347
2023-01-02,148.582184,149.623535,147.943558,147.943558,147.943558,47050925
2023-01-03,147.933929,149.027832,147.62886,148.48793,148.48793,48662135
2023-01-04,148.466995,152.488983,148.342621,150.743652,150.743652,83915181
2023-01-05,150.790253,155.921738,150.769043,155.921738,155.921738,78049428


In [9]:
actual_class = (
    (df_xmr.loc["2023-01-04":, "Close"].to_numpy() > df_xmr.loc["2023-01-03":"2023-10-07", "Close"]).astype(int)
)
df_xmr = df_xmr.drop("2023-10-08")
df_xmr["Price increase (in the next day)"] = actual_class

In [10]:
# Split the data into training, validation, and test sets
train_data, test_data = train_test_split(df_xmr, test_size=0.2, shuffle=False)
train_data, val_data = train_test_split(train_data, test_size=0.2, shuffle=False)

# Define feature columns and target variable
feature_cols = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']
target_col = 'Price increase (in the next day)'

# Split features and target variable for training, validation, and test sets
X_train, y_train = train_data[feature_cols], train_data[target_col]
X_val, y_val = val_data[feature_cols], val_data[target_col]
X_test, y_test = test_data[feature_cols], test_data[target_col]

# Handle missing values in the features and target variables
imputer = SimpleImputer(strategy="mean")
X_train = imputer.fit_transform(X_train)
X_val = imputer.transform(X_val)
X_test = imputer.transform(X_test)

# Handle missing values in the target variable
imputer_target = SimpleImputer(strategy="most_frequent")
y_train = imputer_target.fit_transform(y_train.values.reshape(-1, 1)).ravel()
y_val = imputer_target.transform(y_val.values.reshape(-1, 1)).ravel()
y_test = imputer_target.transform(y_test.values.reshape(-1, 1)).ravel()

# Reshape features into 3D format (samples, timesteps, features)
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_val = np.reshape(X_val, (X_val.shape[0], 1, X_val.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

# Define the LSTM model
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(LSTM(64))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Dense(1, activation='sigmoid'))

# Compile the model
optimizer = Adam(learning_rate=0.001)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Train the LSTM model
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_val, y_val), verbose=1)

# Make predictions on the validation set
val_probabilities = model.predict(X_val)
val_predictions = (val_probabilities > 0.5).astype(int)

# Evaluate model performance on the validation set
accuracy = accuracy_score(y_val, val_predictions)
precision = precision_score(y_val, val_predictions)
recall = recall_score(y_val, val_predictions)
f1 = f1_score(y_val, val_predictions)
auc = roc_auc_score(y_val, val_probabilities)
confusion = confusion_matrix(y_val, val_predictions)

# Print evaluation metrics and confusion matrix
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')
print(f'AUC: {auc}')
print('Confusion Matrix:')
print(confusion)

# Make predictions on the test set
test_probabilities = model.predict(X_test)
test_predictions = (test_probabilities > 0.5).astype(int)

# Evaluate model performance on the test set
test_accuracy = accuracy_score(y_test, test_predictions)
test_precision = precision_score(y_test, test_predictions)
test_recall = recall_score(y_test, test_predictions)
test_f1 = f1_score(y_test, test_predictions)
test_auc = roc_auc_score(y_test, test_probabilities)
test_confusion = confusion_matrix(y_test, test_predictions)

# Print evaluation metrics and confusion matrix for the test set
print('\nTest Set Performance:')
print(f'Test Accuracy: {test_accuracy}')
print(f'Test Precision: {test_precision}')
print(f'Test Recall: {test_recall}')
print(f'Test F1 Score: {test_f1}')
print(f'Test AUC: {test_auc}')
print('Test Confusion Matrix:')
print(test_confusion)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [12]:
val_predictions

array([[1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1]])

In [11]:
# Print evaluation metrics and confusion matrix for the test set
print('\nTest Set Performance:')
print(f'Test Accuracy: {test_accuracy}')
print(f'Test Precision: {test_precision}')
print(f'Test Recall: {test_recall}')
print(f'Test F1 Score: {test_f1}')
print(f'Test AUC: {test_auc}')
print('Test Confusion Matrix:')
print(test_confusion)


Test Set Performance:
Test Accuracy: 0.5892857142857143
Test Precision: 0.5892857142857143
Test Recall: 1.0
Test F1 Score: 0.7415730337078651
Test AUC: 0.5
Test Confusion Matrix:
[[ 0 23]
 [ 0 33]]
