In [None]:
#random forest
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# Assuming `btc_df` has been preprocessed and has features + 'Candlestick Pattern' column

# Feature extraction (e.g., OHLC ratios)
btc_df['range'] = btc_df['High'] - btc_df['Low']
btc_df['body'] = abs(btc_df['Close'] - btc_df['Open'])
features = btc_df[['Open', 'High', 'Low', 'Close', 'Volume', 'range', 'body']]

# Encode labels
labels = btc_df['Candlestick Pattern'].astype('category').cat.codes

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

# Initialize and train the Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions
y_pred = rf_model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.5298701298701298
Classification Report:
               precision    recall  f1-score   support

           0       0.25      0.06      0.09        35
           1       0.10      0.02      0.04        41
           2       1.00      0.11      0.20        18
           3       0.74      0.87      0.80        61
           4       0.00      0.00      0.00         7
           5       0.27      0.10      0.15        39
           6       0.00      0.00      0.00        30
           7       0.25      0.06      0.09        36
           8       0.00      0.00      0.00        18
          10       0.00      0.00      0.00        38
          11       0.33      0.09      0.14        11
          12       0.00      0.00      0.00        22
          13       0.50      0.74      0.60       117
          14       0.00      0.00      0.00         1
          15       0.00      0.00      0.00         5
          16       0.57      0.88      0.69       291

    accuracy               

In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import MinMaxScaler

# Feature Engineering: Adding Technical Indicators
def compute_sma(data, window):
    return data.rolling(window=window).mean()

def compute_ema(data, window):
    return data.ewm(span=window, adjust=False).mean()

def compute_rsi(data, window):
    delta = data.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

# Assuming `btc_df` is the DataFrame with the preprocessed data
# Adding additional indicators: SMA, EMA, RSI
btc_df['SMA_10'] = compute_sma(btc_df['Close'], 10)
btc_df['EMA_10'] = compute_ema(btc_df['Close'], 10)
btc_df['RSI'] = compute_rsi(btc_df['Close'], 14)
btc_df['range'] = btc_df['High'] - btc_df['Low']
btc_df['body'] = abs(btc_df['Close'] - btc_df['Open'])

# Prepare the feature set
features = btc_df[['Open', 'High', 'Low', 'Close', 'Volume', 'range', 'body', 'SMA_10', 'EMA_10', 'RSI']]

# Handle missing values (if any) due to rolling computations
features.fillna(method='bfill', inplace=True)

# Encode labels
labels = btc_df['Candlestick Pattern'].astype('category').cat.codes

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

# Feature Scaling
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Hyperparameter Tuning with RandomizedSearchCV
param_dist = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2']
}

rf_model = RandomForestClassifier(random_state=42)

# Using RandomizedSearchCV to find the best hyperparameters
random_search = RandomizedSearchCV(
    estimator=rf_model,
    param_distributions=param_dist,
    n_iter=20,  # Number of parameter settings sampled
    cv=3,  # 3-fold cross-validation
    verbose=2,
    n_jobs=-1,
    random_state=42
)

# Fit the model
random_search.fit(X_train_scaled, y_train)

# Get the best model from RandomizedSearchCV
best_rf_model = random_search.best_estimator_

# Make predictions with the best model
y_pred = best_rf_model.predict(X_test_scaled)

# Evaluate the model
print("Best Hyperparameters:", random_search.best_params_)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred, labels=np.unique(y_test), target_names=label_encoder.inverse_transform(np.unique(y_test))))


Fitting 3 folds for each of 20 candidates, totalling 60 fits
Best Hyperparameters: {'n_estimators': 200, 'min_samples_split': 10, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'max_depth': 20}
Accuracy: 0.5493506493506494
Classification Report:
                       precision    recall  f1-score   support

   BEARISH ENGULFING       1.00      0.03      0.06        35
   BULLISH ENGULFING       0.00      0.00      0.00        41
    DARK CLOUD COVER       0.00      0.00      0.00        18
                DOJI       0.73      0.84      0.78        61
   EVENING DOJI STAR       0.00      0.00      0.00         7
        EVENING STAR       0.00      0.00      0.00        39
              HAMMER       0.00      0.00      0.00        30
         HANGING MAN       0.00      0.00      0.00        36
     INVERTED HAMMER       0.00      0.00      0.00        18
        MORNING STAR       0.00      0.00      0.00        38
       PIERCING LINE       0.00      0.00      0.00        11
       S

In [None]:
#lstm+cnn
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from tensorflow.keras import layers, models
from sklearn.metrics import classification_report, accuracy_score

# Assume btc_df is the DataFrame with the labeled candlestick patterns
# Prepare the features and labels for the model
btc_df['range'] = btc_df['High'] - btc_df['Low']
btc_df['body'] = abs(btc_df['Close'] - btc_df['Open'])
features = btc_df[['Open', 'High', 'Low', 'Close', 'Volume', 'range', 'body']]

# Encode labels for classification
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(btc_df['Candlestick Pattern'])

# Prepare sequences for CNN-LSTM (e.g., 10-candle sequences)
sequence_length = 10

# Helper function to create sequences
def create_sequences(features, labels, sequence_length):
    X, y = [], []
    for i in range(len(features) - sequence_length):
        X.append(features[i:i + sequence_length])
        y.append(labels[i + sequence_length])
    return np.array(X), np.array(y)

# Normalize the data
scaler = MinMaxScaler()
features_scaled = scaler.fit_transform(features)

# Create sequences for training
X, y = create_sequences(features_scaled, labels, sequence_length)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 1: Build the CNN-LSTM Model
model = models.Sequential()
model.add(layers.Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(sequence_length, X.shape[2])))
model.add(layers.MaxPooling1D(pool_size=2))
model.add(layers.LSTM(50, return_sequences=False))
model.add(layers.Dense(50, activation='relu'))
model.add(layers.Dense(len(np.unique(labels)), activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Step 2: Train the CNN-LSTM Model
history = model.fit(X_train, y_train, epochs=20, batch_size=16, validation_split=0.2)

# Step 3: Make Predictions and Evaluate the Model

y_pred = np.argmax(model.predict(X_test), axis=1)

# Get unique classes in y_test to ensure the report contains only relevant classes
unique_classes_in_test = np.unique(y_test)

# Step 4: Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred, labels=unique_classes_in_test, target_names=label_encoder.inverse_transform(unique_classes_in_test)))



Epoch 1/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.3689 - loss: 2.5580 - val_accuracy: 0.3715 - val_loss: 2.2067
Epoch 2/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.3853 - loss: 2.1525 - val_accuracy: 0.3715 - val_loss: 2.1815
Epoch 3/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.3990 - loss: 2.0382 - val_accuracy: 0.3715 - val_loss: 2.1646
Epoch 4/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.3826 - loss: 2.1101 - val_accuracy: 0.3715 - val_loss: 2.1640
Epoch 5/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.4013 - loss: 2.0676 - val_accuracy: 0.3715 - val_loss: 2.1660
Epoch 6/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.4000 - loss: 2.0976 - val_accuracy: 0.3715 - val_loss: 2.1669
Epoch 7/20
[1m90/90[0m [32m━━━━━━━━━

In [None]:
pip install keras-tuner


Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [None]:
#autoencoder+MLP
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from tensorflow.keras import layers, models
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score

# Assume btc_df is the DataFrame with the labeled candlestick patterns
# Prepare the features and labels for the model
btc_df['range'] = btc_df['High'] - btc_df['Low']
btc_df['body'] = abs(btc_df['Close'] - btc_df['Open'])
features = btc_df[['Open', 'High', 'Low', 'Close', 'Volume', 'range', 'body']]

# Encode labels for classification
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(btc_df['Candlestick Pattern'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

# Normalize the data
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 1: Build the Autoencoder for Feature Extraction
input_dim = X_train_scaled.shape[1]  # Number of features
encoding_dim = 5  # Compressing to 5 features

# Define the autoencoder model
input_layer = layers.Input(shape=(input_dim,))
encoded = layers.Dense(encoding_dim, activation='relu')(input_layer)
decoded = layers.Dense(input_dim, activation='sigmoid')(encoded)

# Compile the autoencoder
autoencoder = models.Model(input_layer, decoded)
encoder = models.Model(input_layer, encoded)  # For feature extraction
autoencoder.compile(optimizer='adam', loss='mse')

# Train the autoencoder
autoencoder.fit(X_train_scaled, X_train_scaled, epochs=50, batch_size=16, shuffle=True, validation_data=(X_test_scaled, X_test_scaled))

# Step 2: Extract Features Using the Encoder
X_train_encoded = encoder.predict(X_train_scaled)
X_test_encoded = encoder.predict(X_test_scaled)

# Step 3: Use MLP Classifier with Encoded Features
mlp_classifier = MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=200, random_state=42)
mlp_classifier.fit(X_train_encoded, y_train)

# Make predictions and evaluate the model
y_pred = mlp_classifier.predict(X_test_encoded)

# Get the unique classes in y_test to match the labels in the classification report
unique_classes_in_test = np.unique(y_test)

# Step 4: Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=label_encoder.inverse_transform(unique_classes_in_test), labels=unique_classes_in_test))

Epoch 1/50
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.1202 - val_loss: 0.0966
Epoch 2/50
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0913 - val_loss: 0.0749
Epoch 3/50
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0710 - val_loss: 0.0570
Epoch 4/50
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0544 - val_loss: 0.0411
Epoch 5/50
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0392 - val_loss: 0.0284
Epoch 6/50
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0266 - val_loss: 0.0193
Epoch 7/50
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0185 - val_loss: 0.0132
Epoch 8/50
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0123 - val_loss: 0.0095
Epoch 9/50
[1m113/113[0m [32m━━━━━━━━