In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Attention
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import joblib
from tensorflow.keras.layers import Layer

# Custom Attention Layer (simplified)
class AttentionLayer(Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)
    
    def build(self, input_shape):
        self.W = self.add_weight(name='attention_weight', shape=(input_shape[-1], 1), 
                                 initializer='random_normal', trainable=True)
        self.b = self.add_weight(name='attention_bias', shape=(input_shape[1], 1), 
                                 initializer='zeros', trainable=True)
        super(AttentionLayer, self).build(input_shape)
    
    def call(self, inputs):
        e = tf.tanh(tf.matmul(inputs, self.W) + self.b)
        alpha = tf.nn.softmax(e, axis=1)
        context = inputs * alpha
        return tf.reduce_sum(context, axis=1)

import tensorflow as tf

# Tickers and parameters
tickers = ["BNB-USD", "BTC-USD", "ETH-USD", "XRP-USD"]
lookback = 30  # Increased to capture longer trends

def prepare_data(df):
    df["Volatility"] = df["High"] - df["Low"]
    features = ["Open", "High", "Low", "Close", "Volume", "Lag1", "Lag7", "SMA7", "RSI14", "MACD", "Returns", "Volatility"]
    X = df[features].values
    y = df["Target"].values
    return X, y

def create_sequences(X, y, lookback):
    X_seq, y_seq = [], []
    for i in range(lookback, len(X)):
        X_seq.append(X[i-lookback:i])
        y_seq.append(y[i])
    return np.array(X_seq), np.array(y_seq)

for ticker in tickers:
    print(f"\nTraining LSTM with Attention for {ticker}...")
    df = pd.read_csv(f"{ticker}_processed.csv", index_col="Date", parse_dates=True)
    X, y = prepare_data(df)
    
    # Split data
    train_size = int(len(X) * 0.8)
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]
    
    # Scale data
    scaler_X = MinMaxScaler()
    scaler_y = MinMaxScaler()
    X_train_scaled = scaler_X.fit_transform(X_train)
    X_test_scaled = scaler_X.transform(X_test)
    y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1))
    y_test_scaled = scaler_y.transform(y_test.reshape(-1, 1))
    
    # Create sequences
    X_train_seq, y_train_seq = create_sequences(X_train_scaled, y_train_scaled.flatten(), lookback)
    X_test_seq, y_test_seq = create_sequences(X_test_scaled, y_test_scaled.flatten(), lookback)
    
    # Build and train LSTM with Attention
    model = Sequential()
    model.add(LSTM(128, return_sequences=True, input_shape=(lookback, X_train_scaled.shape[1])))  # Increased units
    model.add(Dropout(0.3))
    model.add(LSTM(64, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(AttentionLayer())  # Custom attention layer
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=0.0005), loss='mse')
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    model.fit(X_train_seq, y_train_seq, epochs=100, batch_size=32, validation_split=0.1,
              callbacks=[early_stopping], verbose=1)
    
    # Predict and evaluate
    y_pred_scaled = model.predict(X_test_seq)
    y_pred = scaler_y.inverse_transform(y_pred_scaled)
    y_test_actual = scaler_y.inverse_transform(y_test_scaled[lookback:])
    mse = mean_squared_error(y_test_actual, y_pred)
    mape = mean_absolute_percentage_error(y_test_actual, y_pred) * 100
    test_dates = df.index[train_size + lookback:]
    actual_series = pd.Series(y_test_actual.flatten(), index=test_dates)
    pred_series = pd.Series(y_pred.flatten(), index=test_dates)
    actual_direction = (actual_series.shift(-1) > actual_series).iloc[:-1].astype(int)
    predicted_direction = (pred_series.shift(-1) > pred_series).iloc[:-1].astype(int)
    directional_accuracy = (actual_direction == predicted_direction).mean() * 100
    
    print(f"LSTM with Attention Results for {ticker}:")
    print(f"MSE: {mse:.2f}, MAPE: {mape:.2f}%, Directional Accuracy: {directional_accuracy:.2f}%")
    
    # Save model and scalers
    #model.save(f"{ticker}_lstm_attention_model.h5")
    #joblib.dump(scaler_X, f"{ticker}_scaler_X_lstm_attention.pkl")
    #joblib.dump(scaler_y, f"{ticker}_scaler_y_lstm_attention.pkl")


Training LSTM with Attention for BNB-USD...
Epoch 1/100


  super().__init__(**kwargs)


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 54ms/step - loss: 0.1338 - val_loss: 0.0038
Epoch 2/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.0111 - val_loss: 0.0035
Epoch 3/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.0078 - val_loss: 0.0038
Epoch 4/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.0090 - val_loss: 0.0033
Epoch 5/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0079 - val_loss: 0.0037
Epoch 6/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0069 - val_loss: 0.0034
Epoch 7/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0068 - val_loss: 0.0033
Epoch 8/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0060 - val_loss: 0.0042
Epoch 9/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━

  super().__init__(**kwargs)


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 48ms/step - loss: 0.1276 - val_loss: 0.0061
Epoch 2/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.0141 - val_loss: 0.0122
Epoch 3/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0101 - val_loss: 0.0047
Epoch 4/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0094 - val_loss: 0.0053
Epoch 5/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0076 - val_loss: 0.0049
Epoch 6/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0085 - val_loss: 0.0041
Epoch 7/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.0077 - val_loss: 0.0044
Epoch 8/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.0071 - val_loss: 0.0045
Epoch 9/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━

  super().__init__(**kwargs)


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 47ms/step - loss: 0.0462 - val_loss: 0.0047
Epoch 2/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0145 - val_loss: 0.0075
Epoch 3/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0136 - val_loss: 0.0073
Epoch 4/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0117 - val_loss: 0.0123
Epoch 5/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0144 - val_loss: 0.0092
Epoch 6/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.0127 - val_loss: 0.0104
Epoch 7/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0132 - val_loss: 0.0060
Epoch 8/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 0.0116 - val_loss: 0.0048
Epoch 9/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━

  super().__init__(**kwargs)


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 50ms/step - loss: 0.0954 - val_loss: 0.0071
Epoch 2/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0288 - val_loss: 0.0073
Epoch 3/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0271 - val_loss: 0.0081
Epoch 4/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.0276 - val_loss: 0.0070
Epoch 5/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0231 - val_loss: 0.0072
Epoch 6/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0215 - val_loss: 0.0073
Epoch 7/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0243 - val_loss: 0.0082
Epoch 8/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.0238 - val_loss: 0.0083
Epoch 9/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━