# architecture-27-3333

## What's new:

1-


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input, Reshape, TimeDistributed, Lambda, LayerNormalization, Bidirectional, RepeatVector
from tensorflow.keras import Input
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical
import talib
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from sklearn.utils.class_weight import compute_class_weight


In [None]:
# 1- Load and Scaling Features
# Load and preprocess
df = pd.read_csv('datasets-27-3333/XAGUSD-H1-rates.csv', sep='\t')

# Rename columns for easier access
df.rename(columns={
    '<DATE>': 'DATE',
    '<TIME>': 'TIME',
    '<OPEN>': 'OPEN',
    '<HIGH>': 'HIGH',
    '<LOW>': 'LOW',
    '<CLOSE>': 'CLOSE',
    '<TICKVOL>': 'TICKVOL',
    '<VOL>': 'VOL',
    '<SPREAD>': 'SPREAD'
}, inplace=True)

# Optional: Combine DATE and TIME into a single datetime column
df['DATETIME'] = pd.to_datetime(df['DATE'] + ' ' + df['TIME'], errors='coerce')

# Drop rows with missing values
df.dropna(inplace=True)

# Sort data chronologically by DATETIME
df.sort_values(by='DATETIME', inplace=True)

# Reset index to ensure clean row order
df.reset_index(drop=True, inplace=True)

# Select features to scale
feature_cols = ['OPEN', 'HIGH', 'LOW', 'CLOSE', 'TICKVOL']

# Normalize features
scaler = MinMaxScaler()
df[feature_cols] = scaler.fit_transform(df[feature_cols])

In [None]:
# 2- Label trend reversals
def label_reversal_points(prices, window=8, threshold=0.002):
    prices = np.asarray(prices)
    labels = [0] * len(prices)
    prev_trend = 0  # 1 = up, -1 = down, 0 = unknown

    for i in range(len(prices) - window):
        past = prices[i:i + window // 2]
        future = prices[i + window // 2:i + window]

        past_mean = np.mean(past)
        future_mean = np.mean(future)
        change = (future_mean - past_mean) / past_mean

        if change > threshold:
            curr_trend = 1  # Uptrend
        elif change < -threshold:
            curr_trend = -1  # Downtrend
        else:
            curr_trend = 0  # No significant trend

        # Detect a reversal (trend direction changed)
        if prev_trend == -1 and curr_trend == 1:
            labels[i + window // 2] = 1  # Buy signal at start of uptrend
        elif prev_trend == 1 and curr_trend == -1:
            labels[i + window // 2] = 2  # Sell signal at start of downtrend

        # Update previous trend only if there is a new clear trend
        if curr_trend != 0:
            prev_trend = curr_trend

    return labels


df['Label'] = label_reversal_points(df['CLOSE'].values)

In [None]:
# 3- Create sequences
SEQ_LEN = 60  # past candles for input
FORECAST_HORIZON = 10  # predict next 10 candles
NUM_CLASSES = 3  # 0 = no signal, 1 = buy, 2 = sell
X, y = [], []
for i in range(len(df) - SEQ_LEN - FORECAST_HORIZON + 1):
    seq_x = df[feature_cols].iloc[i: i + SEQ_LEN].values
    seq_y = df['Label'].iloc[i + SEQ_LEN: i + SEQ_LEN + FORECAST_HORIZON].values
    X.append(seq_x)
    y.append(seq_y)

X = np.array(X)
y = np.array(y)

In [None]:
# 4- One-hot encode labels for each timestep
y_onehot = np.array([to_categorical(seq, num_classes=NUM_CLASSES) for seq in y])

In [None]:
# 5- Train-test split
split = int(len(X) * 0.8)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y_onehot[:split], y_onehot[split:]

print("X_train:", X_train.shape)
print("y_train:", y_train.shape)

In [None]:
# 6- Handle Class Imbalance

from sklearn.utils.class_weight import compute_class_weight

# Flatten labels to compute weights
y_flat = np.argmax(y_train, axis=-1).flatten()
class_weights_values = compute_class_weight(
    class_weight='balanced',
    classes=np.arange(NUM_CLASSES),
    y=y_flat
)
class_weights = dict(enumerate(class_weights_values))
print("Class weights:", class_weights)

In [None]:
# 7- Build LSTM Classification Model

n_features = len(feature_cols)

model = Sequential([
    Input(shape=(SEQ_LEN, n_features)),
    LSTM(128, return_sequences=False),
    Dropout(0.3),
    RepeatVector(FORECAST_HORIZON),
    LSTM(64, return_sequences=True),
    Dropout(0.3),
    TimeDistributed(Dense(NUM_CLASSES, activation='softmax'))
])

In [None]:


import tensorflow.keras.backend as K

# Convert your class_weights dict to a tensor
weights = np.array([class_weights[i] for i in range(NUM_CLASSES)], dtype=np.float32)

# ---- weighted loss (make broadcasting explicit & reduce over time) ----
# Ensure weights array is float32
weights = np.array([class_weights[i] for i in range(NUM_CLASSES)], dtype=np.float32)


def weighted_categorical_crossentropy(weights_vec):
    weights_tensor = K.constant(weights_vec)

    def loss_fn(y_true, y_pred):
        # y_true: (batch, T, C) one-hot; y_pred: (batch, T, C)
        y_pred = K.clip(y_pred, K.epsilon(), 1.0 - K.epsilon())
        w = K.reshape(weights_tensor, (1, 1, -1))  # shape (1,1,C) to broadcast over batch & time
        per_timestep = -K.sum(y_true * K.log(y_pred) * w, axis=-1)  # (batch, T)
        return K.mean(per_timestep, axis=-1)  # (batch,)

    return loss_fn


model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss=weighted_categorical_crossentropy(weights),
    metrics=['accuracy', 'mae']
)


In [10]:
# 8- Fit model with EarlyStopping

es = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1)
rc = ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.5, verbose=1)

history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=64,
    callbacks=[es, rc],
    verbose=1
)
# shuffle=False,  # Important: keep time order! ?????

Epoch 1/50


I0000 00:00:1755275040.120309   18591 cuda_dnn.cc:529] Loaded cuDNN version 91001


[1m688/688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 9ms/step - accuracy: 0.3098 - loss: 1.0980 - mae: 0.4439 - val_accuracy: 0.8191 - val_loss: 1.0580 - val_mae: 0.4380 - learning_rate: 0.0010
Epoch 2/50
[1m688/688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.3055 - loss: 1.0997 - mae: 0.4443 - val_accuracy: 0.0552 - val_loss: 1.0594 - val_mae: 0.4427 - learning_rate: 0.0010
Epoch 3/50
[1m688/688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.3252 - loss: 1.1007 - mae: 0.4444 - val_accuracy: 0.8912 - val_loss: 1.0580 - val_mae: 0.4333 - learning_rate: 0.0010
Epoch 4/50
[1m683/688[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - accuracy: 0.3976 - loss: 1.0965 - mae: 0.4436
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
[1m688/688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - accuracy: 0.3974 - loss: 1.0965 - mae: 0.4436 - val_accuracy: 0.8715 

In [11]:
# 11- Save Model

from datetime import datetime
import os
import pandas as pd
import matplotlib.pyplot as plt

# 11-1 Create timestamp and paths
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
model_filename = f'model_{timestamp}.keras'
model_path = os.path.join('saved_models', model_filename)

# 11-2 Directory to hold logs and extras
log_dir = os.path.join('saved_models', f'model_{timestamp}_logs')
os.makedirs(log_dir, exist_ok=True)

# 11-3 Save model
model.save(model_path)

# 11-4 Save training history
history_df = pd.DataFrame(history.history)
history_df.to_csv(os.path.join(log_dir, 'training_history.csv'), index=False)

# 11-5 Save training loss plot
plt.figure()
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training Loss Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.savefig(os.path.join(log_dir, 'training_loss.png'))
plt.close()

# 11-6 Save model summary and final performance
with open(os.path.join(log_dir, 'model_log.txt'), 'w') as f:
    # Model architecture
    model.summary(print_fn=lambda x: f.write(x + '\n'))

    # Final metrics
    final_train_loss = history.history['loss'][-1]

    # Use y_test directly (already one-hot encoded from Step 4)
    final_test_loss, final_test_accuracy, final_test_mae = model.evaluate(X_test, y_test, verbose=0)

    f.write(f'\nFinal Training Loss: {final_train_loss:.6f}\n')
    f.write(f'Final Test Loss: {final_test_loss:.6f}\n')
    f.write(f'Final Test Accuracy: {final_test_accuracy:.6f}\n')
    f.write(f'Final Test MAE: {final_test_mae:.6f}\n')

print(f"✅ Model and logs saved in: {log_dir}")

✅ Model and logs saved in: saved_models/model_20250815_195753_logs
