# ta-lib-005 ( Feature‑Engineering + Sequence Models) Add train_test_split

In [13]:
# Cell 1: Imports & Config
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import talib
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
import matplotlib.pyplot as plt

# Ensure plots inline
%matplotlib inline

In [None]:
# Cell 2: Parameters
DATA_PATH = "../datasets/XAGUSD-H1-rates.csv"   # Path to your downloaded MT5 CSV
SEQUENCE_LENGTH = 30                # Number of past candles per sample
PREDICT_HORIZON  = 5                # How many candles ahead to detect a trend change
TEST_SIZE = 0.2
RANDOM_STATE = 42
BATCH_SIZE = 64
EPOCHS = 50

In [None]:
# Cell 3: Load & Inspect Data
df = pd.read_csv(DATA_PATH, parse_dates=[['DATE', 'TIME']])
df.rename(columns={'DATE_TIME':'datetime'}, inplace=True)
df.set_index('datetime', inplace=True)
df.head()

In [None]:
# Cell 4: Compute TA indicators
# e.g. RSI, ATR, MACD Histogram
df['rsi'] = talib.RSI(df['CLOSE'], timeperiod=14)
macd, macd_sig, macd_hist = talib.MACD(df['CLOSE'])
df['macd_hist'] = macd_hist
df['atr'] = talib.ATR(df['HIGH'], df['LOW'], df['CLOSE'], timeperiod=14)
df.dropna(inplace=True)

In [None]:
# Cell 5: Label trend‑change points
# Define trend: rolling slope on CLOSE, then detect flips
window = 5
df['slope'] = df['CLOSE'].diff(window)
df['trend'] = np.sign(df['slope'])
df['trend_change'] = df['trend'].diff().fillna(0).apply(lambda x: 1 if x>0 else (-1 if x<0 else 0))
#  1 = change to uptrend, -1 = change to downtrend, 0 = no change

In [None]:
# Cell 6: Feature Engineering & Sequence Creation
feature_cols = ['CLOSE','rsi','macd_hist','atr']
scaler = StandardScaler()
scaled = scaler.fit_transform(df[feature_cols])
labels = df['trend_change'].values

X, y = [], []
for i in range(len(df) - SEQUENCE_LENGTH - PREDICT_HORIZON + 1):
    seq_x = scaled[i : i+SEQUENCE_LENGTH]
    future_label = df['trend_change'].iloc[i+SEQUENCE_LENGTH : i+SEQUENCE_LENGTH+PREDICT_HORIZON].sum()
    # if any change in horizon: up/down/none
    label = 1 if future_label>0 else (-1 if future_label<0 else 0)
    X.append(seq_x)
    y.append(label)
X = np.array(X)
y = tf.keras.utils.to_categorical([l+1 for l in y], num_classes=3)  # map -1,0,1 to 0,1,2

In [None]:
# Cell 7: Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=y
)
print("Train:", X_train.shape, y_train.shape)
print("Test :", X_test.shape, y_test.shape)

In [None]:
# Cell 8: Build the Sequence Model
def build_model(input_shape, num_classes=3):
    model = models.Sequential([
        layers.Input(shape=input_shape),
        layers.LSTM(64, return_sequences=True),
        layers.Dropout(0.2),
        layers.LSTM(32),
        layers.Dropout(0.2),
        layers.Dense(32, activation='relu'),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(
        loss='categorical_crossentropy',
        optimizer='adam',
        metrics=['accuracy']
    )
    return model

model = build_model((SEQUENCE_LENGTH, len(feature_cols)))
model.summary()


In [None]:
# Cell 9: Training
es = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=[es]
)

In [None]:
# Cell 10: Evaluation
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}  Test Acc: {acc:.4f}")

In [None]:
# Cell 11: Plot Training History
plt.figure()
plt.plot(history.history['accuracy'], label='train acc')
plt.plot(history.history['val_accuracy'], label='val acc')
plt.title("Accuracy over epochs")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

In [None]:
# Cell 12: Sample Predictions & Visualization
# Pick the last available sequence
seq = X_test[-1][None, ...]
pred = model.predict(seq)[0]
labels_map = {-1:'down‑change', 0:'no‑change', 1:'up‑change'}
print("Predicted:", labels_map[np.argmax(pred)-1])

# Plot the last SEQUENCE_LENGTH candles + indicator
idx = -1  # index in test set
orig_idx = len(df) - len(X_test) + idx + SEQUENCE_LENGTH
slice_df = df.iloc[orig_idx-SEQUENCE_LENGTH:orig_idx+PREDICT_HORIZON]
plt.figure(figsize=(12,4))
plt.plot(slice_df.index, slice_df['CLOSE'], label='Close')
plt.scatter(slice_df.index[SEQUENCE_LENGTH:],
            slice_df['CLOSE'].iloc[SEQUENCE_LENGTH:],
            c='red', marker='x', label='Future window')
plt.title("Price around predicted trend‑change point")
plt.legend(); plt.show()