# classification-15

## What's new:

1- https://claude.ai/chat/ef207056-eb79-4bfc-a8d7-c1f4c5d1fe84

## next step:

1-


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LSTM, Dense, Input, Reshape, TimeDistributed, Lambda, RepeatVector, Dropout, \
    BatchNormalization
from tensorflow.keras import Input, layers, models, callbacks, metrics
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers, models, callbacks
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from sklearn.model_selection import train_test_split
from scipy.signal import savgol_filter, find_peaks, peak_prominences
from sklearn.preprocessing import RobustScaler
from sklearn.utils.class_weight import compute_class_weight


In [None]:
# 1- Load and Scaling Features

df = pd.read_csv('XAGUSD-197001010000--H1-rates.csv', sep='\t')
# Rename columns for easier access
df.rename(columns={
    '<DATE>': 'DATE',
    '<TIME>': 'TIME',
    '<OPEN>': 'OPEN',
    '<HIGH>': 'HIGH',
    '<LOW>': 'LOW',
    '<CLOSE>': 'CLOSE',
    '<TICKVOL>': 'TICKVOL',
    '<VOL>': 'VOL',
    '<SPREAD>': 'SPREAD'
}, inplace=True)

# ensure strings and strip any weird whitespace
df['DATE'] = df['DATE'].astype(str).str.strip()
df['TIME'] = df['TIME'].astype(str).str.strip()

df['DATETIME'] = pd.to_datetime(df['DATE'] + ' ' + df['TIME'], dayfirst=False, errors='coerce')
if df['DATETIME'].isna().any():
    raise ValueError("Some DATETIME values could not be parsed. Check date/time formats.")

# set DATETIME as index for reindexing
df = df.set_index('DATETIME').sort_index()

# --------------------------
# Create continuous hourly index & fill weekend gaps
# --------------------------
full_index = pd.date_range(start=df.index.min(), end=df.index.max(), freq='h')

# Reindex to full hourly range so weekends/missing hours appear as NaN rows
df = df.reindex(full_index)

# Fill strategy:
# - Prices: forward-fill last known price across weekend gap (common approach for modeling continuity).
# - TICKVOL / VOL: set missing to 0 (no ticks during weekend).
# - SPREAD: forward-fill last known.
# Alternative: you could leave NaNs and drop sequences that cross weekends (safer but reduces data).
df[['OPEN', 'HIGH', 'LOW', 'CLOSE']] = df[['OPEN', 'HIGH', 'LOW', 'CLOSE']].ffill()
df['SPREAD'] = df['SPREAD'].ffill()
df['TICKVOL'] = df['TICKVOL'].fillna(0)
df['VOL'] = df['VOL'].fillna(0)

# Reset index to make DATETIME a regular column again
df = df.reset_index().rename(columns={'index': 'DATETIME'})

In [None]:
df.shape

In [None]:
# Example: choose the start and end rows
start_row = 32200
end_row = 33000

# Select the range and make a copy to avoid SettingWithCopyWarning
subset = df.iloc[start_row:end_row + 1].copy()

# Ensure DATETIME is datetime type
subset['DATETIME'] = pd.to_datetime(subset['DATETIME'])

# Plot CLOSE price over time
plt.figure(figsize=(12, 6))
plt.plot(subset['DATETIME'], subset['CLOSE'], linewidth=1.0, color='blue')

# Labels and formatting
plt.title(f"Price Chart from Row {start_row} to {end_row}", fontsize=14)
plt.xlabel("Datetime", fontsize=12)
plt.ylabel("Close Price", fontsize=12)
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()


In [None]:
# Specify how many rows to remove for model
nn = 33000  # Delete the first nn rows that do not follow the one-hour timeframe.
mm = 500  # Remove mm last row that the model should not see.

# Delete first nn and last mm rows
df_model = df.iloc[nn:len(df) - mm].reset_index(drop=True)

In [None]:
def label_reversal_points(
        close,
        high=None,
        low=None,
        smoothing_window=31,
        polyorder=3,
        base_prom_factor=0.02,
        distance=3,
        snap_window=5,
        min_dev_pct=0.0015,  # 0.15% minimum leg size
        min_dev_sigma=2.0,  # >= 2x local abs-return EMA
        vol_window=100,  # EMA window for local volatility
        verbose=False
):
    """
    Label reversal points with improved accuracy.

    Returns labels array of length n where:
    0 = none, 1 = valley, 2 = peak.

    Tips:
    - For best accuracy, pass high/low arrays from your OHLCV.
      Example: label_reversal_points(df['CLOSE'], df['HIGH'], df['LOW'])
    - Tune min_dev_pct / min_dev_sigma to be stricter or looser on swing size.
    """
    close = np.asarray(close, dtype=float)
    n = close.size
    if n < 3:
        return np.zeros(n, dtype=int)

    # Interpolate NaNs if any
    if np.isnan(close).any():
        idx = np.arange(n)
        good = ~np.isnan(close)
        close = close.copy()
        close[~good] = np.interp(idx[~good], idx[good], close[good])

    # Helper: simple EMA for local abs-return volatility
    def ema(x, span):
        x = np.asarray(x, dtype=float)
        alpha = 2.0 / (span + 1.0)
        out = np.empty_like(x)
        out[0] = x[0]
        for i in range(1, len(x)):
            out[i] = alpha * x[i] + (1 - alpha) * out[i - 1]
        return out

    # Local volatility in price terms via EMA of absolute returns
    ret = np.zeros(n)
    ret[1:] = np.abs(np.diff(close) / np.maximum(1e-12, close[:-1]))
    vol_absret = ema(ret, vol_window)
    local_vol_price = vol_absret * close  # convert to price units

    # Smoothing to get robust candidates
    win = smoothing_window
    if win >= n:
        win = n - 1 if (n - 1) % 2 == 1 else n - 2
    if win % 2 == 0:
        win += 1
    smoothed = savgol_filter(close, win, polyorder)

    # Base prominence threshold
    global_std = np.std(close) or 1.0
    prom = global_std * base_prom_factor

    # Candidate peaks/valleys on smoothed
    peak_idx, _ = find_peaks(smoothed, distance=distance, prominence=prom)
    val_idx, _ = find_peaks(-smoothed, distance=distance, prominence=prom)

    # Prominences for tie-breaking
    peak_prom = peak_prominences(smoothed, peak_idx)[0] if peak_idx.size else np.array([])
    val_prom = peak_prominences(-smoothed, val_idx)[0] if val_idx.size else np.array([])

    # Combine
    candidates = []
    for i, p in enumerate(peak_idx):
        candidates.append((int(p), 2, float(peak_prom[i]) if peak_prom.size else 0.0))
    for i, v in enumerate(val_idx):
        candidates.append((int(v), 1, float(val_prom[i]) if val_prom.size else 0.0))
    candidates.sort(key=lambda x: x[0])

    if not candidates:
        labels = np.zeros(n, dtype=int)
        # still mark edges for completeness
        labels[0] = 1 if close[1] > close[0] else 2
        labels[-1] = 1 if close[-1] > close[-2] else 2
        return labels

    # Enforce alternation (remove weaker when two same-type neighbors)
    def enforce_alternation(ext):
        ext = ext[:]  # list of (idx, typ, prom)
        while True:
            removed = False
            i = 0
            while i < len(ext) - 1:
                if ext[i][1] == ext[i + 1][1]:
                    # drop the smaller prominence
                    if ext[i][2] < ext[i + 1][2]:
                        ext.pop(i)
                    else:
                        ext.pop(i + 1)
                    removed = True
                else:
                    i += 1
            if not removed:
                break
        return ext

    candidates = enforce_alternation(candidates)

    # SNAP: move each extreme to the true local extremum on raw close (or HIGH/LOW)
    def snap_index(idx, typ):
        L = max(0, idx - snap_window)
        R = min(n, idx + snap_window + 1)
        if high is not None and low is not None:
            if typ == 2:  # peak
                j = np.argmax(np.asarray(high[L:R], dtype=float))
            else:  # valley
                j = np.argmin(np.asarray(low[L:R], dtype=float))
        else:
            if typ == 2:
                j = np.argmax(close[L:R])
            else:
                j = np.argmin(close[L:R])
        return L + int(j)

    snapped = []
    seen_at = {}  # avoid duplicate indices by keeping stronger prominence
    for idx, typ, pr in candidates:
        j = snap_index(idx, typ)
        key = (j, typ)
        if key not in seen_at or pr > seen_at[key][2]:
            seen_at[key] = (j, typ, pr)
    snapped = sorted(seen_at.values(), key=lambda x: x[0])

    # Enforce alternation again after snapping
    snapped = enforce_alternation(snapped)

    # Filter micro-legs using adaptive threshold (min % move and sigma*local_vol)
    pruned = []
    for idx, typ, pr in snapped:
        if not pruned:
            pruned.append((idx, typ, pr))
            continue
        prev_idx, prev_typ, prev_pr = pruned[-1]
        # time spacing
        if idx - prev_idx < distance:
            # keep the more prominent of the two
            if pr > prev_pr:
                pruned[-1] = (idx, typ, pr)
            continue
        leg = abs(close[idx] - close[prev_idx])
        # thresholds at both ends
        thr = max(min_dev_pct * close[prev_idx],
                  min_dev_sigma * max(local_vol_price[prev_idx], 1e-12))
        thr = max(thr, max(min_dev_pct * close[idx],
                           min_dev_sigma * max(local_vol_price[idx], 1e-12)))
        if leg >= thr:
            pruned.append((idx, typ, pr))
        else:
            # too small swing â†’ drop the later point
            continue

    # One more alternation pass (paranoid) and spacing check
    pruned = enforce_alternation(pruned)
    final_ext = []
    for idx, typ, pr in pruned:
        if final_ext and idx - final_ext[-1][0] < distance:
            # keep stronger
            if pr > final_ext[-1][2]:
                final_ext[-1] = (idx, typ, pr)
        else:
            final_ext.append((idx, typ, pr))

    # Build labels
    labels = np.zeros(n, dtype=int)
    for idx, typ, _ in final_ext:
        labels[idx] = typ

    # Mark edges as trend boundaries for continuity
    if labels[0] == 0:
        labels[0] = 1 if close[min(1, n - 1)] > close[0] else 2
    if labels[-1] == 0 and n >= 2:
        labels[-1] = 1 if close[-1] > close[-2] else 2

    if verbose:
        c0 = int((labels == 0).sum())
        c1 = int((labels == 1).sum())
        c2 = int((labels == 2).sum())
        print(f"labels -> 0:{c0}  1:{c1}  2:{c2}  (extrema kept: {len(final_ext)})")

    return labels


In [None]:
# baseline (close-only)
df_model['Label'] = label_reversal_points(df_model['CLOSE'].values, verbose=True)

# inspect counts
print(df_model['Label'].value_counts())

In [None]:
def plot_labeled_candles(df_model, n=1000):
    """
    Plots the last n candles with BUY/SELL labels based on the 'Label' column.
    Assumes df already has a 'DATETIME' column.
    """
    # Drop NaN rows (e.g., weekend gaps)
    df_plot = df_model.dropna(subset=['CLOSE']).tail(n).copy()

    # Ensure DATETIME is a datetime column (optional safeguard)
    if not pd.api.types.is_datetime64_any_dtype(df_plot['DATETIME']):
        df_plot['DATETIME'] = pd.to_datetime(df_plot['DATETIME'])

    # === Plot Close Price ===
    plt.figure(figsize=(15, 6))
    plt.plot(df_plot['DATETIME'], df_plot['CLOSE'], label='Close Price', color='black', linewidth=1.5)

    # === Plot BUY (1) and SELL (2) signals ===
    for _, row in df_plot.iterrows():
        if row['Label'] == 1:  # BUY
            plt.axvline(x=row['DATETIME'], color='green', linestyle='--', linewidth=1)
            plt.text(row['DATETIME'], row['CLOSE'], 'BUY', color='green', ha='center', va='bottom', fontsize=9)
        elif row['Label'] == 2:  # SELL
            plt.axvline(x=row['DATETIME'], color='red', linestyle='--', linewidth=1)
            plt.text(row['DATETIME'], row['CLOSE'], 'SELL', color='red', ha='center', va='top', fontsize=9)

    # === Aesthetics ===
    plt.title(f'Last {n} Candles with Trend Reversal Labels')
    plt.xlabel('Datetime')
    plt.ylabel('Close Price')
    plt.xticks(rotation=45)
    plt.grid(True, linestyle='--', alpha=0.4)
    plt.tight_layout()
    plt.legend()
    plt.show()



In [None]:
plot_labeled_candles(df_model)

In [None]:
# ============================================================================
# HYPERPARAMETERS
# ============================================================================
WINDOW_SIZE = 60
FORECAST_HORIZON = 10
FEATURES = ['OPEN', 'HIGH', 'LOW', 'CLOSE', 'TICKVOL']
NUM_CLASSES = 3  # 0=no signal, 1=buy, 2=sell

In [None]:
# ============================================================================
# DATA PREPARATION
# ============================================================================
print("\n[1/6] Loading and preparing data...")

# Assuming df_model is already loaded
# df_model should have columns: DATETIME, OPEN, HIGH, LOW, CLOSE, TICKVOL, VOL, SPREAD, Label

# Extract features and labels
feature_data = df_model[FEATURES].values
labels = df_model['Label'].values

# Remove any rows with NaN in features
valid_indices = ~np.isnan(feature_data).any(axis=1)
feature_data = feature_data[valid_indices]
labels = labels[valid_indices]
datetime_index = df_model['DATETIME'].values[valid_indices]

print(f"Total valid samples: {len(feature_data)}")
print(f"Label distribution: {np.bincount(labels.astype(int))}")

In [None]:
# ============================================================================
# FEATURE SCALING
# ============================================================================
print("\n[2/6] Scaling features...")

# Fit scaler on all data (we'll use this for prediction too)
scaler = StandardScaler()
scaled_features = scaler.fit_transform(feature_data)

print(f"Feature scaling complete. Shape: {scaled_features.shape}")

In [None]:
# ============================================================================
# CREATE SEQUENCES FOR TRAINING
# ============================================================================
print("\n[3/6] Creating sequences...")


def create_sequences(features, labels, window_size, forecast_horizon):
    """
    Create sequences of (window_size) candles with (forecast_horizon) labels.

    Returns:
        X: shape (num_samples, window_size, num_features)
        y: shape (num_samples, forecast_horizon) - raw class labels
    """
    X, y = [], []

    # We need at least window_size + forecast_horizon data points
    for i in range(len(features) - window_size - forecast_horizon + 1):
        # Input: 60 candles
        X.append(features[i:i + window_size])

        # Output: labels for next 10 hours
        y.append(labels[i + window_size:i + window_size + forecast_horizon])

    return np.array(X), np.array(y)


X, y = create_sequences(scaled_features, labels, WINDOW_SIZE, FORECAST_HORIZON)

print(f"Sequences created:")
print(f"  X shape: {X.shape} (samples, window_size, features)")
print(f"  y shape: {y.shape} (samples, forecast_horizon)")

In [None]:
# ============================================================================
# TRAIN/TEST SPLIT
# ============================================================================
print("\n[4/6] Splitting data...")

# Use 80/20 split, but keep temporal order (no shuffle for time series)
split_idx = int(len(X) * 0.8)
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

print(f"Training samples: {len(X_train)}")
print(f"Testing samples: {len(X_test)}")

In [None]:
# ============================================================================
# HANDLE CLASS IMBALANCE WITH SAMPLE WEIGHTS (CORRECTED)
# ============================================================================
print("\n[5/6] Computing sample weights for class imbalance...")

# Create sample weights for training
sample_weights = np.ones_like(y_train, dtype=np.float32)

for i in range(len(y_train)):
    for j in range(FORECAST_HORIZON):
        label = int(y_train[i, j])
        if label == 0:
            sample_weights[i, j] = 0.1  # Lower weight for no-signal
        elif label == 1:
            sample_weights[i, j] = 10.0  # Very high weight for buy signals
        elif label == 2:
            sample_weights[i, j] = 10.0  # Very high weight for sell signals

# Convert labels to categorical for each timestep
y_train_cat = np.array([to_categorical(y_train[:, i], num_classes=NUM_CLASSES)
                         for i in range(FORECAST_HORIZON)])
y_train_cat = np.transpose(y_train_cat, (1, 0, 2))  # (samples, horizon, classes)

y_test_cat = np.array([to_categorical(y_test[:, i], num_classes=NUM_CLASSES)
                        for i in range(FORECAST_HORIZON)])
y_test_cat = np.transpose(y_test_cat, (1, 0, 2))

print(f"Sample weights shape: {sample_weights.shape}")
print(f"y_train_cat shape: {y_train_cat.shape}")

# Count labels in training set
train_labels_flat = y_train.flatten()
train_counts = np.bincount(train_labels_flat.astype(int))
print(f"Training label distribution: {train_counts}")
print(f"  Class 0 (No Signal): {train_counts[0]} ({100*train_counts[0]/len(train_labels_flat):.1f}%)")
print(f"  Class 1 (Buy): {train_counts[1]} ({100*train_counts[1]/len(train_labels_flat):.1f}%)")
print(f"  Class 2 (Sell): {train_counts[2]} ({100*train_counts[2]/len(train_labels_flat):.1f}%)")



In [None]:
# ============================================================================
# BUILD MODEL WITH CUSTOM WEIGHTED LOSS
# ============================================================================
print("\n[6/6] Building and training model...")

def build_reversal_model(input_shape, forecast_horizon, num_classes):
    """
    Build a deep LSTM model for multi-step classification.

    Architecture:
    - Bidirectional LSTM layers to capture forward and backward patterns
    - Dropout for regularization
    - TimeDistributed Dense layers for per-timestep classification
    """
    model = models.Sequential([
        # Input layer
        layers.Input(shape=input_shape),

        # First LSTM layer - bidirectional to capture context from both directions
        layers.Bidirectional(layers.LSTM(128, return_sequences=True)),
        layers.Dropout(0.3),

        # Second LSTM layer
        layers.Bidirectional(layers.LSTM(64, return_sequences=False)),
        layers.Dropout(0.3),

        # Dense layer to expand to forecast horizon
        layers.RepeatVector(forecast_horizon),

        # Third LSTM layer for sequence output
        layers.LSTM(64, return_sequences=True),
        layers.Dropout(0.2),

        # Output layer: TimeDistributed for per-timestep classification
        layers.TimeDistributed(layers.Dense(32, activation='relu')),
        layers.TimeDistributed(layers.Dense(num_classes, activation='softmax'))
    ])

    return model

# Build model
model = build_reversal_model(
    input_shape=(WINDOW_SIZE, len(FEATURES)),
    forecast_horizon=FORECAST_HORIZON,
    num_classes=NUM_CLASSES
)

# Custom weighted loss function
def weighted_categorical_crossentropy(y_true, y_pred):
    """
    Custom loss that applies per-sample, per-timestep weights.
    This will be used without sample_weight parameter in fit().
    """
    # Standard categorical crossentropy
    loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)
    return loss

# Compile with categorical crossentropy loss
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("\nModel Architecture:")
model.summary()


In [None]:
# ============================================================================
# CUSTOM DATA GENERATOR WITH SAMPLE WEIGHTS
# ============================================================================

class WeightedSequence(tf.keras.utils.Sequence):
    """
    Custom data generator that properly handles sample weights for multi-output.
    """
    def __init__(self, X, y, sample_weights, batch_size, shuffle=True):
        self.X = X
        self.y = y
        self.sample_weights = sample_weights
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.indices = np.arange(len(X))
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.X) / self.batch_size))

    def __getitem__(self, idx):
        batch_indices = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
        X_batch = self.X[batch_indices]
        y_batch = self.y[batch_indices]
        w_batch = self.sample_weights[batch_indices]

        # Return (X, y, sample_weight) tuple
        return X_batch, y_batch, w_batch

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)

In [None]:
# ============================================================================
# TRAINING WITH PROPER SAMPLE WEIGHTS
# ============================================================================
print("\n" + "="*80)
print("TRAINING MODEL")
print("="*80)

# Split training data into train and validation
val_split = 0.15
val_samples = int(len(X_train) * val_split)
train_samples = len(X_train) - val_samples

X_train_final = X_train[:train_samples]
y_train_final = y_train_cat[:train_samples]
w_train_final = sample_weights[:train_samples]

X_val = X_train[train_samples:]
y_val = y_train_cat[train_samples:]
w_val = sample_weights[train_samples:]

print(f"Training samples: {len(X_train_final)}")
print(f"Validation samples: {len(X_val)}")

# Create data generators
train_gen = WeightedSequence(X_train_final, y_train_final, w_train_final, batch_size=64, shuffle=True)
val_gen = WeightedSequence(X_val, y_val, w_val, batch_size=64, shuffle=False)

# Callbacks
early_stop = callbacks.EarlyStopping(
    monitor='val_loss',
    patience=15,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5,
    min_lr=1e-6,
    verbose=1
)

# Train model
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=2,
    callbacks=[early_stop, reduce_lr],
    verbose=1
)

print("\n" + "="*80)
print("TRAINING COMPLETE")
print("="*80)

In [None]:
# ============================================================================
# EVALUATE ON TEST SET
# ============================================================================
print("\nEvaluating on test set...")
test_loss, test_acc = model.evaluate(X_test, y_test_cat, verbose=0)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")

# Get predictions on test set
y_pred_proba = model.predict(X_test, verbose=0)
y_pred = np.argmax(y_pred_proba, axis=-1)

# Calculate per-class accuracy
for step in range(FORECAST_HORIZON):
    step_acc = np.mean(y_pred[:, step] == y_test[:, step])
    print(f"  Forecast Hour {step + 1} Accuracy: {step_acc:.4f}")

print("\n" + "=" * 80)
print("MODEL TRAINING SECTION COMPLETE")
print("=" * 80)

# plot section

In [None]:
# --------------------------
# === Visualization Block ===
# --------------------------

historical_df = input_df.tail(4).copy()


In [None]:
historical_df

In [None]:
# --- 2. Actual future 10 candles  ---
# Since input_df ends at index (start_idx - 1), actual_future_df starts right after that.
actual_future_start = idx + 1
actual_future_end = idx + FORECAST_HORIZON + 1
actual_future_df = df.iloc[actual_future_start - 1:actual_future_end].copy()


In [None]:
actual_future_df

In [None]:
# --- 3. Create predicted_df (forecast for next 10 hours) ---
last_timestamp = input_df['DATETIME'].iloc[-1]
datetime_index = pd.date_range(
    start=last_timestamp + pd.Timedelta(hours=1),
    periods=FORECAST_HORIZON,
    freq='h'
)

# --- 4. Add text labels for clarity ---
predicted_df['label'] = predicted_df['forecast_class'].map({1: 'buy', 2: 'sell'}).fillna('')

# --- 5. Plot title & output settings ---
plot_title = 'Actual vs Predicted Forex Trend Reversals'
output_plot_path = None  # e.g., 'forecast_plot.png'



In [None]:
# --- 6. Import your plotting utility ---

import sys

sys.path.insert(1, '../utils')
import forex_plot_utils_2

# --- 7. Plot all series ---
forex_plot_utils_2.plot_all_series(
    historical_df=historical_df,
    predicted_df=predicted_df,
    actual_future_df=actual_future_df,
    title=plot_title,
    output_path=output_plot_path
)


In [None]:
# 11- Save Model

from datetime import datetime
import os

# 11-1 Create timestamp and paths
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
model_filename = f'model_{timestamp}.keras'
model_path = os.path.join('saved_models', model_filename)

# 11-2 Directory to hold logs and extras
log_dir = os.path.join('saved_models', f'model_{timestamp}_logs')
os.makedirs(log_dir, exist_ok=True)

# 11-3 Save model
model.save(model_path)

# 11-4 Save training history
history_df = pd.DataFrame(history.history)
history_df.to_csv(os.path.join(log_dir, 'training_history.csv'), index=False)

# 11-5 Save training loss plot
plt.figure()
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training Loss Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.savefig(os.path.join(log_dir, 'training_loss.png'))
plt.close()

# 4. Evaluate on validation set (since no X_test/y_test defined)
final_train_loss = history.history['loss'][-1]
final_train_acc = history.history['accuracy'][-1]
final_val_loss, final_val_acc = model.evaluate(X_val, y_val, verbose=0)

# 5. Save model summary and performance metrics
summary_path = os.path.join(log_dir, 'model_log.txt')
with open(summary_path, 'w') as f:
    model.summary(print_fn=lambda x: f.write(x + '\n'))
    f.write('\n')
    f.write(f'Final Training Loss: {final_train_loss:.6f}\n')
    f.write(f'Final Training Accuracy: {final_train_acc:.6f}\n')
    f.write(f'Final Validation Loss: {final_val_loss:.6f}\n')
    f.write(f'Final Validation Accuracy: {final_val_acc:.6f}\n')

In [None]:
model_path = 'saved_models/model_20251112_071605.keras'
model = keras.models.load_model(
    model_path,
    safe_mode=False
)