In [None]:
# ==============================================================================
# FINAL VERSION FOR NOTEBOOK 4: MODEL COMPARISON
# This script will load the LSTM model and retrain the Transformer model
# from scratch to bypass all loading errors.
# ==============================================================================

# --- PART 1: SETUP AND IMPORTS ---
print(">>> PART 1: Setting up environment...")
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization, MultiHeadAttention, GlobalAveragePooling1D

# Connect to Google Drive
from google.colab import drive
drive.mount('/content/drive')

# --- PART 2: DATA PREPARATION (Same as before) ---
print("\n>>> PART 2: Preparing data...")
# Load data
data_path = '/content/drive/MyDrive/Colab Notebooks/Thai_Quant_AI_Project/02_data/set50_processed_data.csv'
df = pd.read_csv(data_path, index_col='Date', parse_dates=True)

# Scale and split data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df)
training_size = int(len(scaled_data) * 0.8)
time_step = 60
train_data = scaled_data[0:training_size, :]
test_data = scaled_data[training_size - time_step:, :]

# Windowing function
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - time_step - 1):
        a = dataset[i:(i + time_step), :]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 3])
    return np.array(dataX), np.array(dataY)

X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)
print("Data preparation complete.")

# --- PART 3: LOAD THE LSTM MODEL (This part works fine) ---
print("\n>>> PART 3: Loading LSTM benchmark model...")
lstm_model_path = '/content/drive/MyDrive/Colab Notebooks/Thai_Quant_AI_Project/03_models/lstm_benchmark_model.h5'
lstm_model = tf.keras.models.load_model(lstm_model_path)
print("LSTM model loaded successfully.")

# --- PART 4: REBUILD AND RETRAIN THE TRANSFORMER MODEL ---
print("\n>>> PART 4: Rebuilding and retraining the Transformer model...")
# Transformer architecture functions
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    x = MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(inputs, inputs)
    x = Dropout(dropout)(x)
    x = LayerNormalization(epsilon=1e-6)(x + inputs)
    ffn = tf.keras.Sequential([Dense(ff_dim, activation="relu"), Dense(inputs.shape[-1])])
    x = ffn(x)
    x = Dropout(dropout)(x)
    x = LayerNormalization(epsilon=1e-6)(x + x)
    return x

def build_transformer_model(input_shape, head_size, num_heads, ff_dim, num_transformer_blocks, mlp_units, dropout=0, mlp_dropout=0):
    inputs = Input(shape=input_shape)
    x = inputs
    positions = tf.range(start=0, limit=input_shape[0], delta=1)
    pos_embedding = tf.keras.layers.Embedding(input_dim=input_shape[0], output_dim=input_shape[1])(positions)
    x = x + pos_embedding
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
    x = GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = Dense(dim, activation="relu")(x)
        x = Dropout(mlp_dropout)(x)
    outputs = Dense(1)(x)
    return Model(inputs, outputs)

# Build a new Transformer model instance
transformer_model = build_transformer_model(
    X_train.shape[1:], head_size=256, num_heads=4, ff_dim=4,
    num_transformer_blocks=4, mlp_units=[128], mlp_dropout=0.4, dropout=0.25
)
transformer_model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model (this will take 5-15 minutes)
print("Starting Transformer model training... Please wait.")
transformer_model.fit(X_train, y_train, epochs=50, batch_size=64, verbose=1)
print("Transformer model training complete.")

# --- PART 5: GENERATE PREDICTIONS & COMPARE RESULTS ---
print("\n>>> PART 5: Generating predictions and comparing results...")
# Generate predictions
lstm_predictions = lstm_model.predict(X_test)
transformer_predictions = transformer_model.predict(X_test)

# Inverse transform function
def inverse_transform_predictions(predictions, original_scaled_data):
    pred_full = np.zeros(shape=(len(predictions), original_scaled_data.shape[1]))
    pred_full[:, 3] = predictions.flatten()
    pred_inv = scaler.inverse_transform(pred_full)[:, 3]
    return pred_inv

lstm_pred_inv = inverse_transform_predictions(lstm_predictions, scaled_data)
transformer_pred_inv = inverse_transform_predictions(transformer_predictions, scaled_data)
actual_prices = df['Close'][-len(y_test):].values

# Quantitative Comparison (RMSE)
lstm_rmse = np.sqrt(mean_squared_error(actual_prices, lstm_pred_inv))
transformer_rmse = np.sqrt(mean_squared_error(actual_prices, transformer_pred_inv))
results_df = pd.DataFrame({
    'Model': ['LSTM (Benchmark)', 'Transformer (Upgrade)'],
    'Test RMSE (Baht)': [lstm_rmse, transformer_rmse]
})
results_df['Improvement'] = ['-', f'{( (lstm_rmse - transformer_rmse) / lstm_rmse) * 100:.2f}%']
print("\n--- Performance Comparison ---")
print(results_df)

# Qualitative Comparison (Plot)
print("\nGenerating comparison plot...")
plt.style.use('seaborn-v0_8-whitegrid')
plt.figure(figsize=(18, 9))
plt.title('Model Comparison: Actual Price vs. Predictions', fontsize=18)
plt.xlabel('Date', fontsize=14)
plt.ylabel('SET50 Close Price (Baht)', fontsize=14)
test_dates = df.index[-len(actual_prices):]
plt.plot(test_dates, actual_prices, label='Actual Price', color='black', linewidth=2)
plt.plot(test_dates, lstm_pred_inv, label=f'LSTM Prediction (RMSE: {lstm_rmse:.2f})', color='dodgerblue', alpha=0.8)
plt.plot(test_dates, transformer_pred_inv, label=f'Transformer Prediction (RMSE: {transformer_rmse:.2f})', color='red', alpha=0.8)
plt.legend(fontsize=12)
plt.show()

>>> PART 1: Setting up environment...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

>>> PART 2: Preparing data...
Data preparation complete.

>>> PART 3: Loading LSTM benchmark model...




LSTM model loaded successfully.

>>> PART 4: Rebuilding and retraining the Transformer model...
Starting Transformer model training... Please wait.
Epoch 1/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 1s/step - loss: 0.3773
Epoch 2/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 1s/step - loss: 0.0440
Epoch 3/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 1s/step - loss: 0.0339
Epoch 4/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 1s/step - loss: 0.0351
Epoch 5/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 1s/step - loss: 0.0325
Epoch 6/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 1s/step - loss: 0.0320
Epoch 7/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 1s/step - loss: 0.0339
Epoch 8/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 1s/step - loss: 0.0344
Epoch 9/50
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m