In [None]:
"""
train_imdb_models.py
--------------------
Data preprocessing + training multiple RNN-based models (SimpleRNN, LSTM, Bidirectional LSTM)
with basic hyperparameter tuning over batch size and loss function.

Requirements:
  - Python 3.8+
  - tensorflow==2.15.0
  - (optional) numpy

Notes:
  * Final layer uses a sigmoid activation (as requested).
  * Loss options include "binary_crossentropy" and "hinge". For "hinge", labels are mapped to {-1, +1}.
    Using "hinge" with a sigmoid output is acceptable but not theoretically perfect; it's included
    to satisfy the request for tuning loss functions.
  * Models are saved in .h5 format.
"""

In [1]:
import os
import random
import numpy as np
import tensorflow as tf

# Reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [2]:
# Config
VOCAB_SIZE = 20000        # top words to consider
MAXLEN = 500              # cut reviews after this number of words
EMBEDDING_DIM = 128
EPOCHS_TUNE = 3           # small number for quick tuning
EPOCHS_FINAL = 6          # longer training with best settings
VALIDATION_SPLIT = 0.2
OUTPUT_DIR = "models"
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [3]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence

print("[INFO] Loading IMDB dataset…")
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=VOCAB_SIZE)

print("[INFO] Padding sequences…")
x_train = sequence.pad_sequences(x_train, maxlen=MAXLEN)
x_test  = sequence.pad_sequences(x_test,  maxlen=MAXLEN)

# For hinge loss, labels should be in {-1, +1}
y_train_hinge = (y_train * 2 - 1).astype(np.float32)
y_test_hinge  = (y_test  * 2 - 1).astype(np.float32)

[INFO] Loading IMDB dataset…
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
[INFO] Padding sequences…


In [4]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, LSTM, Bidirectional, Dense, Dropout

def build_simple_rnn_model():
    model = Sequential([
        Embedding(VOCAB_SIZE, EMBEDDING_DIM, input_length=MAXLEN),
        SimpleRNN(128),
        Dropout(0.3),
        Dense(1, activation="sigmoid")
    ])
    return model

def build_lstm_model():
    model = Sequential([
        Embedding(VOCAB_SIZE, EMBEDDING_DIM, input_length=MAXLEN),
        LSTM(128),
        Dropout(0.3),
        Dense(1, activation="sigmoid")
    ])
    return model

def build_bi_lstm_model():
    model = Sequential([
        Embedding(VOCAB_SIZE, EMBEDDING_DIM, input_length=MAXLEN),
        Bidirectional(LSTM(128)),
        Dropout(0.3),
        Dense(1, activation="sigmoid")
    ])
    return model

MODEL_BUILDERS = {
    "simple_rnn": build_simple_rnn_model,
    "lstm": build_lstm_model,
    "bilstm": build_bi_lstm_model,
}

In [5]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

batch_sizes = [32, 64, 128]
loss_options = ["binary_crossentropy", "hinge"]

def compile_model(model, loss_name):
    if loss_name == "binary_crossentropy":
        loss = tf.keras.losses.BinaryCrossentropy(from_logits=False)
        metrics = ["accuracy"]
    elif loss_name == "hinge":
        loss = tf.keras.losses.Hinge()
        metrics = ["accuracy"]
    else:
        raise ValueError(f"Unsupported loss: {loss_name}")
    model.compile(optimizer=Adam(learning_rate=1e-3), loss=loss, metrics=metrics)
    return model

def get_labels_for_loss(loss_name, train=True):
    if loss_name == "binary_crossentropy":
        return (y_train if train else y_test)
    elif loss_name == "hinge":
        return (y_train_hinge if train else y_test_hinge)
    else:
        raise ValueError(loss_name)

In [6]:
def tune_for_model(model_name):
    print(f"\n[INFO] Tuning hyperparameters for: {model_name}")
    best_cfg = None
    best_val = -np.inf

    for loss_name in loss_options:
        y_tr = get_labels_for_loss(loss_name, train=True)
        for bs in batch_sizes:
            model = MODEL_BUILDERS[model_name]()
            compile_model(model, loss_name)
            callbacks = [EarlyStopping(patience=2, restore_best_weights=True, monitor="val_accuracy")]
            hist = model.fit(
                x_train, y_tr,
                batch_size=bs,
                epochs=EPOCHS_TUNE,
                validation_split=VALIDATION_SPLIT,
                verbose=2,
                callbacks=callbacks
            )
            val_acc = max(hist.history.get("val_accuracy", [0]))
            print(f"[TUNE] model={model_name} loss={loss_name} batch_size={bs} -> best val_acc={val_acc:.4f}")
            if val_acc > best_val:
                best_val = val_acc
                best_cfg = {"loss": loss_name, "batch_size": bs}
    print(f"[BEST] {model_name}: {best_cfg} (val_acc={best_val:.4f})")
    return best_cfg

In [7]:
def train_and_save(model_name, best_cfg):
    loss_name = best_cfg["loss"]
    batch_size = best_cfg["batch_size"]
    y_tr = get_labels_for_loss(loss_name, train=True)
    y_te = get_labels_for_loss(loss_name, train=False)

    model = MODEL_BUILDERS[model_name]()
    compile_model(model, loss_name)

    callbacks = [EarlyStopping(patience=3, restore_best_weights=True, monitor="val_accuracy")]

    print(f"[INFO] Training final {model_name} with loss={loss_name}, batch_size={batch_size}")
    history = model.fit(
        x_train, y_tr,
        batch_size=batch_size,
        epochs=EPOCHS_FINAL,
        validation_split=VALIDATION_SPLIT,
        verbose=2,
        callbacks=callbacks
    )

    print("[INFO] Evaluating on test set…")
    test_metrics = model.evaluate(x_test, y_te, verbose=0)
    print(f"[RESULT] {model_name} test -> loss={test_metrics[0]:.4f}, acc={test_metrics[1]:.4f}")

    out_path = os.path.join(OUTPUT_DIR, f"{model_name}_imdb.h5")
    model.save(out_path)
    print(f"[SAVED] {out_path}")
    return out_path, history.history, test_metrics

In [8]:
if __name__ == "__main__":
    summaries = {}
    for name in MODEL_BUILDERS.keys():
        best_cfg = tune_for_model(name)
        path, hist, test_metrics = train_and_save(name, best_cfg)
        summaries[name] = {
            "best_cfg": best_cfg,
            "model_path": path,
            "history_keys": list(hist.keys()),
            "test_loss": float(test_metrics[0]),
            "test_acc": float(test_metrics[1]),
        }

    print("\n=== TRAINING SUMMARY ===")
    for k, v in summaries.items():
        print(f"{k}: {v}")
    print("\nAll done. Models saved under:", OUTPUT_DIR)


[INFO] Tuning hyperparameters for: simple_rnn




Epoch 1/3
625/625 - 30s - 48ms/step - accuracy: 0.6003 - loss: 0.6643 - val_accuracy: 0.5514 - val_loss: 0.6782
Epoch 2/3
625/625 - 24s - 39ms/step - accuracy: 0.6209 - loss: 0.6463 - val_accuracy: 0.6362 - val_loss: 0.6292
Epoch 3/3
625/625 - 42s - 67ms/step - accuracy: 0.7254 - loss: 0.5300 - val_accuracy: 0.6398 - val_loss: 0.6268
[TUNE] model=simple_rnn loss=binary_crossentropy batch_size=32 -> best val_acc=0.6398
Epoch 1/3
313/313 - 18s - 58ms/step - accuracy: 0.5277 - loss: 0.7083 - val_accuracy: 0.6088 - val_loss: 0.6566
Epoch 2/3
313/313 - 17s - 56ms/step - accuracy: 0.6812 - loss: 0.5833 - val_accuracy: 0.7168 - val_loss: 0.5541
Epoch 3/3
313/313 - 20s - 64ms/step - accuracy: 0.7976 - loss: 0.4453 - val_accuracy: 0.7412 - val_loss: 0.5469
[TUNE] model=simple_rnn loss=binary_crossentropy batch_size=64 -> best val_acc=0.7412
Epoch 1/3
157/157 - 14s - 87ms/step - accuracy: 0.5643 - loss: 0.6748 - val_accuracy: 0.6732 - val_loss: 0.5906
Epoch 2/3
157/157 - 7s - 47ms/step - accurac



[RESULT] simple_rnn test -> loss=0.4537, acc=0.8065
[SAVED] models/simple_rnn_imdb.h5

[INFO] Tuning hyperparameters for: lstm
Epoch 1/3
625/625 - 19s - 31ms/step - accuracy: 0.7682 - loss: 0.4744 - val_accuracy: 0.8278 - val_loss: 0.3823
Epoch 2/3
625/625 - 13s - 20ms/step - accuracy: 0.8897 - loss: 0.2837 - val_accuracy: 0.8354 - val_loss: 0.3947
Epoch 3/3
625/625 - 21s - 33ms/step - accuracy: 0.9232 - loss: 0.2087 - val_accuracy: 0.7468 - val_loss: 0.4854
[TUNE] model=lstm loss=binary_crossentropy batch_size=32 -> best val_acc=0.8354
Epoch 1/3
313/313 - 11s - 34ms/step - accuracy: 0.7581 - loss: 0.4782 - val_accuracy: 0.8096 - val_loss: 0.4185
Epoch 2/3
313/313 - 8s - 27ms/step - accuracy: 0.9058 - loss: 0.2388 - val_accuracy: 0.8478 - val_loss: 0.3340
Epoch 3/3
313/313 - 10s - 33ms/step - accuracy: 0.9332 - loss: 0.1832 - val_accuracy: 0.8444 - val_loss: 0.3892
[TUNE] model=lstm loss=binary_crossentropy batch_size=64 -> best val_acc=0.8478
Epoch 1/3
157/157 - 8s - 52ms/step - accur



[RESULT] lstm test -> loss=0.4039, acc=0.8516
[SAVED] models/lstm_imdb.h5

[INFO] Tuning hyperparameters for: bilstm
Epoch 1/3
625/625 - 25s - 40ms/step - accuracy: 0.7760 - loss: 0.4657 - val_accuracy: 0.8438 - val_loss: 0.3689
Epoch 2/3
625/625 - 41s - 65ms/step - accuracy: 0.9006 - loss: 0.2640 - val_accuracy: 0.8652 - val_loss: 0.3420
Epoch 3/3
625/625 - 41s - 66ms/step - accuracy: 0.9162 - loss: 0.2216 - val_accuracy: 0.8636 - val_loss: 0.3713
[TUNE] model=bilstm loss=binary_crossentropy batch_size=32 -> best val_acc=0.8652
Epoch 1/3
313/313 - 18s - 58ms/step - accuracy: 0.7417 - loss: 0.5142 - val_accuracy: 0.8548 - val_loss: 0.3536
Epoch 2/3
313/313 - 21s - 66ms/step - accuracy: 0.8999 - loss: 0.2565 - val_accuracy: 0.8752 - val_loss: 0.3124
Epoch 3/3
313/313 - 20s - 64ms/step - accuracy: 0.9387 - loss: 0.1691 - val_accuracy: 0.8638 - val_loss: 0.3462
[TUNE] model=bilstm loss=binary_crossentropy batch_size=64 -> best val_acc=0.8752
Epoch 1/3
157/157 - 14s - 88ms/step - accuracy:



[RESULT] bilstm test -> loss=0.3376, acc=0.8652
[SAVED] models/bilstm_imdb.h5

=== TRAINING SUMMARY ===
simple_rnn: {'best_cfg': {'loss': 'binary_crossentropy', 'batch_size': 128}, 'model_path': 'models/simple_rnn_imdb.h5', 'history_keys': ['accuracy', 'loss', 'val_accuracy', 'val_loss'], 'test_loss': 0.45372241735458374, 'test_acc': 0.8065199851989746}
lstm: {'best_cfg': {'loss': 'binary_crossentropy', 'batch_size': 128}, 'model_path': 'models/lstm_imdb.h5', 'history_keys': ['accuracy', 'loss', 'val_accuracy', 'val_loss'], 'test_loss': 0.40390223264694214, 'test_acc': 0.8515599966049194}
bilstm: {'best_cfg': {'loss': 'binary_crossentropy', 'batch_size': 64}, 'model_path': 'models/bilstm_imdb.h5', 'history_keys': ['accuracy', 'loss', 'val_accuracy', 'val_loss'], 'test_loss': 0.3375725746154785, 'test_acc': 0.8651599884033203}

All done. Models saved under: models
