In [3]:
import tensorflow as tf
import tensorflow_datasets as tfds
import time

In [1]:
(train_data, test_data) = tfds.load(
    'imdb_reviews',
    split=['train', 'test'],
    as_supervised=True,
    with_info=True
)

NameError: name 'tfds' is not defined

In [None]:
def preprocess_text(text, label):
    text = tf.strings.lower(text)
    text = tf.strings.regex_replace(text, b"<br />", b" ")
    return text, label

In [None]:
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000, oov_token="<OOV>")
texts = [str(ex.numpy()) for ex, _ in train_data.take(10000)]
tokenizer.fit_on_texts(texts)

In [None]:
def tokenize_map(text, label):
    text = tf.py_function(lambda t: tokenizer.texts_to_sequences([t.numpy().decode("utf-8")])[0],
                          [text], Tout=tf.int64)
    text = tf.keras.preprocessing.sequence.pad_sequences([text], maxlen=200)[0]
    return text, label

In [None]:
def make_dataset(optimize=False):
    ds = train_data.map(preprocess_text)
    ds = ds.map(tokenize_map, num_parallel_calls=tf.data.AUTOTUNE)
    if optimize:
        ds = ds.cache().prefetch(tf.data.AUTOTUNE)
    return ds.batch(32)

In [None]:
def build_model():
    return tf.keras.Sequential([
        tf.keras.layers.Embedding(10000, 16, input_length=200),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

In [None]:
results = {}
for tag, opt in [('Unoptimized', False), ('Optimized', True)]:
    ds = make_dataset(optimize=opt)
    model = build_model()
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    print(f"\n🔹 Training: {tag}")
    start = time.time()
    model.fit(ds, epochs=1, steps_per_epoch=300, verbose=2)
    end = time.time()
    results[tag] = end - start


In [None]:
print("\n🕒 Training Time Comparison:")
for tag, sec in results.items():
    print(f"{tag}: {sec:.2f} seconds")