<a href="https://colab.research.google.com/github/mdzikrim/Hands-on_DL/blob/main/Chapter_13.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##TFRecord + Fashion MNIST

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.datasets import fashion_mnist

# Load
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_train = X_train.astype(np.float32) / 255.0
X_test = X_test.astype(np.float32) / 255.0

# Add channel dim
X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]

# Split validation
X_valid, y_valid = X_train[-5000:], y_train[-5000:]
X_train, y_train = X_train[:-5000], y_train[:-5000]


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [2]:
def image_example(image, label):
    feature = {
        "image": tf.train.Feature(bytes_list=tf.train.BytesList(value=[tf.io.serialize_tensor(image).numpy()])),
        "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label]))
    }
    return tf.train.Example(features=tf.train.Features(feature=feature))

def write_tfrecord(filename, images, labels):
    with tf.io.TFRecordWriter(filename) as writer:
        for img, lbl in zip(images, labels):
            example = image_example(img, lbl)
            writer.write(example.SerializeToString())

write_tfrecord("train.tfrecord", X_train, y_train)
write_tfrecord("valid.tfrecord", X_valid, y_valid)
write_tfrecord("test.tfrecord", X_test, y_test)


In [3]:
def parse_example(example_proto):
    feature_description = {
        "image": tf.io.FixedLenFeature([], tf.string),
        "label": tf.io.FixedLenFeature([], tf.int64),
    }
    parsed = tf.io.parse_single_example(example_proto, feature_description)
    image = tf.io.parse_tensor(parsed["image"], out_type=tf.float32)
    image = tf.reshape(image, [28, 28, 1])
    label = parsed["label"]
    return image, label

def load_dataset(filename, batch_size=32):
    return (tf.data.TFRecordDataset(filename)
            .map(parse_example)
            .shuffle(1000)
            .batch(batch_size)
            .prefetch(1))


In [4]:
train_ds = load_dataset("train.tfrecord")
valid_ds = load_dataset("valid.tfrecord")
test_ds = load_dataset("test.tfrecord")

model = tf.keras.Sequential([
    tf.keras.layers.Rescaling(1./255, input_shape=[28, 28, 1]),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax'),
])

model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.fit(train_ds, validation_data=valid_ds, epochs=5)
model.evaluate(test_ds)


Epoch 1/5


  super().__init__(**kwargs)


   1719/Unknown [1m12s[0m 7ms/step - accuracy: 0.5697 - loss: 1.5216



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 7ms/step - accuracy: 0.5698 - loss: 1.5213 - val_accuracy: 0.7542 - val_loss: 0.7009
Epoch 2/5
[1m  21/1719[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m9s[0m 5ms/step - accuracy: 0.7457 - loss: 0.8065



[1m1710/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - accuracy: 0.7593 - loss: 0.6809



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.7594 - loss: 0.6807 - val_accuracy: 0.7898 - val_loss: 0.5870
Epoch 3/5
[1m1706/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - accuracy: 0.7914 - loss: 0.5857



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - accuracy: 0.7915 - loss: 0.5856 - val_accuracy: 0.8102 - val_loss: 0.5341
Epoch 4/5
[1m1714/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.8113 - loss: 0.5360



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 7ms/step - accuracy: 0.8113 - loss: 0.5360 - val_accuracy: 0.8226 - val_loss: 0.5019
Epoch 5/5




[1m1718/1719[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - accuracy: 0.8218 - loss: 0.5042



[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 7ms/step - accuracy: 0.8218 - loss: 0.5042 - val_accuracy: 0.8304 - val_loss: 0.4780
     33/Unknown [1m0s[0m 3ms/step - accuracy: 0.8307 - loss: 0.4886



[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8219 - loss: 0.5078




[0.5114444494247437, 0.8184999823570251]

##IMDB Reviews + TextVectorization + Embedding

In [6]:
import tensorflow_datasets as tfds

# Load raw text
ds_train, ds_test = tfds.load("imdb_reviews", split=["train", "test"], as_supervised=True)

# Split validation and test directly from ds_test
ds_valid = ds_test.take(15000)
ds_test = ds_test.skip(15000)

In [7]:
from tensorflow.keras.layers import TextVectorization

# Prepare vectorizer
vocab_size = 10000
seq_len = 200

vectorizer = TextVectorization(max_tokens=vocab_size, output_mode="int", output_sequence_length=seq_len)
text_only = ds_train.map(lambda text, label: text)
vectorizer.adapt(text_only)

# Vectorized dataset
def vectorize(text, label):
    return vectorizer(text), label

train_ds = ds_train.map(vectorize).cache().shuffle(10000).batch(32).prefetch(1)
valid_ds = ds_valid.map(vectorize).batch(32).prefetch(1)
test_ds = ds_test.map(vectorize).batch(32).prefetch(1)


In [8]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=100),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid'),
])

model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
history = model.fit(train_ds, validation_data=valid_ds, epochs=5)
model.evaluate(test_ds)


Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 33ms/step - accuracy: 0.6854 - loss: 0.5791 - val_accuracy: 0.8367 - val_loss: 0.3737
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 24ms/step - accuracy: 0.8797 - loss: 0.2913 - val_accuracy: 0.8306 - val_loss: 0.3761
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 23ms/step - accuracy: 0.8967 - loss: 0.2530 - val_accuracy: 0.8589 - val_loss: 0.3413
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 28ms/step - accuracy: 0.9236 - loss: 0.1984 - val_accuracy: 0.8519 - val_loss: 0.3665
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 23ms/step - accuracy: 0.9341 - loss: 0.1778 - val_accuracy: 0.8353 - val_loss: 0.4077
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 16ms/step - accuracy: 0.8368 - loss: 0.3963


[0.40598738193511963, 0.8341000080108643]

In [9]:
# Load TFDS-ready IMDB
ds_train, ds_test = tfds.load("imdb_reviews", split=["train[:85%]", "train[85%:]"], as_supervised=True)
