# Text classification with TensorFlow Hub: Movie reviews

Tutorial URL: https://www.tensorflow.org/tutorials/keras/text_classification_with_hub

Valid as of: 2023.05.02

## Imports

In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds

print(f"TensorFlow version: {tf.__version__}")

  from .autonotebook import tqdm as notebook_tqdm


TensorFlow version: 2.10.1


# Loading the dataset

In [2]:
dataset_train, dataset_validate, dataset_test = tfds.load(
    name="imdb_reviews",
    split=("train[:60%]", "train[60%:]", "test"),
    as_supervised=True,
)

[1mDownloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to C:\Users\sophi\tensorflow_datasets\imdb_reviews\plain_text\1.0.0...[0m


Dl Size...: 100%|██████████| 80/80 [00:11<00:00,  7.18 MiB/s]rl]
Dl Completed...: 100%|██████████| 1/1 [00:11<00:00, 11.14s/ url]
                                                                        

[1mDataset imdb_reviews downloaded and prepared to C:\Users\sophi\tensorflow_datasets\imdb_reviews\plain_text\1.0.0. Subsequent calls will reuse this data.[0m


# Declare model

In [3]:
layer_embedding = hub.KerasLayer(
    "https://tfhub.dev/google/nnlm-en-dim50/2",
    input_shape=[],
    dtype=tf.string,
    trainable=True,
)

model = tf.keras.Sequential(
    [
        layer_embedding,
        tf.keras.layers.Dense(16, activation="relu"),
        tf.keras.layers.Dense(1),
    ]
)

model.summary()

model.compile(
    optimizer="adam",
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    metrics=["accuracy"],
)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer (KerasLayer)    (None, 50)                48190600  
                                                                 
 dense (Dense)               (None, 16)                816       
                                                                 
 dense_1 (Dense)             (None, 1)                 17        
                                                                 
Total params: 48,191,433
Trainable params: 48,191,433
Non-trainable params: 0
_________________________________________________________________


# Train model

In [4]:
model.fit(
    dataset_train.shuffle(10000).batch(512),
    epochs=10,
    validation_data=dataset_validate.batch(512),
    verbose=1,
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x21f624c1c10>

# Evaluate

In [5]:
model.evaluate(dataset_test.batch(512), verbose=2)

49/49 - 1s - loss: 0.3436 - accuracy: 0.8558 - 1s/epoch - 30ms/step


[0.34364956617355347, 0.8557599782943726]