## Text summarization task of CNN dailymail dataset

In [1]:
# Local mac OS
# %pip install -U tensorflow==2.16.2 tensorflow-macos==2.16.2 keras==3.4.1 keras-nlp
# AWS SageMaker
# %%pip install -U tensorflow==2.16.2 tensorflow-datasets keras==3.4.1 keras-nlp datasets

Note: you may need to restart the kernel to use updated packages.


In [1]:
import tensorflow as tf
import keras
import keras_nlp
print("Tensorflow 2.16.2 is expected. The running version is", tf.__version__)
print("Keras 3.4.1 is expected. The running version is", keras.__version__)
print("KerasNLP 0.12.1 is expected. The running version is", keras_nlp.__version__)

import tensorflow_datasets as tfds

# SageMaker cannot use @keras.saving
from keras import saving

Tensorflow 2.16.2 is expected. The running version is 2.16.2
Keras 3.4.1 is expected. The running version is 3.4.1
KerasNLP 0.12.1 is expected. The running version is 0.12.1


***
# Utility function

In [2]:
import platform

def is_running_on_apple_sillicon():
    return platform.system() == "Darwin" and platform.processor() == "arm"

In [3]:
import numpy as np
import matplotlib.pyplot as plt

def plot(history, title=None, keys=["loss", "masked_acc"]):
    """
    Display the plot that indicates the loss and accuracy.
    :param history: history object from the tensorflow fit function.
    :param title: title text.
    :param keys: keys for plotting.
    """
    flg, axes = plt.subplots(1, 2, tight_layout=True)
    if title is not None:
        flg.suptitle(t=title, fontsize=14)
    for i, key in enumerate(keys):
        value = history.history[key]
        val_loss = history.history[f"val_{key}"]
        epochs = range(1, len(value) + 1)
        axes[i].plot(epochs, value, label=f"Training {key}")
        axes[i].plot(epochs, val_loss, label=f"Validation {key}")
        axes[i].set_title(f"Training and validation {key}")
        axes[i].set_xlabel("epochs")
        axes[i].set_ylabel(key)
        axes[i].legend()
    plt.show()

    for key in keys:
        if 'loss' in key:
            print(
                np.min(history.history[f"val_{key}"]),
                "The best number of epocs for the validation loss is",
                np.argmin(history.history[f"val_{key}"]) + 1,
            )
        else:
            print(
                np.max(history.history[f"val_{key}"]),
                "The best number of epocs for the validation accuracy is",
                np.argmax(history.history[f"val_{key}"]) + 1,
            )
    

In [4]:
# @see https://www.tensorflow.org/text/tutorials/transformer
@saving.register_keras_serializable()
def masked_acc(y_true, y_pred):
    y_pred = tf.argmax(y_pred, axis=2)
    y_true = tf.cast(y_true, dtype=y_pred.dtype)
    match = y_true == y_pred
    mask = y_true != 0
    match = match & mask
    match = tf.cast(match, dtype=tf.float32)
    mask = tf.cast(mask, dtype=tf.float32)
    return tf.reduce_sum(match)/tf.reduce_sum(mask)

In [5]:
# @see https://www.tensorflow.org/text/tutorials/nmt_with_attention
@saving.register_keras_serializable()
def masked_loss(y_true, y_pred):
    # Calculate the loss for each item in the batch.
    loss_fn = keras.losses.SparseCategoricalCrossentropy(
        # nn.py:609: UserWarning:
        # "`sparse_categorical_crossentropy` received `from_logits=True`,
        # but the `output` argument was produced by a Softmax activation and thus does not represent logits.
        # Was this intended?
        # When logits is True, softmax activation function has not processed the values.
        # from_logits=True,
        reduction='none'
    )
    loss = loss_fn(y_true, y_pred)

    # Mask off the losses on padding.
    mask = tf.cast(y_true != 0, dtype=loss.dtype)
    loss *= mask

    # Return the total.
    return tf.reduce_sum(loss)/tf.reduce_sum(mask)

***
# Dataset

In [6]:
# Hyperparameters
# BATCH_SIZE = 32 # p3.2xlarge ResourceExhaustedError
EMBEDDING_DIM = 64
NUM_HEADS = 8
INTERMIDIATE_DIM = 1024
VOCAB_SIZE = 15000
NUM_ENCODERS = 1
NUM_DECODERS = 1
BATCH_SIZE = 16 # 1 does not work
NUM_EPOCHS = 10 # 1 100
# Generally, 100 to 200 is used as the epoch number for generative models
# However, because this is a prototype, the number is intentionally small.


In [7]:
if is_running_on_apple_sillicon():
    args = {
        'trust_remote_code': False,
    }
else:
    """
    When 'trust_remote_code' is False, it does not work on AWS SageMaker.
    """
    args = {
    }
train_dataset, validation_dataset, test_dataset = tfds.load(
    'huggingface:ccdv__cnn_dailymail/3.0.0',
    split=['train', 'validation', 'test'],
    builder_kwargs=args,
)

  hf_names = hf_datasets.list_datasets()
2024-07-02 22:00:31.407268: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Pro
2024-07-02 22:00:31.407287: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 32.00 GB
2024-07-02 22:00:31.407290: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 10.67 GB
2024-07-02 22:00:31.407305: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-07-02 22:00:31.407317: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [8]:
# max_input_length = max(len(row[0][0]) for ds in [preprocessed_train_dataset, preprocessed_validation_dataset, preprocessed_test_dataset] for row in ds)
# max_target_length = max(len(row[0][1]) for ds in [preprocessed_train_dataset, preprocessed_validation_dataset, preprocessed_test_dataset] for row in ds)
# max_decoder_target_length = max(len(row[1]) for ds in [preprocessed_train_dataset, preprocessed_validation_dataset, preprocessed_test_dataset] for row in ds)
# max_input_length, max_target_length, max_decoder_target_length

summarized_text_size = 256 #  1437 is the longest summarized text in dataset
min_summarized_text_size = 32

# @TODO The followings should programmatically be derived.
max_input_length = 2137
max_target_length = summarized_text_size + 1
max_decoder_target_length = summarized_text_size + 1

In [35]:
# for development with 1/10 entries
DEVELOPMENT = True
if DEVELOPMENT:
    if is_running_on_apple_sillicon():
        # NUM_TAKE = 512 # 500
        # train_dataset = train_dataset.take(NUM_TAKE)
        # validation_dataset = validation_dataset.take(NUM_TAKE)
        # test_dataset = test_dataset.take(NUM_TAKE)

        train_size = len(train_dataset) // 10 * 9
        validation_size = len(validation_dataset) // 10 * 9
        test_size = len(test_dataset) // 10 * 9
        train_dataset = train_dataset.skip(train_size)
        validation_dataset = validation_dataset.skip(validation_size)
        test_dataset = test_dataset.skip(test_size)
    else:
        # Use 10% dataset.
        train_size = len(train_dataset) // 10 * 9
        validation_size = len(validation_dataset) // 10 * 9
        test_size = len(test_dataset) // 10 * 9
        train_dataset = train_dataset.skip(train_size)
        validation_dataset = validation_dataset.skip(validation_size)
        test_dataset = test_dataset.skip(test_size)

In [36]:
# @see https://github.com/keras-team/keras-nlp/blob/50e041487b1d8b30b34c5fb738db3ed3406363bc/examples/machine_translation/data.py
import string
import re

strip_chars = string.punctuation
strip_chars = strip_chars.replace("[", "")
strip_chars = strip_chars.replace("]", "")

@saving.register_keras_serializable()
def custom_standardization(input_string):
    lowercase = tf.strings.lower(input_string)
    return tf.strings.regex_replace(
        lowercase,
        "[%s]" % re.escape(strip_chars),
        "",
    )

vectorization_layer = keras.layers.TextVectorization(
    standardize=custom_standardization,
    max_tokens=VOCAB_SIZE,
    output_mode='int',
    ragged=True,
)
# Warning: adapt, which clear the already held data inside, must be called only once.
vectorization_layer.adapt(train_dataset.concatenate(validation_dataset).concatenate(test_dataset).batch(BATCH_SIZE).map(lambda row: '[start] ' + row['article'] + ' ' + row['highlights'] + ' [end]'))

2024-07-02 22:05:00.049744: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [37]:
input_vectorization_layer = keras.layers.TextVectorization(
    standardize=custom_standardization,
    max_tokens=VOCAB_SIZE,
    output_mode='int',
    # @TODO This should be programmatically obtained
    output_sequence_length=2137,
)
target_vectorization_layer = keras.layers.TextVectorization(
    standardize=custom_standardization,
    max_tokens=VOCAB_SIZE,
    output_mode='int',
    output_sequence_length=summarized_text_size + 1,
)
input_vectorization_layer.set_vocabulary(vectorization_layer.get_vocabulary())
target_vectorization_layer.set_vocabulary(vectorization_layer.get_vocabulary())

In [38]:
vectorization_layer.vocabulary_size(), vectorization_layer.get_vocabulary(include_special_tokens=True)[0:8]

(7220, ['', '[UNK]', 'the', 'to', 'a', 'and', 'of', 'in'])

In [39]:
# Must be False
assert not vectorization_layer(['[start]'])[0] == vectorization_layer(['start'])[0]

In [40]:
"""
vectorization_layer(['This is a pen', 'I am a software engineer'])
#vectorization_layer(['This is a pen', 'I am a software engineer']).row_lengths().shape[0]
# 2
rows = vectorization_layer(['This is a pen', 'I am a software engineer']).row_lengths().shape[0]
vectorization_layer(['This is a pen', 'I am a software engineer']).to_tensor(shape=(rows, 10))
# .to_tensor()

RaggedTensor.to_tensor can make 0-filled Tensor
"""
def prepare_dataset(x):
    article = input_vectorization_layer(x['article'])
    highlights = tf.strings.join(['[start] ', x['highlights'], ' [end]'])
    h = vectorization_layer(highlights)
    rows = h.row_lengths().shape[0]
    sequences = h.to_tensor(shape=(rows, summarized_text_size + 1 + 1))
    highlights_decoder_input = sequences[:, :-1] # summarized_text_size - 1
    highlights_decoder_output = sequences[:, 1:] # summarized_text_size - 1
    return (
        (
            article, # encoder input
            highlights_decoder_input, # decoder input
        ),
        highlights_decoder_output, # decoder output
    )

def filter_by_length(row):
    highlights = vectorization_layer(row['highlights'])
    return min_summarized_text_size <= tf.size(highlights) and tf.size(highlights) <= summarized_text_size

preprocessed_train_dataset = train_dataset.filter(filter_by_length).batch(BATCH_SIZE).map(prepare_dataset, num_parallel_calls=tf.data.AUTOTUNE)
preprocessed_validation_dataset = validation_dataset.filter(filter_by_length).batch(BATCH_SIZE).map(prepare_dataset, num_parallel_calls=tf.data.AUTOTUNE)
preprocessed_test_dataset = test_dataset.filter(filter_by_length).batch(BATCH_SIZE).map(prepare_dataset, num_parallel_calls=tf.data.AUTOTUNE)

In [41]:
for entry in train_dataset.take(1):
    print('article: ', entry['article'].numpy())
    print('highlights: ', entry['highlights'].numpy())
for entry in preprocessed_train_dataset.take(1):
    print('encoder input: ', entry[0][0].shape, entry[0][0][:10])
    print('decoder input: ', entry[0][1].shape, entry[0][1][:10])
    print('decoder output: ', entry[1].shape, entry[1][:10])

article:  b'HONG KONG, China (CNN) -- In the decade since the 1997 handover of Hong Kong to China, local movie-makers have faced daunting changes in the industry. A trend of fewer films being produced each year in Hong Kong at the time of the handover has continued into the 21st century. Stephen Chow\'s "Shaolin Soccer" is one of Hong Kong\'s all-time top-grossing films. People in Hong Kong\'s industry point to several causes for the comparatively leaner times: a lack of opportunities for new acting talent, inadequate training and schooling for people who produce movies and changing tastes within the Hong Kong public. At the same time, local film-makers have had to refocus their cameras for a new audience: mainland China. "The Hong Kong film industry came to a rude awakening [in the late 1990s] that the world was changing faster than it was in the age of new delivery systems for home entertainment and the Internet," says Bede Cheng, a local film archivist and curator. "Unfortunately, i

2024-07-02 22:05:01.486752: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-07-02 22:05:01.558838: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


# Model

In [42]:
@saving.register_keras_serializable()
class TransformerEncoderDecoderModel(keras.Model):
    """
    Build Transformer Encoder Decoder model
    """
    def __init__(
        self,
        encoder_tokenizer,
        decoder_tokenizer,
        max_input_length,
        max_target_length,
        embedding_dim,
        encoder_vocabulary_size,
        decoder_vocabulary_size,
        num_encoders,
        num_decoders,
        encoder_num_heads,
        encoder_transformer_intermediate_dim,
        decoder_num_heads,
        decoder_transformer_intermediate_dim,
        **kwargs):
        super().__init__(**kwargs)

        self.encoder_tokenizer = encoder_tokenizer
        self.decoder_tokenizer = decoder_tokenizer
        self.encoders = []
        self.decoders = []

        # Encoder
        self.encoder_embedding = keras_nlp.layers.TokenAndPositionEmbedding(
            vocabulary_size=encoder_vocabulary_size,
            sequence_length=max_input_length,
            embedding_dim=embedding_dim,
            mask_zero=True,
            name="encoder_embedding",
        )
        for i in range(num_encoders):
            self.encoders.append(
                keras_nlp.layers.TransformerEncoder(
                    num_heads=encoder_num_heads,
                    intermediate_dim=encoder_transformer_intermediate_dim,
                    name=f"transformer_encoder_{i}",
                )
            )

        # Decoder
        self.decoder_embedding = keras_nlp.layers.TokenAndPositionEmbedding(
            vocabulary_size=decoder_vocabulary_size,
            sequence_length=max_target_length,
            embedding_dim=embedding_dim,
            mask_zero=True,
            name="decoder_embedding",
        )
        for i in range(num_decoders):
            self.decoders.append(
                keras_nlp.layers.TransformerDecoder(
                    num_heads=decoder_num_heads,
                    intermediate_dim=decoder_transformer_intermediate_dim,
                    name=f"transformer_decoder_{i}",
                )
            )
        self.dense = keras.layers.Dense(
            decoder_vocabulary_size,
            activation="softmax"
        )

    def call(self, inputs):
        encoded = self.encoder_embedding(inputs[0])
        for encoder in self.encoders:
            encoded = encoder(inputs=encoded)

        decoded = self.decoder_embedding(inputs[1])
        for decoder in self.decoders:
            decoded = decoder(
                decoder_sequence=decoded,
                encoder_sequence=encoded,
                use_causal_mask=True,
            )

        output = self.dense(decoded)
        return output

    def get_config(self):
        config = super().get_config().copy()
        #config = super(TransformerEncoderDecoderModel, self).get_config()
        config.update({
            "encoder_tokenizer": self.encoder_tokenizer.get_config(),
            "decoder_tokenizer": self.decoder_tokenizer.get_config(),
            "max_input_length": self.encoder_embedding.sequence_length,
            "max_target_length": self.decoder_embedding.sequence_length,
            "embedding_dim": self.encoder_embedding.embedding_dim,
            "encoder_vocabulary_size": self.encoder_embedding.vocabulary_size,
            "decoder_vocabulary_size": self.decoder_embedding.vocabulary_size,
            "num_encoders": len(self.encoders),
            "num_decoders": len(self.decoders),
            "encoder_num_heads": self.encoders[0].num_heads,
            "encoder_transformer_intermediate_dim": self.encoders[0].intermediate_dim,
            "decoder_num_heads": self.decoders[0].num_heads,
            "decoder_transformer_intermediate_dim": self.decoders[0].intermediate_dim,
        })
        return config
    @classmethod
    def from_config(cls, config):
        encoder_tokenizer_config = config.pop("encoder_tokenizer")
        decoder_tokenizer_config = config.pop("decoder_tokenizer")
        encoder_tokenizer = keras.layers.TextVectorization.from_config(encoder_tokenizer_config)
        decoder_tokenizer = keras.layers.TextVectorization.from_config(decoder_tokenizer_config)
        return cls(
            encoder_tokenizer=encoder_tokenizer,
            decoder_tokenizer=decoder_tokenizer,
            **config
        )

In [43]:
learning_rate = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001,
    decay_steps=100,
    decay_rate=0.99,
)
if f"{keras.__version__}".startswith("2.") and is_running_on_apple_sillicon():
    """
    Apple Silicon mac shows tht following warning.
    WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.Adam` runs slowly on M1/M2 Macs,
    please use the legacy Keras optimizer instead,
    located at `tf.keras.optimizers.legacy.Adam`
    Therefore, keras.optimizers.legacy.Adam is used.
    """
    optimizer = keras.optimizers.legacy.Adam(
        learning_rate=learning_rate,
    )
else:
    optimizer = keras.optimizers.Adam(
        learning_rate=learning_rate,
    )
model = TransformerEncoderDecoderModel(
    encoder_tokenizer=input_vectorization_layer,
    decoder_tokenizer=target_vectorization_layer,
    max_input_length=max_input_length,
    max_target_length=max_target_length,
    embedding_dim=EMBEDDING_DIM,
    encoder_vocabulary_size=VOCAB_SIZE,
    decoder_vocabulary_size=VOCAB_SIZE,
    num_encoders=NUM_ENCODERS,
    num_decoders=NUM_DECODERS,
    encoder_num_heads=NUM_HEADS,
    encoder_transformer_intermediate_dim=INTERMIDIATE_DIM,
    decoder_num_heads=NUM_HEADS,
    decoder_transformer_intermediate_dim=INTERMIDIATE_DIM,
    name="transformer_text_summarization_model",
)
# Note
# In the case that the dataset is large and the dimension is small,
# the learning rate of Adam needed to be smaller.
model.compile(
    optimizer=optimizer,
    loss=masked_loss,
    metrics=[
        masked_acc,
        # keras_nlp.metrics.RougeL()
    ]
)

In [44]:
# Training
history = model.fit(
    preprocessed_train_dataset,
    validation_data=preprocessed_validation_dataset,
    epochs=NUM_EPOCHS,
)
# model.summary()

Epoch 1/10
      1/Unknown [1m10s[0m 10s/step - loss: 9.6177 - masked_acc: 0.0000e+00

2024-07-02 22:05:12.708863: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-07-02 22:05:12.708881: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 7181687681662453136
2024-07-02 22:05:12.708887: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[StatefulPartitionedCall/adam/add_18/_202]]
2024-07-02 22:05:12.708896: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 7303369528940347056
2024-07-02 22:05:12.708901: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 9057391819082547123
2024-07-02 22:05:12.708905: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 387378873

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 11s/step - loss: 9.6177 - masked_acc: 0.0000e+00 - val_loss: 9.6026 - val_masked_acc: 0.0000e+00
Epoch 2/10


2024-07-02 22:05:14.044760: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-07-02 22:05:14.044775: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_6]]
2024-07-02 22:05:14.044781: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 14866386119962510299
2024-07-02 22:05:14.044785: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 3591497929172821875
2024-07-02 22:05:14.044790: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 8039304267182418321
2024-07-02 22:05:14.044796: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 9321964354606189143
2024-07-02

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 9.5329 - masked_acc: 0.0015

2024-07-02 22:05:15.363405: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-07-02 22:05:15.363424: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 13178813949261504838
2024-07-02 22:05:15.363442: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 3359335281358102826
2024-07-02 22:05:15.363447: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 6637575230694712958
2024-07-02 22:05:15.363451: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 16045442193560201276
2024-07-02 22:05:15.363454: I tensorflow/core/framework/local_rendezvous.cc:426] Local rendezvous send item cancelled. Key hash: 2654816292227870078
2024-07-02 22:05:15.363458: I tensorflow/core/framework/local_rendezv

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 9.5329 - masked_acc: 0.0011 - val_loss: 9.5817 - val_masked_acc: 0.0000e+00
Epoch 3/10


2024-07-02 22:05:15.992713: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-07-02 22:05:15.992732: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[compile_loss/masked_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/All/_72]]
2024-07-02 22:05:15.992746: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 3524763877106016780
2024-07-02 22:05:15.992756: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 8071039381564072498
2024-07-02 22:05:15.992760: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 7856431864848106169
2024-07-02 22:05:15.992767: I tensorflow/core/framework/local_r

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 933ms/step - loss: 9.4434 - masked_acc: 0.0393

2024-07-02 22:05:16.995266: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-07-02 22:05:16.995279: I tensorflow/core/framework/local_rendezvous.cc:426] Local rendezvous send item cancelled. Key hash: 11422837437334484912
2024-07-02 22:05:16.995293: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[StatefulPartitionedCall/transformer_text_summarization_model_1/transformer_encoder_0_1/self_attention_layer_1/query_1/einsum/Einsum/_234]]
2024-07-02 22:05:16.995302: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 7181687681662453136
2024-07-02 22:05:16.995312: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 8086039979185686000
2024-07-02 22:05:16.995317: I tensorflow/

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 9.4434 - masked_acc: 0.0295 - val_loss: 9.5523 - val_masked_acc: 0.0028
Epoch 4/10


2024-07-02 22:05:17.514621: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-07-02 22:05:17.514634: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[transformer_text_summarization_model_1/transformer_encoder_0_1/self_attention_layer_1/attention_output_1/einsum/Einsum/_54]]
2024-07-02 22:05:17.514639: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 13753058815561699136
2024-07-02 22:05:17.514643: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 14866386119962510299
2024-07-02 22:05:17.514650: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 3591497929172821875
2024-07-02 22:05:17.514655: I tensorflow/core/framewor

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 938ms/step - loss: 9.3462 - masked_acc: 0.1193

2024-07-02 22:05:18.521361: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-07-02 22:05:18.521375: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[StatefulPartitionedCall/gradient_tape/transformer_text_summarization_model_1/transformer_encoder_0_1/self_attention_layer_1/key_1/einsum/Einsum_1/_330]]
2024-07-02 22:05:18.521395: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 4149232978214166850
2024-07-02 22:05:18.521400: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 12020224512409632535
2024-07-02 22:05:18.521413: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 17311167122851283223
2024-07-02 22:05:18.521421

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - loss: 9.3462 - masked_acc: 0.0895 - val_loss: 9.5154 - val_masked_acc: 0.0038
Epoch 5/10


2024-07-02 22:05:19.013084: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-07-02 22:05:19.013104: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[transformer_text_summarization_model_1/NotEqual/_10]]
2024-07-02 22:05:19.013119: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 583673868128984934
2024-07-02 22:05:19.013125: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 9724994943831322853
2024-07-02 22:05:19.013129: I tensorflow/core/framework/local_rendezvous.cc:426] Local rendezvous send item cancelled. Key hash: 806039669590630677
2024-07-02 22:05:19.013133: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 905ms/step - loss: 9.2451 - masked_acc: 0.1495

2024-07-02 22:05:19.985536: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-07-02 22:05:19.985554: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 4750849862333000196
2024-07-02 22:05:19.985560: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 13178813949261504838
2024-07-02 22:05:19.985567: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 4149232978214166850
2024-07-02 22:05:19.985572: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 3359335281358102826
2024-07-02 22:05:19.985580: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 6637575230694712958
2024-07-02 22:05:19.985583: I tensorflow/core/framework/local_rendezvo

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - loss: 9.2451 - masked_acc: 0.1122 - val_loss: 9.4740 - val_masked_acc: 0.0066
Epoch 6/10


2024-07-02 22:05:20.489277: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-07-02 22:05:20.489297: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 583673868128984934
2024-07-02 22:05:20.489303: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 13227948152418006060
2024-07-02 22:05:20.489306: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[Shape/_8]]
2024-07-02 22:05:20.489320: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 9724994943831322853
2024-07-02 22:05:20.489325: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 17441739009884845392
2024-07-02 22:05:20.

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 9.1432 - masked_acc: 0.1631

2024-07-02 22:05:21.622597: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-07-02 22:05:21.622635: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[StatefulPartitionedCall/transformer_text_summarization_model_1/transformer_encoder_0_1/self_attention_layer_1/value_1/einsum/Einsum/_236]]
2024-07-02 22:05:21.622645: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 11378142067497230498
2024-07-02 22:05:21.622649: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 13178813949261504838
2024-07-02 22:05:21.622654: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 4149232978214166850
2024-07-02 22:05:21.622659: I tensorflow

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 9.1432 - masked_acc: 0.1224 - val_loss: 9.4312 - val_masked_acc: 0.0066
Epoch 7/10


2024-07-02 22:05:22.119316: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-07-02 22:05:22.119339: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 3524763877106016780
2024-07-02 22:05:22.119348: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 9706992149152278874
2024-07-02 22:05:22.119351: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[transformer_text_summarization_model_1/transformer_encoder_0_1/self_attention_layer_1/attention_output_1/einsum/Einsum/_54]]
2024-07-02 22:05:22.119372: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 14702601333373700796
2024-07-02 22:05:22.119382: I tensorflow/core/framework

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 9.0407 - masked_acc: 0.1662

2024-07-02 22:05:23.393388: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-07-02 22:05:23.393416: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 13178813949261504838
2024-07-02 22:05:23.393425: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 3359335281358102826
2024-07-02 22:05:23.393453: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[StatefulPartitionedCall/compile_loss/masked_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/All/_294]]
2024-07-02 22:05:23.393464: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 6637575230694712958
2024-07-02 22:05:23.393473: I tensorf

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 9.0407 - masked_acc: 0.1246 - val_loss: 9.3889 - val_masked_acc: 0.0056
Epoch 8/10


2024-07-02 22:05:23.920991: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-07-02 22:05:23.921007: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[transformer_text_summarization_model_1/transformer_encoder_0_1/self_attention_layer_1/attention_output_1/einsum/Einsum/_54]]
2024-07-02 22:05:23.921023: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 14866386119962510299
2024-07-02 22:05:23.921030: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 3591497929172821875
2024-07-02 22:05:23.921036: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 8039304267182418321
2024-07-02 22:05:23.921040: I tensorflow/core/framework

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 897ms/step - loss: 8.9367 - masked_acc: 0.1601

2024-07-02 22:05:24.888328: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-07-02 22:05:24.888346: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[StatefulPartitionedCall/adam/add_18/_202]]
2024-07-02 22:05:24.888361: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 5264887135522579776
2024-07-02 22:05:24.888378: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 7625833597198135204
2024-07-02 22:05:24.888390: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 7181687681662453136
2024-07-02 22:05:24.888402: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 905739181

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - loss: 8.9367 - masked_acc: 0.1201 - val_loss: 9.3478 - val_masked_acc: 0.0056
Epoch 9/10


2024-07-02 22:05:25.386840: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-07-02 22:05:25.386865: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[compile_loss/masked_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert/data_0/_74]]
2024-07-02 22:05:25.386883: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 13753058815561699136
2024-07-02 22:05:25.386889: I tensorflow/core/framework/local_rendezvous.cc:426] Local rendezvous send item cancelled. Key hash: 8596111050428461720
2024-07-02 22:05:25.386893: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 9724994943831322853
2024-07-02 22:05:25.386900: I tensorflow/core

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 903ms/step - loss: 8.8302 - masked_acc: 0.1767

2024-07-02 22:05:26.360122: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-07-02 22:05:26.360137: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[StatefulPartitionedCall/transformer_text_summarization_model_1/transformer_decoder_0_1/BroadcastTo/_242]]
2024-07-02 22:05:26.360158: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 7181687681662453136
2024-07-02 22:05:26.360174: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 5666092009480652652
2024-07-02 22:05:26.360182: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 5183682304234358996
2024-07-02 22:05:26.360187: I tensorflow/core/framework/local_rendezvous.cc

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 8.8302 - masked_acc: 0.1326 - val_loss: 9.3081 - val_masked_acc: 0.0066
Epoch 10/10


2024-07-02 22:05:26.962553: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-07-02 22:05:26.962570: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 583673868128984934
2024-07-02 22:05:26.962576: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[compile_loss/masked_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert/data_3/_78]]
2024-07-02 22:05:26.962590: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 2286092525749285196
2024-07-02 22:05:26.962603: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 9724994943831322853
2024-07-02 22:05:26.962612: I tensorflow/core/f

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 8.7212 - masked_acc: 0.1828

2024-07-02 22:05:28.711754: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[StatefulPartitionedCall/adam/add_40/_142]]
2024-07-02 22:05:28.711783: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 10057838093013088228
2024-07-02 22:05:28.711810: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-07-02 22:05:28.711828: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 7181687681662453136
2024-07-02 22:05:28.711836: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 9057391819082547123
2024-07-02 22:05:28.711844: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 38737887

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 8.7212 - masked_acc: 0.1371 - val_loss: 9.2693 - val_masked_acc: 0.0066


2024-07-02 22:05:29.313638: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-07-02 22:05:29.313662: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[transformer_text_summarization_model_1/transformer_encoder_0_1/self_attention_layer_1/key_1/einsum/Einsum/_32]]
2024-07-02 22:05:29.313687: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 3524763877106016780
2024-07-02 22:05:29.313699: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 8071039381564072498
2024-07-02 22:05:29.313706: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 9706992149152278874
2024-07-02 22:05:29.313709: I tensorflow/core/framework/local_rendezv

In [45]:
model.save('text_classification.keras')

In [46]:
test_loss = model.evaluate(
    preprocessed_test_dataset,
)
test_loss

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 972ms/step - loss: 9.2575 - masked_acc: 0.0175


2024-07-02 22:05:30.485195: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-07-02 22:05:30.485211: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[transformer_text_summarization_model_1/transformer_decoder_0_1/BroadcastTo/_42]]
2024-07-02 22:05:30.485218: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 7856431864848106169
2024-07-02 22:05:30.485221: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 14866386119962510299
2024-07-02 22:05:30.485225: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous recv item cancelled. Key hash: 14691011841256006551
2024-07-02 22:05:30.485229: I tensorflow/core/framework/local_rendezvous.cc:422] Local rendezvous 

[9.257521629333496, 0.011666666716337204]

In [47]:
def summarize(model, encoder_tokenizer, decoder_tokenizer, text):
    """
    Summarize text
    :param text: original text
    :return: summarized text
    """
    table = decoder_tokenizer.get_vocabulary()
    input_sequence = encoder_tokenizer([text])


    start_token = decoder_tokenizer('[start]')[0].numpy()
    end_token = decoder_tokenizer('[end]')[0].numpy()
    decoded_sentence = [start_token]
    for i in range(max_target_length):
        decoder_inputs = tf.convert_to_tensor(
            [decoded_sentence],
            dtype="int64",
        )
        decoder_inputs = tf.concat(
            [
                decoder_inputs,
                tf.zeros(
                    [1, max_target_length - i - 1],
                    dtype="int64",
                ),
            ],
            axis=1,
        )
        predictions = model.predict(
            [input_sequence, decoder_inputs],
            verbose=0
        )
        predicted_token = np.argmax(predictions[0, i, :])
        decoded_sentence.append(predicted_token)
        if predicted_token == end_token:
            break

    detokenized_output = []
    for token in decoded_sentence:
        detokenized_output.append(table[token])
    return " ".join(detokenized_output)


loaded_model = keras.models.load_model('text_classification.keras')
encoder_tokenizer = loaded_model.encoder_tokenizer
decoder_tokenizer = loaded_model.decoder_tokenizer

# Sample
# sample_text = "Giant pig fell into the swimming pool at his home in Ringwood, Hampshire. It took the efforts of a team of firefighters to winch him out of the water. A wayward horse also had to be rescued from a swimming pool in Sussex."
# print("Original:", sample_text)
# print("Summary:", summarize(sample_text))
sample_text = """
(CNN) -- Usain Bolt rounded off the world championships Sunday by claiming his third gold in Moscow as he anchored Jamaica to victory in the men\'s 4x100m relay. The fastest man in the world charged clear of United States rival Justin Gatlin as the Jamaican quartet of Nesta Carter, Kemar Bailey-Cole, Nickel Ashmeade and Bolt won in 37.36 seconds. The U.S finished second in 37.56 seconds with Canada taking the bronze after Britain were disqualified for a faulty handover. The 26-year-old Bolt has now collected eight gold medals at world championships, equaling the record held by American trio Carl Lewis, Michael Johnson and Allyson Felix, not to mention the small matter of six Olympic titles. The relay triumph followed individual successes in the 100 and 200 meters in the Russian capital. "I\'m proud of myself and I\'ll continue to work to dominate for as long as possible," Bolt said, having previously expressed his intention to carry on until the 2016 Rio Olympics. Victory was never seriously in doubt once he got the baton safely in hand from Ashmeade, while Gatlin and the United States third leg runner Rakieem Salaam had problems. Gatlin strayed out of his lane as he struggled to get full control of their baton and was never able to get on terms with Bolt. Earlier, Jamaica\'s women underlined their dominance in the sprint events by winning the 4x100m relay gold, anchored by Shelly-Ann Fraser-Pryce, who like Bolt was completing a triple. Their quartet recorded a championship record of 41.29 seconds, well clear of France, who crossed the line in second place in 42.73 seconds. Defending champions, the United States, were initially back in the bronze medal position after losing time on the second handover between Alexandria Anderson and English Gardner, but promoted to silver when France were subsequently disqualified for an illegal handover. The British quartet, who were initially fourth, were promoted to the bronze which eluded their men\'s team. Fraser-Pryce, like Bolt aged 26, became the first woman to achieve three golds in the 100-200 and the relay. In other final action on the last day of the championships, France\'s Teddy Tamgho became the third man to leap over 18m in the triple jump, exceeding the mark by four centimeters to take gold. Germany\'s Christina Obergfoll finally took gold at global level in the women\'s javelin after five previous silvers, while Kenya\'s Asbel Kiprop easily won a tactical men\'s 1500m final. Kiprop\'s compatriot Eunice Jepkoech Sum was a surprise winner of the women\'s 800m. Bolt\'s final dash for golden glory brought the eight-day championship to a rousing finale, but while the hosts topped the medal table from the United States there was criticism of the poor attendances in the Luzhniki Stadium. There was further concern when their pole vault gold medalist Yelena Isinbayeva made controversial remarks in support of Russia\'s new laws, which make "the propagandizing of non-traditional sexual relations among minors" a criminal offense. She later attempted to clarify her comments, but there were renewed calls by gay rights groups for a boycott of the 2014 Winter Games in Sochi, the next major sports event in Russia.
"""
print("Original:", sample_text)
print("Summary:", summarize(model, encoder_tokenizer, decoder_tokenizer, sample_text))

sample_text = """
Vice President Dick Cheney will serve as acting president briefly Saturday while President Bush is anesthetized for a routine colonoscopy, White House spokesman Tony Snow said Friday. Bush is scheduled to have the medical procedure, expected to take about 2 1/2 hours, at the presidential retreat at Camp David, Maryland, Snow said. Bush's last colonoscopy was in June 2002, and no abnormalities were found, Snow said. The president's doctor had recommended a repeat procedure in about five years. The procedure will be supervised by Dr. Richard Tubb and conducted by a multidisciplinary team from the National Naval Medical Center in Bethesda, Maryland, Snow said. A colonoscopy is the most sensitive test for colon cancer, rectal cancer and polyps, small clumps of cells that can become cancerous, according to the Mayo Clinic. Small polyps may be removed during the procedure. Snow said that was the case when Bush had colonoscopies before becoming president. Snow himself is undergoing chemotherapy for cancer that began in his colon and spread to his liver. Snow told reporters he had a chemo session scheduled later Friday. Watch Snow talk about Bush's procedure and his own colon cancer » . "The president wants to encourage everybody to use surveillance," Snow said. The American Cancer Society recommends that people without high-risk factors or symptoms begin getting screened for signs of colorectal cancer at age 50. E-mail to a friend .
"""
print("Original:", sample_text)
print("Summary:", summarize(model, encoder_tokenizer, decoder_tokenizer, sample_text))

sample_text = "There are two chickens in the garden."
print("Original:", sample_text)
print("Summary:", summarize(model, encoder_tokenizer, decoder_tokenizer, sample_text))

sample_text = "Two chickens fell into the swimming pool in the garden."
print("Original:", sample_text)
print("Summary:", summarize(model, encoder_tokenizer, decoder_tokenizer, sample_text))


Original: 
(CNN) -- Usain Bolt rounded off the world championships Sunday by claiming his third gold in Moscow as he anchored Jamaica to victory in the men's 4x100m relay. The fastest man in the world charged clear of United States rival Justin Gatlin as the Jamaican quartet of Nesta Carter, Kemar Bailey-Cole, Nickel Ashmeade and Bolt won in 37.36 seconds. The U.S finished second in 37.56 seconds with Canada taking the bronze after Britain were disqualified for a faulty handover. The 26-year-old Bolt has now collected eight gold medals at world championships, equaling the record held by American trio Carl Lewis, Michael Johnson and Allyson Felix, not to mention the small matter of six Olympic titles. The relay triumph followed individual successes in the 100 and 200 meters in the Russian capital. "I'm proud of myself and I'll continue to work to dominate for as long as possible," Bolt said, having previously expressed his intention to carry on until the 2016 Rio Olympics. Victory was n