## Text summarization task of CNN dailymail dataset

In [1]:
# Install KerasNLP, and so on
%pip install keras-nlp rouge-score tensorflow-datasets datasets

Note: you may need to restart the kernel to use updated packages.


In [2]:
import platform
import numpy as np

import tensorflow as tf
import tensorflow_datasets as tfds
import keras
import keras_nlp

# SageMaker cannot use @keras.saving
from keras import saving

# Hyperparameters
# BATCH_SIZE = 32 # p3.2xlarge ResourceExhaustedError
EMBEDDING_DIM = 64
NUM_HEADS = 8
INTERMIDIATE_DIM = 1024
VOCAB_SIZE = 15000
NUM_ENCODERS = 1
NUM_DECODERS = 1
BATCH_SIZE = 16 # 1 does not work
NUM_EPOCHS = 10 # 1 100

Using TensorFlow backend


In [3]:
if platform.system() == "Darwin" and platform.processor() == "arm":
    args = {
        'trust_remote_code': False,
    }
else:
    """
    When 'trust_remote_code' is False, it does not work on AWS SageMaker.
    """
    args = {
    }
train_dataset, validation_dataset, test_dataset = tfds.load(
    'huggingface:ccdv__cnn_dailymail/3.0.0',
    split=['train', 'validation', 'test'],
    builder_kwargs=args,
)

  hf_names = hf_datasets.list_datasets()
2024-06-18 17:42:13.145574: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Pro
2024-06-18 17:42:13.145601: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 32.00 GB
2024-06-18 17:42:13.145605: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 10.67 GB
2024-06-18 17:42:13.145663: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-06-18 17:42:13.145684: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [4]:
# max_input_length = max(len(row[0][0]) for ds in [preprocessed_train_dataset, preprocessed_validation_dataset, preprocessed_test_dataset] for row in ds)
# max_target_length = max(len(row[0][1]) for ds in [preprocessed_train_dataset, preprocessed_validation_dataset, preprocessed_test_dataset] for row in ds)
# max_decoder_target_length = max(len(row[1]) for ds in [preprocessed_train_dataset, preprocessed_validation_dataset, preprocessed_test_dataset] for row in ds)
# max_input_length, max_target_length, max_decoder_target_length

summarized_text_size = 256 #  1437 is the longest summarized text in dataset
min_summarized_text_size = 32

# @TODO The followings should programmatically be derived.
max_input_length = 2137
max_target_length = summarized_text_size + 1
max_decoder_target_length = summarized_text_size + 1

In [5]:
# for development with 1/10 entries
DEVELOPMENT = True
if DEVELOPMENT:
    if platform.system() == "Darwin" and platform.processor() == "arm":
        NUM_TAKE = 128 # 500
        train_dataset = train_dataset.take(NUM_TAKE)
        validation_dataset = validation_dataset.take(NUM_TAKE)
        test_dataset = test_dataset.take(NUM_TAKE)
    else:
        # Use 10% dataset.
        train_size = len(train_dataset) // 10 * 9
        validation_size = len(validation_dataset) // 10 * 9
        test_size = len(test_dataset) // 10 * 9
        train_dataset = train_dataset.skip(train_size)
        validation_dataset = validation_dataset.skip(validation_size)
        test_dataset = test_dataset.skip(test_size)

In [6]:
# @see https://github.com/keras-team/keras-nlp/blob/50e041487b1d8b30b34c5fb738db3ed3406363bc/examples/machine_translation/data.py
import string
import re

strip_chars = string.punctuation
strip_chars = strip_chars.replace("[", "")
strip_chars = strip_chars.replace("]", "")

@saving.register_keras_serializable()
def custom_standardization(input_string):
    lowercase = tf.strings.lower(input_string)
    return tf.strings.regex_replace(
        lowercase,
        "[%s]" % re.escape(strip_chars),
        "",
    )

vectorization_layer = keras.layers.TextVectorization(
    standardize=custom_standardization,
    max_tokens=VOCAB_SIZE,
    output_mode='int',
    ragged=True,
)
# Warning: adapt, which clear the already held data inside, must be called only once.
vectorization_layer.adapt(train_dataset.concatenate(validation_dataset).concatenate(test_dataset).batch(BATCH_SIZE).map(lambda row: '[start] ' + row['article'] + ' ' + row['highlights'] + ' [end]'))

2024-06-18 17:42:13.500836: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


In [7]:
input_vectorization_layer = keras.layers.TextVectorization(
    standardize=custom_standardization,
    max_tokens=VOCAB_SIZE,
    output_mode='int',
    # @TODO This should be programmatically obtained
    output_sequence_length=2137,
)
target_vectorization_layer = keras.layers.TextVectorization(
    standardize=custom_standardization,
    max_tokens=VOCAB_SIZE,
    output_mode='int',
    output_sequence_length=summarized_text_size + 1,
)
input_vectorization_layer.set_vocabulary(vectorization_layer.get_vocabulary())
target_vectorization_layer.set_vocabulary(vectorization_layer.get_vocabulary())

In [8]:
vectorization_layer.vocabulary_size(), vectorization_layer.get_vocabulary(include_special_tokens=True)[0:8]

(15000, ['', '[UNK]', 'the', 'to', 'of', 'a', 'and', 'in'])

In [9]:
# Must be False
assert not vectorization_layer(['[start]'])[0] == vectorization_layer(['start'])[0]

In [10]:
"""
vectorization_layer(['This is a pen', 'I am a software engineer'])
#vectorization_layer(['This is a pen', 'I am a software engineer']).row_lengths().shape[0]
# 2
rows = vectorization_layer(['This is a pen', 'I am a software engineer']).row_lengths().shape[0]
vectorization_layer(['This is a pen', 'I am a software engineer']).to_tensor(shape=(rows, 10))
# .to_tensor()

RaggedTensor.to_tensor can make 0-filled Tensor
"""
def prepare_dataset(x):
    article = input_vectorization_layer(x['article'])
    highlights = tf.strings.join(['[start] ', x['highlights'], ' [end]'])
    h = vectorization_layer(highlights)
    rows = h.row_lengths().shape[0]
    sequences = h.to_tensor(shape=(rows, summarized_text_size + 1 + 1))
    highlights_decoder_input = sequences[:, :-1] # summarized_text_size - 1
    highlights_decoder_output = sequences[:, 1:] # summarized_text_size - 1
    return (
        (
            article, # encoder input
            highlights_decoder_input, # decoder input
        ),
        highlights_decoder_output, # decoder output
    )

def filter_by_length(row):
    highlights = vectorization_layer(row['highlights'])
    return min_summarized_text_size <= tf.size(highlights) and tf.size(highlights) <= summarized_text_size

preprocessed_train_dataset = train_dataset.filter(filter_by_length).batch(BATCH_SIZE).map(prepare_dataset, num_parallel_calls=tf.data.AUTOTUNE)
preprocessed_validation_dataset = validation_dataset.filter(filter_by_length).batch(BATCH_SIZE).map(prepare_dataset, num_parallel_calls=tf.data.AUTOTUNE)
preprocessed_test_dataset = test_dataset.filter(filter_by_length).batch(BATCH_SIZE).map(prepare_dataset, num_parallel_calls=tf.data.AUTOTUNE)

In [11]:
for entry in train_dataset.take(1):
    print('article: ', entry['article'].numpy())
    print('highlights: ', entry['highlights'].numpy())
for entry in preprocessed_train_dataset.take(1):
    print('encoder input: ', entry[0][0].shape, entry[0][0][:10])
    print('decoder input: ', entry[0][1].shape, entry[0][1][:10])
    print('decoder output: ', entry[1].shape, entry[1][:10])

article:  b'It\'s official: U.S. President Barack Obama wants lawmakers to weigh in on whether to use military force in Syria. Obama sent a letter to the heads of the House and Senate on Saturday night, hours after announcing that he believes military action against Syrian targets is the right step to take over the alleged use of chemical weapons. The proposed legislation from Obama asks Congress to approve the use of military force "to deter, disrupt, prevent and degrade the potential for future uses of chemical weapons or other weapons of mass destruction." It\'s a step that is set to turn an international crisis into a fierce domestic political battle. There are key questions looming over the debate: What did U.N. weapons inspectors find in Syria? What happens if Congress votes no? And how will the Syrian government react? In a televised address from the White House Rose Garden earlier Saturday, the president said he would take his case to Congress, not because he has to -- but beca

In [12]:
@saving.register_keras_serializable()
class TransformerEncoderDecoderModel(keras.Model):
    """
    Build Transformer Encoder Decoder model
    """
    def __init__(
        self,
        encoder_tokenizer,
        decoder_tokenizer,
        max_input_length,
        max_target_length,
        embedding_dim,
        encoder_vocabulary_size,
        decoder_vocabulary_size,
        num_encoders,
        num_decoders,
        encoder_num_heads,
        encoder_transformer_intermediate_dim,
        decoder_num_heads,
        decoder_transformer_intermediate_dim,
        **kwargs):
        super().__init__(**kwargs)

        self.encoder_tokenizer = encoder_tokenizer
        self.decoder_tokenizer = decoder_tokenizer
        self.encoders = []
        self.decoders = []

        # Encoder
        self.encoder_embedding = keras_nlp.layers.TokenAndPositionEmbedding(
            vocabulary_size=encoder_vocabulary_size,
            sequence_length=max_input_length,
            embedding_dim=embedding_dim,
            mask_zero=True,
            name="encoder_embedding",
        )
        for i in range(num_encoders):
            self.encoders.append(
                keras_nlp.layers.TransformerEncoder(
                    num_heads=encoder_num_heads,
                    intermediate_dim=encoder_transformer_intermediate_dim,
                    name=f"transformer_encoder_{i}",
                )
            )

        # Decoder
        self.decoder_embedding = keras_nlp.layers.TokenAndPositionEmbedding(
            vocabulary_size=decoder_vocabulary_size,
            sequence_length=max_target_length,
            embedding_dim=embedding_dim,
            mask_zero=True,
            name="decoder_embedding",
        )
        for i in range(num_decoders):
            self.decoders.append(
                keras_nlp.layers.TransformerDecoder(
                    num_heads=decoder_num_heads,
                    intermediate_dim=decoder_transformer_intermediate_dim,
                    name=f"transformer_decoder_{i}",
                )
            )
        self.dense = keras.layers.Dense(
            decoder_vocabulary_size,
            activation="softmax"
        )

    def call(self, inputs):
        encoded = self.encoder_embedding(inputs[0])
        for encoder in self.encoders:
            encoded = encoder(inputs=encoded)

        decoded = self.decoder_embedding(inputs[1])
        for decoder in self.decoders:
            decoded = decoder(
                decoder_sequence=decoded,
                encoder_sequence=encoded,
                use_causal_mask=True,
            )

        output = self.dense(decoded)
        return output

    def get_config(self):
        config = super().get_config().copy()
        #config = super(TransformerEncoderDecoderModel, self).get_config()
        config.update({
            "encoder_tokenizer": self.encoder_tokenizer.get_config(),
            "decoder_tokenizer": self.decoder_tokenizer.get_config(),
            "max_input_length": self.encoder_embedding.sequence_length,
            "max_target_length": self.decoder_embedding.sequence_length,
            "embedding_dim": self.encoder_embedding.embedding_dim,
            "encoder_vocabulary_size": self.encoder_embedding.vocabulary_size,
            "decoder_vocabulary_size": self.decoder_embedding.vocabulary_size,
            "num_encoders": len(self.encoders),
            "num_decoders": len(self.decoders),
            "encoder_num_heads": self.encoders[0].num_heads,
            "encoder_transformer_intermediate_dim": self.encoders[0].intermediate_dim,
            "decoder_num_heads": self.decoders[0].num_heads,
            "decoder_transformer_intermediate_dim": self.decoders[0].intermediate_dim,
        })
        return config
    @classmethod
    def from_config(cls, config):
        encoder_tokenizer_config = config.pop("encoder_tokenizer")
        decoder_tokenizer_config = config.pop("decoder_tokenizer")
        encoder_tokenizer = keras.layers.TextVectorization.from_config(encoder_tokenizer_config)
        decoder_tokenizer = keras.layers.TextVectorization.from_config(decoder_tokenizer_config)
        return cls(
            encoder_tokenizer=encoder_tokenizer,
            decoder_tokenizer=decoder_tokenizer,
            **config
        )

In [13]:
learning_rate = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001,
    decay_steps=20,
    decay_rate=0.99,
)
if platform.system() == "Darwin" and platform.processor() == "arm":
    """
    Apple Silicon mac shows tht following warning.
    WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.Adam` runs slowly on M1/M2 Macs,
    please use the legacy Keras optimizer instead,
    located at `tf.keras.optimizers.legacy.Adam`
    Therefore, keras.optimizers.legacy.Adam is used.
    """
    optimizer = keras.optimizers.legacy.RMSprop()
else:
    optimizer = keras.optimizers.RMSprop()

loss_fn = keras.losses.SparseCategoricalCrossentropy(
    reduction=keras.losses.Reduction.NONE
)
model = TransformerEncoderDecoderModel(
    encoder_tokenizer=input_vectorization_layer,
    decoder_tokenizer=target_vectorization_layer,
    max_input_length=max_input_length,
    max_target_length=max_target_length,
    embedding_dim=EMBEDDING_DIM,
    encoder_vocabulary_size=VOCAB_SIZE,
    decoder_vocabulary_size=VOCAB_SIZE,
    num_encoders=NUM_ENCODERS,
    num_decoders=NUM_DECODERS,
    encoder_num_heads=NUM_HEADS,
    encoder_transformer_intermediate_dim=INTERMIDIATE_DIM,
    decoder_num_heads=NUM_HEADS,
    decoder_transformer_intermediate_dim=INTERMIDIATE_DIM,
    name="transformer_text_summarization_model",
)
# Note
# In the case that the dataset is large and the dimension is small,
# the learning rate of Adam needed to be smaller.
model.compile(
    optimizer=optimizer,
    loss=loss_fn,
    metrics=[
        keras.metrics.SparseCategoricalAccuracy()
        # 'accuracy', #  This should not be used.
        # "loss", # This is not necessarily specified.
        # keras_nlp.metrics.RougeL()
    ]
)

In [14]:
# Training
history = model.fit(
    preprocessed_train_dataset,
    validation_data=preprocessed_validation_dataset,
    # Generally, 100 to 200 is used as the epoch number for generative models
    # However, because this is a prototype, the number is intentionally small.
    epochs=NUM_EPOCHS,
    # steps_per_epoch=2,
)
# model.summary()

Epoch 1/10
      7/Unknown - 34s 4s/step - loss: 1.7010 - sparse_categorical_accuracy: 0.0326

2024-06-18 17:42:48.323083: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 14723218912504078356
2024-06-18 17:42:48.323107: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 3169183304579104278
2024-06-18 17:42:48.323127: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 16362726997968918606
2024-06-18 17:42:48.323134: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 10467125803559141714
2024-06-18 17:42:48.323137: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 14119534312805664449
2024-06-18 17:42:48.323144: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 10065104762816455936
2024-06-18 17:42:48.323148: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous rec

Epoch 2/10


2024-06-18 17:42:55.399125: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 1840523209888531552
2024-06-18 17:42:55.399140: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 17048533278248386395
2024-06-18 17:42:55.399147: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 8392551989560379414
2024-06-18 17:42:55.399154: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 9675240249405842931
2024-06-18 17:42:55.399158: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 5982160123268073974
2024-06-18 17:42:55.399160: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 392086375942743717
2024-06-18 17:42:55.399163: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv ite

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [15]:
model.save('text_classification.keras')

In [16]:
test_loss = model.evaluate(
    preprocessed_test_dataset,
)
test_loss



2024-06-18 17:49:03.371320: I tensorflow/core/framework/local_rendezvous.cc:425] Local rendezvous send item cancelled. Key hash: 2226400670515421360
2024-06-18 17:49:03.371338: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 8991942974715753332
2024-06-18 17:49:03.371359: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 6079130253509059758
2024-06-18 17:49:03.371367: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 1840523209888531552
2024-06-18 17:49:03.371371: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 8392551989560379414
2024-06-18 17:49:03.371375: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv item cancelled. Key hash: 14903777881495472864
2024-06-18 17:49:03.371379: I tensorflow/core/framework/local_rendezvous.cc:421] Local rendezvous recv it

[1.2944235801696777, 0.07090103626251221]

In [17]:
def summarize(model, encoder_tokenizer, decoder_tokenizer, text):
    """
    Summarize text
    :param text: original text
    :return: summarized text
    """
    table = decoder_tokenizer.get_vocabulary()
    input_sequence = encoder_tokenizer([text])


    start_token = decoder_tokenizer('[start]')[0].numpy()
    end_token = decoder_tokenizer('[end]')[0].numpy()
    decoded_sentence = [start_token]
    for i in range(max_target_length):
        decoder_inputs = tf.convert_to_tensor(
            [decoded_sentence],
            dtype="int64",
        )
        decoder_inputs = tf.concat(
            [
                decoder_inputs,
                tf.zeros(
                    [1, max_target_length - i - 1],
                    dtype="int64",
                ),
            ],
            axis=1,
        )
        predictions = model.predict(
            [input_sequence, decoder_inputs],
            verbose=0
        )
        predicted_token = np.argmax(predictions[0, i, :])
        decoded_sentence.append(predicted_token)
        if predicted_token == end_token:
            break

    detokenized_output = []
    for token in decoded_sentence:
        detokenized_output.append(table[token])
    return " ".join(detokenized_output)


loaded_model = keras.models.load_model('text_classification.keras')
encoder_tokenizer = loaded_model.encoder_tokenizer
decoder_tokenizer = loaded_model.decoder_tokenizer

# Sample
# sample_text = "Giant pig fell into the swimming pool at his home in Ringwood, Hampshire. It took the efforts of a team of firefighters to winch him out of the water. A wayward horse also had to be rescued from a swimming pool in Sussex."
# print("Original:", sample_text)
# print("Summary:", summarize(sample_text))
sample_text = """
(CNN) -- Usain Bolt rounded off the world championships Sunday by claiming his third gold in Moscow as he anchored Jamaica to victory in the men\'s 4x100m relay. The fastest man in the world charged clear of United States rival Justin Gatlin as the Jamaican quartet of Nesta Carter, Kemar Bailey-Cole, Nickel Ashmeade and Bolt won in 37.36 seconds. The U.S finished second in 37.56 seconds with Canada taking the bronze after Britain were disqualified for a faulty handover. The 26-year-old Bolt has now collected eight gold medals at world championships, equaling the record held by American trio Carl Lewis, Michael Johnson and Allyson Felix, not to mention the small matter of six Olympic titles. The relay triumph followed individual successes in the 100 and 200 meters in the Russian capital. "I\'m proud of myself and I\'ll continue to work to dominate for as long as possible," Bolt said, having previously expressed his intention to carry on until the 2016 Rio Olympics. Victory was never seriously in doubt once he got the baton safely in hand from Ashmeade, while Gatlin and the United States third leg runner Rakieem Salaam had problems. Gatlin strayed out of his lane as he struggled to get full control of their baton and was never able to get on terms with Bolt. Earlier, Jamaica\'s women underlined their dominance in the sprint events by winning the 4x100m relay gold, anchored by Shelly-Ann Fraser-Pryce, who like Bolt was completing a triple. Their quartet recorded a championship record of 41.29 seconds, well clear of France, who crossed the line in second place in 42.73 seconds. Defending champions, the United States, were initially back in the bronze medal position after losing time on the second handover between Alexandria Anderson and English Gardner, but promoted to silver when France were subsequently disqualified for an illegal handover. The British quartet, who were initially fourth, were promoted to the bronze which eluded their men\'s team. Fraser-Pryce, like Bolt aged 26, became the first woman to achieve three golds in the 100-200 and the relay. In other final action on the last day of the championships, France\'s Teddy Tamgho became the third man to leap over 18m in the triple jump, exceeding the mark by four centimeters to take gold. Germany\'s Christina Obergfoll finally took gold at global level in the women\'s javelin after five previous silvers, while Kenya\'s Asbel Kiprop easily won a tactical men\'s 1500m final. Kiprop\'s compatriot Eunice Jepkoech Sum was a surprise winner of the women\'s 800m. Bolt\'s final dash for golden glory brought the eight-day championship to a rousing finale, but while the hosts topped the medal table from the United States there was criticism of the poor attendances in the Luzhniki Stadium. There was further concern when their pole vault gold medalist Yelena Isinbayeva made controversial remarks in support of Russia\'s new laws, which make "the propagandizing of non-traditional sexual relations among minors" a criminal offense. She later attempted to clarify her comments, but there were renewed calls by gay rights groups for a boycott of the 2014 Winter Games in Sochi, the next major sports event in Russia.
"""
print("Original:", sample_text)
print("Summary:", summarize(model, encoder_tokenizer, decoder_tokenizer, sample_text))

sample_text = """
Vice President Dick Cheney will serve as acting president briefly Saturday while President Bush is anesthetized for a routine colonoscopy, White House spokesman Tony Snow said Friday. Bush is scheduled to have the medical procedure, expected to take about 2 1/2 hours, at the presidential retreat at Camp David, Maryland, Snow said. Bush's last colonoscopy was in June 2002, and no abnormalities were found, Snow said. The president's doctor had recommended a repeat procedure in about five years. The procedure will be supervised by Dr. Richard Tubb and conducted by a multidisciplinary team from the National Naval Medical Center in Bethesda, Maryland, Snow said. A colonoscopy is the most sensitive test for colon cancer, rectal cancer and polyps, small clumps of cells that can become cancerous, according to the Mayo Clinic. Small polyps may be removed during the procedure. Snow said that was the case when Bush had colonoscopies before becoming president. Snow himself is undergoing chemotherapy for cancer that began in his colon and spread to his liver. Snow told reporters he had a chemo session scheduled later Friday. Watch Snow talk about Bush's procedure and his own colon cancer » . "The president wants to encourage everybody to use surveillance," Snow said. The American Cancer Society recommends that people without high-risk factors or symptoms begin getting screened for signs of colorectal cancer at age 50. E-mail to a friend .
"""
print("Original:", sample_text)
print("Summary:", summarize(model, encoder_tokenizer, decoder_tokenizer, sample_text))

sample_text = "There are two chickens in the garden."
print("Original:", sample_text)
print("Summary:", summarize(model, encoder_tokenizer, decoder_tokenizer, sample_text))

sample_text = "Two chickens fell into the swimming pool in the garden."
print("Original:", sample_text)
print("Summary:", summarize(model, encoder_tokenizer, decoder_tokenizer, sample_text))


Original: 
(CNN) -- Usain Bolt rounded off the world championships Sunday by claiming his third gold in Moscow as he anchored Jamaica to victory in the men's 4x100m relay. The fastest man in the world charged clear of United States rival Justin Gatlin as the Jamaican quartet of Nesta Carter, Kemar Bailey-Cole, Nickel Ashmeade and Bolt won in 37.36 seconds. The U.S finished second in 37.56 seconds with Canada taking the bronze after Britain were disqualified for a faulty handover. The 26-year-old Bolt has now collected eight gold medals at world championships, equaling the record held by American trio Carl Lewis, Michael Johnson and Allyson Felix, not to mention the small matter of six Olympic titles. The relay triumph followed individual successes in the 100 and 200 meters in the Russian capital. "I'm proud of myself and I'll continue to work to dominate for as long as possible," Bolt said, having previously expressed his intention to carry on until the 2016 Rio Olympics. Victory was n