In [1]:
# !wget https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
# !tar -xf aclImdb_v1.tar.gz

In [2]:
!pip install keras_nlp



In [3]:
import platform

import numpy as np

import tensorflow as tf
import keras
import keras_nlp

Using TensorFlow backend


In [4]:
# Sometimes, the learning with tensorflow-metal does not converge.
# @see
# - https://forums.developer.apple.com/forums/thread/736187
# - https://forums.developer.apple.com/forums/thread/701056
# - https://forums.developer.apple.com/forums/thread/742157
# Therefore, it might be best to execute learning without metal and execute learning again in the last tuning phase.
# if platform.system() == "Darwin" and platform.processor() == "arm":
#     print("This is running on M1/M2 mac.")
#     tf.config.set_visible_devices([], 'GPU')
# else:
#     print("This is not running on M1/M2 mac.")

***

In [5]:
SEQUENCE_LENGTH = 50
MAX_TOKENS = 15000
EMBEDDING_DIM = 256
INTERMIDIATE_DIM = 2048
NUM_HEADS = 2
LEARNING_RATE = 2e-6 #  changed from 2e-5

In [6]:
dataset = keras.utils.text_dataset_from_directory(
    directory="aclImdb",
    label_mode=None,
    batch_size=256
)
dataset = dataset.map(
    lambda x: tf.strings.regex_replace(x, "<br />", " ")
)
dataset

Found 100006 files belonging to 1 classes.


2024-05-28 23:13:23.159805: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Pro
2024-05-28 23:13:23.159857: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 32.00 GB
2024-05-28 23:13:23.159866: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 10.67 GB
2024-05-28 23:13:23.159927: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-05-28 23:13:23.159974: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


<_MapDataset element_spec=TensorSpec(shape=(None,), dtype=tf.string, name=None)>

In [7]:
text_vectorization = keras.layers.TextVectorization(
    max_tokens=MAX_TOKENS,
    output_mode="int",
    output_sequence_length=SEQUENCE_LENGTH,
)
text_vectorization.adapt(dataset)

2024-05-28 23:13:23.328780: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


In [8]:
def prepare_lm_dataset(text_batch):
    vectorized_sequences = text_vectorization(text_batch)
    x = vectorized_sequences[:, :-1]
    y = vectorized_sequences[:, 1:]
    return x, y

lm_dataset = dataset.map(prepare_lm_dataset, num_parallel_calls=4)

In [9]:
if platform.system() == "Darwin" and platform.processor() == "arm":
    """
    Apple Silicon mac shows tht following warning.
    WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.Adam` runs slowly on M1/M2 Macs,
    please use the legacy Keras optimizer instead,
    located at `tf.keras.optimizers.legacy.Adam`
    Therefore, keras.optimizers.legacy.Adam is used.
    """
    optimizer = keras.optimizers.legacy.Adam(learning_rate=LEARNING_RATE)
else:
    optimizer = keras.optimizers.Adam(learning_rate=LEARNING_RATE)
  
inputs = keras.Input(shape=(None,), dtype="int64")
x = keras_nlp.layers.TokenAndPositionEmbedding(
    vocabulary_size=MAX_TOKENS,
    sequence_length=SEQUENCE_LENGTH,
    embedding_dim=EMBEDDING_DIM,
)(inputs)
x = keras_nlp.layers.TransformerDecoder(
    intermediate_dim=INTERMIDIATE_DIM,
    num_heads=NUM_HEADS
)(x, x)
outputs = keras.layers.Dense(
    MAX_TOKENS,
    activation="softmax"
)(x)
model = keras.Model(inputs, outputs)
model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=optimizer,
)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, None)]               0         []                            
                                                                                                  
 token_and_position_embeddi  (None, None, 256)            3852800   ['input_1[0][0]']             
 ng (TokenAndPositionEmbedd                                                                       
 ing)                                                                                             
                                                                                                  
 transformer_decoder (Trans  (None, None, 256)            1578752   ['token_and_position_embedding
 formerDecoder)                                                     [0][0]',                  

In [10]:
tokens_index = dict(enumerate(text_vectorization.get_vocabulary()))

def sample_next(predictions, temperature=1.0):
    predictions = np.asarray(predictions).astype("float64")
    predictions = np.log(predictions) / temperature
    exp_preds = np.exp(predictions)
    predictions = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, predictions, 1)
    return np.argmax(probas)

class TextGenerator(keras.callbacks.Callback):
    def __init__(
            self,
            prompt,
            generate_length,
            model_input_length,
            temperatures=(1.,),
            print_freq=1):
        self.prompt = prompt
        self.generate_length = generate_length
        self.model_input_length = model_input_length
        self.temperatures = temperatures
        self.print_freq = print_freq
  
    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % self.print_freq != 0:
            return
        for temperature in self.temperatures:
            sentence = self.prompt
            for i in range(self.generate_length):
                tokenized_sentence = text_vectorization([sentence])
                predictions = self.model(tokenized_sentence)
                next_token = sample_next(predictions[0, i, :])
                sampled_token = tokens_index[next_token]
                sentence += " " + sampled_token
            print(f"\nTemperature {temperature}: {sentence}")

prompt = "This movie" 
text_gen_callback = TextGenerator(
    prompt,
    generate_length=50,
    model_input_length=SEQUENCE_LENGTH,
    temperatures=(0., 0.2, 0.5, 0.7, 1., 1.5)
)

class EpochModelCheckpoint(tf.keras.callbacks.ModelCheckpoint):
    def __init__(
        self,
        filepath,
        frequency=1,
        monitor='val_loss',
        verbose=0,
        save_best_only=False,
        save_weights_only=False,
        mode='auto',
        options=None,
        **kwargs):
        super(EpochModelCheckpoint, self).__init__(
            filepath,
            monitor,
            verbose,
            save_best_only,
            save_weights_only,
            mode,
            "epoch",
            options
        )
        self.epochs_since_last_save = 0
        self.frequency = frequency

    def on_epoch_end(self, epoch, logs=None):
        self.epochs_since_last_save += 1
        if self.epochs_since_last_save % self.frequency == 0:
            self._save_model(epoch=epoch, batch=None, logs=logs)

    def on_train_batch_end(self, batch, logs=None):
        pass

model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath='example_transformer_next_word_prediction.keras',
    monitor='loss',
    mode='min',
    save_best_only=False,
)

In [11]:
model.fit(
    lm_dataset,
    epochs=200,
    callbacks=[
        model_checkpoint_callback,
        text_gen_callback,
    ]
)

Epoch 1/200
Temperature 0.0: This movie summers prayer celebration iraqi baddies black greatness humiliating wai rutger massive limp student flow ariel midway forum stern snap paths teasing demonstration disappointed manuel commanding bam advise spider passengers tragedy repulsed prose jordan handheld hatred abbey graduation neighborhood angel pauly lackluster realization bathtub shouldve trains moll dumbed since forster cal

Temperature 0.2: This movie wanting rocked shallow define trainer attractive northwest paresh cracks fulllength brow ursula gaming 2007 jagger culp porter action elevate mocked marry hybrid partying chan gale consequently company shaun disgusting undertaker counterpart walters re scale hasnt documentaries painfully truthful tag regrets lie dying bearable lip wherein scare embarrassing worldly motivated dangling

Temperature 0.5: This movie moreau net redgrave yen mcdonald inheritance junkies crusades coburn isabelle lottery heavenly zone transparent woven truthful

<keras.src.callbacks.History at 0x1759702b0>

In [12]:
model.save('last_example_transformer_next_word_prediction.keras')