# Generative model

In [1]:
import numpy as np 
def reweight_distribution(original_distribution, temperature=0.5):
    distribution = np.log(original_distribution) / temperature
    distribution = np.exp(distribution)
    return distribution / np.sum(distribution)

In [2]:
import tensorflow as tf 
from tensorflow import keras
tf.TF_ENABLE_ONEDNN_OPTS=0
dataset = keras.utils.text_dataset_from_directory(
    directory="aclImdb", label_mode=None, batch_size=256)
dataset = dataset.map(lambda x: tf.strings.regex_replace(x, "<br />", " "))

2024-11-27 12:03:58.938832: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-27 12:03:59.027954: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1732701839.063509    6152 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732701839.073777    6152 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-27 12:03:59.156848: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

Found 100006 files.


W0000 00:00:1732701841.829185    6152 gpu_device.cc:2344] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [3]:
from tensorflow.keras import layers

keras.config.disable_traceback_filtering() 
class PositionalEmbedding(layers.Layer):
    def __init__(self, sequence_length, input_dim, output_dim, **kwargs):  
        super().__init__(**kwargs)
        self.token_embeddings = layers.Embedding(                          
            input_dim=input_dim, output_dim=output_dim)
        self.position_embeddings = layers.Embedding(
            input_dim=sequence_length, output_dim=output_dim)              
        self.sequence_length = sequence_length
        self.input_dim = input_dim
        self.output_dim = output_dim
  
    def call(self, inputs):
        length = tf.shape(inputs)[-1]
        positions = tf.range(start=0, limit=length, delta=1)
        embedded_tokens = self.token_embeddings(inputs)
        embedded_positions = self.position_embeddings(positions)
        return embedded_tokens + embedded_positions            def compute_mask(self, inputs, mask=None):
        return tf.math.not_equal(inputs, 0)    return inputs != 0
 
    def get_config(self):
        config = super().get_config()
        config.update({
            "output_dim": self.output_dim,
            "sequence_length": self.sequence_length,
            "input_dim": self.input_dim,
        })
        return config


class TransformerDecoder(layers.Layer):
    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.dense_dim = dense_dim
        self.num_heads = num_heads
        self.attention_1 = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim)
        self.attention_2 = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim)
        self.dense_proj = keras.Sequential(
            [layers.Dense(dense_dim, activation="relu"),
             layers.Dense(embed_dim),]
        )
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()
        self.layernorm_3 = layers.LayerNormalization()
        self.supports_masking = True                     
  
    def get_config(self):
        config = super().get_config()
        config.update({
            "embed_dim": self.embed_dim,
            "num_heads": self.num_heads,
            "dense_dim": self.dense_dim,
        })
        return config

    def get_causal_attention_mask(self, inputs):
        input_shape = tf.shape(inputs)
        batch_size, sequence_length = input_shape[0], input_shape[1]
        i = tf.range(sequence_length)[:, tf.newaxis]
        j = tf.range(sequence_length)
        mask = tf.cast(i >= j, dtype="int32")                           
        mask = tf.reshape(mask, (1, input_shape[1], input_shape[1]))    
        mult = tf.concat(                                               
            [tf.expand_dims(batch_size, -1),                            
                tf.constant([1, 1], dtype=tf.int32)], axis=0)              
        return tf.tile(mask, mult)          
    
    def call(self, inputs, encoder_outputs, mask=None):
        causal_mask = self.get_causal_attention_mask(inputs)
        padding_mask = None       
        if mask is not None:                                       
            padding_mask = tf.cast(                                
                mask[:, tf.newaxis, :], dtype="int32")             
            padding_mask = tf.minimum(padding_mask, causal_mask)   
        attention_output_1 = self.attention_1(
            query=inputs,
            value=inputs,
            key=inputs,
            attention_mask=causal_mask)                            
        attention_output_1 = self.layernorm_1(inputs + attention_output_1)
        attention_output_2 = self.attention_2(
            query=attention_output_1,
            value=encoder_outputs,
            key=encoder_outputs,
            attention_mask=padding_mask,                           
        )
        attention_output_2 = self.layernorm_2(
            attention_output_1 + attention_output_2)
        proj_output = self.dense_proj(attention_output_2)
        return self.layernorm_3(attention_output_2 + proj_output)

In [4]:
from tensorflow.keras.layers import TextVectorization
  
sequence_length = 100 
vocab_size = 15000                            
text_vectorization = TextVectorization(
    max_tokens=vocab_size,                
    output_mode="int",                        
    output_sequence_length=sequence_length,   
)
text_vectorization.adapt(dataset)

2024-11-27 12:04:13.550988: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [5]:
def prepare_lm_dataset(text_batch):
    vectorized_sequences = text_vectorization(text_batch)    
    x = vectorized_sequences[:, :-1]                         
    y = vectorized_sequences[:, 1:]                          
    return x, y
  
lm_dataset = dataset.map(prepare_lm_dataset, num_parallel_calls=4)

In [7]:
embed_dim = 256 
latent_dim = 2048 
num_heads = 2 
  
inputs = keras.Input(shape=(None,), dtype="int64")
x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(inputs)
x = TransformerDecoder(embed_dim, latent_dim, num_heads)(x, x)
outputs = layers.Dense(vocab_size, activation="softmax")(x)       
model = keras.Model(inputs, outputs)
model.compile(loss="sparse_categorical_crossentropy", optimizer="rmsprop")

In [11]:
tokens_index = dict(enumerate(text_vectorization.get_vocabulary()))    
  
def sample_next(predictions, temperature=1.0):                         
    predictions = np.asarray(predictions).astype("float64")
    predictions = np.log(predictions) / temperature
    exp_preds = np.exp(predictions)
    predictions = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, predictions, 1)
    return np.argmax(probas)
  
class TextGenerator(keras.callbacks.Callback):
    def __init__(self,
                 prompt,                                               
                 generate_length,                                      
                 model_input_length,
                 temperatures=(1.,),                                   
                 print_freq=1):
        self.prompt = prompt
        self.generate_length = generate_length
        self.model_input_length = model_input_length
        self.temperatures = temperatures
        self.print_freq = print_freq
  
    def on_epoch_end(self, epoch, logs=None):
        if (epoch + 1) % self.print_freq != 0:
            return
        for temperature in self.temperatures:
            print("== Generating with temperature", temperature)
            sentence = self.prompt                                     
            for i in range(self.generate_length):
                tokenized_sentence = text_vectorization([sentence])    
                predictions = self.model(tokenized_sentence)           
                next_token = sample_next(predictions[0, i, :])         
                sampled_token = tokens_index[next_token]               
                sentence += " " + sampled_token                        
            print(sentence)
  
prompt = "This movie"

from keras.callbacks import ModelCheckpoint

callbacks = [
    ModelCheckpoint(filepath="text_gen.keras", save_best_only=True, monitor="loss"),
    TextGenerator(prompt, generate_length=50,
    model_input_length=sequence_length,
    temperatures=(0.2, 0.5, 0.7, 1., 1.5)),
]     

In [12]:
model = keras.models.load_model("text_gen.keras", custom_objects={"PositionalEmbedding": PositionalEmbedding, "TransformerDecoder": TransformerDecoder})

In [18]:
sentence = "This horror movie was"                                  
for i in range(25):
    tokenized_sentence = text_vectorization([sentence])
    predictions = model(tokenized_sentence)  
    next_token = sample_next(predictions[0, i, :])         
    sampled_token = tokens_index[next_token]               
    sentence += " " + sampled_token                        
print(sentence)


This horror movie was movie movie was definitely was was was was was is was picked disappointed totally not  on scared stupid william keller the was was he


In [9]:
model.fit(lm_dataset, epochs=10, callbacks=callbacks)

Epoch 1/10




[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - loss: 6.3881== Generating with temperature 0.2
This movie is when born war in this the film rental festival ever ive be always written shot we it see was those turned beautiful out and at tv the series chance it to far have too been much a right 10 through crap the should perry inside show brand everything g
== Generating with temperature 0.5
This movie is was threatened all by let the the things hour have but been the filmed other in big his surviving life fantastic because back she in bitter special 19th effects queen [UNK] concorde having as laughable is usually wrong satisfying with satire years male commandments product people that if fourteen
== Generating with temperature 0.7
This movie is is the agreeing meantime although shows its good claudette way has quite written fact but that something their fun time if he you didnt ask describe if any you genuinely must device see stupid the acting actors youve b

<keras.src.callbacks.history.History at 0x788da6fd7950>