In [26]:
import numpy as np
import json
import re
import string
from IPython.display import display, HTML

import tensorflow as tf
from tensorflow.keras import layers, models, losses, callbacks

In [27]:
VOCAB_SIZE = 50000
MAX_LEN = 128
EMBEDDING_DIM = 512
KEY_DIM = 512
N_HEADS = 4
FEED_FORWARD_DIM = 512
SEED = 42
BATCH_SIZE = 256
EPOCHS = 15

In [28]:
with open("recipes.txt", 'r', encoding='utf-8') as f:
    recipes = [line for line in f]

In [29]:
# Pad the punctuation, to treat them as separate 'words'
def pad_punctuation(s):
    s = re.sub(f"([{string.punctuation}, '\n'])", r" \1 ", s)
    s = re.sub(" +", " ", s)
    return s


text_data = [pad_punctuation(x) for x in recipes]

In [30]:
# Convert to a Tensorflow Dataset
text_ds = (
    tf.data.Dataset.from_tensor_slices(text_data)
    .batch(BATCH_SIZE)
    .shuffle(1000)
)

In [31]:
# Create a vectorisation layer
vectorize_layer = layers.TextVectorization(
    standardize="lower",
    max_tokens=VOCAB_SIZE,
    output_mode="int",
    output_sequence_length=MAX_LEN + 1,
)

In [32]:
# Adapt the layer to the training set
vectorize_layer.adapt(text_ds)
vocab = vectorize_layer.get_vocabulary()

In [33]:
# Create the training set of recipes and the same text shifted by one word
def prepare_inputs(text):
    text = tf.expand_dims(text, -1)
    tokenized_sentences = vectorize_layer(text)
    x = tokenized_sentences[:, :-1]
    y = tokenized_sentences[:, 1:]
    return x, y


train_ds = text_ds.map(prepare_inputs)

In [34]:
def causal_attention_mask(batch_size, n_dest, n_src, dtype):
    i = tf.range(n_dest)[:, None]
    j = tf.range(n_src)
    m = i >= j - n_src + n_dest
    mask = tf.cast(m, dtype)
    mask = tf.reshape(mask, [1, n_dest, n_src])
    mult = tf.concat(
        [tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)], 0
    )
    return tf.tile(mask, mult)

class TransformerBlock(layers.Layer):
    def __init__(self, num_heads, key_dim, embed_dim, ff_dim, dropout_rate=0.2):
        super(TransformerBlock, self).__init__()
        self.num_heads = num_heads
        self.key_dim = key_dim
        self.embed_dim = embed_dim
        self.ff_dim = ff_dim
        self.dropout_rate = dropout_rate
        self.attn = layers.MultiHeadAttention(
            num_heads, key_dim, output_shape=embed_dim
        )
        self.dropout_1 = layers.Dropout(self.dropout_rate)
        self.ln_1 = layers.LayerNormalization(epsilon=1e-6)
        self.ffn_1 = layers.Dense(self.ff_dim, activation="relu")
        self.ffn_2 = layers.Dense(self.embed_dim)
        self.dropout_2 = layers.Dropout(self.dropout_rate)
        self.ln_2 = layers.LayerNormalization(epsilon=1e-6)

    def call(self, inputs):
        input_shape = tf.shape(inputs)
        batch_size = input_shape[0]
        seq_len = input_shape[1]
        causal_mask = causal_attention_mask(
            batch_size, seq_len, seq_len, tf.bool
        )
        attention_output, attention_scores = self.attn(
            inputs,
            inputs,
            attention_mask=causal_mask,
            return_attention_scores=True,
        )
        attention_output = self.dropout_1(attention_output)
        out1 = self.ln_1(inputs + attention_output)
        ffn_1 = self.ffn_1(out1)
        ffn_2 = self.ffn_2(ffn_1)
        ffn_output = self.dropout_2(ffn_2)
        return (self.ln_2(out1 + ffn_output), attention_scores)

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "key_dim": self.key_dim,
                "embed_dim": self.embed_dim,
                "num_heads": self.num_heads,
                "ff_dim": self.ff_dim,
                "dropout_rate": self.dropout_rate,
            }
        )
        return config

class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, max_len, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.max_len = max_len
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
        self.token_emb = layers.Embedding(
            input_dim=vocab_size, output_dim=embed_dim
        )
        self.pos_emb = layers.Embedding(input_dim=max_len, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "max_len": self.max_len,
                "vocab_size": self.vocab_size,
                "embed_dim": self.embed_dim,
            }
        )
        return config

In [35]:
inputs = layers.Input(shape=(None,), dtype=tf.int32)
x = TokenAndPositionEmbedding(MAX_LEN, VOCAB_SIZE, EMBEDDING_DIM)(inputs)
x, attention_scores = TransformerBlock(
    N_HEADS, KEY_DIM, EMBEDDING_DIM, FEED_FORWARD_DIM
)(x)
outputs = layers.Dense(VOCAB_SIZE, activation="softmax")(x)
model = models.Model(inputs=inputs, outputs=[outputs, attention_scores])
model.compile("adam", loss=[losses.SparseCategoricalCrossentropy(), None])

In [36]:
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, None)]            0         
                                                                 
 token_and_position_embeddi  (None, None, 512)         25665536  
 ng_1 (TokenAndPositionEmbe                                      
 dding)                                                          
                                                                 
 transformer_block_1 (Trans  ((None, None, 512),       4728320   
 formerBlock)                 (None, 4, None, None))             
                                                                 
 dense_5 (Dense)             (None, None, 50000)       25650000  
                                                                 
Total params: 56043856 (213.79 MB)
Trainable params: 56043856 (213.79 MB)
Non-trainable params: 0 (0.00 Byte)
_______________

In [37]:
class TextGenerator(callbacks.Callback):
    def __init__(self, index_to_word, top_k=10):
        self.index_to_word = index_to_word
        self.word_to_index = {
            word: index for index, word in enumerate(index_to_word)
        }

    def sample_from(self, probs, temperature):
        probs = probs ** (1 / temperature)
        probs = probs / np.sum(probs)
        return np.random.choice(len(probs), p=probs), probs

    def generate(self, start_prompt, max_tokens, temperature):
        start_tokens = [
            self.word_to_index.get(x, 1) for x in start_prompt.split()
        ]
        sample_token = None
        info = []
        while len(start_tokens) < max_tokens and sample_token != 0:
            x = np.array([start_tokens])
            y, att = self.model.predict(x, verbose=0)
            sample_token, probs = self.sample_from(y[0][-1], temperature)
            info.append(
                {
                    "prompt": start_prompt,
                    "word_probs": probs,
                    "atts": att[0, :, -1, :],
                }
            )
            start_tokens.append(sample_token)
            start_prompt = start_prompt + " " + self.index_to_word[sample_token]
        print(f"\ngenerated text:\n{start_prompt}\n")
        return info

    def on_epoch_end(self, epoch, logs=None):
        self.generate("recipe for", max_tokens=MAX_LEN, temperature=0.7)

In [38]:
# Create a model save checkpoint
model_checkpoint_callback = callbacks.ModelCheckpoint(
    filepath="./checkpoint/checkpoint.ckpt",
    save_weights_only=True,
    save_freq="epoch",
    verbose=0,
)

tensorboard_callback = callbacks.TensorBoard(log_dir="./logs")

# Tokenize starting prompt
text_generator = TextGenerator(vocab)

In [39]:
model.fit(
    train_ds,
    epochs=EPOCHS,
    callbacks=[model_checkpoint_callback, tensorboard_callback, text_generator],
)

Epoch 1/15
generated text:
recipe for chocolate chip cookies | preheat oven to 350 degrees f . beat butter and sugars together until creamy . add in eggs , one at a time , beating well after each addition . sift flour , baking soda and salt . drop dough by rounded teaspoons onto greased cookie sheets . bake 10 - 12 minutes or until edges are light golden brown . cool on wire racks . 

Epoch 2/15
generated text:
recipe for baked chicken with cream sauce | in a small saucepan , bring the water to a boil , add the chicken , salt , pepper , onion , and garlic powder ; cook until tender , about 5 minutes . drain and set aside . in a large skillet , cook the chicken in the oil until no longer pink . add the peppers , garlic and ginger ; cook for 1 minute . add the tomatoes , tomatoes , chili powder and salt ; stir to combine . bring to a boil ; reduce heat and simmer , covered , for 30 minutes . place a small amount of the sauce in a saucepan , and cook for 5 minutes . stir in the chicken

E

<keras.src.callbacks.History at 0x7f8900365f10>

In [40]:
# Save the final model
model.save("./models/model")

INFO:tensorflow:Assets written to: ./models/model/assets


INFO:tensorflow:Assets written to: ./models/model/assets


In [41]:
info = text_generator.generate(
    "recipe for vanilla whipped cream parfaits with berries |", max_tokens=MAX_LEN, temperature=0.7
)


generated text:
recipe for vanilla whipped cream parfaits with berries | combine 2 tablespoons of the sugar , 1 / 2 cup of the water in a medium saucepan and bring to a boil . reduce heat to medium - low and continue to cook until the mixture registers 160 degrees f , about 25 minutes . meanwhile , bring a large pot of lightly salted water to a boil ; add the angel food to the bottom of the saucepan ; remove from the heat and let cool . place the berries and citrus juices in a small , heavy saucepan and bring to a boil . reduce the heat to low and simmer for 5 minutes , or until it reaches the temperature of an instant - read



In [42]:
info = text_generator.generate(
    "recipe for chocolate chip cookies |", max_tokens=MAX_LEN, temperature=0.7
)


generated text:
recipe for chocolate chip cookies | preheat oven to 350 degrees f ( 175 degrees c ) . cream together margarine , cream cheese , sugar , and vanilla . add flour , salt and baking soda and stir until blended . add chocolate chips . drop by rounded teaspoonfuls onto ungreased cookie sheets . bake 8 to 10 minutes in preheated oven . cool on baking sheets for about 2 minutes before removing to wire racks to cool completely . 



In [43]:
info = text_generator.generate(
    "recipe for chicken noodle soup |", max_tokens=MAX_LEN, temperature=0.7
)


generated text:
recipe for chicken noodle soup | place the chicken in a large dutch oven . add the chicken stock , water , and spices . bring to a boil . skim off the foam . reduce the heat , cover , and simmer for 1 1 / 2 hours . remove the chicken from the bones , pull the chicken out of the bones and discard them . return the broth to the pot . bring to a boil . reduce the heat to low . simmer , uncovered , for 30 minutes . strain and skim off any foam that rises to the surface . discard the bay leaf . skim off any fat from the broth . add the chicken back into the pot . add



In [44]:
info = text_generator.generate(
    "recipe for sugar cookies |", max_tokens=MAX_LEN, temperature=0.7
)


generated text:
recipe for sugar cookies | preheat oven to 375f grease baking sheets . stir together flour , baking powder , baking soda , cinnamon , nutmeg , and cloves . in a large bowl , using a wooden spoon , stir in chocolate chips until smooth , 2 to 3 minutes . drop rounded tablespoonfuls of dough onto prepared baking sheets , spacing cookies 2 inches apart . bake cookies until edges begin to brown , 5 to 6 minutes . cool on baking sheets for 1 minute , watching closely so cookies don ' t move them around to cooling racks . cookies will harden as they cool . 



In [45]:
info = text_generator.generate(
    "recipe for grilled cheese |", max_tokens=MAX_LEN, temperature=0.7
)


generated text:
recipe for grilled cheese | in a medium saucepan , combine the cottage cheese , butter , lemon juice , and mustard , and pepper . heat over medium heat until butter is melted . pour in the cheese mixture . stir until it is well combined . cover the pan and let it melt . spread the mixture over the cheese layer . sprinkle with the remaining cheese . cover and refrigerate overnight . remove the pan from the refrigerator and let it come to room temperature . preheat the oven to 350 degrees f . spread the onions on a rimmed baking sheet . bake for 1 hour , until they are crispy and crispy , about 20 minutes . 



In [46]:
info = text_generator.generate(
    "recipe for chicken breast |", max_tokens=MAX_LEN, temperature=0.7
)


generated text:
recipe for chicken breast | preheat oven to 350 degrees . grease a 9x13 inch baking pan or pyrex dish . in a large bowl , combine the soup , sour cream , and parmesan cheese . mix until blended . add chicken and mix well . pour into prepared baking dish . season with salt and pepper . sprinkle cheese over top . bake 45 minutes to an hour , or until cheese is melted . 

