In [None]:
import numpy as np
import re
import string
from IPython.display import display, HTML

import tensorflow as tf
from tensorflow.keras import layers, models, losses, callbacks

In [2]:
VOCAB_SIZE = 50000
MAX_LEN = 128
EMBEDDING_DIM = 256
KEY_DIM = 256
N_HEADS = 4
FEED_FORWARD_DIM = 256
SEED = 42
BATCH_SIZE = 256
EPOCHS = 5

In [3]:
with open("recipes.txt", 'r', encoding='utf-8') as f:
    recipes = [line for line in f]

In [4]:
# Pad the punctuation, to treat them as separate 'words'
def pad_punctuation(s):
    s = re.sub(f"([{string.punctuation}, '\n'])", r" \1 ", s)
    s = re.sub(" +", " ", s)
    return s


text_data = [pad_punctuation(x) for x in recipes]

In [None]:
# Convert to a Tensorflow Dataset
text_ds = (
    tf.data.Dataset.from_tensor_slices(text_data)
    .batch(BATCH_SIZE)
    .shuffle(1000)
)

In [6]:
# Create a vectorisation layer
vectorize_layer = layers.TextVectorization(
    standardize="lower",
    max_tokens=VOCAB_SIZE,
    output_mode="int",
    output_sequence_length=MAX_LEN + 1,
)

In [7]:
# Adapt the layer to the training set
vectorize_layer.adapt(text_ds)
vocab = vectorize_layer.get_vocabulary()

In [8]:
# Create the training set of recipes and the same text shifted by one word
def prepare_inputs(text):
    text = tf.expand_dims(text, -1)
    tokenized_sentences = vectorize_layer(text)
    x = tokenized_sentences[:, :-1]
    y = tokenized_sentences[:, 1:]
    return x, y


train_ds = text_ds.map(prepare_inputs)

In [9]:
def causal_attention_mask(batch_size, n_dest, n_src, dtype):
    i = tf.range(n_dest)[:, None]
    j = tf.range(n_src)
    m = i >= j - n_src + n_dest
    mask = tf.cast(m, dtype)
    mask = tf.reshape(mask, [1, n_dest, n_src])
    mult = tf.concat(
        [tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)], 0
    )
    return tf.tile(mask, mult)

class TransformerBlock(layers.Layer):
    def __init__(self, num_heads, key_dim, embed_dim, ff_dim, dropout_rate=0.2):
        super(TransformerBlock, self).__init__()
        self.num_heads = num_heads
        self.key_dim = key_dim
        self.embed_dim = embed_dim
        self.ff_dim = ff_dim
        self.dropout_rate = dropout_rate
        self.attn = layers.MultiHeadAttention(
            num_heads, key_dim, output_shape=embed_dim
        )
        self.dropout_1 = layers.Dropout(self.dropout_rate)
        self.ln_1 = layers.LayerNormalization(epsilon=1e-6)
        self.ffn_1 = layers.Dense(self.ff_dim, activation="relu")
        self.ffn_2 = layers.Dense(self.embed_dim)
        self.dropout_2 = layers.Dropout(self.dropout_rate)
        self.ln_2 = layers.LayerNormalization(epsilon=1e-6)

    def call(self, inputs):
        input_shape = tf.shape(inputs)
        batch_size = input_shape[0]
        seq_len = input_shape[1]
        causal_mask = causal_attention_mask(
            batch_size, seq_len, seq_len, tf.bool
        )
        attention_output, attention_scores = self.attn(
            inputs,
            inputs,
            attention_mask=causal_mask,
            return_attention_scores=True,
        )
        attention_output = self.dropout_1(attention_output)
        out1 = self.ln_1(inputs + attention_output)
        ffn_1 = self.ffn_1(out1)
        ffn_2 = self.ffn_2(ffn_1)
        ffn_output = self.dropout_2(ffn_2)
        return (self.ln_2(out1 + ffn_output), attention_scores)

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "key_dim": self.key_dim,
                "embed_dim": self.embed_dim,
                "num_heads": self.num_heads,
                "ff_dim": self.ff_dim,
                "dropout_rate": self.dropout_rate,
            }
        )
        return config

class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, max_len, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.max_len = max_len
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
        self.token_emb = layers.Embedding(
            input_dim=vocab_size, output_dim=embed_dim
        )
        self.pos_emb = layers.Embedding(input_dim=max_len, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "max_len": self.max_len,
                "vocab_size": self.vocab_size,
                "embed_dim": self.embed_dim,
            }
        )
        return config

In [10]:
inputs = layers.Input(shape=(None,), dtype=tf.int32)
x = TokenAndPositionEmbedding(MAX_LEN, VOCAB_SIZE, EMBEDDING_DIM)(inputs)
x, attention_scores = TransformerBlock(
    N_HEADS, KEY_DIM, EMBEDDING_DIM, FEED_FORWARD_DIM
)(x)
outputs = layers.Dense(VOCAB_SIZE, activation="softmax")(x)
model = models.Model(inputs=inputs, outputs=[outputs, attention_scores])
model.compile("adam", loss=[losses.SparseCategoricalCrossentropy(), None])

In [11]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None)]            0         
                                                                 
 token_and_position_embeddi  (None, None, 256)         12832768  
 ng (TokenAndPositionEmbedd                                      
 ing)                                                            
                                                                 
 transformer_block (Transfo  ((None, None, 256),       1184512   
 rmerBlock)                   (None, 4, None, None))             
                                                                 
 dense_2 (Dense)             (None, None, 50000)       12850000  
                                                                 
Total params: 26867280 (102.49 MB)
Trainable params: 26867280 (102.49 MB)
Non-trainable params: 0 (0.00 Byte)
_________________

In [12]:
class TextGenerator(callbacks.Callback):
    def __init__(self, index_to_word, top_k=10):
        self.index_to_word = index_to_word
        self.word_to_index = {
            word: index for index, word in enumerate(index_to_word)
        }

    def sample_from(self, probs, temperature):
        probs = probs ** (1 / temperature)
        probs = probs / np.sum(probs)
        return np.random.choice(len(probs), p=probs), probs

    def generate(self, start_prompt, max_tokens, temperature):
        start_tokens = [
            self.word_to_index.get(x, 1) for x in start_prompt.split()
        ]
        sample_token = None
        info = []
        while len(start_tokens) < max_tokens and sample_token != 0:
            x = np.array([start_tokens])
            y, att = self.model.predict(x, verbose=0)
            sample_token, probs = self.sample_from(y[0][-1], temperature)
            info.append(
                {
                    "prompt": start_prompt,
                    "word_probs": probs,
                    "atts": att[0, :, -1, :],
                }
            )
            start_tokens.append(sample_token)
            start_prompt = start_prompt + " " + self.index_to_word[sample_token]
        print(f"\ngenerated text:\n{start_prompt}\n")
        return info

    def on_epoch_end(self, epoch, logs=None):
        self.generate("recipe for", max_tokens=MAX_LEN, temperature=0.7)

In [13]:
# Create a model save checkpoint
model_checkpoint_callback = callbacks.ModelCheckpoint(
    filepath="./checkpoint/checkpoint.ckpt",
    save_weights_only=True,
    save_freq="epoch",
    verbose=0,
)

tensorboard_callback = callbacks.TensorBoard(log_dir="./logs")

# Tokenize starting prompt
text_generator = TextGenerator(vocab)

In [14]:
model.fit(
    train_ds,
    epochs=EPOCHS,
    callbacks=[model_checkpoint_callback, tensorboard_callback, text_generator],
)

Epoch 1/5


2023-12-09 18:05:05.245251: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-12-09 18:05:08.728199: I external/local_xla/xla/service/service.cc:168] XLA service 0x7f883d5e4ca0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-12-09 18:05:08.728222: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA RTX A5000, Compute Capability 8.6
2023-12-09 18:05:08.731640: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-12-09 18:05:09.004562: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8906
I0000 00:00:1702163109.046433 3777595 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


generated text:
recipe for vegetable stock | blend the stock , vinegar , and olive oil in a large pot . bring a large pot of water to a boil . add the onion and cook for 5 minutes , and add the carrots , zucchini , and cook for a further two minutes . add the tomatoes and cook for another 10 minutes . add the onion and garlic and cook for a further 5 minutes . add the stock and cook for another 10 minutes . add the stock , passata , wine , salt , and pepper . bring to a boil and then reduce the heat to medium - low . simmer for 20 - 25 minutes . stir in the stock . stir in the

Epoch 2/5
generated text:
recipe for pickled eggs | start by using about 1 / 2 of the salt and pepper the eggs . then gradually add to the potatoes and boil for about 10 minutes . while the eggs are cooking , you will need to activate the eggs . add the eggs and the eggs , and beat until they are well combined . the eggs will be thick . ( you can use a little less vinegar ) and 1 / 4 cup of water in a pan and se

<keras.src.callbacks.History at 0x7f89801bf2d0>

In [15]:
# Save the final model
model.save("./models/model")

INFO:tensorflow:Assets written to: ./models/model/assets


INFO:tensorflow:Assets written to: ./models/model/assets


In [16]:
info = text_generator.generate(
    "recipe for vanilla whipped cream parfaits with berries |", max_tokens=MAX_LEN, temperature=0.7
)


generated text:
recipe for vanilla whipped cream parfaits with berries | prepare pudding according to directions on package . in a medium saucepan , combine milk , vanilla and sugar ; cook over medium heat , stirring constantly , until sugar is dissolved . remove from heat ; stir in vanilla extract and 6 tablespoons of the whipped cream . place half of the whipped cream mixture in the center of each half . gently fold in half and half , scoop half of the mixture into a parfait glass . repeat layers until all of the half - half and half . repeat layers . top with half the remaining whipped cream and remaining whipped cream . let stand for at least 20 minutes . 



In [17]:
info = text_generator.generate(
    "recipe for chocolate chip cookies |", max_tokens=MAX_LEN, temperature=0.7
)


generated text:
recipe for chocolate chip cookies | cream butter and sugars with an electric mixer . add egg and vanilla . add sifted flour and baking soda , vanilla , and salt . mix well . add oats and chocolate chips . mix in chips . drop by teaspoonfuls onto ungreased cookie sheet . bake at 350f for 10 - 12 minutes . cool on wire rack . 



In [18]:
info = text_generator.generate(
    "recipe for chicken noodle soup |", max_tokens=MAX_LEN, temperature=0.7
)


generated text:
recipe for chicken noodle soup | saute chicken in the oil until soft and translucent . add the chicken stock . cook until the chicken is just cooked through ( 8 to 10 minutes ) . add the remaining ingredients and bring to a boil , then reduce to a simmer and cook uncovered for 30 minutes . add the chicken broth and simmer for 30 - 45 minutes until chicken is tender and slightly thickened . remove chicken and allow to cool . if you want to serve chicken salad . 



In [19]:
info = text_generator.generate(
    "recipe for sugar cookies |", max_tokens=MAX_LEN, temperature=0.7
)


generated text:
recipe for sugar cookies | preheat oven to 350 degrees f . line baking sheet with parchment paper . sift together flour , baking powder , baking soda , and salt . cream butter with a large eggs , vanilla , and sugar . beat in the eggs one at a time . stir in the vanilla . beat in the flour mixture . add the buttermilk and vanilla ; beat until smooth . stir in the flour mixture . drop by spoonfuls onto prepared baking sheet . bake for 10 to 12 minutes , or until a toothpick inserted in the center comes out clean . cool in pan on a wire rack for 5 minutes . remove from pan . sprinkle with powdered sugar . 



In [20]:
info = text_generator.generate(
    "recipe for grilled cheese |", max_tokens=MAX_LEN, temperature=0.7
)


generated text:
recipe for grilled cheese | mix all ingredients together and press onto bottom and up side of a greased grill pan . bake at 350 degrees for 15 to 20 minutes . 



In [21]:
info = text_generator.generate(
    "recipe for chicken breast |", max_tokens=MAX_LEN, temperature=0.7
)


generated text:
recipe for chicken breast | place chicken breast in a large pot over medium heat , add the olive oil , season with salt , pepper , cayenne pepper and cumin . cover and cook over low heat , stirring occasionally , until the chicken is cooked through and the water is absorbed , about 15 minutes . remove from the heat . add the wine and stir to combine . cover the pan , and cook until the chicken is cooked through , about 10 minutes . stir in the wine , salt , and pepper . cover and simmer until the chicken is tender , about 20 minutes . ( this will take about 15 minutes ) . serve with lemon - lime juice , lemon

