In [6]:
import numpy as np
import json
import re
import string
from IPython.display import display, HTML

import tensorflow as tf
from tensorflow.keras import layers, models, losses, callbacks

In [7]:
VOCAB_SIZE = 15000
MAX_LEN = 256
EMBEDDING_DIM = 256
KEY_DIM = 256
N_HEADS = 4
FEED_FORWARD_DIM = 256
SEED = 42
BATCH_SIZE = 64
EPOCHS = 5

In [8]:
# # Load the full dataset
# with open("full_format_recipes.json") as json_data:
#     recipes = json.load(json_data)
    
# # Filter the dataset
# filtered_data = [
#     "Recipe for " + x["title"] + " | " + " ".join(x["directions"])
#     for x in recipes
#     if "title" in x
#     and x["title"] is not None
#     and "directions" in x
#     and x["directions"] is not None
# ]

In [9]:
with open("recipes.txt", 'r', encoding='utf-8') as f:
    recipes = [line for line in f]

In [10]:
# Pad the punctuation, to treat them as separate 'words'
def pad_punctuation(s):
    s = re.sub(f"([{string.punctuation}, '\n'])", r" \1 ", s)
    s = re.sub(" +", " ", s)
    return s


text_data = [pad_punctuation(x) for x in recipes]

In [11]:
text_data[0]

'Recipe for No - Bake Nut Cookies | In a heavy 2 - quart saucepan , mix brown sugar , nuts , evaporated milk and butter or margarine . Stir over medium heat until mixture bubbles all over top . Boil and stir 5 minutes more . Take off heat . Stir in vanilla and cereal ; mix well . Using 2 teaspoons , drop and shape into 30 clusters on wax paper . Let stand until firm , about 30 minutes . \n '

In [12]:
# Convert to a Tensorflow Dataset
text_ds = (
    tf.data.Dataset.from_tensor_slices(text_data)
    .batch(BATCH_SIZE)
    .shuffle(1000)
)

2023-12-07 20:21:43.257510: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-12-07 20:21:43.291046: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-12-07 20:21:43.291235: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

In [13]:
# Create a vectorisation layer
vectorize_layer = layers.TextVectorization(
    standardize="lower",
    max_tokens=VOCAB_SIZE,
    output_mode="int",
    output_sequence_length=MAX_LEN + 1,
)

In [14]:
# Adapt the layer to the training set
vectorize_layer.adapt(text_ds)
vocab = vectorize_layer.get_vocabulary()

In [15]:
# Create the training set of recipes and the same text shifted by one word
def prepare_inputs(text):
    text = tf.expand_dims(text, -1)
    tokenized_sentences = vectorize_layer(text)
    x = tokenized_sentences[:, :-1]
    y = tokenized_sentences[:, 1:]
    return x, y


train_ds = text_ds.map(prepare_inputs)

In [16]:
def causal_attention_mask(batch_size, n_dest, n_src, dtype):
    i = tf.range(n_dest)[:, None]
    j = tf.range(n_src)
    m = i >= j - n_src + n_dest
    mask = tf.cast(m, dtype)
    mask = tf.reshape(mask, [1, n_dest, n_src])
    mult = tf.concat(
        [tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)], 0
    )
    return tf.tile(mask, mult)

class TransformerBlock(layers.Layer):
    def __init__(self, num_heads, key_dim, embed_dim, ff_dim, dropout_rate=0.2):
        super(TransformerBlock, self).__init__()
        self.num_heads = num_heads
        self.key_dim = key_dim
        self.embed_dim = embed_dim
        self.ff_dim = ff_dim
        self.dropout_rate = dropout_rate
        self.attn = layers.MultiHeadAttention(
            num_heads, key_dim, output_shape=embed_dim
        )
        self.dropout_1 = layers.Dropout(self.dropout_rate)
        self.ln_1 = layers.LayerNormalization(epsilon=1e-6)
        self.ffn_1 = layers.Dense(self.ff_dim, activation="relu")
        self.ffn_2 = layers.Dense(self.embed_dim)
        self.dropout_2 = layers.Dropout(self.dropout_rate)
        self.ln_2 = layers.LayerNormalization(epsilon=1e-6)

    def call(self, inputs):
        input_shape = tf.shape(inputs)
        batch_size = input_shape[0]
        seq_len = input_shape[1]
        causal_mask = causal_attention_mask(
            batch_size, seq_len, seq_len, tf.bool
        )
        attention_output, attention_scores = self.attn(
            inputs,
            inputs,
            attention_mask=causal_mask,
            return_attention_scores=True,
        )
        attention_output = self.dropout_1(attention_output)
        out1 = self.ln_1(inputs + attention_output)
        ffn_1 = self.ffn_1(out1)
        ffn_2 = self.ffn_2(ffn_1)
        ffn_output = self.dropout_2(ffn_2)
        return (self.ln_2(out1 + ffn_output), attention_scores)

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "key_dim": self.key_dim,
                "embed_dim": self.embed_dim,
                "num_heads": self.num_heads,
                "ff_dim": self.ff_dim,
                "dropout_rate": self.dropout_rate,
            }
        )
        return config

class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, max_len, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.max_len = max_len
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
        self.token_emb = layers.Embedding(
            input_dim=vocab_size, output_dim=embed_dim
        )
        self.pos_emb = layers.Embedding(input_dim=max_len, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "max_len": self.max_len,
                "vocab_size": self.vocab_size,
                "embed_dim": self.embed_dim,
            }
        )
        return config

In [17]:
inputs = layers.Input(shape=(None,), dtype=tf.int32)
x = TokenAndPositionEmbedding(MAX_LEN, VOCAB_SIZE, EMBEDDING_DIM)(inputs)
x, attention_scores = TransformerBlock(
    N_HEADS, KEY_DIM, EMBEDDING_DIM, FEED_FORWARD_DIM
)(x)
outputs = layers.Dense(VOCAB_SIZE, activation="softmax")(x)
model = models.Model(inputs=inputs, outputs=[outputs, attention_scores])
model.compile("adam", loss=[losses.SparseCategoricalCrossentropy(), None])

In [18]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None)]            0         
                                                                 
 token_and_position_embeddi  (None, None, 256)         3905536   
 ng (TokenAndPositionEmbedd                                      
 ing)                                                            
                                                                 
 transformer_block (Transfo  ((None, None, 256),       1184512   
 rmerBlock)                   (None, 4, None, None))             
                                                                 
 dense_2 (Dense)             (None, None, 15000)       3855000   
                                                                 
Total params: 8945048 (34.12 MB)
Trainable params: 8945048 (34.12 MB)
Non-trainable params: 0 (0.00 Byte)
_____________________

In [19]:
class TextGenerator(callbacks.Callback):
    def __init__(self, index_to_word, top_k=10):
        self.index_to_word = index_to_word
        self.word_to_index = {
            word: index for index, word in enumerate(index_to_word)
        }

    def sample_from(self, probs, temperature):
        probs = probs ** (1 / temperature)
        probs = probs / np.sum(probs)
        return np.random.choice(len(probs), p=probs), probs

    def generate(self, start_prompt, max_tokens, temperature):
        start_tokens = [
            self.word_to_index.get(x, 1) for x in start_prompt.split()
        ]
        sample_token = None
        info = []
        while len(start_tokens) < max_tokens and sample_token != 0:
            x = np.array([start_tokens])
            y, att = self.model.predict(x, verbose=0)
            sample_token, probs = self.sample_from(y[0][-1], temperature)
            info.append(
                {
                    "prompt": start_prompt,
                    "word_probs": probs,
                    "atts": att[0, :, -1, :],
                }
            )
            start_tokens.append(sample_token)
            start_prompt = start_prompt + " " + self.index_to_word[sample_token]
        print(f"\ngenerated text:\n{start_prompt}\n")
        return info

    def on_epoch_end(self, epoch, logs=None):
        self.generate("recipe for", max_tokens=MAX_LEN, temperature=0.7)

In [20]:
# Create a model save checkpoint
model_checkpoint_callback = callbacks.ModelCheckpoint(
    filepath="./checkpoint/checkpoint.ckpt",
    save_weights_only=True,
    save_freq="epoch",
    verbose=0,
)

tensorboard_callback = callbacks.TensorBoard(log_dir="./logs")

# Tokenize starting prompt
text_generator = TextGenerator(vocab)

In [21]:
model.fit(
    train_ds,
    epochs=EPOCHS,
    callbacks=[model_checkpoint_callback, tensorboard_callback, text_generator],
)

Epoch 1/5


2023-12-07 20:22:25.768059: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-12-07 20:22:28.919086: I external/local_xla/xla/service/service.cc:168] XLA service 0x7fdd49ad6610 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-12-07 20:22:28.919123: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA RTX A5000, Compute Capability 8.6
2023-12-07 20:22:28.922184: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-12-07 20:22:29.205742: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8906
I0000 00:00:1701998549.247509 3231399 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


generated text:
recipe for beef stroganoff | mince the beef in a pot with water , bouillon and bring to a boil . add the barley and cook until tender , about 5 minutes . while the meat is cooking , put the meat back on the heat and warm water to a boil . when the beef is done , remove it from the microwave . add the meat and add the gravy . let sit for about 20 minutes to cool . mix the meat with the flour , salt and pepper to taste . in a large skillet , melt the butter over low heat . when it has reduced , add the beef stock and garlic and cook for about 5 minutes . add the beef broth , sherry , and peas and simmer until the gravy thickens , about 5 minutes . serve over hot rice . 

Epoch 2/5
generated text:
recipe for smoked salmon with yogurt and lime | preheat the oven to 425 degrees f . in a small bowl , whisk the yogurt , lime juice , and salt . season the salmon with salt and pepper and set aside . combine the yogurt , cilantro , lime juice , lime zest and lime zest and juice i

<keras.src.callbacks.History at 0x7fde4c3cbcd0>

In [22]:
# Save the final model
model.save("./models/new_recipe_generator")

INFO:tensorflow:Assets written to: ./models/new_recipe_generator/assets


INFO:tensorflow:Assets written to: ./models/new_recipe_generator/assets


# 3. Generate text using the Transformer

In [31]:
info = text_generator.generate(
    "recipe for vanilla whipped cream parfaits with berries |", max_tokens=256, temperature=0.7
)


generated text:
recipe for vanilla whipped cream parfaits with berries | line large baking sheets with parchment paper or silicone baking mats . in 1 to 1 1 / 2 cups ( 750 ml ) of the 1 / 2 cups ( 125 ml ) of the sugar , and 1 / 2 cup ( 125 ml ) of the strawberry mixture . cover and refrigerate until the strawberries are ready to assemble dessert dishes . spoon 1 / 2 cup ( 175 ml ) parfait glasses and top with 2 tbsp ( 15 ml ) of the remaining 1 / 2 cup ( 125 ml ) whipped topping ( 1 / 2 cup ( 125 ml ) of the whipped topping ) crushed wafers ( 1 / 4 cup ( 125 ml ) of the strawberries ) and the remaining 1 cup ( 225 ml ) whipped topping . repeat with remaining whipped topping . top with remaining whipped topping and remaining whipped topping ; garnish with toasted pecans if desired . 



In [24]:
info = text_generator.generate(
    "recipe for chocolate chip cookies |", max_tokens=256, temperature=0.7
)


generated text:
recipe for chocolate chip cookies | preheat oven to 350 & line 2 rimmed baking sheets with parchment paper . cream butter and sugar until light and fluffy . add eggs , 1 at a time , beating well after each addition . add flour and baking soda and salt , to the creamed mixture and mix well . add the flour mixture to the creamed mixture alternately with the rest of the flour mixture . beat the egg whites until they form soft peaks . fold in macadamia nuts and chocolate chips . drop the dough by rounded teaspoons onto the prepared baking sheets . bake for 12 minutes or until golden brown . let cool on the baking sheets for 30 minutes . 



In [26]:
info = text_generator.generate(
    "recipe for chicken noodle soup |", max_tokens=256, temperature=0.7
)


generated text:
recipe for chicken noodle soup | combine all ingredients , except chicken . bring to boil , then lower heat . when chicken is cooked , remove chicken from liquid , drain & return to pot . add basil and lemon juice . stir well . return chicken to pot , and baste with sauce . 



In [27]:
info = text_generator.generate(
    "recipe for sugar cookies |", max_tokens=256, temperature=0.7
)


generated text:
recipe for sugar cookies | preheat oven to 350 degrees f . mix flour and baking soda in large bowl . using electric mixer , cream together the butter and sugars . add eggs , one at a time , beating well after each addition . stir in vanilla . add flour mixture to creamed mixture and stir to combine . add flour , soda , baking powder and salt . combine well . set aside . beat egg whites in another large bowl until foamy . gradually add the flour mixture , beating at low speed until just combined . stir in walnuts . drop by spoonfuls on ungreased cookie sheet . bake on cookie sheet for about 5 minutes . cool on wire rack for 10 minutes . transfer to wire rack and cool completely . 



In [28]:
info = text_generator.generate(
    "recipe for grilled cheese |", max_tokens=256, temperature=0.7
)


generated text:
recipe for grilled cheese | butter a shallow baking dish . in a small bowl , combine the butter , lemon zest and juice from the lemon juice . season with salt and pepper . add the butter ; toss to combine . cover and refrigerate until ready to serve . for topping , heat the oven to 400 . 



In [29]:
info = text_generator.generate(
    "recipe for chicken breast |", max_tokens=256, temperature=0.7
)


generated text:
recipe for chicken breast | preheat oven to 375 degrees f . coat chicken breasts with flour , salt and pepper , dip in buttermilk , then coat with flour . in a large skillet , heat oil over medium heat until hot ( about 2 minutes ) . once oil is hot , add chicken breasts , skin side down , and cook until golden brown on both sides , about 3 to 5 minutes . transfer chicken to a baking sheet and repeat with remaining breasts . bake for about 5 minutes , until chicken is cooked through and crispy . ( can be made 1 day ahead . cool , then cover and refrigerate . bring to room temperature before using . to make the chicken , melt butter in a large skillet over medium heat . add onion and garlic and cook until softened , about 7 minutes . add carrot , celery , green pepper , thyme , and salt and pepper to taste . add chicken , toss again to coat . transfer chicken breasts , skin side up and place on baking sheet . bake for about 1 hour . meanwhile , in a small bowl , combine