<a href="https://colab.research.google.com/github/rsvolkert/Beer-Recipes/blob/main/generate_recipe.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from google.colab import files

os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]="true"

In [2]:
recipes = pd.read_csv('https://raw.githubusercontent.com/rsvolkert/Beer-Recipes/main/Data/recipes.csv')
instructions = [recipe for recipe in recipes['recipe']]

In [3]:
STOP = '#'
STOP_NAME = 'NAME'
STOP_STYLE = 'STYLE'
STOP_METHOD = 'METHOD'
STOP_INGREDIENTS = 'INGREDIENTS'
STOP_INSTRUCTIONS = 'INSTRUCTIONS'

In [4]:
tokenizer = tf.keras.preprocessing.text.Tokenizer(
    char_level=True,
    filters='',
    lower=False,
    split=''
)

tokenizer.fit_on_texts([STOP])
tokenizer.fit_on_texts(instructions)

VOCABULARY_SIZE = len(tokenizer.word_counts) + 1

In [5]:
vectorized = tokenizer.texts_to_sequences(instructions)
vectorized_padded_without_stops = tf.keras.preprocessing.sequence.pad_sequences(
    vectorized,
    padding='post',
    truncating='post',
    value=tokenizer.texts_to_sequences([STOP])[0]
)
vectorized_padded = tf.keras.preprocessing.sequence.pad_sequences(
    vectorized_padded_without_stops,
    padding='post',
    truncating='post',
    value=tokenizer.texts_to_sequences([STOP])[0]
)

In [6]:
dataset = tf.data.Dataset.from_tensor_slices(vectorized_padded)

In [7]:
def split_input_target(recipe):
    input_text = recipe[:-1]
    target_text = recipe[1:]
    
    return input_text, target_text

targeted = dataset.map(split_input_target)

In [8]:
BATCH_SIZE = 32
SHUFFLE_SIZE = 1000

train = targeted.shuffle(SHUFFLE_SIZE).batch(BATCH_SIZE, drop_remainder=True).repeat()

In [9]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.models.Sequential()
    
    model.add(tf.keras.layers.Embedding(
        input_dim=vocab_size,
        output_dim=embedding_dim,
        batch_input_shape=[batch_size, None]
    ))
    
    model.add(tf.keras.layers.LSTM(
        units=rnn_units,
        return_sequences=True,
        stateful=True,
        recurrent_initializer=tf.keras.initializers.GlorotNormal()
    ))
    
    model.add(tf.keras.layers.Dense(vocab_size))
    
    return model

model = build_model(
    vocab_size=VOCABULARY_SIZE,
    embedding_dim=256,
    rnn_units=1024,
    batch_size=BATCH_SIZE
)

In [10]:
def loss(labels, logits):
    entropy = tf.keras.losses.sparse_categorical_crossentropy(
        y_true=labels,
        y_pred=logits,
        from_logits=True
    )
    
    return entropy

adam_optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

model.compile(
    optimizer=adam_optimizer,
    loss=loss
)

In [11]:
early_stopping_callback = tf.keras.callbacks.EarlyStopping(
    patience=5,
    monitor='loss',
    restore_best_weights=True,
    verbose=1
)

os.makedirs('tmp/checkpoints', exist_ok=True)
checkpoint_prefix = os.path.join('tmp/checkpoints', 'ckpt_{epoch}')
chckpt_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True
)

In [85]:
EPOCHS = 100
STEPS_PER_EPOCH = 100

history = model.fit(
    x=train,
    epochs=EPOCHS,
    steps_per_epoch=STEPS_PER_EPOCH,
    callbacks=[chckpt_callback, early_stopping_callback]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Restoring model weights from the end of the best epoch.
Epoc

In [86]:
model.save('model.h5')
files.download('model.h5')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>