<a href="https://colab.research.google.com/github/rsvolkert/Beer-Recipes/blob/main/generate_recipe.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
import os
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

In [9]:
recipes = pd.read_csv('https://raw.githubusercontent.com/rsvolkert/Beer-Recipes/main/Data/recipes.csv')
instructions = [recipe for recipe in recipes['recipe']]

In [10]:
STOP = '#'
STOP_NAME = 'NAME'
STOP_STYLE = 'STYLE'
STOP_METHOD = 'METHOD'
STOP_INGREDIENTS = 'INGREDIENTS'
STOP_INSTRUCTIONS = 'INSTRUCTIONS'

In [11]:
tokenizer = tf.keras.preprocessing.text.Tokenizer(
    char_level=True,
    filters='',
    lower=False,
    split=''
)

tokenizer.fit_on_texts([STOP])
tokenizer.fit_on_texts(instructions)

VOCABULARY_SIZE = len(tokenizer.word_counts) + 1

In [12]:
vectorized = tokenizer.texts_to_sequences(instructions)
vectorized_padded_without_stops = tf.keras.preprocessing.sequence.pad_sequences(
    vectorized,
    padding='post',
    truncating='post',
    value=tokenizer.texts_to_sequences([STOP])[0]
)
vectorized_padded = tf.keras.preprocessing.sequence.pad_sequences(
    vectorized_padded_without_stops,
    padding='post',
    truncating='post',
    value=tokenizer.texts_to_sequences([STOP])[0]
)

In [13]:
dataset = tf.data.Dataset.from_tensor_slices(vectorized_padded)

In [14]:
def split_input_target(recipe):
    input_text = recipe[:-1]
    target_text = recipe[1:]
    
    return input_text, target_text

targeted = dataset.map(split_input_target)

In [15]:
BATCH_SIZE = 64
SHUFFLE_SIZE = 1000

train = targeted.shuffle(SHUFFLE_SIZE).batch(BATCH_SIZE, drop_remainder=True).repeat()

In [16]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.models.Sequential()
    
    model.add(tf.keras.layers.Embedding(
        input_dim=vocab_size,
        output_dim=embedding_dim,
        batch_input_shape=[batch_size, None]
    ))
    
    model.add(tf.keras.layers.LSTM(
        units=rnn_units,
        return_sequences=True,
        stateful=True,
        recurrent_initializer=tf.keras.initializers.GlorotNormal()
    ))
    
    model.add(tf.keras.layers.Dense(vocab_size))
    
    return model

model = build_model(
    vocab_size=VOCABULARY_SIZE,
    embedding_dim=256,
    rnn_units=1024,
    batch_size=BATCH_SIZE
)

In [17]:
def loss(labels, logits):
    entropy = tf.keras.losses.sparse_categorical_crossentropy(
        y_true=labels,
        y_pred=logits,
        from_logits=True
    )
    
    return entropy

adam_optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

model.compile(
    optimizer=adam_optimizer,
    loss=loss
)

In [20]:
early_stopping_callback = tf.keras.callbacks.EarlyStopping(
    patience=5,
    monitor='loss',
    restore_best_weights=True,
    verbose=1
)

os.makedirs('tmp/checkpoints', exist_ok=True)
checkpoint_prefix = os.path.join('tmp/checkpoints', 'ckpt_{epoch}')
chckpt_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True
)

In [None]:
EPOCHS = 500
INITIAL_EPOCH = 1
STEPS_PER_EPOCH = 1500

history = model.fit(
    x=train,
    epochs=EPOCHS,
    steps_per_epoch=STEPS_PER_EPOCH,
    initial_epoch=INITIAL_EPOCH,
    callbacks=[early_stopping_callback])

model.save('https://raw.githubusercontent.com/rsvolkert/Beer-Recipes/main/recipe_generation_raw.h5', save_format='h5')