DIY Cereal Slogans 

Use a RNN (Recurrent Neural Network) to generate a cereal slogan! This program is based on an adaptation of the information and tutorial described in: https://thepythoncode.com/article/text-generation-keras-python 

In [38]:
import tensorflow as tf
import numpy as np
import os
import pickle
import tqdm
import random
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from string import punctuation

In [2]:
FILE_PATH = './data/just_slogans.txt'
BASENAME = os.path.basename(FILE_PATH)

In [3]:
# get data and prep it, remove case
text = open(FILE_PATH, encoding='utf-8').read().lower()

# remove punctuation
text = text.translate(str.maketrans("", "", punctuation))

print(f"text length: {len(text)}")

# create a mapping 
unique_chars = sorted(list(set(text)))

# number of unique chars 
num_unique = len(unique_chars)

# char -> int
char_int = {c: i for i, c in enumerate(unique_chars)}

# int -> char
int_char = {i: c for i, c in enumerate(unique_chars)}

# convert all of the text into ints
encoded_text = np.array([char_int[c] for c in text])

text length: 5528


In [4]:
# save the dicts for text gen 
pickle.dump(char_int, open(f"./data/{BASENAME}-char_int.pickle", "wb"))
pickle.dump(int_char, open(f"./data/{BASENAME}-int_char.pickle", "wb"))

In [5]:
# define parameters
subsequence_length = 100
batch_size = 128 # memory can read in efficiently if its in bytes
epochs = 30

# create a dataset object for efficient handling 
char_dataset = tf.data.Dataset.from_tensor_slices(encoded_text)

# # print characters and their integer representations 
# for char in char_dataset.take(10):
#     print(char.numpy(), int_char[char.numpy()])


Prepare the Dataset 
- split into inputs and targets to expand the dataset
- one hot encoding

In [6]:
# build sequences to be the input of the text generation, the output will be a single character that's predicted 
sequences = char_dataset.batch(2 * subsequence_length + 1, drop_remainder=True)

# this is an example of two sequences that will be fed in
for sequence in sequences.take(2):
    print(''.join([int_char[i] for i in sequence.numpy()]))

a bowl a day keeps the bullies away
a dish that’s a winner for wartime
a is for apple j is for jacks cinnamon toasty applejacks
alphabits you know you want them come and have some
always after my lucky
 charms they’re magically delicious
applejacks will not be sold to bullies
applejacks where the sweet taste of cinnamon is the winnamon
ask for them by name
because that’s the kind of mom you are
bet y


In [7]:
# convert a sequence into input, target samples 
def split_sample(sample):

    # basically taking a sequence and putting it into (input, target format)
    # ex. subsequence_length = 5, sequence: theyre grrrreat
    # (input, target) -> (theyre grr, r)

    ds = tf.data.Dataset.from_tensors((sample[:subsequence_length], sample[subsequence_length]))
    
    # repeat this by splitting the sequences even further, going character by character 
    for i in range(1, (len(sample) - 1) // 2):
        input_ = sample[i: i + subsequence_length]
        target = sample[i + subsequence_length]

        # create a larger dataset by converting into (input, target)
        other_ds = tf.data.Dataset.from_tensors((input_, target))
        ds = ds.concatenate(other_ds)
    return ds

# create inputs and targets
dataset = sequences.flat_map(split_sample)

In [8]:
# one hot encoding for the samples 
def one_hot_samples(input_, target):
    return tf.one_hot(input_, num_unique), tf.one_hot(target, num_unique)

dataset = dataset.map(one_hot_samples)

In [9]:
# print first 2 samples
for element in dataset.take(2):
    print("Input:", ''.join([int_char[np.argmax(char_vector)] for char_vector in element[0].numpy()]))
    print("Target:", int_char[np.argmax(element[1].numpy())])
    print("Input shape:", element[0].shape)
    print("Target shape:", element[1].shape)
    print("="*50, "\n")

Input: a bowl a day keeps the bullies away
a dish that’s a winner for wartime
a is for apple j is for jacks
Target:  
Input shape: (100, 32)
Target shape: (32,)

Input:  bowl a day keeps the bullies away
a dish that’s a winner for wartime
a is for apple j is for jacks 
Target: c
Input shape: (100, 32)
Target shape: (32,)



In [10]:
# shuffle and split dataset into batches 
prepped_dataset = dataset.repeat().shuffle(1024).batch(batch_size, drop_remainder=True)

Building the Model 
- LSTM Layers 

In [11]:
model = Sequential([
    LSTM(256, input_shape=(subsequence_length, num_unique), return_sequences=True), 
    Dropout(0.3), 
    LSTM(256), 
    Dense(num_unique, activation="softmax")])

# define model path 
model_weights_path = f"results/{BASENAME}-{subsequence_length}.h5"
model.summary()
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy", "recall", "precision"])


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 100, 256)          295936    
                                                                 
 dropout (Dropout)           (None, 100, 256)          0         
                                                                 
 lstm_1 (LSTM)               (None, 256)               525312    
                                                                 
 dense (Dense)               (None, 32)                8224      
                                                                 
Total params: 829472 (3.16 MB)
Trainable params: 829472 (3.16 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


Train the Model!

In [12]:
# train the model
model.fit(prepped_dataset, steps_per_epoch=(len(encoded_text) - subsequence_length) // batch_size, epochs=epochs)

# save it!
model.save(model_weights_path)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


  saving_api.save_model(


In [24]:
# load vocab dictionaries
char_int = pickle.load(open(f"./data/{BASENAME}-char_int.pickle", "rb"))
int_char = pickle.load(open(f"./data/{BASENAME}-int_char.pickle", "rb"))
vocab_size = len(char_int)

# build model again 
# building the model
model = Sequential([
    LSTM(256, input_shape=(subsequence_length, vocab_size), return_sequences=True),
    Dropout(0.3),
    LSTM(256),
    Dense(vocab_size, activation="softmax"),
])

# load the optimal weights
model.load_weights(f"results/{BASENAME}-{subsequence_length}.h5")

In [73]:
start_index = random.randint(0, len(text) - 90 - 1)
generated = ''
seed = text[start_index: start_index + 90]

In [74]:
s = seed
n_chars = 30
generated = ""

# generate n_chars 
for i in tqdm.tqdm(range(n_chars), "Generating text"):

    # make the input sequence
    X = np.zeros((1, subsequence_length, vocab_size))
    
    for t, char in enumerate(seed):
        X[0, (subsequence_length - len(seed)) + t, char_int[char]] = 1

    # predict the next character
    predicted = model.predict(X, verbose=0)[0]

    # converting the vector to an integer
    next_index = np.argmax(predicted)

    # converting the integer to a character
    next_char = int_char[next_index]

    # add the character to results
    generated += next_char

    # shift seed and the predicted character
    seed = seed[1:] + next_char

print(f"Seed: {s}\n")
print(f"Generated Text: {generated}")

Generating text: 100%|██████████| 30/30 [00:02<00:00, 12.82it/s]

Seed: oney nut cheerios
what are you eating nutin’ honey
what’s the good word bird
what’s new li

Generated Text: me an hor taste that’s of lins





![Alt text](image.png)

Every once in a while it generates text pretty well! But sometimes it just comes out with gibberish. Most likely need more cereal slogan data to get a better result!