# Aretha Bot
**LSTM that generates Aretha Franklin lyrics**

In [1]:
from tensorflow.keras.callbacks import LambdaCallback
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, LSTM
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import RMSprop
import numpy as np
import random

### Input data

In [2]:
text = '''
What you want (oo)
Baby, I got (oo)
What you need (oo)
Do you know I got it? (oo)
All I'm askin' (oo)
Is for a little respect

when you come home (just a little bit)
Hey baby (just a little bit)
when you get home (just a little bit)
mister (just a little bit)

I ain't gonna do you wrong while you're gone
Ain't gonna do you wrong (oo)
'cause I don't wanna (oo)
All I'm askin' (oo)
Is for a little respect when you come home (just a little bit)
Baby (just a little bit)
when you get home (just a little bit)
Yeah (just a little bit)'''
text = text.lower().replace('\n', ' ')

print(f'corpus length: {len(text)} characters')

corpus length: 533 characters


### Constants

In [3]:
TEMPERATURE = 0.2  # vary this to raise/lower randomness
SEQ_LENGTH = 20  # input sequences in characters
STEP = 3

### Create data for predicting the next character

In [5]:
"""
CHALLENGE:

Cut text into pieces, according to the example output below:

- each string in 'sentences' is SEQ_LENGTH characters long
- each string in 'next_char' is 1 character long
- the starting characters at each STEP are used.
"""
text = 'hello world'
SEQ_LENGTH = 4

# sentences = [text[i:i+SEQ_LENGTH] for i in range(0, len(text), SEQ_LENGTH)]


assert sentences == ['hell', 'lo w', 'worl']
assert next_chars == ['o', 'o', 'd']

['hel', 'lo ', 'wor', 'ld']

### Helper Data Structures

In [None]:
# dictionary to convert characters to integers and back
chars = sorted(list(set(text)))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

nchars = len(chars)
print('distinct characters:', nchars)

### Vectorize sequences

In [None]:
X = np.zeros((nseq, SEQ_LENGTH, nchars), dtype=np.bool)
Y = np.zeros((nseq, nchars), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        X[i, t, char_indices[char]] = 1
    Y[i, char_indices[next_chars[i]]] = 1
    
X.shape, Y.shape

### Build LSTM model

In [None]:
K.clear_session()

model = Sequential()
model.add(LSTM(64, input_shape=(SEQ_LENGTH, nchars)))
model.add(Dense(nchars))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [None]:
def sample_character(preds, characters, temperature=1.0):
    """random weighted choice of a character"""
    preds = np.asarray(preds).astype('float64')  # high precision
    preds = np.log(preds) / temperature          # apply temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)        # normalize
    probas = np.random.multinomial(1, preds, 1)  # same as random.choices
    i = np.argmax(probas)
    return characters[i]

In [None]:
def predict(sentence, model):
    """predicts the next character for a given """
    # convert sentence to numbers
    x_pred = np.zeros((1, SEQ_LENGTH, nchars))
    for t, char in enumerate(sentence):
        x_pred[0, t, char_indices[char]] = 1.

    probs = model.predict(x_pred, verbose=0)[0]
    return sample_character(probs, indices_char, TEMPERATURE)

### Warmup training

In [None]:
model.fit(X, Y, batch_size=128, epochs=20, verbose=0)

In [None]:
predict("you want a little re", model)

In [None]:
predict("ou want a little res", model)

In [None]:
predict("u want a little resp", model)

### Go Bananas

In [None]:
def on_epoch_end(epoch, logs):
    """Callback function invoked at end of each epoch. Prints generated text."""
    if epoch % 40 != 0:
        return
    
    # create a text to start predicting
    start = random.randint(0, len(text) - SEQ_LENGTH - 1)
    generated = ''
    sentence = text[start: start + SEQ_LENGTH]
    print('-' * 60)
    print(f'\nAfter Epoch: {epoch}\n \n{sentence.upper()}')

    for i in range(300):
        next_char = predict(sentence, model)
        print(next_char, end='')
        sentence = sentence[1:] + next_char  # prepare input for next round

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(X, Y, batch_size=128, epochs=200, callbacks=[print_callback], verbose=0)