# Poem RNN

Generating a poem based on Maya Angelou's Still I Rise.

In [81]:
import tensorflow as tf
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import plot_confusion_matrix, confusion_matrix

from tensorflow.keras.layers import Dense, Flatten, LSTM, Dropout, Embedding
from tensorflow.keras import Model
from tensorflow.keras.models import Sequential
from keras.utils import np_utils

import sys

tf.random.set_seed(1)

Load Data

In [82]:
poem_text = open('../Data/still_i_rise.txt', 'r', encoding='utf-8').read()
poem_text = poem_text.lower()

Creates a dictionary with all the possible letters/values

In [83]:
chars = sorted(list(set(poem_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

In [84]:
n_chars = len(poem_text)
n_vocab = len(chars)
print(f"Total Characters: ", n_chars)
print(f"Total Vocab: ", n_vocab)

Total Characters:  1252
Total Vocab:  31


In [85]:
print(char_to_int)

{'\n': 0, ' ': 1, "'": 2, ',': 3, '.': 4, '?': 5, 'a': 6, 'b': 7, 'c': 8, 'd': 9, 'e': 10, 'f': 11, 'g': 12, 'h': 13, 'i': 14, 'j': 15, 'k': 16, 'l': 17, 'm': 18, 'n': 19, 'o': 20, 'p': 21, 'r': 22, 's': 23, 't': 24, 'u': 25, 'v': 26, 'w': 27, 'x': 28, 'y': 29, '’': 30}


### This looks at a sequence of the data of length 100. It then records the character and the number corresponding to that character

In [86]:
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = poem_text[i:i + seq_length]
    seq_out = poem_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)

Reshaping for Keras

In [87]:
# reshape X to be [samples, time steps, features]
X = np.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

# The Model

In [88]:
model = Sequential()
model.add(Embedding(n_vocab, 10, input_length=seq_length))
model.add(LSTM(256, input_shape=(100, 1)))
model.add(Dropout(0.2))
model.add(Dense(31, activation='softmax'))

In [89]:
model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_5 (Embedding)      (None, 100, 10)           310       
_________________________________________________________________
lstm_10 (LSTM)               (None, 256)               273408    
_________________________________________________________________
dropout_7 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 31)                7967      
Total params: 281,685
Trainable params: 281,685
Non-trainable params: 0
_________________________________________________________________


In [90]:
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [47]:
history = model.fit(X,y, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [103]:
num_to_char = dict((i, c) for i, c in enumerate(chars))
start = np.random.randint(0, len(dataX)-1)
print(start)
pattern = dataX[start]
print(f"Seed:")
print(f"\"", ''.join([num_to_char[value] for value in pattern]), "\"")

948
Seed:
"  rooted in pain
i rise
i'm a black ocean, leaping and wide,
welling and swelling i bear in the tide. "


In [113]:
for i in range(100):
    x = np.reshape(pattern, (1, len(pattern), 1))
    prediction = model.predict(x, verbose=0)
    index = np.argmax(prediction)
    result = num_to_char[index]
    seq_in = [num_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]

isni'


ttsvaccwsagg
ttsgvvva

ttsg
vva

ttsgvvacwsnisni'


ttsvaccwsagg
ttsgvvva

ttsg
vva

ttsgvva