# Train Sequential Neural Network

In [28]:
import re
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, LSTM, Lambda, Dropout

import os
import numpy as np


# load in text 
raw_text = open(os.path.join(os.getcwd(), 'data_shakespeare/shakespeare.txt')).read()
lines = [line.split() for line in raw_text.split('\n\n') if line.split()]

# remove all unnecessary characters from the text
raw_text2 = ''
for line in lines:
    obs_elem = []
    for word in line:
        word = re.sub("\d+", "", word)
        if (word == ""):
            continue
        word = re.sub(r'[^-\w\']', '', word).lower()
        raw_text2 += word + ' '
            
# create same-length strings 
length = 40
# get list of all characters used in text
chars = sorted(list(set(raw_text2)))
# map characters to their numerical value
mapping = dict((c, i) for i, c in enumerate(chars))
vocab_size = len(mapping)
print(len(raw_text2))
# tokenize a 40 length sequence and the character
# coming after it (the 40 length sequence will be 
# x and the 41st character will be y)
X = []
y = []
# using semi-redundant sequences to speed up training
for i in range(length, len(raw_text2) - 1):
    # select sequence of tokens
    seq = raw_text2[i-length:i+1]
    # store
    encoded_seq = [mapping[c] for c in list(seq)]
    X.append(np.array(encoded_seq))
    output = raw_text2[i + 1]
    encoded_seq2 = mapping[output]
    y.append(encoded_seq2)
    
# separate into input and output
X = np.array(X)
y = np.array(y)
beforepX = X
# converts x and y to binary class matrices (depending on 
# if the character is present in that sequence)
newsequences = [to_categorical(x, num_classes=vocab_size) for x in X]
X = np.array(newsequences)
y = to_categorical(y, num_classes=vocab_size)



91006


# Generate poem from Sequential Neural Network

In [None]:
def generate_poem(model, chars):
    ''' passing in a trained model and all the characters
    that the model was trained on, generates a poem based
    on model predictions '''
    # get random initial line
    start = np.random.randint(0, len(beforepX)-1)
    # dictionary converting integers to characters
    # (since we will generate predictions that are int sequences,
    # we convert them to characters)
    int_to_char = dict((i, c) for i, c in enumerate(chars))
    # get a random seed from the input data
    pattern = list(beforepX[start])
    # generate characters
    # based on the input data, generate predictions 
    poem = ''
    for k in range(700):
        # get a prediction based on the pattern for what 
        # the next character will be 
        x = np.reshape(pattern, (1, len(pattern), 1))
        x = to_categorical(x, num_classes=vocab_size)
        prediction = model.predict(x, verbose=0)
        
        # get prediction with highest probability 
        index = np.argmax(prediction)
        result = int_to_char[index]
        seq_in = [int_to_char[value] for value in pattern]
        poem = poem + result
        
        # append that predicted pattern, remove first character 
        # from predicted pattern
        pattern.append(index)
        pattern = pattern[1:len(pattern)]
    # randomly add punctuation to poem, print poem out
    punctuation_list = [',', '.', ':', '?']
    punctuation_probs = [0.6, 0.1, 0.2, 0.1]
    poem_format = poem.split()
    for p in range(1, len(poem_format)):
        if ((p - 1) % 10 == 0):
            print(poem_format[p].capitalize() + ' ', end = '')
        else:
            if (poem_format[p] == 'i'):
                print(poem_format[p].capitalize() + ' ', end = '')
            else:
                print(poem_format[p], end = '')
            if (p % 10 == 0 and p < len(poem_format) - 1):
                print(np.random.choice(punctuation_list, p = punctuation_probs) + ' ', end = '')
                print()
            elif(p == len(poem_format) - 1):
                print('.', end = '')
            else:
                print(' ', end = '')
        
            
def summers_day_poem(model, chars):
    ''' passing in a trained model and all the characters
    that the model was trained on, generates a poem based
    on model predictions -- the input is set to be shall
    i compare thee to a summers day'''
        
    test = "shall i compare thee to a summers day tho"
    test = re.sub("\d+", " ", test)
    test = re.sub(r'[^-\w\']', ' ', test).lower()
    pattern = [mapping[c] for c in list(test)]
    int_to_char = dict((i, c) for i, c in enumerate(chars))

    # generate characters
    poem = ''
    for k in range(700):
        x = np.reshape(pattern, (1, len(pattern), 1))
        x = to_categorical(x, num_classes=vocab_size)
        prediction = model.predict(x, verbose=0)
        index = np.argmax(prediction)
        result = int_to_char[index]
        seq_in = [int_to_char[value] for value in pattern]
        poem = poem + result
        pattern.append(index)
        pattern = pattern[1:len(pattern)]

    punctuation_list = [',', '.', ':', '?']
    punctuation_probs = [0.6, 0.1, 0.2, 0.1]
    poem_format = poem.split()
    for p in range(1, len(poem_format)):
        if ((p - 1) % 10 == 0):
            print(poem_format[p].capitalize() + ' ', end = '')
        else:
            if (poem_format[p] == 'i'):
                print(poem_format[p].capitalize() + ' ', end = '')
            else:
                print(poem_format[p], end = '')
            if (p % 10 == 0 and p < len(poem_format) - 1):
                print(np.random.choice(punctuation_list, p = punctuation_probs) + ' ', end = '')
                print()
            elif(p == len(poem_format) - 1):
                print('.', end = '')
            else:
                print(' ', end = '')

# Temperature = 1.5

In [None]:
# define model
model1 = Sequential()
model1.add(LSTM(200, input_shape=(X.shape[1], X.shape[2])))
model1.add(Dropout(0.3))
model1.add(LSTM(200, input_shape=(X.shape[1], X.shape[2])))
model1.add(Dropout(0.3))

# adding temperature
temp = 1.5
model1.add(Lambda(lambda x : x /temp))
model1.add(Dense(vocab_size, activation='softmax'))

print(model1.summary())
# compile model
model1.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit model

model1.fit(X, y, epochs=90, verbose=2)



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_6 (LSTM)                (None, 200)               184000    
_________________________________________________________________
dropout_6 (Dropout)          (None, 200)               0         
_________________________________________________________________
lambda_6 (Lambda)            (None, 200)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 29)                5829      
Total params: 189,829
Trainable params: 189,829
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/90
 - 138s - loss: 2.4598 - acc: 0.2945
Epoch 2/90
 - 114s - loss: 2.0767 - acc: 0.3870
Epoch 3/90
 - 114s - loss: 1.9420 - acc: 0.4179
Epoch 4/90
 - 118s - loss: 1.8574 - acc: 0.4417
Epoch 5/90
 - 132s - loss: 1.7921 - acc: 0.4586
Epoch 6/90
 - 131s - lo

In [None]:
generate_poem(model1, chars)

In [None]:
summers_day_poem(model1, chars)

# Temperature = 0.75

In [None]:
model2 = Sequential()
model2.add(LSTM(200, input_shape=(X.shape[1], X.shape[2])))
model2.add(Dropout(0.3))
model2.add(LSTM(200, input_shape=(X.shape[1], X.shape[2])))
model2.add(Dropout(0.3))

# adding temperature
temp = 0.75
model2.add(Lambda(lambda x : x /temp))
model2.add(Dense(vocab_size, activation='softmax'))

print(model2.summary())
# compile model
model2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit model

model2.fit(X, y, epochs=50, verbose=2)

In [None]:
generate_poem(model2, chars)

In [None]:
summers_day_poem(model2, chars)

# Temperature = 0.25

In [None]:
model3 = Sequential()
model3.add(LSTM(200, input_shape=(X.shape[1], X.shape[2])))
model3.add(Dropout(0.3))
model3.add(LSTM(200, input_shape=(X.shape[1], X.shape[2])))
model3.add(Dropout(0.3))

# adding temperature
temp = 0.25
model3.add(Lambda(lambda x : x /temp))
model3.add(Dense(vocab_size, activation='softmax'))

print(model3.summary())
# compile model
model3.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit model

model3.fit(X, y, epochs=50, verbose=2)

In [None]:
generate_poem(model3, chars)

In [None]:
summers_day_poem(model3, chars)