# Coursework

In [6]:
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM, Dropout
from keras.optimizers import RMSprop, Adam
import keras
import numpy as np
import string
import random

def char_to_one_hot(char, corpus):
    arr = np.zeros((len(corpus)))
    hot_index = corpus.index(char)
    arr[hot_index] = 1
    return arr

def one_hot_to_char(one_hot, corpus):
    best_index = np.argmax(one_hot)
    return corpus[best_index]

def from_window_to_one_hot(window, corpus):
    corpus_size = len(corpus)
    
    text = window[0]
    next_char = window[1]
    
    text_array = np.zeros((len(text), corpus_size))
    for idx, char in enumerate(text):
        text_array[idx] = char_to_one_hot(char, corpus)
    
    next_char_arr = char_to_one_hot(next_char, corpus)
    
    return text_array, next_char_arr

def get_window(text, starting_index, window_size):
    window = text[starting_index : starting_index+window_size]
    next_character = text[starting_index+window_size]
    return window, next_character

def generate_sample(model, sample_len=200, initial_window=None):
    if initial_window is None:
        #Generate random string from lowercase letters and numbers
        initial_window = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(window_size))
    
    #String holding only letters predicted by the model
    predicted_text = ""
    
    for i in range(sample_len):
        X,y = from_window_to_one_hot((initial_window,"a"), corpus)
        X = X.reshape((1,window_size, corpus_size))
        pred_char_raw = model.predict(X)
        pred_char = one_hot_to_char(pred_char_raw, corpus)

        initial_window = initial_window[1:]
        initial_window = initial_window+pred_char

        predicted_text += pred_char
    
    return predicted_text

print("Loading data...")
with open('data/all_lines.txt', 'r', encoding='utf-8', errors='ignore') as speech_file:
    text=speech_file.read()

print("Data loaded!")
print("Reducing corpus...")
characters_to_replace = [ '\n', '\r', '\t', '$']
for character in characters_to_replace:
    text = text.replace(character, "")

text = text.replace("\"", " ")
text = text.replace("  ", " ")
text = text.lower()

corpus = sorted(list(set(text)))
corpus_size = len(corpus)
print("Reduced corpus:")
print(corpus)
print("Data sample:")
print(text[500:1000])

#limit num of samples to 500000, more will cause memory issues
window_size = 50
#num_of_samples = len(text)-(window_size+1)
num_of_samples = 500000

X = np.zeros((num_of_samples, window_size, corpus_size))
y = np.zeros((num_of_samples, corpus_size))

print("Getting samples...")
for i in range(num_of_samples):
    window = get_window(text, i, window_size)
    window_X, window_y = from_window_to_one_hot(window, corpus)
    X[i] = window_X
    y[i] = window_y

print("Compiling model...")
model = Sequential()
model.add(LSTM(units=200, input_shape=(window_size, corpus_size)))
model.add(Dropout(0.2))
model.add(Dense(units=corpus_size))
model.add(Activation('softmax'))

#optimizer = keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
optimizer = keras.optimizers.Adam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0, amsgrad=False)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

print("Training model (may take a while)...")
number_of_epochs = 30
for i in range(number_of_epochs):
    model.fit(X, y, batch_size=500, epochs=1)
    print("After", i, "epochs, model generated:")
    print(generate_sample(model))

#Remember to update me before training!
model.save('shakespeare4.h5')

Loading data...
Data loaded!
Reducing corpus...
Reduced corpus:
[' ', '!', "'", '(', ')', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', '?', '[', ']', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
Data sample:
 paces: those opposed eyes, which, like the meteors of a troubled heaven, all of one nature, of one substance bred, did lately meet in the intestine shock and furious close of civil butchery shall now, in mutual well-beseeming ranks, march all one way and be no more opposed against acquaintance, kindred and allies: the edge of war, like an ill-sheathed knife, no more shall cut his master. therefore, friends, as far as to the sepulchre of christ, whose soldier now, under whose blessed cross we ar
Getting samples...
Compiling model...
Training model (may take a while)...
Epoch 1/1
After 0 epochs, model generated:
er with the farther the fall the fall the fall the fall the fal

After 22 epochs, model generated:
uelly and the see and so henry the seas of me in the country and the see in the country and the see in the country and the see in the country and the see in the country and the see in the country and 
Epoch 1/1
After 23 epochs, model generated:
eds, and then the father was a subject for the son of heaven to the king, and therefore is the cardinal of warwick, and somerset and the counterfeiting the state of the court of the face, and therefor
Epoch 1/1
After 24 epochs, model generated:
coning on the country and the soul of heart of the court of the state. the see is the soul of heart of the court of the state. the see is the soul of heart of the court of the state. the see is the so
Epoch 1/1
After 25 epochs, model generated:
ons, and then the see i shall be so fair more than the duke of york, and the see i shall be so fair more than the duke of york, and the see i shall be so fair more than the duke of york, and the see i
Epoch 1/1
After 26 epochs, mod

In [8]:
generate_sample(model, 500, )

'd and the state of heaven, and then the state of the fields of the state. the sun in the field of the field. what say you to the protector of the see. the lord of were the commonwealth of the commons to the part of the seas of france, and then the state of the fields of the state of heaven, and then the state of the fields of the state. the sun in the field of the field. what say you to the protector of the see. the lord of were the commonwealth of the commons to the part of the seas of france, '