In [6]:
import numpy as np
import keras
from keras.models import Model, Sequential, load_model
from keras.layers import Embedding, LSTM, Dense, TimeDistributed, Activation

Using TensorFlow backend.


Read data from file and split into characters. 

In [7]:
def get_data_from_file(filename):
    chars = set()

    text = ''
    with open(filename) as f:
        for line in f:
            words = line.split()
            if len(words) <= 1:
                continue

            line = line.lower()
            chars |= set(line)

            text += line
            
    return chars, text

One hot encoding definition

In [8]:
def build_encoding(chars):
    id_to_char = list(chars)
    char_to_ids = {}
    for i, c in enumerate(id_to_char):
        char_to_ids[c] = i
        
    return id_to_char, char_to_ids

In [9]:
def encode(c, char_to_ids):
    encoding = np.zeros((M, ))
    encoding[char_to_ids[c]] = 1
    return encoding

In [10]:
def encode_string(s, char_to_ids):
    encoded = np.zeros((len(s), len(char_to_ids)))
    for i, c in enumerate(s):
        encoded[i] = encode(c, char_to_ids)
    return encoded

Turning raw text into sequences of characters for training data

In [11]:
def get_encoded_sequences(char_to_ids, skip=3, seq_len=40):
    X = []
    y = []
    for i in range(0, len(text) - seq_len - 1, skip):
        sequence = []
        for j in range(i, i + seq_len):
            sequence.append(encode(text[j], char_to_ids))
        X.append(sequence)
        y.append(encode(text[i + seq_len], char_to_ids))
    X = np.array(X)
    y = np.array(y)
    return X, y

In [8]:
chars, text = get_data_from_file('data/shakespeare.txt')
id_to_char, char_to_ids = build_encoding(chars)
M = len(id_to_char)
skip, seq_len = 1, 40

X, y = get_encoded_sequences(char_to_ids, skip, seq_len)
print(X.shape, y.shape)

(94248, 40, 38) (94248, 38)


In [9]:
def LSTM_model():
    model = Sequential()
    model.add(LSTM(200, input_shape=X.shape[1:]))
    model.add(Dense(M, activation='softmax'))
    return model

In [10]:
modelLSTM = LSTM_model()
modelLSTM.compile(loss='categorical_crossentropy', optimizer='adam')

In [11]:
modelLSTM.fit(X, y, batch_size=32, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.callbacks.History at 0x25381073388>

In [41]:
modelLSTM.save('models/LSTM.h5')

In [42]:
modelLSTM2 = load_model('models/LSTM.h5')

In [13]:
seed = "shall i compare thee to a summer's day?\n"

In [40]:
print(id_to_char)
print(char_to_ids)

['m', 'r', 's', 'p', 'l', 'v', '.', 'z', '\n', 'q', 'y', 'd', 'c', 'n', '?', 'k', 'o', 'g', 'h', ' ', "'", ';', 'j', 't', 'u', 'b', '-', 'w', 'x', '!', 'i', 'a', ',', ')', 'f', ':', '(', 'e']
{'m': 0, 'r': 1, 's': 2, 'p': 3, 'l': 4, 'v': 5, '.': 6, 'z': 7, '\n': 8, 'q': 9, 'y': 10, 'd': 11, 'c': 12, 'n': 13, '?': 14, 'k': 15, 'o': 16, 'g': 17, 'h': 18, ' ': 19, "'": 20, ';': 21, 'j': 22, 't': 23, 'u': 24, 'b': 25, '-': 26, 'w': 27, 'x': 28, '!': 29, 'i': 30, 'a': 31, ',': 32, ')': 33, 'f': 34, ':': 35, '(': 36, 'e': 37}


In [14]:
def sample_from_softmax(prediction, temp=1.0):
    prediction = np.asarray(prediction).astype('float64')
    num = np.log(prediction) / temp
    num = np.exp(num)
    p = num / np.sum(num)
    return np.argmax(np.random.multinomial(1, p, 1))

In [43]:
def generate_sequence(num_chars, temp=1):
    generated = seed
    sequence = seed
    for i in range(num_chars):
        x = np.zeros((1, len(sequence), len(char_to_ids)))
        x[0] = encode_string(sequence, char_to_ids)
        prediction = modelLSTM2.predict(x)
        next_char_id = sample_from_softmax(prediction[0], temp)
        next_char = id_to_char[next_char_id]
        generated += next_char
        sequence = sequence[1:] + next_char
    return generated

In [44]:
num_chars = 600
generated = generate_sequence(num_chars, temp=0.25)
print(generated)

shall i compare thee to a summer's day?
this more so, to dreds doth cruel kind,
and dountet baster sourly ougle worth
than in his did falled with thine and true,
to the dung shall in thy worth and sun,
corrain to me, so thrue love we come woe,
  to wint, of hove, which in shy hide is love,
and thence this, and thou shouldst not to deam.
that times his beauty do i quent straight,
and therefore waste in thy cheaked are dead.
simpot night by mine eye more dear.
  as the prey in thee in dead do thee,
  and this mine in of thee best thought cannotime,
to let my barest which the time that pups
crows of hearts have present upon thy name,
t


In [45]:
num_chars = 600
generated = generate_sequence(num_chars, temp=0.75)
print(generated)

shall i compare thee to a summer's day?
th' expense of shapes disgrainiag side,
  but heat the child, and nothing this edd
that i am potson brongs thence boss,
  whose some suspect of me forbor do thee,
for bending foul and other ppainting
earth, shall on me brain short-lie thee,
where awas the true, supposed of woitetion,
and somety seeming menours make be grief,
they left me doth parts of outward dwast.
when i wondrow that i sweet love his,
and do i in hand of him in a don.
than that wend'rn of mine own prese behings,
and thou taughts their thy formen alenthered,
alove's green appier thou shalt same life,
and cheeks ne'er runting 


In [46]:
num_chars = 600
generated = generate_sequence(num_chars, temp=1.5)
print(generated)

shall i compare thee to a summer's day?
th' eabs targed fors-your veriow,
if fortur breast doth rire poath of ever.
weth.
  my hours readons, where you freme with thy find,
both looks, deiting morture amesty,
but those wanting owerst love his, behble:
thou hast parts to win, presence ffomous,
o gover-knot her pyoures. shall by cart?
now which my devise in huse it sond,
i see burn of not, to lief the thing or ween,
thy i'ts to tell not wifter-despite,
cortine quite of secorain ow hate of minere,
thou natured thus i tenture hatour ad.
that meast that i have seem not love to to me.
'has vasing with mire, i never thangeryparch?
exceared
