In [1]:
def load_doc(filename):
    file = open(filename, "r")
    text = file.read()
    file.close()
    return text

In [3]:
raw_text = load_doc("rhyme.txt")
print(raw_text)

Sing a song of sixpence,
A pocket full of rye.
Four and twenty blackbirds,
Baked in a pie.

When the pie was opened
The birds began to sing;
Wasn’t that a dainty dish,
To set before the king.

The king was in his counting house,
Counting out his money;
The queen was in the parlour,
Eating bread and honey.

The maid was in the garden,
Hanging out the clothes,
When down came a blackbird
And pecked off her nose.


In [5]:
tokens = raw_text.split()
text = ' '.join(tokens)
print(text)

Sing a song of sixpence, A pocket full of rye. Four and twenty blackbirds, Baked in a pie. When the pie was opened The birds began to sing; Wasn’t that a dainty dish, To set before the king. The king was in his counting house, Counting out his money; The queen was in the parlour, Eating bread and honey. The maid was in the garden, Hanging out the clothes, When down came a blackbird And pecked off her nose.


In [7]:
length = 10
sequences = []
for i in range(length, len(text)):
    seq = text[i-length: i+1]
    sequences.append(seq)

print('Total length of sequences is {}'.format(len(sequences)))

Total length of sequences is 399


In [18]:
char_map = {c: i for i, c in enumerate(sorted(set(raw_text)))}
print(char_map)

{'c': 16, 'S': 11, 'r': 30, 'm': 25, ' ': 1, 'q': 29, 'p': 28, 'f': 19, 'a': 14, 'u': 33, 'F': 9, 'W': 13, 'i': 22, 'h': 21, 'l': 24, '.': 3, 'o': 27, 't': 32, ',': 2, 'd': 17, 'B': 6, 'g': 20, 'A': 5, 'b': 15, ';': 4, 'H': 10, 'E': 8, 'w': 34, 'y': 36, 'n': 26, '\n': 0, 'k': 23, '’': 37, 'T': 12, 'C': 7, 'x': 35, 'e': 18, 's': 31}


In [29]:
s = "latish"
a = '123456'
trans = str.maketrans(s, a)

In [25]:
reverse = {v: k for k, v in char_map.items()}
sequences[0].translate()

'Sing a song'

In [32]:
encoded_seq = []
for line in sequences:
    seq = [char_map[ch] for ch in line]
    encoded_seq.append(seq)

In [34]:
import numpy as np
sequences = np.array(encoded_seq)
X, y = sequences[:, :-1], sequences[:, -1]

In [37]:
vocab_size = len(char_map)
from keras.utils import to_categorical
seqs = [to_categorical(x, num_classes=vocab_size) for x in X]
X = np.array(seqs)
y = to_categorical(y, num_classes=vocab_size)

In [46]:
from keras.models import Sequential
from keras.layers import Dense, LSTM

In [48]:
model = Sequential()[
model.add(LSTM(75, input_shape=(X.shape[1], X.shape[2])))]
model.add(Dense(vocab_size, activation='softmax 
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 75)                34200     
_________________________________________________________________
dense_1 (Dense)              (None, 38)                2888      
Total params: 37,088
Trainable params: 37,088
Non-trainable params: 0
_________________________________________________________________
None


In [49]:
model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=["accuracy"])
model.fit(X, y, epochs=100, verbose=2)

Epoch 1/100
1s - loss: 3.6125 - acc: 0.1153
Epoch 2/100
0s - loss: 3.5081 - acc: 0.1880
Epoch 3/100
0s - loss: 3.1857 - acc: 0.1905
Epoch 4/100
0s - loss: 3.0740 - acc: 0.1905
Epoch 5/100
0s - loss: 3.0267 - acc: 0.1905
Epoch 6/100
0s - loss: 3.0005 - acc: 0.1905
Epoch 7/100
0s - loss: 2.9901 - acc: 0.1905
Epoch 8/100
0s - loss: 2.9704 - acc: 0.1905
Epoch 9/100
0s - loss: 2.9577 - acc: 0.1905
Epoch 10/100
0s - loss: 2.9424 - acc: 0.1905
Epoch 11/100
0s - loss: 2.9243 - acc: 0.1905
Epoch 12/100
0s - loss: 2.9063 - acc: 0.1905
Epoch 13/100
0s - loss: 2.8866 - acc: 0.2256
Epoch 14/100
0s - loss: 2.8639 - acc: 0.1980
Epoch 15/100
0s - loss: 2.8177 - acc: 0.2155
Epoch 16/100
0s - loss: 2.7867 - acc: 0.2080
Epoch 17/100
0s - loss: 2.7405 - acc: 0.2456
Epoch 18/100
0s - loss: 2.6941 - acc: 0.2556
Epoch 19/100
0s - loss: 2.6604 - acc: 0.2381
Epoch 20/100
0s - loss: 2.6526 - acc: 0.2556
Epoch 21/100
0s - loss: 2.5810 - acc: 0.2907
Epoch 22/100
0s - loss: 2.5429 - acc: 0.2807
Epoch 23/100
0s - l

<keras.callbacks.History at 0x16e3adb3f98>

In [50]:
model.save('char_level_lstm.h5')
from pickle import dump
dump(char_map, open("mapping.pkl", "wb"))

In [55]:
from keras.preprocessing.sequence import pad_sequences

def generate_seq(model, mapping, n_chars, seed_text, seq_len):
    in_text = seed_text
    reverse_map = {v: k for k, v in mapping.items()}
    
    for _ in range(n_chars):
        encoded = [mapping[char] for char in in_text]
        encoded = pad_sequences([encoded], maxlen=seq_len, truncating='pre')
        encoded = to_categorical(encoded, num_classes=len(mapping))
        encoded = encoded.reshape(1, encoded.shape[0], encoded.shape[1])
        y_hat = model.predict_classes(encoded, verbose=0)
       # print(y_hat)
        
        out_char = reverse_map[y_hat[0]]
        in_text += out_char
    
    return in_text

In [57]:
print(generate_seq(model, char_map, 30, "Sing a son", 10))

Sing a song of sixpence, A pocket full o
