In [49]:
import numpy as np
from keras.utils import to_categorical
from pickle import dump
from keras.models import load_model
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from pickle import load
from keras.preprocessing.sequence import pad_sequences

In [2]:
def load_doc(filename):
    with open(filename, 'r') as f:
        text = f.read()
        
    f.close()
    return text
        

In [3]:
raw_text = load_doc('rhyme.txt')

In [4]:
raw_text

"Sing a song of sixpence,\nA pocket full of rye.\nFour and twenty blackbirds,\nBaked in a pie.\n\nWhen the pie was opened\nThe birds began to sing;\nWasn't that a dainty dish,\nTo set before the king.\n\nThe king was in his counting house,\nCounting out his money;\nThe queen was in the parlour,\nEating bread and honey.\n\nThe maid was in the garden,\nHanging out the clothes,\nWhen down came a blackbird\nAnd pecked off her nose.\n"

In [5]:
tokens = raw_text.split()
raw_text = ' '.join(tokens)

In [6]:
length = 10
sequences = list()

for i in range(length, len(raw_text)):
    seq = raw_text[i-length : i+1]
    
    sequences.append(seq)

print('Total sequences %d' %len(sequences))

Total sequences 399


In [7]:
## Saving sequences

def save_doc(lines, filename):
    data = '\n'.join(sequences)
    file = open(filename, 'w')
    file.write(data)
    file.close

In [8]:
save_doc(sequences, 'char_sequences.txt')

In [9]:
in_filename = 'char_sequences.txt'

raw_text = load_doc(in_filename)

lines = raw_text.split('\n')

In [10]:
lines

['Sing a song',
 'ing a song ',
 'ng a song o',
 'g a song of',
 ' a song of ',
 'a song of s',
 ' song of si',
 'song of six',
 'ong of sixp',
 'ng of sixpe',
 'g of sixpen',
 ' of sixpenc',
 'of sixpence',
 'f sixpence,',
 ' sixpence, ',
 'sixpence, A',
 'ixpence, A ',
 'xpence, A p',
 'pence, A po',
 'ence, A poc',
 'nce, A pock',
 'ce, A pocke',
 'e, A pocket',
 ', A pocket ',
 ' A pocket f',
 'A pocket fu',
 ' pocket ful',
 'pocket full',
 'ocket full ',
 'cket full o',
 'ket full of',
 'et full of ',
 't full of r',
 ' full of ry',
 'full of rye',
 'ull of rye.',
 'll of rye. ',
 'l of rye. F',
 ' of rye. Fo',
 'of rye. Fou',
 'f rye. Four',
 ' rye. Four ',
 'rye. Four a',
 'ye. Four an',
 'e. Four and',
 '. Four and ',
 ' Four and t',
 'Four and tw',
 'our and twe',
 'ur and twen',
 'r and twent',
 ' and twenty',
 'and twenty ',
 'nd twenty b',
 'd twenty bl',
 ' twenty bla',
 'twenty blac',
 'wenty black',
 'enty blackb',
 'nty blackbi',
 'ty blackbir',
 'y blackbird',
 ' black

In [11]:
chars = sorted(list(set(raw_text)))

In [12]:
len(chars)

38

In [13]:
mapping = dict((c, i) for i, c in enumerate(chars))

In [14]:
mapping

{'\n': 0,
 ' ': 1,
 "'": 2,
 ',': 3,
 '.': 4,
 ';': 5,
 'A': 6,
 'B': 7,
 'C': 8,
 'E': 9,
 'F': 10,
 'H': 11,
 'S': 12,
 'T': 13,
 'W': 14,
 'a': 15,
 'b': 16,
 'c': 17,
 'd': 18,
 'e': 19,
 'f': 20,
 'g': 21,
 'h': 22,
 'i': 23,
 'k': 24,
 'l': 25,
 'm': 26,
 'n': 27,
 'o': 28,
 'p': 29,
 'q': 30,
 'r': 31,
 's': 32,
 't': 33,
 'u': 34,
 'w': 35,
 'x': 36,
 'y': 37}

In [15]:
sequences = list()

for line in lines:
    print(line)
    encoded_seq = [mapping[char] for char in line]
    
    sequences.append(encoded_seq)

Sing a song
ing a song 
ng a song o
g a song of
 a song of 
a song of s
 song of si
song of six
ong of sixp
ng of sixpe
g of sixpen
 of sixpenc
of sixpence
f sixpence,
 sixpence, 
sixpence, A
ixpence, A 
xpence, A p
pence, A po
ence, A poc
nce, A pock
ce, A pocke
e, A pocket
, A pocket 
 A pocket f
A pocket fu
 pocket ful
pocket full
ocket full 
cket full o
ket full of
et full of 
t full of r
 full of ry
full of rye
ull of rye.
ll of rye. 
l of rye. F
 of rye. Fo
of rye. Fou
f rye. Four
 rye. Four 
rye. Four a
ye. Four an
e. Four and
. Four and 
 Four and t
Four and tw
our and twe
ur and twen
r and twent
 and twenty
and twenty 
nd twenty b
d twenty bl
 twenty bla
twenty blac
wenty black
enty blackb
nty blackbi
ty blackbir
y blackbird
 blackbirds
blackbirds,
lackbirds, 
ackbirds, B
ckbirds, Ba
kbirds, Bak
birds, Bake
irds, Baked
rds, Baked 
ds, Baked i
s, Baked in
, Baked in 
 Baked in a
Baked in a 
aked in a p
ked in a pi
ed in a pie
d in a pie.
 in a pie. 
in a pie. W
n a pie. Wh
 a p

In [16]:
sequences

[[12, 23, 27, 21, 1, 15, 1, 32, 28, 27, 21],
 [23, 27, 21, 1, 15, 1, 32, 28, 27, 21, 1],
 [27, 21, 1, 15, 1, 32, 28, 27, 21, 1, 28],
 [21, 1, 15, 1, 32, 28, 27, 21, 1, 28, 20],
 [1, 15, 1, 32, 28, 27, 21, 1, 28, 20, 1],
 [15, 1, 32, 28, 27, 21, 1, 28, 20, 1, 32],
 [1, 32, 28, 27, 21, 1, 28, 20, 1, 32, 23],
 [32, 28, 27, 21, 1, 28, 20, 1, 32, 23, 36],
 [28, 27, 21, 1, 28, 20, 1, 32, 23, 36, 29],
 [27, 21, 1, 28, 20, 1, 32, 23, 36, 29, 19],
 [21, 1, 28, 20, 1, 32, 23, 36, 29, 19, 27],
 [1, 28, 20, 1, 32, 23, 36, 29, 19, 27, 17],
 [28, 20, 1, 32, 23, 36, 29, 19, 27, 17, 19],
 [20, 1, 32, 23, 36, 29, 19, 27, 17, 19, 3],
 [1, 32, 23, 36, 29, 19, 27, 17, 19, 3, 1],
 [32, 23, 36, 29, 19, 27, 17, 19, 3, 1, 6],
 [23, 36, 29, 19, 27, 17, 19, 3, 1, 6, 1],
 [36, 29, 19, 27, 17, 19, 3, 1, 6, 1, 29],
 [29, 19, 27, 17, 19, 3, 1, 6, 1, 29, 28],
 [19, 27, 17, 19, 3, 1, 6, 1, 29, 28, 17],
 [27, 17, 19, 3, 1, 6, 1, 29, 28, 17, 24],
 [17, 19, 3, 1, 6, 1, 29, 28, 17, 24, 19],
 [19, 3, 1, 6, 1, 29, 28, 17, 

In [17]:
sequences = np.array(sequences)

X, y = sequences[:,:-1], sequences[:, -1]

In [18]:
X

array([[12, 23, 27, ..., 32, 28, 27],
       [23, 27, 21, ..., 28, 27, 21],
       [27, 21,  1, ..., 27, 21,  1],
       ...,
       [28, 20, 20, ...,  1, 27, 28],
       [20, 20,  1, ..., 27, 28, 32],
       [20,  1, 22, ..., 28, 32, 19]])

In [19]:
vocab_size = len(mapping)

In [20]:
vocab_size

38

In [21]:
sequences = [to_categorical(x, num_classes= vocab_size) for x in X]
X = np.array(sequences)
y = to_categorical(y, num_classes= vocab_size )

In [22]:
X.shape

(399, 10, 38)

In [33]:
def define_model(X):
    model = Sequential()
    model.add(LSTM(75, input_shape = (X.shape[1], X.shape[2])))
    
    model.add(Dense(vocab_size, activation = 'softmax'))
    
    model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics=['accuracy'])
    
    model.summary()
    
    return model

In [35]:
model = define_model(X)
model.fit(X, y, epochs = 100, verbose = 2)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 75)                34200     
_________________________________________________________________
dense_3 (Dense)              (None, 38)                2888      
Total params: 37,088
Trainable params: 37,088
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
 - 1s - loss: 3.6158 - acc: 0.1128
Epoch 2/100
 - 0s - loss: 3.5148 - acc: 0.1905
Epoch 3/100
 - 0s - loss: 3.2560 - acc: 0.1905
Epoch 4/100
 - 0s - loss: 3.0715 - acc: 0.1905
Epoch 5/100
 - 0s - loss: 3.0245 - acc: 0.1905
Epoch 6/100
 - 0s - loss: 3.0013 - acc: 0.1905
Epoch 7/100
 - 0s - loss: 2.9806 - acc: 0.1905
Epoch 8/100
 - 0s - loss: 2.9732 - acc: 0.1905
Epoch 9/100
 - 0s - loss: 2.9581 - acc: 0.1905
Epoch 10/100
 - 0s - loss: 2.9438 - acc: 0.1905
Epoch 11/100
 - 0s - loss: 2.9226 - acc: 0.1905
Epoch 12/100
 -

<keras.callbacks.History at 0x1f0f66ba358>

In [36]:
model.save('model.h5')

In [37]:
dump(mapping, open('mapping.pkl', 'wb'))

In [38]:
model = load_model('model.h5')

In [40]:
print(model)

<keras.engine.sequential.Sequential object at 0x000001F0F920EB38>


In [43]:
mapping = load(open('mapping.pkl', 'rb'))

In [None]:
# encoded = [mapping[char] for char in in_text]
# encoded = to_categorical(encoded, num_classes=len(mapping))
# encoded.reshape(1, encoded.shape[0], encoded.shape[1])
# yhat = model.predict_classes(encoed, verbose =0)


# out_char = ''
# for char, index in mapping.items():
#     if index == yhat:
#         out_char = char
#         break

In [63]:
def generate_seq(model, mapping, seq_length, seed_text, n_chars):
    in_text = seed_text
    
    for _ in range(n_chars):
        encoded = [mapping[char] for char in in_text]
        
        encoded = pad_sequences([encoded], maxlen= seq_length, truncating = 'pre')
        encoded = to_categorical(encoded, num_classes=len(mapping))
        #print(encoded.shape)
        #encoded = encoded.reshape(1, encoded.shape[0], encoded.shape[1])
        yhat = model.predict_classes(encoded, verbose =0)
        
        out_char = ''
        for char, index in mapping.items():
            if index == yhat:
                out_char = char
                print(out_char)
                break
                
        in_text += out_char
    
    return in_text
        

In [64]:
print(generate_seq(model, mapping, 10, 'Sing a son', 20))

g
 
o
f
 
s
i
x
p
e
n
c
e
,
 
A
 
p
o
c
Sing a song of sixpence, A poc
