In [1]:
#http://adventuresinmachinelearning.com/keras-lstm-tutorial/
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM, GRU
from keras.models import model_from_json
from keras.models import load_model
import numpy as np
import pandas as pd

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Model Setting - char matrix

In [2]:
import io
filename = "haikus.txt"
raw_text = io.open(filename, encoding='utf-8').read()
text = raw_text.lower()
chars = list(set(raw_text))
chars.sort()

In [3]:
print (text)

fishing boats
colors of
the rainbow

ash wednesday
trying to remember
my dream

japanese quake
this godless spring
tsunami

snowy morn
pouring another cup
of black coffee

shortest day
flames dance
in the oven

haze
half the horse hidden
behind the house

low sun
the lady in red
on high heels

advent
the passing stranger
farts

tarn
a bubble in
the ice

snowflakes
new asphalt
in the holes

crystal night
gusts of rain
outside

rain
the sound of a horse galloping
through leaves

winter stars
suddenly a whiff
of perfume

hungry
half of the moon
hidden

rain
another leaf
down

sharia
the sound of one hand
clapping

the sound of geese
drowned by the sound of the train
this morning

autumn sun
my shadow over
tombstones

fly fishing
the sound of the wind
in the reel

december
a long shadow
joins another

end of path
snowflakes melting
on the pond

morning frost
she leaves
first

evening walk
smell of tar between
pines

dachau
a blue sky above
the chimneys

deep autumn
the apple colder
in the 

In [4]:
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

In [5]:
maxlen = 20
step = 1
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen]) #get sentences of 20 characters as X
    next_chars.append(text[i + maxlen]) #get the next char after the sentence as y
print('no. of sequences:', len(sentences))

no. of sequences: 224544


In [6]:
for sentence in sentences[10:15]:
    print (sentence.replace('\n',' '))

ats colors of the ra
ts colors of the rai
s colors of the rain
 colors of the rainb
colors of the rainbo


In [7]:
for i in next_chars[10:15]:
    print (i)

i
n
b
o
w


In [8]:
#example of sentences & next chars
for i in range(5):
    print ('sentence:', sentences[i])
    print ('next char:', next_chars[i])

sentence: fishing boats
colors
next char:  
sentence: ishing boats
colors 
next char: o
sentence: shing boats
colors o
next char: f
sentence: hing boats
colors of
next char: 

sentence: ing boats
colors of

next char: t


In [9]:
#vectorizing into matrix - one hot encoding of the characters
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool) #encode each sentence with maxlen of 20 and character of 38 - shape(len(sentences), 20, 38)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        X[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1
X.shape

(224544, 20, 64)

## Model Setting GRU arch

In [10]:
model = Sequential()
model.add(GRU(512,  input_shape=(maxlen,len(chars)), return_sequences=True))
model.add(Dropout(0.20))

# model.add(LSTM(512, return_sequences=True))
# model.add(Dropout(0.20))

#model.add(LSTM(512, return_sequences=True))
#model.add(Dropout(0.20))

model.add(GRU(256, return_sequences=False))
model.add(Dropout(0.20))

model.add(Dense(len(chars)))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop',metrics=['accuracy'])
model.summary()
# compile or load weights then compile depending

#the total number of parameters in the GRU RNN equals 3×(n2+nm+n).
#where m is the input dimension and n is the output dimension. This is due to the fact that there are three sets of operations requiring weight matrices of these sizes.
#https://arxiv.org/ftp/arxiv/papers/1701/1701.05923.pdf

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_1 (GRU)                  (None, 20, 512)           886272    
_________________________________________________________________
dropout_1 (Dropout)          (None, 20, 512)           0         
_________________________________________________________________
gru_2 (GRU)                  (None, 256)               590592    
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                16448     
_________________________________________________________________
activation_1 (Activation)    (None, 64)                0         
Total params: 1,493,312
Trainable params: 1,493,312
Non-trainable params: 0
_________________________________________________________________


## Model Training

In [20]:
# history = model.fit(X, y, batch_size=256, epochs=10)

In [21]:
# # save the model to file
# model.save('drive/haiku_10.h5')

# from pickle import dump
# # save the mapping
# dump(char_indices, open('drive/mapping_10.pkl', 'wb'))
# #https://machinelearningmastery.com/develop-character-based-neural-language-model-keras/

## Loading and testing

In [11]:
from pickle import load
from keras.models import load_model
from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences
#https://machinelearningmastery.com/develop-character-based-neural-language-model-keras/

In [12]:
model = load_model('haiku_30.h5')

In [13]:
# load the mapping
mapping = load(open('mapping.pkl', 'rb'))

In [14]:
def sample(a, temperature=1.0):
    a = np.log(a) / temperature 
    dist = np.exp(a)/np.sum(np.exp(a)) 
    choices = range(len(a)) 
    return np.random.choice(choices, p=dist)#random.choices to implement weighted random selection

In [15]:
import random, sys
def generate_from_model(model):
    start_index = random.randint(0, len(text) - maxlen - 1) #initiate random character from text sequence

    for diversity in [0.2, 0.5, 1.0, 1.2]: #diversity to feed temperature to pick random probabilities for char pred
        print()
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index : start_index + maxlen] #take a sentence of 20 char from the random start_index char
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        tot_lines = 0 #count lines for break >3
        tot_chars = 0 #count chars for break >120

        while True:
            if tot_lines > 3 or tot_chars > 120:
                break
            x = np.zeros((1, maxlen, len(mapping))) #create empty input
            for t, char in enumerate(sentence): 
                x[0, t, char_indices[char]] = 1. #one-hot encode sample sentence

            preds = model.predict(x, verbose=0)[0] #predict [0] as output is in matrix
            next_index = sample(preds, diversity) #diversity allows picking of prob < max. smaller diversity uses higher prob
            next_char = indices_char[next_index] #retrieve char from selected index

            tot_chars += 1
            generated += next_char
            if next_char == '\n':
                tot_lines += 1
            sentence = sentence[1:] + next_char #add next_char to sentence for while loop to predict next char, start from 1 to maintain maxlen

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

In [16]:
generate_from_model(model)#30 epochs


----- diversity: 0.2
----- Generating with seed: "stove
november morni"
stove
november morning

sunset and stare
a flag cerepart back


----- diversity: 0.5
----- Generating with seed: "stove
november morni"
stove
november morning

santaso day
my wife buddhason silence


----- diversity: 1.0
----- Generating with seed: "stove
november morni"
stove
november morning

somewhere
into traffic


----- diversity: 1.2
----- Generating with seed: "stove
november morni"
stove
november morning

rahmailed chusid
a snow



## Productionising

In [40]:
from keras.models import load_model
from pickle import load
import numpy as np
import pandas as pd
import random, sys
import re

In [41]:
#load the model
model = load_model('haiku_30.h5')
# load the mapping
mapping = load(open('mapping.pkl', 'rb'))
inv_map = {v: k for k, v in mapping.items()}

In [42]:
#temperature to prevent selection of top prob
def sample(a, temperature=1.0):
    a = np.log(a) / temperature 
    dist = np.exp(a)/np.sum(np.exp(a)) 
    choices = range(len(a)) 
    return np.random.choice(choices, p=dist)#random.choices to implement weighted random selection

In [43]:
#spell correct
from nltk.tokenize import TweetTokenizer
from autocorrect import spell
def cleaning(sentence):
    tokenizer_words = TweetTokenizer()
    spellcheck = []
    for word in tokenizer_words.tokenize(sentence):
        if word not in [',','!','.',':']:
            spellcheck.append(spell(word))
        else:
            spellcheck.append(word)
    corr_sent = ' '.join(spellcheck)
    return corr_sent

In [64]:
usr_input = input("Your input is: ")
usr_input = re.sub('[^a-zA-Z0-9 \n\.]', '', usr_input).lower()
sentence = ('{:>' + str(20) + '}').format(usr_input[:20]).lower()
generated = ''
generated += sentence
sys.stdout.write(generated)
tot_lines = 0
tot_chars = 0
while True:
        if tot_lines > 3 or tot_chars > 120:
            break
        x = np.zeros((1, 20, len(mapping)))
        for t, char in enumerate(sentence):
            x[0, t, mapping[char]] = 1.

        preds = model.predict(x, verbose=0)[0]
        next_index = sample(preds, 0.5)
        next_char = inv_map[next_index]

        tot_chars += 1
        generated += next_char
        if next_char == '\n':
            tot_lines += 1
        sentence = sentence[1:] + next_char

        sys.stdout.write(next_char)
        sys.stdout.flush()
print ('')
gen = generated.replace('\n\n','\n')
gen = gen.split('\n')
clean_gen = []
for i in gen:
    clean_gen.append(cleaning(i))
clean_gen = '\n'.join(clean_gen)
print (clean_gen)


Your input is: hello morning star
  hello morning start

soft rain
butterflies butterfly

hello morning start
soft rain
butterflies butterfly



In [60]:
generated

'pect the job\n\njust f'

In [61]:
generated.replace('\n','<br>')

'pect the job<br><br>just f'

## Generate for human/bot test

In [62]:
for i in range(10):
    start_index = random.randint(0, len(text) - maxlen - 1) #initiate random character from text sequence

    generated = ''
    sentence = text[start_index : start_index + maxlen] #take a sentence of 20 char from the random start_index char
    generated += sentence

    # sys.stdout.write(generated)
    tot_lines = 0
    tot_chars = 0
    while True:
            if tot_lines > 3 or tot_chars > 120:
                break
            x = np.zeros((1, 20, len(mapping)))
            for t, char in enumerate(sentence):
                x[0, t, mapping[char]] = 1.

            preds = model.predict(x, verbose=0)[0]
            next_index = sample(preds, 0.5)
            next_char = inv_map[next_index]

            tot_chars += 1
            generated += next_char
            if next_char == '\n':
                tot_lines += 1
            sentence = sentence[1:] + next_char

    #         sys.stdout.write(next_char)
    #         sys.stdout.flush()
    print ('')
    gen = generated.replace('\n\n','\n')
    gen = gen.split('\n')
    clean_gen = []
    for i in gen:
        clean_gen.append(cleaning(i))
    clean_gen = '\n'.join(clean_gen[1:]) #remove first set of inputs due to incompleteness
    print (clean_gen)


late june crowded
to the reading tonight
moonless night


monday morning
a birthday silence
become statues


reflections
open window


another birthday
white curser reses


between the temple
monday morning
a blue plastic bag


autumn child
find lights up a slipping in the highway


lily dust pause on a cloud
a drifting dusk silence
the stone buddha


carved on the closed rest
to crows ward
first fireflies chicken


a skein of birds
twines only the time
Silenales changed
a spring morning


kitchen afternoon
the first light on in the sky
the new year
a blind man starts



In [63]:
cleaning(generated)

'ed laundry kitchen afternoon the first light on in the sky the new year a blind man starts'