In [1]:
from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io

Using TensorFlow backend.


In [5]:
import re

In [15]:
input_file = "sheep-data/input2.txt"
file = open(input_file, "r") 
text = file.read() 
print('corpus length:', len(text))

corpus length: 464826


In [17]:
chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

total chars: 84


In [18]:
## Chopping the text up into strings of 40 characters to train on
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

nb sequences: 154929


In [19]:
## Vectorizing our sequences
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

In [20]:
## We're designing our model here- choosing the number of layers and such
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

Build model...


In [21]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

## Showing the poetry quality at the end of each epoch
def on_epoch_end(epoch, logs):
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)
        ## What we're doing is giving the bot 40 characters and asking
        ## it to predict the 41st character, over and over
        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

In [None]:
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

## Here's where we train the model on our data
model.fit(x, y,
          batch_size=128,
          epochs=60,
          callbacks=[print_callback])

Epoch 1/60
----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "eenhouse where we'll see a green heron
i"
eenhouse where we'll see a green heron
in the with the sen the word and the sear the sheep
The was the word the world the see the stree the word the mong the the shore the word the word the word the the the wark the wark the word the way the the the sen the sen the strow the word the his down the rese the sen the sang the sen the the stree the word the count the word the with the world the spree the sen the the the with the were the wor
----- diversity: 0.5
----- Generating with seed: "eenhouse where we'll see a green heron
i"
eenhouse where we'll see a green heron
is the row the were, 
The would strees the moon be the light the mine in the strow the wath shee 
Some the world where the tore the word light and the see the shalf not for the wind of the wear the more
whose the for with the lace
than the sen all the cone is cance cane not the the dinge

In single file they mogenn -snow,
And I had frinn their shord! Rusted and prinn.
The swoed him gigness, rudants down snow crifts
Above itselp nivel in palt joy when the went is
and I the folling, to knswapling for sord,
pard? Add the faces mole when three fall thriftly of welly
hull, grand the was thon in hur
girly joy, I came his sanges
along that hors, with work a piet to cometel,
Tholl stalle and bream insames st
----- diversity: 1.2
----- Generating with seed: "n they themselves
In single file they m"
n they themselves
In single file they maybing's eal
To thourol.
Godiant; to fexping that glistenpow to beet hearty,
his roonee, bund wallh oct,
Rarn Is down orropers
Stampting bundure of siketeasion fewrater for mist 'sing Chon thing:
Terh dalable dow his hine, spendshool.
Ruste winh in vife wore were
exlangward-nablues by walk Alres.urur
aak'd what my ymed from God'd 
While day, :'d lapiest. Dawsquir, so do sluli's by 
Oh bown tryfe
Epoch 5/60
  9472/154929 [>........................

In [None]:
## Here is where you can actually use the model on a given piece of text!

for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = 'Under an angry moon baaaaaaaaaaaaaaaaaaaa'
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(800):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()