In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from keras.utils import get_file
path = get_file('nietzsche.txt',origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
data = open(path).read().lower()
print('Length of the training corpus ',len(data))

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
Length of the training corpus  600893


In [3]:
maxlen = 60
step = 3
sentences = []
next_chars = []

for i in range(0 , len(data) - maxlen , step):
    sentences.append(data[i:i+maxlen])
    next_chars.append(data[i+maxlen])

print('Number of sequences',len(sentences))

chars = sorted(list(set(data)))
print('Unique characters',len(chars))
char_indices = { char:chars.index(char) for char in chars}

print('Vectorization')
x = np.zeros((len(sentences),maxlen,len(chars)) , dtype = np.bool)
y = np.zeros((len(sentences),len(chars)) , dtype = np.bool)

for i,sentence in enumerate(sentences):
    for t,char in enumerate(sentence):
        x[i,t,char_indices[char]] = 1
    y[i,char_indices[next_chars[i]]] = 1

Number of sequences 200278
Unique characters 57
Vectorization


In [4]:
from keras.layers import Dense ,Flatten , LSTM
from keras.models import Sequential
first_model = Sequential()
first_model.add(LSTM(128,input_shape = (maxlen,len(chars))))
first_model.add(Dense(len(chars),activation = 'softmax'))

In [5]:
first_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 128)               95232     
_________________________________________________________________
dense (Dense)                (None, 57)                7353      
Total params: 102,585
Trainable params: 102,585
Non-trainable params: 0
_________________________________________________________________


In [6]:
first_model.compile(loss = 'categorical_crossentropy' , optimizer = 'rmsprop' , metrics = ['acc'])

In [7]:
def sample(preds , temperature = 1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds)/temperature
    exp_preds = np.exp(preds)
    preds = exp_preds/np.sum(exp_preds)
    probas = np.random.multinomial(1,preds,1)
    return np.argmax(probas)

In [8]:
import random , sys

for epoch in range(1,60):
    print('epoch',epoch)
    first_model.fit(x,y,batch_size = 128 , epochs = 1)
    start_index = random.randint(0,len(data) - maxlen - 1)
    generated_text = data[start_index:start_index+maxlen]
    print(' Generated text ',generated_text)
    for temperature in [0.2,0.5,1.0,1.2]:
        print(' Temperature ',temperature)
        sys.stdout.write(generated_text)
        for i in range(400):
            sampled = np.zeros((1,maxlen,len(chars)))
            for t,char in enumerate(generated_text):
                sampled[0,t,char_indices[char]] = 1
            preds = first_model.predict(sampled,verbose = 0)[0]
            next_index = sample(preds,temperature)
            next_char = chars[next_index]
            
            generated_text += next_char
            generated_text = generated_text[1:]
            
            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()
            

epoch 1
 Generated text  verything in the domain of ethic is evolved, changeable,
tot
 Temperature  0.2
verything in the domain of ethic is evolved, changeable,
tot he the the the the the the here the the the serere the the the here the the the and and and and the her and in the the the the the the the the the the here and wher the the the here the the the the the the the the the and and the here the the heres and the the the the whe the here the here the here the the the sere the hand the the the presting of the here the the hered and and and and the here t
 Temperature  0.5
resting of the here the the hered and and and and the here the herict art on the mound as in the hece the for the hestere pore in the serone and conders for the fort ald hare nof the thes in alll the her the the the the remance at aus the he fand sat lose the the the gintine fore ther pont the he the the and the alled tar wonct dive so the conte comering  adt the ford panitinnce the hereng for whe hes pithing if t