<a href="https://colab.research.google.com/github/souravoo7/Deep-Learning/blob/master/Test_Run.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 30 03:40:49 2020

@author: sourav
"""

"""
Generating Texts using a LSTM network
"""

import keras
import numpy as np
"""
Download the data
"""
path = keras.utils.get_file(
    'nietzsche.txt',
    origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower() #read in the entire text file and convert them all to lower case
print('Corpus length:', len(text))

"""
Data Pre-processing/preparation
"""

# Length of extracted character sequences
maxlen = 60

# We sample a new sequence every `step` characters
step = 3

# This holds our extracted sequences
sentences = []

# This holds the targets (the follow-up characters)
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen]) #store in the partially overlapping sequences of size maxlen
    next_chars.append(text[i + maxlen]) #store the target character 
print('Number of sequences:', len(sentences))

# List of unique characters in the corpus
chars = sorted(list(set(text)))
print('Unique characters:', len(chars))

# Dictionary mapping unique characters to their index in `chars`
char_indices = dict((char, chars.index(char)) for char in chars)

# Next, one-hot encode the characters into binary arrays.
print('Vectorization...')
x = np.zeros((len(sentences),# i th sentence  (number of sentences)
              maxlen, #t th character (will be 60 in total)
              len(chars)), #character index (the character identifier using the dictionary)
             dtype=np.bool)
y = np.zeros((len(sentences), # i th sentence  (number of sentences)
              len(chars)), #character index (the character identifier using the dictionary)
             dtype=np.bool)

#encoding loop
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

'''
Build the network
'''
from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, 
                      input_shape=(maxlen,
                                   len(chars))))
model.add(layers.Dense(len(chars), #the total output set of possible characters 
                       activation='softmax'))

optimizer = keras.optimizers.RMSprop(lr=0.01)

model.compile(loss='categorical_crossentropy', 
              optimizer=optimizer)
'''
Re-weigh the sample distribution: Math??
'''
def sample(preds, temperature=1.0):
    '''
    preds:The output from the network
    temperature:Entropy of the probablity distribution used for sampling
    Higher temperature implies more entropy, hence resulting in more unnnatural sampling
    Lower temperature willlead to more repititive structures
    With more convergence of the model one would expect more sane predictions
    '''
    preds = np.asarray(preds).astype('float64') # convert data type to floating
    #softmax temperature 
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    
    return np.argmax(probas)

'''
Training & Text Generation via LSTM
'''
import random
import sys

for epoch in range(1, 60):
    '''
    EPOCH = 60
    '''
    print('epoch', epoch)
    # Fit the model for 1 epoch on the available training data
    model.fit(x, y,
              batch_size=128,
              epochs=1)

    # Select a text seed at random
    start_index = random.randint(0, len(text) - maxlen - 1) # get a random start index
    generated_text = text[start_index: start_index + maxlen] #get the corresponding maxlen sentence
    print('--- Generating with seed: "' + generated_text + '"')

    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('------ temperature:', temperature)
        sys.stdout.write(generated_text) # print the generated sentence

        # We generate 400 characters
        for i in range(400):
            #encode the newly generate sentence
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1.
            #use the trained model to get the next character    
            preds = model.predict(sampled, verbose=0)[0]
            #sample it using the temperature function
            next_index = sample(preds, temperature)
            next_char = chars[next_index]# get the new character

            generated_text += next_char # append the new character to the sequence
            generated_text = generated_text[1:] # move up one step to get the next char after this

            sys.stdout.write(next_char) # keep printing the following characters
            sys.stdout.flush()
        print()

Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
Corpus length: 600893
Number of sequences: 200278
Unique characters: 57
Vectorization...
epoch 1
Epoch 1/1
--- Generating with seed: "logy. astrology presupposes that the heavenly
bodies are reg"
------ temperature: 0.2
logy. astrology presupposes that the heavenly
bodies are regelies the condicious of the condician and the this for the sensent of the conding and the sense of the deligion of the compain and the sense of the period of the germans of the period of the most deligious of the german as the person of the manious of the compare of the well of the compally in the such a period and and maning the sensent and the sense is a good the deligion of the germans of the g
------ temperature: 0.5
and the sense is a good the deligion of the germans of the good the grod like to be this for a such man to a man come dignest deligion of the cestain deep and and the deligic bolity of selies is mans and to be the nother