In [1]:
#adding libraris
from tensorflow.keras.callbacks import LambdaCallback
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.utils import get_file
import numpy as np
import random
import sys
import io
import requests
import re



In [2]:
#request.get- downloads the site that is provided in the link
r = requests.get('https://www.gutenberg.org/files/2600/2600-0.txt')
#intelligently decoding text from http sites
only_text = r.text
print(only_text[0:1000])

ï»¿
The Project Gutenberg EBook of War and Peace, by Leo Tolstoy

This eBook is for the use of anyone anywhere at no cost and with almost
no restrictions whatsoever. You may copy it, give it away or re-use
it under the terms of the Project Gutenberg License included with this
eBook or online at www.gutenberg.org


Title: War and Peace

Author: Leo Tolstoy

Translators: Louise and Aylmer Maude

Posting Date: January 10, 2009 [EBook #2600]

Last Updated: January 21, 2019

Language: English

Character set encoding: UTF-8

*** START OF THIS PROJECT GUTENBERG EBOOK WAR AND PEACE ***




An Anonymous Volunteer, and David Widger






WAR AND PEACE


By Leo Tolstoy/Tolstoi





    CONTENTS


    BOOK ONE: 1805

    CHAPTER I

    CHAPTER II

    CHAPTER III

    CHAPTER IV

    CHAPTER V

    CHAPTER VI

    CHAPTER VII

    CHAPTER VIII

    CHAPTER IX

    CHAPTER X

    CHAPTER XI

    CHAPTER XII

    CHAPTER XIII


In [3]:
#changing the characters to lower case only
changed_text = only_text.lower()
#getting rid off not needed characters (ASCII [0:127] coding system)
changed_text = re.sub(r'[^\x00-\x7f]',r'', changed_text)

In [4]:
changed_text = changed_text[8118:]

In [5]:
changed_text



In [6]:
print('number of signs:', len(changed_text))

number of signs: 3247227


In [7]:
#building ordered list of unique characters based on preprocessed text
signs = sorted(list(set(changed_text)))
print('total number of characters:', len(signs))

total number of characters: 54


In [8]:
#assigning each character to a numeratic value
char_indices = dict((c, i) for i, c in enumerate(signs))
#doing the same thing but the other way around
indices_char = dict((i, c) for i, c in enumerate(signs))

In [9]:
# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_signs = []
#creating and adding chunks of text of lenght of 60 signs, jumping every 2 signs
for i in range(0, len(changed_text) - maxlen, step):
    #creating a list with all chunks of text
    sentences.append(changed_text[i: i + maxlen])
    #creating a list with every 2nd upcoming letter
    next_signs.append(changed_text[i + maxlen])
print('number of chunks:', len(sentences))

number of chunks: 1082396


In [10]:
#building empty arrays (3-dimensional & 2-dimensional)
x = np.zeros((len(sentences), maxlen, len(signs)), dtype=np.bool)
y = np.zeros((len(sentences), len(signs)), dtype=np.bool)

#filling empty arrays from above with Booleans, where x is the vectorization of all ca 1M of chunks and y is 
#assigning number to every chunk and itterating through every chunk one after another
for i, sentence in enumerate(sentences):
    #assigning number (0:39) to each character in each chunk of text and itterating through every letter in a chunk
    #that is being iterated above (for i, sentence in enumerate(sentences):)
    for t, char in enumerate(sentence):
        #assigning True Boolean in the 3-dimension of array, in the position whichs number corresponds to given
        #character in char_indices (dict of all available 54 unique enumerated characters), x is a 3 dimensional
        #visualisation of all the chunks of text
        x[i, t, char_indices[char]] = 1
        #assigning True Boolean in the 2-dimension of array, in the position whichs number corresponds to given
        #character in char_indices, y is a 2 dimensional visualisation of every 3rd letter coming after a corresponding
        #chunk of text
    y[i, char_indices[next_signs[i]]] = 1

In [11]:
#building the LSTM model
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(signs))))
model.add(Dense(len(signs), activation='softmax'))

optimizer = RMSprop(lr=0.01)
#using loss='categorical_crossentropy', because we are working with one-hot encoded vectors
model.compile(loss='categorical_crossentropy', optimizer=optimizer)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 128)               93696     
_________________________________________________________________
dense (Dense)                (None, 54)                6966      
Total params: 100,662
Trainable params: 100,662
Non-trainable params: 0
_________________________________________________________________


In [12]:
def sample(preds, temperature=1.0):
    #this is essentially softmax, it makes sure that all the propabilities (elements in preds) are adding
    #up to 1.0
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)    
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    #returning the highest
    return np.argmax(probas)

In [13]:
def on_epoch_end(epoch, _):
    # Function invoked at end of each epoch. Prints generated text.
    print("\n")
    print('Generating text after Epoch: %d' % epoch)
    
    #randomly picking the starting index of the chunk that predicted
    #text is going to be seeded on
    start_index = random.randint(0, len(changed_text) - maxlen - 1)
    
    #iterating through temperatures (they decide how srtictly the predicted characters will bo choosen)
    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('\n' + 'a) temperature:', temperature)
        
        #based on randomly chosen starting index picking a chunk of text
        generated = ''
        sentence = changed_text[start_index: start_index + maxlen]
        generated += sentence
        print('b) Generating with seed:' + sentence)
        sys.stdout.write(generated)

        for i in range(400):
            #creating 3-dimensional array (1x40x41) for the one-hot encoding form
            x_pred = np.zeros((1, maxlen, len(signs)))
            
            ##assigning True Boolean in the 3-dimension of array, in the position whichs number corresponds to given
            #character in char_indices (dict of all available 41 unique enumerated characters), x_pred is a 3 dimensional
            #visualisation of a random chunk of text
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.
                
            #creating an array with probabilities of presence of each of 41 characters
            preds = model.predict(x_pred, verbose=0)[0]
            #using the sample function to pick the the character with highest probability
            next_index = sample(preds, temperature)
            #picking the predicted character
            next_char = indices_char[next_index]
        
            #adding the generated characted to the text being generated
            generated += next_char
            sentence = sentence[1:] + next_char
            #and printing the generated text
            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

In [14]:
# Fit the model
#letting us see how model performs after each epoch
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)
#fitting the model and printing the results using on_epoch_end function)
model.fit(x, y,
          batch_size=128,
          epochs=15,
          #letting us see how model performs after each epoch
          callbacks=[print_callback])

Train on 1082396 samples
Epoch 1/15

Generating text after Epoch: 0

a) temperature: 0.2
b) Generating with seed:and. whats it all about?

wait a bit, 
and. whats it all about?

wait a bit, she thought the rostvs and the rostvs was speaking the struck and the battery and the returned the soldiers the same of the rostvs there was not and should have been the single of the special rest the reach and the sight of the soldiers was not and the should the reach of the rostv whom and the same on the warding the soldiers was her horses and which the connect and the room with a sight of the a

a) temperature: 0.5
b) Generating with seed:and. whats it all about?

wait a bit, 
and. whats it all about?

wait a bit, intervally and his eyes and with the part and the spaces with the position which was so what is the princess which there was not to reply and replied blushing the princess place the princess the moving and they reached his side himself the sown sat silent of prince very devo the rostvs 

with an accoming and it after as she would be to clark out of the lextle with moscow
did cogiined with snight to the dispositions of the smell was to hearfar man with themselves of her at albege and with you.

i over     ill to be getri. said evening a loud, the garden wrink, with licatorh stopped for... rostv listen and that graddel
you addred her demanded basi

a) temperature: 1.2
b) Generating with seed:bred
russian voice.

mvra kuzmnichna 
bred
russian voice.

mvra kuzmnichna and blwis you having merely, medered; that saf. it became to
shete us as i ins.
she war. the pule
shoves, the nementspe. havin rugity
vasliful
and had killing the news and gaving as all itselved to spot, because ionitation or prince andrew tolingded your ronds he levi
entrusupahie
unwell?

that miding i action viels unthe spayiorce to trughore sitting down paperly. heres, agorts of provin
Epoch 5/15

Generating text after Epoch: 4

a) temperature: 0.2
b) Generating with seed:ay, and leaning his elbows on the t

bourienne say the position of the stranger of the prince that he was the same prince andrew was a strength of the state of the stopped door of the princess who had been a strange of the emperor and the staff of the staff of the position of the soldiers was the staff of the same strength of the left of the left of the commander to his consistate of the success of the soldiers and the same said of the position o

a) temperature: 0.5
b) Generating with seed:e is so old? mademoiselle
bourienne say
e is so old? mademoiselle
bourienne say i we did not know him as they said, and because a drawing who had been should expecting his hood was signed the
new head of such sofa, for the house of the reception of the significance of the presence of the terms of the day of his finge of the positions of the sight of the part of the most he tried to be a grew talking of the position of a difference of the definite fellow, and the princess th

a) temperature: 1.0
b) Generating with seed:e is so old? made

  """


ew was not a soldier of the same that the strength of the reception of the soldier with the strength of the stand of the reception of the strength of the strength of the count was a service who was all the count and the reception of the countess and the strength of the same of the deach of the reception of the princess and the princess was a service of the princess of the regiment of the

a) temperature: 0.5
b) Generating with seed:.. but theres no bringing it
back.

p
.. but theres no bringing it
back.

prince andrew, the same and angry the handstas that in the perture
and the sound of the countess and the countess and happiness to the end of the stone of the men and the first word of the meanting and from the time to the two words in other conception of the conception of the first who had come and not to be the believed and that the correction of the declors was at the end of the
dear emperor o

a) temperature: 1.0
b) Generating with seed:.. but theres no bringing it
back.

p
.. but 

shes coffiv sinting, party and attending a france, again signifitate
toloot thinking how toxishas as frrais.


a) temperature: 1.2
b) Generating with seed:everal adjutants galloped
off, and an h
everal adjutants galloped
off, and an hug same let a blinc conversation of himself
a
presentity with the strength asked, to be unnaturors eveninas every? it so now, anyday
prrich by can  no
as
kutzov princesssifskevering from yet verefs, .

why yes, 
i might be interfect longer shrippe.

pride dancts. deat
her? he only that
reason!dakr wakhed
dicne.? he pierre beaterages something beate.

close rostes, costation veant in
Epoch 13/15

Generating text after Epoch: 12

a) temperature: 0.2
b) Generating with seed:ers, i know its hard for you, but it can
ers, i know its hard for you, but it can the hands of the same the staff of the french of the third who was a french officer was the officers of the staff of the old man who had been stood the story of the french of the third who was and she was all

<tensorflow.python.keras.callbacks.History at 0x144f5e748>