# Text generator

## Importing the libraries

In [1]:
from tensorflow.keras.models import load_model, Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import RMSprop
import numpy as np
from matplotlib import pyplot as plt
import random
from silence_tensorflow import silence_tensorflow
import sys
import os

## Data preprocessing

### Getting the data

In [2]:
filename = 'SV_works.txt'
fileobject = open(filename)
text = fileobject.read()
lines = text.split('\n')
print('Size of the dataset: %d' %(len(lines)))
print('Length of Corpus: %d' %(len(text)))

Size of the dataset: 107
Length of Corpus: 1996490


### Making character sequences

In [3]:
maxlen = 60
text = text.lower()
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])

In [4]:
print('No. of sequences: ', len(sentences))

No. of sequences:  665477


In [5]:
chars = sorted(list(set(text)))

In [6]:
print('No of unique characters: %d' %(len(chars)))
print(chars)

No of unique characters: 59
['\n', ' ', '!', '"', '$', "'", '(', ')', '+', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', '[', ']', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'â', 'æ', 'é', 'ê', 'ü', '—']


In [7]:
char_indices = dict((char, chars.index(char)) for char in chars)

### Vectorizing sequences

In [8]:
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
Y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        X[i, t, char_indices[char]] = 1
    Y[i, char_indices[next_chars[i]]] = 1

## Model

In [16]:
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars), activation='softmax'))
model.compile(optimizer=RMSprop(learning_rate=0.01), loss='categorical_crossentropy')
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               96256     
_________________________________________________________________
dense_1 (Dense)              (None, 59)                7611      
Total params: 103,867
Trainable params: 103,867
Non-trainable params: 0
_________________________________________________________________


### Reweighted distribution generator

In [9]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [17]:
#loss_values = []
for epoch in range(1, 41):
    print("\nEpoch No: %d\n" %(epoch))
    model.fit(X, Y, batch_size=128, epochs=1)
    #loss_values.append(hist.history['loss'])
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated_text = text[start_index: start_index + maxlen]
    start_text = generated_text
    if epoch%5==0:
        print('\n\t\t Generating text with seed: \"'+generated_text+'\"\n')
    for temperature in [0.2, 0.5, 1.0, 1.2]:
        generated_text = start_text
        if epoch%5==0:
            print('\n\t\t Temperature: ',temperature)
            print(generated_text, end='')
            model.save('text_gen_model.h5')
        for i in range(400):
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1.

            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]

            generated_text += next_char
            generated_text = generated_text[1:]
            if epoch%5==0:
                print(next_char, end='')
        print()


Epoch No: 1






Epoch No: 2






Epoch No: 3






Epoch No: 4






Epoch No: 5


		 Generating text with seed: "ciety there are stages of development. behind this epic ther"


		 Temperature:  0.2
ciety there are stages of development. behind this epic there is a man become the propers of the same suppose the self. the more and of the self? the more than the self. the possible and the self? when i am the same things and the same things and the state of the senses and the same possible and the same things and the same things and the same things the world is the possible and the same things and the same time and the more and the self. the world is the

		 Temperature:  0.5
ciety there are stages of development. behind this epic there is and the soul is perception on the same different the things that it is and there is not fine more material wants to real of the man is in one of this world in the moon, and the real of the godand the fact of the problems that we general thought into

KeyboardInterrupt: 