# Importing Libraries

In [53]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Activation
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras import datasets, layers, models

import warnings
warnings.filterwarnings("ignore")

# Importing Dataset

In [54]:
PATH_DATASET = "dataset/shakespeare.txt"

In [55]:
text = open(PATH_DATASET, 'rb').read().decode(encoding='utf-8')
print(f'Length of text: {len(text)} Charcaters')

Length of text: 1115394 Charcaters


In [56]:
text[:250]

'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you know Caius Marcius is chief enemy to the people.\n'

In [57]:
vocab = sorted(set(text))
print(f'Unique Characters are {len(vocab)}')

Unique Characters are 65


# Process the Text

### Vectorize the text

In [58]:
char_to_index = {u: i for i, u in enumerate(vocab)}
char_to_index

{'\n': 0,
 ' ': 1,
 '!': 2,
 '$': 3,
 '&': 4,
 "'": 5,
 ',': 6,
 '-': 7,
 '.': 8,
 '3': 9,
 ':': 10,
 ';': 11,
 '?': 12,
 'A': 13,
 'B': 14,
 'C': 15,
 'D': 16,
 'E': 17,
 'F': 18,
 'G': 19,
 'H': 20,
 'I': 21,
 'J': 22,
 'K': 23,
 'L': 24,
 'M': 25,
 'N': 26,
 'O': 27,
 'P': 28,
 'Q': 29,
 'R': 30,
 'S': 31,
 'T': 32,
 'U': 33,
 'V': 34,
 'W': 35,
 'X': 36,
 'Y': 37,
 'Z': 38,
 'a': 39,
 'b': 40,
 'c': 41,
 'd': 42,
 'e': 43,
 'f': 44,
 'g': 45,
 'h': 46,
 'i': 47,
 'j': 48,
 'k': 49,
 'l': 50,
 'm': 51,
 'n': 52,
 'o': 53,
 'p': 54,
 'q': 55,
 'r': 56,
 's': 57,
 't': 58,
 'u': 59,
 'v': 60,
 'w': 61,
 'x': 62,
 'y': 63,
 'z': 64}

In [59]:
index_to_char = np.array(vocab)
index_to_char

array(['\n', ' ', '!', '$', '&', "'", ',', '-', '.', '3', ':', ';', '?',
       'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
       'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
       'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
       'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'],
      dtype='<U1')

In [60]:
text_as_int = np.array([char_to_index[c] for c in text])
text_as_int

array([18, 47, 56, ..., 45,  8,  0])

### Mapping of First 30 words

In [61]:
print(f'{text[:30]} ------ charcaters mapped to integar ----> {text_as_int[:30]}')

First Citizen:
Before we proce ------ charcaters mapped to integar ----> [18 47 56 57 58  1 15 47 58 47 64 43 52 10  0 14 43 44 53 56 43  1 61 43
  1 54 56 53 41 43]


# Creating Training Exampels and Targets

In [62]:
SEQ_LENGTH = 40
STEP_SIZE = 3

sentences = []
next_char = []

In [63]:
for i in range(0, len(text) - SEQ_LENGTH, STEP_SIZE):
    sentences.append(text[i : i + SEQ_LENGTH])
    next_char.append(text[i + SEQ_LENGTH])

In [64]:
x = np.zeros((len(sentences), SEQ_LENGTH, len(vocab)))
y = np.zeros((len(sentences), len(vocab)))

for i, satz in enumerate(sentences):
    for t, char in enumerate(satz):
        x[i, t, char_to_index[char]] = 1
    y[i, char_to_index[next_char[i]]] = 1

# Model Training

### Creating Batches

In [65]:
model = Sequential()

In [66]:
model.add(LSTM(128, input_shape=(SEQ_LENGTH, len(vocab))))
model.add(Dense(len(vocab)))
model.add(Activation('softmax'))

In [67]:
model.compile('adam', loss="categorical_crossentropy")
model.summary()

In [68]:
epochs_per_example = int(len(text)/SEQ_LENGTH)

In [69]:
model.fit(x, y, batch_size=256, epochs=epochs_per_example)

Epoch 1/27884
[1m1453/1453[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 22ms/step - loss: 3.3680
Epoch 2/27884
[1m1453/1453[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 22ms/step - loss: 3.3188
Epoch 3/27884
[1m1453/1453[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 22ms/step - loss: 3.3164
Epoch 4/27884
[1m 127/1453[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m29s[0m 22ms/step - loss: 3.3270

KeyboardInterrupt: 

# Making Predictions

In [None]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [None]:
import random

def generate_text(length, temperature):
    start_index = random.randint(0, len(text) - SEQ_LENGTH - 1)
    generated = ""
    sentence = text[start_index : start_index + SEQ_LENGTH]
    generated += sentence
    for i in range(length):
        x_predictions = np.zeros((1, SEQ_LENGTH, len(vocab)))
        for t, char in enumerate(sentence):
            x_predictions[0, t, char_to_index[char]] = 1

        predictions = model.predict(x_predictions, verbose=0)[0]
        next_index = sample(predictions, temperature)
        next_character = index_to_char[next_index]

        generated += next_character
        sentence = sentence[1:] + next_character
    return generated

In [None]:
print(generate_text(300, 0.2))

 promise of his sister, and what else,
T                            i             s     r   t  e                 e     e  e           t   e                            e             ee    e     e e  t   ee                             e e                                    e      t                                          e e        e      


In [None]:
print(generate_text(300, 0.4))

oo,
And not till then.

HORTENSIO:

KATH  rsenhesita e erohhno  r   nee  e e  t thn  e eanawlri      ee   e  e

  it ed      s   n oeo   e o t   ns n  o e e I     o  ei h tnrnh el  e e rt 
 
 dse  n
   o hi  
    S he nr Leir    il oreo   ho   hoh   rsot  
eh a roestg     el eh o   
  tot  srosee   de 
  e  eeee  e  y
ds  i  o   . e r th 


# Saving the Model

In [None]:
model.save('model/textgeneration.h5')