In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [51]:
import pandas as pd
import numpy as np

In [56]:
with open('shakespeare_final.txt','r+',encoding='utf8') as fp:
    poem_data=fp.readlines()

In [57]:
poem_data=''.join(poem_data).split('----\n')
poem_data

["From fairest creatures we desire increase,\nThat thereby beauty's rose might never die,\nBut as the riper should by time decease,\nHis tender heir might bear his memory:\nBut thou contracted to thine own bright eyes,\nFeed'st thy light's flame with self-substantial fuel,\nMaking a famine where abundance lies,\nThy self thy foe, to thy sweet self too cruel:\nThou that art now the world's fresh ornament,\nAnd only herald to the gaudy spring,\nWithin thine own bud buriest thy content,\nAnd tender churl mak'st waste in niggarding:\n  Pity the world, or else this glutton be,\n  To eat the world's due, by the grave and thee.\n\n",
 "When forty winters shall besiege thy brow,\nAnd dig deep trenches in thy beauty's field,\nThy youth's proud livery so gazed on now,\nWill be a tatter'd weed of small worth held: \nThen being asked, where all thy beauty lies,\nWhere all the treasure of thy lusty days; \nTo say, within thine own deep sunken eyes,\nWere an all-eating shame, and thriftless praise.\

In [58]:
len(poem_data)

689

In [59]:
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Embedding, LSTM, Dense, Dropout
from keras.preprocessing.text import Tokenizer
from keras.callbacks import EarlyStopping
from keras.models import Sequential
import keras.utils as ku 
from keras.layers import Bidirectional

# set seeds for reproducabilit
import tensorflow as tf
from numpy.random import seed
tf.compat.v1.set_random_seed(2)
seed(1)

import pandas as pd
import numpy as np
import string, os 

import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter(action='ignore', category=FutureWarning)

In [60]:
tokenizer = Tokenizer(filters='ï»¿')

def get_sequence_of_tokens(data):
    ## tokenization
    tokenizer.fit_on_texts(data)
    total_words = len(tokenizer.word_index) + 1
    
    ## convert data to sequence of tokens 
    input_sequences = []
    for line in data:
        token_list = tokenizer.texts_to_sequences([line])[0]
        for i in range(1, len(token_list)):
            n_gram_sequence = token_list[:i+1]
            input_sequences.append(n_gram_sequence)
    return input_sequences, total_words

inp_sequences, total_words = get_sequence_of_tokens(poem_data)
inp_sequences[:10]

[[30, 579],
 [30, 579, 580],
 [30, 579, 580, 107],
 [30, 579, 580, 107, 239],
 [30, 579, 580, 107, 239, 2966],
 [30, 579, 580, 107, 239, 2966, 2967],
 [30, 579, 580, 107, 239, 2966, 2967, 191],
 [30, 579, 580, 107, 239, 2966, 2967, 191, 510],
 [30, 579, 580, 107, 239, 2966, 2967, 191, 510, 113],
 [30, 579, 580, 107, 239, 2966, 2967, 191, 510, 113, 92]]

In [61]:
def generate_padded_sequences(input_sequences):
    max_sequence_len = max([len(x) for x in input_sequences])
    input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
    
    predictors, label = input_sequences[:,:-1],input_sequences[:,-1]
    label = ku.to_categorical(label, num_classes=total_words)
    return predictors, label, max_sequence_len

predictors, label, max_sequence_len = generate_padded_sequences(inp_sequences)

(345,)

In [77]:
def create_model(max_sequence_len, total_words):
    input_len = max_sequence_len - 1
    model = Sequential()
    
    # Add Input Embedding Layer
    model.add(Embedding(total_words, 10, input_length=input_len))
    
    # Add Hidden Layer 1 - LSTM Layer
    model.add(Bidirectional(LSTM(256,return_sequences=True, input_shape=(80, 345))))
    model.add(Dropout(0.2))
    model.add(Bidirectional(LSTM(256)))
    model.add(Dropout(0.2))
    # Add Output Layer
    model.add(Dense(total_words, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam')
    
    return model

model = create_model(max_sequence_len, total_words)
model.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_9 (Embedding)      (None, 345, 10)           121300    
_________________________________________________________________
bidirectional_12 (Bidirectio (None, 345, 512)          546816    
_________________________________________________________________
dropout_12 (Dropout)         (None, 345, 512)          0         
_________________________________________________________________
bidirectional_13 (Bidirectio (None, 512)               1574912   
_________________________________________________________________
dropout_13 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_9 (Dense)              (None, 12130)             6222690   
Total params: 8,465,718
Trainable params: 8,465,718
Non-trainable params: 0
____________________________________________

In [98]:
model.fit(predictors,label,epochs=10,verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f79694c80b8>

In [80]:
from tqdm import tqdm

In [99]:
def generate_text(seed_text, next_words, model, max_sequence_len):
    for _ in tqdm(range(next_words)):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = model.predict_classes(token_list, verbose=0)
        
        output_word = ""
        for word,index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " "+output_word
    return seed_text.title()

In [114]:
seed_text="love"

In [115]:
print(generate_text(seed_text,100,model,max_sequence_len))

100%|██████████| 100/100 [00:05<00:00, 17.07it/s]

Bhasad My Heart Is Strengthen'D, Though More Weak In Seeming;
I Love Not Less, Though Less The Show Appear;
That Love Is Merchandiz'D, Whose Rich Esteeming,
The Owner'S Tongue Doth Publish Every Where.
Our Love Was New, And Then But In The Spring,
When I Was Wont To Greet It With My Lays;
As Philomel In Summer'S Front Doth Sing,
And Stops Her Pipe In Growth Of Riper Days:
Not That The Summer Is Less Pleasant Now
Than When Her Mournful Hymns Did Hush The Night,
But That Wild Music Burthens Every Bough,
And Sweets Grown Common Lose Their Dear Delight.
 Therefore Like Her, I Sometime Hold My Tongue:
 Because I Would





In [106]:
model.save('shakespeare_bi_400_ud_loss_0.53')

INFO:tensorflow:Assets written to: shakespeare_bi_400_ud_loss_0.53/assets


In [107]:
!zip -r /content/shakespeare_bi_400_ud_053.zip /content/shakespeare_bi_400_ud_loss_0.53

  adding: content/shakespeare_bi_400_ud_loss_0.53/ (stored 0%)
  adding: content/shakespeare_bi_400_ud_loss_0.53/saved_model.pb (deflated 91%)
  adding: content/shakespeare_bi_400_ud_loss_0.53/assets/ (stored 0%)
  adding: content/shakespeare_bi_400_ud_loss_0.53/variables/ (stored 0%)
  adding: content/shakespeare_bi_400_ud_loss_0.53/variables/variables.data-00000-of-00001 (deflated 6%)
  adding: content/shakespeare_bi_400_ud_loss_0.53/variables/variables.index (deflated 69%)
