In [32]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
import numpy as np
import re
import string

In [33]:
with open('lyrics/Bob Dylan.txt','r') as f:
    text=f.read()

text


'[Verse 1]How many roads must a man walk down before you call him a man?How many seas must the white dove sail before she sleeps in the sand?Yes, and how many times must the cannonballs fly before they\'re forever banned?[Refrain]The answer, my friend, is blowin\' in the windThe answer is blowin\' in the wind[Verse 2]Yes, and how many years can a mountain exist before it is washed to the sea?Yes, and how many years can some people exist before they\'re allowed to be free?Yes, and how many times can a man turn his head and pretend that he just doesn\'t see?[Refrain]The answer, my friend, is blowin\' in the windThe answer is blowin\' in the wind[Verse 3]Yes, and how many times must a man look up before he can see the sky?Yes, and how many ears must one man have before he can hear people cry?Yes, and how many deaths will it take till he knows that too many people have died?[Refrain]The answer, my friend, is blowin\' in the windThe answer is blowin\' in the wind\n\n[Verse 1]Come gather \'r

In [34]:
def data_clean_round1(text):
                                                    # lower case text
    text = re.sub('<[^>]*>', ' ', text)
                                    
    text = re.sub('\[.*?\]', ' ', text)                                 # remove square brackets
    # Create punctuation set without forward slash
    punct = ''.join(char for char in string.punctuation if char != '\\')
    text = re.sub('[%s]' % re.escape(punct), '', text)                  # remove punctuation except forward slash
    # Replace forward slash with /n
    
    text = re.sub(r'([a-z])([A-Z])', r'\1/n\2', text)     
    text = re.sub('the','',text)
    text = text.lower()                                 # convert slash to /n
    return text.strip()


In [35]:
def data_clean_round2(text):
    text = re.sub('  ', ' ', text)
    text = re.sub('   ', ' ', text)
    return text


In [36]:
data=data_clean_round1(text)
data


'how many roads must a man walk down before you call him a man/nhow many seas must  white dove sail before she sleeps in  sand/nyes and how many times must  cannonballs fly before yre forever banned the answer my friend is blowin in  wind/nthe answer is blowin in  wind yes and how many years can a mountain exist before it is washed to  sea/nyes and how many years can some people exist before yre allowed to be free/nyes and how many times can a man turn his head and pretend that he just doesnt see the answer my friend is blowin in  wind/nthe answer is blowin in  wind yes and how many times must a man look up before he can see  sky/nyes and how many ears must one man have before he can hear people cry/nyes and how many deaths will it take till he knows that too many people have died the answer my friend is blowin in  wind/nthe answer is blowin in  wind\n\n come gar round people wherever you roam/nand admit that  waters around you have grown/nand accept it that soon youll be drenched to  

In [37]:
data=data_clean_round2(data)
data

'how many roads must a man walk down before you call him a man/nhow many seas must white dove sail before she sleeps in sand/nyes and how many times must cannonballs fly before yre forever banned the answer my friend is blowin in wind/nthe answer is blowin in wind yes and how many years can a mountain exist before it is washed to sea/nyes and how many years can some people exist before yre allowed to be free/nyes and how many times can a man turn his head and pretend that he just doesnt see the answer my friend is blowin in wind/nthe answer is blowin in wind yes and how many times must a man look up before he can see sky/nyes and how many ears must one man have before he can hear people cry/nyes and how many deaths will it take till he knows that too many people have died the answer my friend is blowin in wind/nthe answer is blowin in wind\n\n come gar round people wherever you roam/nand admit that waters around you have grown/nand accept it that soon youll be drenched to bone/nif your

In [38]:
corpus=data.split('/n')


In [39]:
tokenizer=Tokenizer()
tokenizer.fit_on_texts(corpus)

totalWords=len(tokenizer.word_index) + 1



In [40]:
input_seq = []
for line in corpus:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        ngram_seq = token_list[:i+1]
        input_seq.append(ngram_seq)

In [41]:
max_seq_len = max([len(x) for x in input_seq])
input_seq = np.array(pad_sequences(input_seq, maxlen=max_seq_len, padding='pre'))

In [42]:
xs, labels = input_seq[:,:-1], input_seq[:,-1]


ys = tf.keras.utils.to_categorical(labels, num_classes=totalWords)


In [43]:
model = Sequential([Embedding(totalWords, 64, input_length=max_seq_len-1),
                   Bidirectional(LSTM(100, return_sequences=True)),
                   Dropout(0.2),
                   Bidirectional(LSTM(80)),
                   Dense(totalWords//2, activation='relu'),
                   Dense(totalWords, activation='softmax')
                   ])

model.compile(loss='sparse_categorical_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])

model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 203, 64)           345792    
                                                                 
 bidirectional_4 (Bidirectio  (None, 203, 200)         132000    
 nal)                                                            
                                                                 
 dropout_2 (Dropout)         (None, 203, 200)          0         
                                                                 
 bidirectional_5 (Bidirectio  (None, 160)              179840    
 nal)                                                            
                                                                 
 dense_4 (Dense)             (None, 2701)              434861    
                                                                 
 dense_5 (Dense)             (None, 5403)             

In [44]:
cp_path='weights/dyla_nweights.h5'

In [45]:
cp=tf.keras.callbacks.ModelCheckpoint(cp_path,save_freq=5,verbose=1)

In [46]:
try:
    model.load_weights(cp_path)
except OSError:
    print("No File : Scratch Start")

No File : Scratch Start


In [47]:
history=model.fit(xs,
                  labels,
                  epochs=20,
                  verbose=1,
                  callbacks=[cp]
                  )

Epoch 1/20
   4/1212 [..............................] - ETA: 1:38 - loss: 8.5916 - accuracy: 0.0078   
Epoch 1: saving model to weights\dyla_nweights.h5
   9/1212 [..............................] - ETA: 2:40 - loss: 8.5120 - accuracy: 0.0312
Epoch 1: saving model to weights\dyla_nweights.h5
  14/1212 [..............................] - ETA: 2:56 - loss: 8.2491 - accuracy: 0.0312
Epoch 1: saving model to weights\dyla_nweights.h5
  19/1212 [..............................] - ETA: 3:03 - loss: 8.0115 - accuracy: 0.0280
Epoch 1: saving model to weights\dyla_nweights.h5
  24/1212 [..............................] - ETA: 3:05 - loss: 7.8835 - accuracy: 0.0221
Epoch 1: saving model to weights\dyla_nweights.h5
  29/1212 [..............................] - ETA: 3:08 - loss: 7.8390 - accuracy: 0.0205
Epoch 1: saving model to weights\dyla_nweights.h5
  34/1212 [..............................] - ETA: 3:09 - loss: 7.7350 - accuracy: 0.0248
Epoch 1: saving model to weights\dyla_nweights.h5
  39/1212 [..

In [48]:
model.save('newmodel.h5')

In [50]:
from tensorflow.keras.models import load_model
loadedModel=load_model('newmodel.h5')

In [52]:
seed_text=input("Give Seed")
next_words=20
for _ in range(next_words):
    token_list=tokenizer.texts_to_sequences([seed_text])[0]
    token_list=pad_sequences([token_list],maxlen=max_seq_len-1,padding='pre')
    predicted=loadedModel.predict(token_list,verbose=0)
    predicted_index = np.argmax(predicted)
    output_word=""
    for word , index in tokenizer.word_index.items():
        if index == predicted_index:
            output_word = word
            break
        
    seed_text += " " + output_word

print(seed_text)

miss misery man looking everywhere for a fort and start buyin street and jimmy throughout time too dumb tickin like to catch
