In [None]:
# importing the libraries
import tensorflow
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
import tensorflow.keras.utils as ku 
import numpy as np 

In [None]:
# preparing the data
tokenizer = Tokenizer()

data = open('rock.txt').read()
corpus = data.lower().split("\n")





tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

# create input sequences using list of tokens
input_sequences = []
for line in corpus:
	token_list = tokenizer.texts_to_sequences([line])[0]
	for i in range(1, len(token_list)):
		n_gram_sequence = token_list[:i+1]
		input_sequences.append(n_gram_sequence)


# pad sequences  
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

# create predictors and label
predictors, label = input_sequences[:,:-1],input_sequences[:,-1]

label = ku.to_categorical(label, num_classes=total_words)

In [None]:
# model
model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequence_len-1))
model.add(Bidirectional(LSTM(150, return_sequences = True)))
model.add(Dropout(0.2))
model.add(LSTM(100))
model.add(Dense(total_words/2, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(total_words, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 151, 100)          352300    
_________________________________________________________________
bidirectional_1 (Bidirection (None, 151, 300)          301200    
_________________________________________________________________
dropout_1 (Dropout)          (None, 151, 300)          0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 100)               160400    
_________________________________________________________________
dense_2 (Dense)              (None, 1761)              177861    
_________________________________________________________________
dense_3 (Dense)              (None, 3523)              6207526   
Total params: 7,199,287
Trainable params: 7,199,287
Non-trainable params: 0
____________________________________________

In [None]:
#  training the model
 history = model.fit(predictors, label, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
model.save('my_model22.h5')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# loading and retraining the model
from tensorflow import keras
model22 = keras.models.load_model('/content/drive/MyDrive/nlp')

In [None]:
history = model22.fit(predictors, label, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
history = model22.fit(predictors, label, epochs=20, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
history = model22.fit(predictors, label, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# collecting word stoppers from lyrics
data = open('rock.txt').read()
corpus = data.lower().split("\n")
def lastWord(string):
    newstring = ""
    length = len(string)
    for i in range(length-1, 0, -1):
        if(string[i] == " "):
            return newstring[::-1]
        else:
            newstring = newstring + string[i]
stopwords=[]
for i in corpus:
  stopwords.append(lastWord(i))
improved_stopwords = []
for i in stopwords:
  try:
    if i.lower() not in improved_stopwords:
        improved_stopwords.append(i.lower())
  except:
    if i not in improved_stopwords:
        improved_stopwords.append(i)


In [None]:
stopwordss = [ "a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "could", "did", "do", "does", "doing", "down", "during", "each", "few", "for", "from", "further", "had", "has", "have", "having", "he", "he'd", "he'll", "he's", "her", "here", "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is", "it", "it's", "its", "itself", "let's", "me", "more", "most", "my", "myself", "nor", "of", "on", "once", "only", "or", "other", "ought", "our", "ours", "ourselves", "out", "over", "own", "same", "she", "she'd", "she'll", "she's", "should", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs", "them", "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under", "until", "up", "very", "was", "we", "we'd", "we'll", "we're", "we've", "were", "what", "what's", "when", "when's", "where", "where's", "which", "while", "who", "who's", "whom", "why", "why's", "with", "would", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves" ]
# genrating new lyrics
def generate(x):  
  seed_text = x
  next_words = 50
    
  for _ in range(next_words):
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
    # predicting with the trained model
    predicted = model22.predict_classes(token_list, verbose=0)
    output_word = ""
    for word, index in tokenizer.word_index.items():
      if index == predicted:
        output_word = word
        break
    seed_text += " " + output_word
  wh=seed_text.split(' ')
  for i in wh:
    if i in stopwordss:
      print(i)
      
      continue
    print(i, end =" ")  
inn=input(' TYPE YOUR STARTING LYRIC')
generate(inn)



  

 TYPE YOUR STARTING LYRICi hate you




i
hate you
they
got the
doors it's
the
one that
i
can't have
real love it
wasn't enough with
you
all
without me
next to
you
i
keep all
that
we've
been
low now it's
finally sinkin' in
a
great love like a
little kid with
glasses in
a
twin sized bed 

In [None]:
# saving the model
!mkdir NLP
model22.save('/content/NLP')


!zip -r /content/file.zip /content/NLP
from google.colab import files
files.download("/content/file.zip")



INFO:tensorflow:Assets written to: /content/NLP/assets


INFO:tensorflow:Assets written to: /content/NLP/assets


  adding: content/NLP/ (stored 0%)
  adding: content/NLP/keras_metadata.pb (deflated 91%)
  adding: content/NLP/assets/ (stored 0%)
  adding: content/NLP/variables/ (stored 0%)
  adding: content/NLP/variables/variables.data-00000-of-00001 (deflated 57%)
  adding: content/NLP/variables/variables.index (deflated 67%)
  adding: content/NLP/saved_model.pb (deflated 89%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>