# Import Required model

In [65]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding,LSTM,Dense,Bidirectional,Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.optimizers import Adam
import numpy as np

# Input Data and Preprocessing

In [66]:
with open("The Chainsmokers – Closer Lyrics.txt",'r', encoding = 'utf-8') as f:
    data = f.read()

In [67]:
data

'Hey, I was doing just fine before I met you\nI drink too much and that’s an issue\nBut I’m OK\nHey, you tell your friends it was nice to meet them\nBut I hope I never see them\nAgain\n\nI know it breaks your heart\nMoved to the city in a broke-down car\nAnd four years, no calls\nNow you’re looking pretty in a hotel bar\nAnd I, I, I, I, I can’t stop\nNo, I, I, I, I, I can’t stop\n\nSo, baby, pull me closer\nIn the back seat of your Rover\nThat I know you can’t afford\nBite that tattoo on your shoulder\nPull the sheets right off the corner\nOf that mattress that you stole\nFrom your roommate back in Boulder\nWe ain’t ever getting older\n\nWe ain’t ever getting older\nWe ain’t ever getting older\n\nYou look as good as the day I met you\nI forget just why I left you,\nI was insane\nStay and play that Blink-182 song\nThat we beat to death in Tucson\nOK\n\nI know it breaks your heart\nMoved to the city in a broke-down car\nAnd four years, no call\nNow I’m looking pretty in a hotel bar\nAnd 

In [68]:
corpus = data.lower().split('\n')

In [69]:
corpus

['hey, i was doing just fine before i met you',
 'i drink too much and that’s an issue',
 'but i’m ok',
 'hey, you tell your friends it was nice to meet them',
 'but i hope i never see them',
 'again',
 '',
 'i know it breaks your heart',
 'moved to the city in a broke-down car',
 'and four years, no calls',
 'now you’re looking pretty in a hotel bar',
 'and i, i, i, i, i can’t stop',
 'no, i, i, i, i, i can’t stop',
 '',
 'so, baby, pull me closer',
 'in the back seat of your rover',
 'that i know you can’t afford',
 'bite that tattoo on your shoulder',
 'pull the sheets right off the corner',
 'of that mattress that you stole',
 'from your roommate back in boulder',
 'we ain’t ever getting older',
 '',
 'we ain’t ever getting older',
 'we ain’t ever getting older',
 '',
 'you look as good as the day i met you',
 'i forget just why i left you,',
 'i was insane',
 'stay and play that blink-182 song',
 'that we beat to death in tucson',
 'ok',
 '',
 'i know it breaks your heart',
 'move

In [70]:
mytokenizer = Tokenizer()

In [71]:
mytokenizer.fit_on_texts(corpus)

In [72]:
mytokenizer.word_index

{'i': 1,
 'we': 2,
 'ain’t': 3,
 'ever': 4,
 'getting': 5,
 'older': 6,
 'that': 7,
 'your': 8,
 'the': 9,
 'you': 10,
 'in': 11,
 'no': 12,
 'can’t': 13,
 'and': 14,
 'pull': 15,
 'back': 16,
 'of': 17,
 'know': 18,
 'to': 19,
 'a': 20,
 'stop': 21,
 'was': 22,
 'it': 23,
 'so': 24,
 'baby': 25,
 'me': 26,
 'closer': 27,
 'seat': 28,
 'rover': 29,
 'afford': 30,
 'bite': 31,
 'tattoo': 32,
 'on': 33,
 'shoulder': 34,
 'sheets': 35,
 'right': 36,
 'off': 37,
 'corner': 38,
 'mattress': 39,
 'stole': 40,
 'from': 41,
 'roommate': 42,
 'boulder': 43,
 'hey': 44,
 'just': 45,
 'met': 46,
 'but': 47,
 'i’m': 48,
 'ok': 49,
 'them': 50,
 'breaks': 51,
 'heart': 52,
 'moved': 53,
 'city': 54,
 'broke': 55,
 'down': 56,
 'car': 57,
 'four': 58,
 'years': 59,
 'now': 60,
 'looking': 61,
 'pretty': 62,
 'hotel': 63,
 'bar': 64,
 'as': 65,
 'doing': 66,
 'fine': 67,
 'before': 68,
 'drink': 69,
 'too': 70,
 'much': 71,
 'that’s': 72,
 'an': 73,
 'issue': 74,
 'tell': 75,
 'friends': 76,
 'nice':

In [91]:
total_words = len(mytokenizer.word_index)+1

In [74]:
total_words

100

In [84]:
my_input_sequences = []

for line in corpus:
    print(line)
    token_list = mytokenizer.texts_to_sequences([line])[0]
    print(token_list)
    for i in range(1,len(token_list)):
        n_gram_sequnece = token_list[:i+1]
        my_input_sequences.append(n_gram_sequnece)

hey, i was doing just fine before i met you
[44, 1, 22, 66, 45, 67, 68, 1, 46, 10]
i drink too much and that’s an issue
[1, 69, 70, 71, 14, 72, 73, 74]
but i’m ok
[47, 48, 49]
hey, you tell your friends it was nice to meet them
[44, 10, 75, 8, 76, 23, 22, 77, 19, 78, 50]
but i hope i never see them
[47, 1, 79, 1, 80, 81, 50]
again
[82]

[]
i know it breaks your heart
[1, 18, 23, 51, 8, 52]
moved to the city in a broke-down car
[53, 19, 9, 54, 11, 20, 55, 56, 57]
and four years, no calls
[14, 58, 59, 12, 83]
now you’re looking pretty in a hotel bar
[60, 84, 61, 62, 11, 20, 63, 64]
and i, i, i, i, i can’t stop
[14, 1, 1, 1, 1, 1, 13, 21]
no, i, i, i, i, i can’t stop
[12, 1, 1, 1, 1, 1, 13, 21]

[]
so, baby, pull me closer
[24, 25, 15, 26, 27]
in the back seat of your rover
[11, 9, 16, 28, 17, 8, 29]
that i know you can’t afford
[7, 1, 18, 10, 13, 30]
bite that tattoo on your shoulder
[31, 7, 32, 33, 8, 34]
pull the sheets right off the corner
[15, 9, 35, 36, 37, 9, 38]
of that mattress t

In [85]:
my_input_sequences

[[44, 1],
 [44, 1, 22],
 [44, 1, 22, 66],
 [44, 1, 22, 66, 45],
 [44, 1, 22, 66, 45, 67],
 [44, 1, 22, 66, 45, 67, 68],
 [44, 1, 22, 66, 45, 67, 68, 1],
 [44, 1, 22, 66, 45, 67, 68, 1, 46],
 [44, 1, 22, 66, 45, 67, 68, 1, 46, 10],
 [1, 69],
 [1, 69, 70],
 [1, 69, 70, 71],
 [1, 69, 70, 71, 14],
 [1, 69, 70, 71, 14, 72],
 [1, 69, 70, 71, 14, 72, 73],
 [1, 69, 70, 71, 14, 72, 73, 74],
 [47, 48],
 [47, 48, 49],
 [44, 10],
 [44, 10, 75],
 [44, 10, 75, 8],
 [44, 10, 75, 8, 76],
 [44, 10, 75, 8, 76, 23],
 [44, 10, 75, 8, 76, 23, 22],
 [44, 10, 75, 8, 76, 23, 22, 77],
 [44, 10, 75, 8, 76, 23, 22, 77, 19],
 [44, 10, 75, 8, 76, 23, 22, 77, 19, 78],
 [44, 10, 75, 8, 76, 23, 22, 77, 19, 78, 50],
 [47, 1],
 [47, 1, 79],
 [47, 1, 79, 1],
 [47, 1, 79, 1, 80],
 [47, 1, 79, 1, 80, 81],
 [47, 1, 79, 1, 80, 81, 50],
 [1, 18],
 [1, 18, 23],
 [1, 18, 23, 51],
 [1, 18, 23, 51, 8],
 [1, 18, 23, 51, 8, 52],
 [53, 19],
 [53, 19, 9],
 [53, 19, 9, 54],
 [53, 19, 9, 54, 11],
 [53, 19, 9, 54, 11, 20],
 [53, 19, 9,

In [86]:
max_sequences_len = max([len(seq) for seq in my_input_sequences])
input_sequences = np.array(pad_sequences(my_input_sequences, maxlen=max_sequences_len,padding = 'pre'))

In [87]:
input_sequences.shape

(319, 11)

In [88]:
X = input_sequences[:,:-1]

In [89]:
y = input_sequences[:,-1]

In [92]:
y=np.array(tf.keras.utils.to_categorical(y,num_classes=total_words))

In [98]:
model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequences_len-1))
model.add(Bidirectional(LSTM(150)))
model.add(Dense(total_words, activation='softmax'))
print(model.summary())

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=100, verbose=1)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 10, 100)           10100     
                                                                 
 bidirectional (Bidirectiona  (None, 300)              301200    
 l)                                                              
                                                                 
 dense_1 (Dense)             (None, 101)               30401     
                                                                 
Total params: 341,701
Trainable params: 341,701
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epo

Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x271a2637c10>

In [99]:
input_text = "hey i was doing just fine"
predict_next_words= 30

for _ in range(predict_next_words):
    token_list = mytokenizer.texts_to_sequences([input_text])[0]
    print(token_list)
    token_list = pad_sequences([token_list], maxlen=max_sequences_len-1, padding='pre')
    predicted = np.argmax(model.predict(token_list), axis=-1)
    output_word = ""
    for word, index in mytokenizer.word_index.items():
        if index == predicted:
            output_word = word
            break
    input_text += " " + output_word

print(input_text)

[44, 1, 22, 66, 45, 67]
[44, 1, 22, 66, 45, 67, 68]
[44, 1, 22, 66, 45, 67, 68, 1]
[44, 1, 22, 66, 45, 67, 68, 1, 46]
[44, 1, 22, 66, 45, 67, 68, 1, 46, 10]
[44, 1, 22, 66, 45, 67, 68, 1, 46, 10, 50]
[44, 1, 22, 66, 45, 67, 68, 1, 46, 10, 50, 50]
[44, 1, 22, 66, 45, 67, 68, 1, 46, 10, 50, 50, 50]
[44, 1, 22, 66, 45, 67, 68, 1, 46, 10, 50, 50, 50, 50]
[44, 1, 22, 66, 45, 67, 68, 1, 46, 10, 50, 50, 50, 50, 50]
[44, 1, 22, 66, 45, 67, 68, 1, 46, 10, 50, 50, 50, 50, 50, 74]
[44, 1, 22, 66, 45, 67, 68, 1, 46, 10, 50, 50, 50, 50, 50, 74, 68]
[44, 1, 22, 66, 45, 67, 68, 1, 46, 10, 50, 50, 50, 50, 50, 74, 68, 19]
[44, 1, 22, 66, 45, 67, 68, 1, 46, 10, 50, 50, 50, 50, 50, 74, 68, 19, 84]
[44, 1, 22, 66, 45, 67, 68, 1, 46, 10, 50, 50, 50, 50, 50, 74, 68, 19, 84, 50]
[44, 1, 22, 66, 45, 67, 68, 1, 46, 10, 50, 50, 50, 50, 50, 74, 68, 19, 84, 50, 57]
[44, 1, 22, 66, 45, 67, 68, 1, 46, 10, 50, 50, 50, 50, 50, 74, 68, 19, 84, 50, 57, 57]
[44, 1, 22, 66, 45, 67, 68, 1, 46, 10, 50, 50, 50, 50, 50, 74, 

In [None]:
i