In [1]:
import numpy as np
import pandas as pd
import sys 
from keras.models import Sequential
from keras.layers import LSTM, Activation, Flatten, Dropout, Dense, Embedding, TimeDistributed
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

In [2]:
#Load the dataset
dataset = pd.read_csv('tsl.csv', encoding = "latin1")
dataset.head()

Unnamed: 0,artist,album,track_title,track_n,lyric,line,year
0,Taylor Swift,Taylor Swift,Tim McGraw,1,He said the way my blue eyes shined,1,2006
1,Taylor Swift,Taylor Swift,Tim McGraw,1,Put those Georgia stars to shame that night,2,2006
2,Taylor Swift,Taylor Swift,Tim McGraw,1,"I said, ""That's a lie""",3,2006
3,Taylor Swift,Taylor Swift,Tim McGraw,1,Just a boy in a Chevy truck,4,2006
4,Taylor Swift,Taylor Swift,Tim McGraw,1,That had a tendency of gettin' stuck,5,2006


In [3]:
def processFirstLine(lyrics, songID, songName, row):
    lyrics.append(row['lyric'] + '\n')
    songID.append( row['year']*100+ row['track_n'])
    songName.append(row['track_title'])
    return lyrics,songID,songName
# define empty lists for the lyrics , songID , songName 
lyrics = []
songID = []
songName = []
# songNumber indicates the song number in the dataset
songNumber = 1
# i indicates the song number
i = 0
isFirstLine = True
# Iterate through every lyrics line and join them together for each song independently 
for index,row in dataset.iterrows():
    if(songNumber == row['track_n']):
        if (isFirstLine):
            lyrics,songID,songName = processFirstLine(lyrics,songID,songName,row)
            isFirstLine = False
        else :
            #if we still in the same song , keep joining the lyrics lines    
            lyrics[i] +=  row['lyric'] + '\n'
    #When it's done joining a song's lyrics lines , go to the next song :    
    else :
        lyrics,songID,songName = processFirstLine(lyrics,songID,songName,row)
        songNumber = row['track_n']
        i+=1

In [4]:
lyrics_data = pd.DataFrame({'songID':songID, 'songName':songName, 'lyrics':lyrics })

In [5]:
with open('lyricsText.txt', 'w',encoding="utf-8") as filehandle:  
    for listitem in lyrics:
        filehandle.write('%s\n' % listitem)

In [6]:
textFileName = 'lyricsText.txt'
raw_text = open(textFileName, encoding = 'UTF-8').read()
raw_text = raw_text.lower()

In [7]:
chars = sorted(list(set(raw_text)))
int_chars = dict((i, c) for i, c in enumerate(chars))
chars_int = dict((i, c) for c, i in enumerate(chars))

In [8]:
n_chars = len(raw_text)
n_vocab = len(chars)


In [9]:
seq_len = 100
data_X = []
data_y = []
for i in range(0, n_chars - seq_len, 1):
    # Input Sequeance(will be used as samples)
    seq_in  = raw_text[i:i+seq_len]
    # Output sequence (will be used as target)
    seq_out = raw_text[i + seq_len]
    # Store samples in data_X
    data_X.append([chars_int[char] for char in seq_in])
    # Store targets in data_y
    data_y.append(chars_int[seq_out])
n_patterns = len(data_X)
print( 'Total Patterns : ', n_patterns)

Total Patterns :  173598


In [10]:
X = np.reshape(data_X , (n_patterns, seq_len, 1))
# Normalizing input data :
X = X/ float(n_vocab)
# One hot encode the output targets :
y = np_utils.to_categorical(data_y)

In [11]:
LSTM_layer_num = 4 # number of LSTM layers
layer_size = [256,256,256,256] # number of nodes in each layer

In [12]:
model = Sequential()

In [13]:
model.add(LSTM(layer_size[0], input_shape =(X.shape[1], X.shape[2]), return_sequences = True))

In [14]:
for i in range(1,LSTM_layer_num) :
    model.add(LSTM(layer_size[i], return_sequences=True))

In [15]:
model.add(Flatten())

In [16]:
model.add(Dense(y.shape[1]))
model.add(Activation('softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam')

In [17]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 100, 256)          264192    
_________________________________________________________________
lstm_1 (LSTM)                (None, 100, 256)          525312    
_________________________________________________________________
lstm_2 (LSTM)                (None, 100, 256)          525312    
_________________________________________________________________
lstm_3 (LSTM)                (None, 100, 256)          525312    
_________________________________________________________________
flatten (Flatten)            (None, 25600)             0         
_________________________________________________________________
dense (Dense)                (None, 58)                1484858   
_________________________________________________________________
activation (Activation)      (None, 58)                0

In [18]:
checkpoint_name = 'Weights-LSTM-improvement-{epoch:03d}-{loss:.5f}-bigger.hdf5'
checkpoint = ModelCheckpoint(checkpoint_name, monitor='loss', verbose = 1, save_best_only = True, mode ='min')
callbacks_list = [checkpoint]

In [20]:
model_params = {'epochs':15,
                'batch_size':128,
                'callbacks':callbacks_list,
                'verbose':1,
                'validation_split':0.2,
                'validation_data':None,
                'shuffle': True,
                'initial_epoch':0,
                'steps_per_epoch':None,
                'validation_steps':None}
model.fit(X,
          y,
          epochs = model_params['epochs'],
           batch_size = model_params['batch_size'],
           callbacks= model_params['callbacks'],
           verbose = model_params['verbose'],
           validation_split = model_params['validation_split'],
           validation_data = model_params['validation_data'],
           shuffle = model_params['shuffle'],
           initial_epoch = model_params['initial_epoch'],
           steps_per_epoch = model_params['steps_per_epoch'],
           validation_steps = model_params['validation_steps'])

Epoch 1/15
Epoch 00001: loss improved from 2.71296 to 2.51822, saving model to Weights-LSTM-improvement-001-2.51822-bigger.hdf5
Epoch 2/15
Epoch 00002: loss improved from 2.51822 to 2.23025, saving model to Weights-LSTM-improvement-002-2.23025-bigger.hdf5
Epoch 3/15
Epoch 00003: loss improved from 2.23025 to 1.86221, saving model to Weights-LSTM-improvement-003-1.86221-bigger.hdf5
Epoch 4/15
Epoch 00004: loss improved from 1.86221 to 1.47906, saving model to Weights-LSTM-improvement-004-1.47906-bigger.hdf5
Epoch 5/15
Epoch 00005: loss improved from 1.47906 to 1.13481, saving model to Weights-LSTM-improvement-005-1.13481-bigger.hdf5
Epoch 6/15
Epoch 00006: loss improved from 1.13481 to 0.83476, saving model to Weights-LSTM-improvement-006-0.83476-bigger.hdf5
Epoch 7/15
Epoch 00007: loss improved from 0.83476 to 0.58913, saving model to Weights-LSTM-improvement-007-0.58913-bigger.hdf5
Epoch 8/15
Epoch 00008: loss improved from 0.58913 to 0.39696, saving model to Weights-LSTM-improvement-

<tensorflow.python.keras.callbacks.History at 0x7fead0c51080>

In [22]:
# Load wights file :
wights_file = '/content/Weights-LSTM-improvement-015-0.10778-bigger.hdf5' # weights file path
model.load_weights(wights_file)
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam')

In [24]:
# set a random seed :
start = np.random.randint(0, len(data_X)-1)
pattern = data_X[start]
print('Seed : ')
print("\"",''.join([int_chars[value] for value in pattern]), "\"\n")
# How many characters you want to generate
generated_characters = 500
# Generate Charachters :
for i in range(generated_characters):
    x = np.reshape(pattern, ( 1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x,verbose = 0)
    index = np.argmax(prediction)
    result = int_chars[index]
    #seq_in = [int_chars[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print('\nDone')

Seed : 
" d i got that red lip classic thing that you like
and when we go crashing down, we come back everytim "

e
cause we never go out of style, we never go out of style

looking at it now
it all seems so simple
we were lying on the stouch
i remembe
that took a polaroid of us
then discovered it ereais of doon
eienies to keav no the meget, eoe io my miae
the say, yea,
yeshou  you siool them cowe to a fay mookes, tteat on a cho ond rntt
iete 
nohane au lorn oa inmemanf
i dan't halp it onsk tp i knoe syartsiiggt ailcy i can't fes fetan miss you worln
ort oosking himl
it's worle i know you
cue iimoing and yo
Done
