In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
#working directory
%cd '/content/drive/MyDrive/lyrics_generator/'

/content/drive/MyDrive/lyrics_generator


### Introduction

In this article, we delve into the fascinating world of lyrics generation using Long Short-Term Memory (LSTM) modeling techniques, focusing specifically on [Taylor Swift's](https://www.kaggle.com/datasets/ishikajohari/taylor-swift-all-lyrics-30-albums) lyrical repertoire.

This dataset has the lyrics of almost all, if not all, of Taylor Swift's songs from 46 albums. The lyrics are in a textual format (.txt) for maximum user flexibility. Additionally, the dataset includes cover art for each of these albums.

LSTM, a type of recurrent neural network (RNN), has gained immense popularity in natural language processing tasks, including text generation. It possesses a unique ability to capture and learn patterns in sequential data, making it an ideal candidate for creating realistic and artistically appealing lyrics.

By harnessing the power of LSTM, we can embark on a journey to understand the underlying structure and themes of Taylor Swift's lyrics. Our goal is to not only generate lyrics that mimic her distinctive style but also gain insights into her creative process and storytelling techniques.

In [3]:
#load necessary modules
import os
import re
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [25]:
def preprocess(text):
    punctuation_pad = '!?.,:-;'
    # end of verse tokenization
    content = re.sub(r'\n\n',r"<EOV>", text)
    # end of sentence
    content = re.sub(r'\n', r"<EOS>", content)
    content = re.sub(r'\|.*?\|','',content)
    content = content.translate(str.maketrans({key: ' {0} '.format(key) for key in punctuation_pad}))
    content = re.sub(' +', ' ', content)
    content = re.split(r'(<EOS>)|(<EOV>)',content)
    updated_content = ['' if element is None else element for element in content]
    return ' '.join(updated_content)

In [26]:
# Step 1: Train-Test Split
lyrics_dir = "../lyrics_generator/Data/"
# all lyrics together as list
all_lyrics = []
for filename in os.listdir(lyrics_dir):
    with open(os.path.join(lyrics_dir, filename), "r") as f:
        lyrics = f.read()
        process_lyrics = preprocess(lyrics)
        all_lyrics.append(process_lyrics)

In [27]:
len(all_lyrics)

500

In [28]:
#Perform train-test split
train_lyrics, test_lyrics = train_test_split(all_lyrics, test_size=0.2, random_state=42)

In [29]:
train_lyrics[0]

"Intro  <EOV> Verse 1 <EOS>  Once upon a time , a few mistakes ago <EOS>  I was in your sights , you got me alone <EOS>  You found me , you found me <EOS>  You found me - e - e - e - e <EOS>  I guess you didn't care , and I guess I liked that <EOS>  And when I fell hard , you took a step back <EOS>  Without me , without me <EOS>  Without me - e - e - e - e <EOS>  Pre - Chorus <EOS>  And he's long gone when he's next to me <EOS>  And I realize the blame is on me  <EOV> Chorus <EOS>  'Cause I knew you were trouble when you walked in <EOS>  So shame on me now <EOS>  Flew me to places I'd never been <EOS>  'Til you put me down , oh <EOS>  I knew you were trouble when you walked in <EOS>  So shame on me now <EOS>  Flew me to places I'd never been <EOS>  Now , I'm lying on the cold , hard ground  <EOV> Post - Chorus <EOS>  Oh , oh - oh <EOS>  Trouble , trouble , trouble <EOS>  Oh , oh - oh <EOS>  Trouble , trouble , trouble  <EOV> Verse 2 <EOS>  No apologies , he'll never see you cry <EOS>  

In [16]:
# modules related to generating the dataset and training model
import torch
import pandas as pd
from collections import Counter
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader

In [30]:
class LyricsDataset():
    def __init__(self,text, sequence_length=4):
        self.text = text
        self.sequence_length = sequence_length
        self.words = self.load_words()
        self.uniq_words = self.get_uniq_words()
        self.index_to_word = {index: word for index, word in enumerate(self.uniq_words)}
        self.word_to_index = {word: index for index, word in enumerate(self.uniq_words)}
        # add unknown tokens to the word to index
        self.word_to_index['<UNK>'] = len(self.word_to_index)
        self.words_indexes = [self.word_to_index[w] for w in self.words]
        
    def load_words(self):
        total_words = []
        text = self.text
        for i in range(len(text)):
            for sentence in text[i].splitlines():
                for word in sentence.split(' '):
                    total_words.append(word)
        word_counts = Counter(total_words)
        filtered_words = [word if word_counts[word] >= 1  else '<UNK>' for word in total_words]
        return filtered_words        
        
    def get_uniq_words(self):
        word_counts = Counter(self.words)
        word_counts['<UNK>'] = 0  # Add the <UNK> token with count 0
        return sorted(word_counts, key=word_counts.get, reverse=True)
        
    def __len__(self):
        return len(self.words_indexes) - self.sequence_length
    
    def __getitem__(self, index):
        sequence = torch.tensor(self.words_indexes[index:index+self.sequence_length])
        sequence[sequence >= len(self.uniq_words)] = self.word_to_index['<UNK>']
        target = torch.tensor(self.words_indexes[index+1:index+self.sequence_length+1])
        target[target >= len(self.uniq_words)] = self.word_to_index['<UNK>']
        return torch.tensor(sequence), torch.tensor(target)

In [32]:
# create LyricsDataset
seq_length = 6
train_data = LyricsDataset(train_lyrics, sequence_length=seq_length)
test_data = LyricsDataset(test_lyrics, sequence_length=seq_length)

In [None]:
#data_lyrics = LyricsDataset(all_lyrics, sequence_length=seq_length)

In [33]:
batch_size = 2
train_loader = DataLoader(train_data, batch_size=batch_size)
test_loader = DataLoader(test_data,batch_size=batch_size)

In [34]:
device =torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [35]:
#develop LSTM model
class LSTMmodel(nn.Module):
    def __init__(self, dataset):
        super(LSTMmodel, self).__init__()
        self.lstm_size = 128
        self.embedding_dim = 128
        self.num_layers = 3
        n_vocab = len(dataset.uniq_words)
        self.embedding = nn.Embedding(
            num_embeddings=n_vocab,
            embedding_dim=self.embedding_dim,
        )
        self.lstm = nn.LSTM(
            input_size=self.lstm_size,
            hidden_size=self.lstm_size,
            num_layers=self.num_layers,
            dropout=0.2,
        )
        self.fc = nn.Linear(self.lstm_size, n_vocab)
    def forward(self, x, prev_state):
        embed = self.embedding(x)
        output, state = self.lstm(embed, prev_state)
        logits = self.fc(output)
        return logits, state
    def init_state(self, sequence_length):
        return (torch.zeros(self.num_layers, sequence_length, self.lstm_size),
                torch.zeros(self.num_layers, sequence_length, self.lstm_size))

In [36]:
model = LSTMmodel(train_data)
model.to(device)

LSTMmodel(
  (embedding): Embedding(11100, 128)
  (lstm): LSTM(128, 128, num_layers=3, dropout=0.2)
  (fc): Linear(in_features=128, out_features=11100, bias=True)
)

In [37]:
#loss function
criterion = nn.CrossEntropyLoss()
#optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [38]:
def evaluate(dataloader, model):
    val_loss = 0
    count = 0
    with torch.no_grad():
        for batch, (x, y) in tqdm(enumerate(dataloader)):
            x = x.to(device)
            y = y.to(device)
            # Initialize states for each batch
            state_h, state_c = model.init_state(x.size(1))  
            state_h = state_h.to(device)
            state_c = state_c.to(device)
            y_pred, (state_h, state_c) = model(x, (state_h, state_c))
            loss = criterion(y_pred.transpose(1, 2), y)
            val_loss += loss.item()
            count += 1
    avg_val_loss = val_loss / count
    #print({'loss': avg_val_loss})
    return avg_val_loss

In [39]:
def train( model, dataloader,test_loader,epochs=10):
    loss = 0
    for epoch in range(epochs):
      model.train()
      state_h, state_c = model.init_state(seq_length)
      state_h = torch.tensor(state_h).to(device)
      state_c = torch.tensor(state_c).to(device)
      count = 0
      for batch, (x, y) in tqdm(enumerate(dataloader)):
          optimizer.zero_grad()
          x = x.to(device)
          y = y.to(device)
          y_pred, (state_h, state_c) = model(x, (state_h, state_c))
          loss = criterion(y_pred.transpose(1, 2), y)
          state_h = state_h.detach()
          state_c = state_c.detach()
          loss.backward()
          optimizer.step()
          loss += loss.item()
          count += 1
      avg_loss = loss.item()/count
      # valuation
      model.eval()
      val_loss = evaluate(test_loader,model)
      print(f"epoch: {epoch}, train loss: {avg_loss}, val loss: {avg_loss}")

In [40]:
train(model,train_loader,test_loader)

  state_h = torch.tensor(state_h).to(device)
  state_c = torch.tensor(state_c).to(device)
  return torch.tensor(sequence), torch.tensor(target)
127504it [07:16, 292.12it/s]
30429it [00:31, 965.91it/s] 


epoch: 0, train loss: 7.265320026466158e-05, val loss: 7.265320026466158e-05


127504it [07:11, 295.46it/s]
30429it [00:32, 945.16it/s]


epoch: 1, train loss: 6.882309509738803e-05, val loss: 6.882309509738803e-05


127504it [07:12, 294.64it/s]
30429it [00:31, 973.64it/s] 


epoch: 2, train loss: 8.192787446854838e-05, val loss: 8.192787446854838e-05


127504it [07:12, 294.80it/s]
30429it [00:33, 911.63it/s]


epoch: 3, train loss: 7.29978735286269e-05, val loss: 7.29978735286269e-05


127504it [07:16, 291.81it/s]
30429it [00:31, 966.05it/s] 


epoch: 4, train loss: 7.762153050818426e-05, val loss: 7.762153050818426e-05


127504it [07:16, 292.09it/s]
30429it [00:31, 962.67it/s]


epoch: 5, train loss: 8.1432547828367e-05, val loss: 8.1432547828367e-05


127504it [07:11, 295.15it/s]
30429it [00:31, 977.29it/s]


epoch: 6, train loss: 9.182626950664523e-05, val loss: 9.182626950664523e-05


127504it [07:10, 296.49it/s]
30429it [00:31, 961.54it/s] 


epoch: 7, train loss: 8.408507793367635e-05, val loss: 8.408507793367635e-05


127504it [07:09, 296.83it/s]
30429it [00:31, 981.14it/s] 


epoch: 8, train loss: 6.881857744078817e-05, val loss: 6.881857744078817e-05


127504it [07:08, 297.58it/s]
30429it [00:30, 982.54it/s]

epoch: 9, train loss: 6.083664862390653e-05, val loss: 6.083664862390653e-05





### Save the model for future use

In [41]:
import torch
import torch.nn as nn

# Assuming you have defined and trained your LSTM model
lstm_model = LSTMmodel(train_data)

# Specify the file path to save the model
model_path = '/content/drive/MyDrive/lyrics_generator/models/lstm_modelv2.pth'

# Save the model
torch.save(lstm_model.state_dict(), model_path)


### Load the model for evaluation

In [None]:
import torch
import torch.nn as nn

In [42]:
# Specify the file path of the saved model
model_path = '/content/drive/MyDrive/lyrics_generator/models/lstm_modelv2.pth'

# Load the model
model = LSTMmodel(train_data)
model.load_state_dict(torch.load(model_path))
model.to(device)
model.eval()  # Set the model to evaluation mode


LSTMmodel(
  (embedding): Embedding(11100, 128)
  (lstm): LSTM(128, 128, num_layers=3, dropout=0.2)
  (fc): Linear(in_features=128, out_features=11100, bias=True)
)

In [43]:
import random

In [44]:
def predict(train_data, model, prompt, next_words=400):
    model.eval()
    prompt = prompt.replace('\n', ' <EOS>')
    words = prompt.split(' ')

    with torch.no_grad():
        state_h, state_c = model.init_state(len(words))
        state_h, state_c = state_h.to(device), state_c.to(device)

        for i in range(next_words):
            if words[i] not in train_data.word_to_index:
                words[i] = '<UNK>'

            x = torch.tensor([[train_data.word_to_index[w] for w in words[i:]]]).to(device)
            y_pred, (state_h, state_c) = model(x, (state_h, state_c))
            last_word_logits = y_pred[0][-1]
            p = torch.nn.functional.softmax(last_word_logits, dim=0).detach().cpu().numpy()

            try:
                word_index = np.random.choice(len(last_word_logits), p=p)
                words.append(train_data.index_to_word[word_index])
            except KeyError:
                words.append('<UNK>')

        generated_text = ' '.join(words)
        generated_text += prompt
        generated_text = adjust_text_format(generated_text)
        generated_text = adjust_verses(generated_text)
        return generated_text


def adjust_text_format(text):
    text = text.replace('<EOV>', '\n\n')
    text = text.replace('<EOS>', '\n')
    lines = text.splitlines()
    formatted_lyrics = []

    for i, line in enumerate(lines):
        if len(line) > 10:
            formatted_lines = split_line(line, max_words_per_line=random.randint(3, 6))
            formatted_lyrics.extend(formatted_lines)
        else:
            formatted_lyrics.append(line)

        if (i + 1) % 5 == 0:
            formatted_lyrics.append('\n\n')

    formatted_lyrics = '\n'.join(formatted_lyrics)
    return formatted_lyrics


def split_line(line, max_words_per_line):
    words = line.split()
    lines = []
    current_line = []

    for word in words:
        if len(current_line) < max_words_per_line:
            current_line.append(word)
        else:
            lines.append(' '.join(current_line))
            current_line = [word]

    if current_line:
        lines.append(' '.join(current_line))

    return lines


def adjust_verses(text):
    lines = text.splitlines()
    formatted_lyrics = ''

    for i, line in enumerate(lines):
        formatted_lyrics += '\n' + line
        if (i + 1) % 5 == 0:
            formatted_lyrics += '\n\n'

    return formatted_lyrics


In [45]:
prompt = "Like the war of words I shouted\n in my sleep And you passed right by\n"
generated_text = predict(train_data, model, prompt)

In [46]:
print(generated_text)


Like the war of words I
shouted
in my sleep And
you passed right by
curl Sleepwalking Steve


Off lupines sharply
scuffling amplifie Briskly
Titans pointed pressed
Breathing speechesI careif
mend boiled It


Emperor Promise gowns
sparkin' spending Cars
Representing soundtrack 238
paced Comin' meam
R Be Grab


week goodbye HUNDRED
turns estates owest
prison Wait judges
bags JAY bear
scholar backseat Dawn


fight shewed quarrelling
white commended Honester
home spoon White
461 cuts Thick
1966 pliant Woman


"awesome" creepin' Prohibido
Vert excuse need
Whose omni gleam
Lookin likeEmbed hug
Sollicitavit fact wait


followeth Entreated mornin'
favourable wearing Chicks
your pleasanter hurt
four Youth keeper
Tokens Stock faking


E Tying staircase
suspected it'slike rule
Dan guilty ruled
Blonde empire 24
broom 111 fingers


Yellow bump band
Clandestino boar's Lucilius
Mixt conduit eating
hussye worst Monuments
freckle stale Tove


chanced bong Proper
Agamemnon metheglin attack'd
seeming Co

In [48]:
df = {'original_lyrics':[],'gen_lyrics':[]}
for txt in random.sample(train_lyrics, k=10):
  txt_split = txt.split('<EOS>')
  prompt = str('<EOS>'.join(txt_split[:3]))
  #print(prompt)
  gen_text = predict(train_data,model,prompt)
  df['original_lyrics'].append(txt)
  df['gen_lyrics'].append(gen_text)


1 ContributorYou Cant Dance With Me LyricsCouldnt get a job , probably have to be a grad student <EOS>  All this college shit has got my swag ruin <EOS>  People think I sound like Eminem 
Verse 1 <EOS>  Friends break up , friends get married <EOS>  Strangers get born , strangers get buried 
Trke eviri <EOS>  Once upon a time , a few mistakes ago <EOS>  I was in your sights , you got me alone 
Intro <EOS>  I remember  <EOV> Verse 1 <EOS>  Good girl , sad boy 
Verse 1 <EOS>  When the dinner is cold and the chatter gets old <EOS>  You ask for the tab 
Verse 1 <EOS>  Fatefully <EOS>  I tried to pick my battles 'til the battle picked me 
Verse 1 <EOS>  Keep your helmet , keep your life , son <EOS>  Just a flesh wound , here's your rifle 
Intro <EOS>  Meet me at midnight  <EOV> Verse 1 <EOS>  Staring at the ceiling with you 
Verse 1 <EOS>  I took a chance , I took a shot <EOS>  And you might think I'm bulletproof , but I'm not 
Verse 1 : Taylor Swift <EOS>  Break my soul in two looking for y

In [49]:
df_gen = pd.DataFrame(df)
df_gen

Unnamed: 0,original_lyrics,gen_lyrics
0,1 ContributorYou Cant Dance With Me LyricsCoul...,\n1 ContributorYou Cant\nDance With Me\nLyrics...
1,"Verse 1 <EOS> Friends break up , friends get ...","\nVerse 1 \nFriends break up\n, friends get\nm..."
2,"Trke eviri <EOS> Once upon a time , a few mis...","\nTrke eviri\nOnce upon a time\n, a few mistak..."
3,Intro <EOS> I remember <EOV> Verse 1 <EOS> ...,"\nIntro \nI remember\n\n Verse 1 \nGood girl ,..."
4,Verse 1 <EOS> When the dinner is cold and the...,\nVerse 1 \nWhen the dinner is\ncold and the c...
5,Verse 1 <EOS> Fatefully <EOS> I tried to pic...,\nVerse 1 \nFatefully\nI tried to pick my\nbat...
6,"Verse 1 <EOS> Keep your helmet , keep your li...","\nVerse 1 \nKeep your helmet , keep\nyour life..."
7,Intro <EOS> Meet me at midnight <EOV> Verse ...,\nIntro \nMeet me at midnight\n\n Verse 1 \nSt...
8,"Verse 1 <EOS> I took a chance , I took a shot...","\nVerse 1 \nI took a\nchance , I\ntook a shot\..."
9,Verse 1 : Taylor Swift <EOS> Break my soul in...,\nVerse 1 : Taylor\nSwift\nBreak my soul\nin t...


In [50]:
#Using BLEU score to compare the real sentences with the generated ones
import statistics
from nltk.translate.bleu_score import sentence_bleu

In [60]:
def reverse(text):
  content = re.sub(r'<EOS>',' ',text)
  content = re.sub(r'<EOS>',' ',content)
  content = re.sub(r'\n',' ', content)
  return content



In [61]:
df_gen_clean = df_gen.applymap(reverse)

In [62]:
scores=[]

for i in range(df_gen_clean.shape[0]):
  reference = df_gen_clean['original_lyrics'][i]
  candidate = df_gen_clean['gen_lyrics'][i]
  scores.append(sentence_bleu(reference, candidate))

statistics.mean(scores)

The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


6.36959445327723e-232

### Conclusions

- We haven't used any word embeddings.
- If we use embeddings like Glove or BERT, the perfomance can be improved.
- In conclusion, improving the BLEU score on the Taylor Swift Lyrics dataset using LSTM models requires exploring different model architectures, optimizing hyperparameters, augmenting the data, leveraging ensemble methods, experimenting with alternative sequence generation approaches, and considering fine-tuning of pre-trained models. 
- These strategies can collectively contribute to enhancing the model's understanding of the lyrics and generating more accurate and coherent outputs.