In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import string

class LyricsGenerator(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, device):
        super(LyricsGenerator, self).__init__()
        self.hidden_size = hidden_size
        self.batch_size = 1  # Set the batch size to 1
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True)  # Set batch_first=True
        self.fc = nn.Linear(hidden_size, output_size)
        self.device = device

    def forward(self, input_seq, hidden):
        batch_size = input_seq.size(0)  # Get the batch size
        embedded = self.embedding(input_seq)
        hidden = self.init_hidden(batch_size)  # Initialize the hidden state with the updated batch size
        output, hidden = self.lstm(embedded, hidden)
        output = self.fc(output)
        return output, hidden

    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = (weight.new(1, batch_size, self.hidden_size).zero_().to(self.device),
                  weight.new(1, batch_size, self.hidden_size).zero_().to(self.device))
        return hidden


class LyricsDataset(Dataset):
    def __init__(self, lyrics_list):
        self.lyrics_list = lyrics_list
        self.word_to_index = {}
        self.index_to_word = {}

        self._create_vocab()

    def _create_vocab(self):
        words = []
        for line in self.lyrics_list:
            words.extend(line.split())

        words = list(set(words))
        self.word_to_index = {word: i for i, word in enumerate(words)}
        self.index_to_word = {i: word for i, word in enumerate(words)}

    def __len__(self):
        return len(self.lyrics_list)

    def __getitem__(self, index):
        line = self.lyrics_list[index]
        sequence = [self.word_to_index[word] for word in line.split()]
        input_seq = torch.tensor(sequence[:-1]).long()  # Convert input sequence to LongTensor
        target_seq = torch.tensor(sequence[1:]).long()  # Convert target sequence to LongTensor
        return input_seq, target_seq

file_path = "/Users/nisshanth/KanyeCover/Kanye West Lyrics.txt"

try:
    with open(file_path, "r") as file:
        # Read the first 400 lines of the file contents
        file_contents = [line.strip() for line in file.readlines()[:400] if line.strip()]

        # Convert the text to lowercase and remove punctuation/special characters/parentheses
        file_contents = [
            line.lower()
            .translate(str.maketrans("", "", string.punctuation))
            .replace("(", "")
            .replace(")", "")
            for line in file_contents
            if "[" not in line and "]" not in line
        ]

        # Create the lyrics dataset
        dataset = LyricsDataset(file_contents)

        # Print the lyrics set
        print("Lyrics Set:")
        for lyric in dataset.word_to_index:
            print(lyric)

        # Print the dataset size
        print("Dataset Size:", len(dataset))

        # Print a sample input-target pair
        input_seq, target_seq = dataset[0]
        print("Sample Input Sequence:", input_seq)
        print("Sample Target Sequence:", target_seq)

except FileNotFoundError:
    print("File not found. Please check the file path.")
    



Lyrics Set:
awake
said
ever
cannot
ill
away
wanted
plannin
really
before
flag
power
keepin
wake
spazzin
vipers
are
gas
crash
mothers
bitter
comes
pierre
tools
jesus
its
ye
pie
reveal
job
hell
keef
thought
couldnt
feel
picture
sanctuary
dont
though
may
excitebike
pour
gave
no
live
written
dancin
judas
woah
come
longer
em
oohooh
stars
with
conversation
kiss
doing
began
deity
slave
at
350s
down
survived
mean
life
told
forget
not
tryin
want
okay
favor
bearing
pure
good
wrestlin
perfect
them
all
morning
bike
heart
sun
wore
water
abraham
hallelujah
statues
needin
after
youth
head
new
become
portions
juice
dude
weapons
hide
use
praising
follow
say
wooooh
glory
breath
tyler
833
millisecond
prayed
level
loves
hand
me
lie
woke
couldve
safe
have
wealth
give
side
every
lay
cant
adam
forbes
hair
jezebel
mike
garage
raise
everything
bust
shalt
be
thing
being
on
drivin
worship
lows
composure
strong
chasin
word
only
i’m
john
drank
dad
culture
people
like
high
find
been
of
tubin
gotta
hands
lemonade
tr

In [7]:
# Define the hyperparameters
input_size = len(dataset.word_to_index)
hidden_size = 128
output_size = len(dataset.word_to_index)
num_epochs = 10
batch_size = 32
learning_rate = 0.01

# Check if CUDA is available and set the device accordingly
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Create the model instance
model = LyricsGenerator(input_size, hidden_size, output_size, device)


# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Create the data loader
# Create the data loader with the custom collate function
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=lambda batch: (pad_sequence([item[0] for item in batch], batch_first=True), pad_sequence([item[1] for item in batch], batch_first=True)))

In [8]:
import torch

# Check if CUDA is available and set the device accordingly
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

for epoch in range(num_epochs):
    for batch_idx, (input_seq, target_seq) in enumerate(data_loader):
        # Set gradients to zero
        optimizer.zero_grad()

        # Initialize hidden state
        batch_size = input_seq.size(0)  # Get the batch size
        hidden = (torch.zeros(1, batch_size, hidden_size).to(device),  # Initialize the hidden state with zeros
                  torch.zeros(1, batch_size, hidden_size).to(device))  # Initialize the cell state with zeros

        # Adjust hidden state size if batch size changes
        if hidden[0].size(1) != batch_size:
            hidden = (hidden[0][:, :batch_size, :], hidden[1][:, :batch_size, :])

        # Forward pass
        output, hidden = model(input_seq, hidden)
        loss = criterion(output.view(-1, output_size), target_seq.view(-1))

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Detach hidden state to prevent gradient accumulation
        hidden = tuple([h.detach() for h in hidden])

        # Print the loss every few batches
        if batch_idx % 10 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Batch [{batch_idx+1}/{len(data_loader)}], Loss: {loss.item()}")



Epoch [1/10], Batch [1/9], Loss: 6.340527057647705
Epoch [2/10], Batch [1/9], Loss: 1.798841118812561
Epoch [3/10], Batch [1/9], Loss: 1.886411190032959
Epoch [4/10], Batch [1/9], Loss: 1.0222667455673218
Epoch [5/10], Batch [1/9], Loss: 0.9230043292045593
Epoch [6/10], Batch [1/9], Loss: 0.8644943237304688
Epoch [7/10], Batch [1/9], Loss: 0.4975454807281494
Epoch [8/10], Batch [1/9], Loss: 0.28289297223091125
Epoch [9/10], Batch [1/9], Loss: 0.20439575612545013
Epoch [10/10], Batch [1/9], Loss: 0.1822577565908432


In [10]:
# Generate lyrics
start_word = "we"  # Starting word for generation
num_words = 150  # Number of words to generate

# Set the model to evaluation mode
model.eval()

# Convert the starting word to index
start_index = dataset.word_to_index[start_word]
input_word = torch.tensor([[start_index]])  # Add an extra dimension for batch size

# Generate the lyrics
generated_lyrics = [start_word]
hidden = None
with torch.no_grad():
    for _ in range(num_words):
        output, hidden = model(input_word, hidden)
        probabilities = nn.functional.softmax(output, dim=2).squeeze(0)
        predicted_index = torch.multinomial(probabilities, 1).item()
        predicted_word = dataset.index_to_word[predicted_index]

        generated_lyrics.append(predicted_word)
        input_word = torch.tensor([[predicted_index]])  # Add an extra dimension for batch size

# Print the generated lyrics
generated_lyrics = " ".join(generated_lyrics)
print("Generated Lyrics:")
print(generated_lyrics)

# Save the model
torch.save(model.state_dict(), "lyrics_generator.pth")


Generated Lyrics:
we have breath works the strength is lying with judas kiss every time conversation will twice that breath the greatest artist restin with us sing til the selfies them indoctrinate hallelujah hallelujah hallelujah hallelujah on la way for a newborn told me in movie truth twice yeah well right fight down for vipers to whom hour til the leaves’ll tribe the week start as water my life gon praising the 350s he he cover ours now you gon what your loves of the powers had chasin felt thats why i was yeah are water down at dancin you on the army heavens up for every minute the strong the irs want gates awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake awake
