In [35]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import model_utils as mu
from gensim.models import Word2Vec
import numpy as np
import torch
import torch.nn as nn
from tqdm.autonotebook import tqdm
import nn_model as nnm
from sklearn.preprocessing import LabelEncoder

NUM_SEQUENCES_PER_BATCH = 512

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [36]:
# Load the pre-trained model
embedder = nnm.Embedder('all-MiniLM-L6-v2', device=device)

In [37]:
#load sentences
similar_song_lyrics= pd.read_csv("data/kat_data_similar_songs.csv") 
similar_song_lyrics.head()


Unnamed: 0,title,artist,lyrics,similar_track,similar_artist,similar_song
0,Cuéntale,Ivy Queen,4 ContributorsDile LyricsQuien te va a querer ...,Dile,Ivy Queen,Dile by Ivy Queen
1,Cuéntale,Ivy Queen,3 ContributorsCansada LyricsWelcome to the dra...,Cansada,Ivy Queen,Cansada by Ivy Queen
2,Cuéntale,Ivy Queen,11 ContributorsTranslationsEnglishDeutschElla ...,Ella Me Levantó,Daddy Yankee,Ella Me Levantó by Daddy Yankee
3,Cuéntale,Ivy Queen,"12 ContributorsDale Don Dale Lyrics[Letra de ""...",Dale Don Dale,Don Omar,Dale Don Dale by Don Omar
4,Cuéntale,Ivy Queen,"8 ContributorsSexy Robótica Lyrics[Letra de ""S...",Sexy Robotica,Don Omar,Sexy Robotica by Don Omar


In [46]:
lyrics, song_title = similar_song_lyrics["lyrics"].tolist(), similar_song_lyrics["similar_song"].tolist()
processed_lyrics = []
for lyric in lyrics:
    processed_lyrics.append(nnm.preprocess_sentence(lyric))

In [47]:
#Create word embeddings 
embeddings = embedder(processed_lyrics, batch_size=NUM_SEQUENCES_PER_BATCH)
print(embeddings.shape)


torch.Size([7047, 384])


In [48]:
label_encoder = LabelEncoder()

# Fit and transform the labels to integers
y_encoded = label_encoder.fit_transform(song_title) # Label Encoder does transform values into unique

# Convert to a PyTorch tensor
y_tensor = torch.tensor(y_encoded, dtype=torch.int64).to(device)
print(y_tensor[:5])

tensor([ 734,  487,  863,  655, 2482], device='cuda:0')


In [49]:
model, test_dataloader = nnm.full_pipeline(x=embeddings, y=y_tensor, vocab_size = len(set(y_encoded)), batch_size=NUM_SEQUENCES_PER_BATCH, embedding_size=embeddings.shape[1], hidden_units=128, epochs=1000, device=device, early_stop_threshold=1e-3)
print(embeddings.shape)  # Should be [num_samples, embedding_dim]

  0%|          | 0/1000 [00:00<?, ?it/s]

Epoch: 0, Loss: 8.0998

Epoch: 1, Loss: 8.0538

Epoch: 2, Loss: 7.9156

Epoch: 3, Loss: 7.7421

Epoch: 4, Loss: 7.4864

Epoch: 5, Loss: 7.3772

Epoch: 6, Loss: 7.2313

Epoch: 7, Loss: 7.1641

Epoch: 8, Loss: 7.0233

Epoch: 9, Loss: 6.9744

Epoch: 10, Loss: 6.7750

Epoch: 11, Loss: 6.8615

Epoch: 12, Loss: 6.7391

Epoch: 13, Loss: 6.6516

Epoch: 14, Loss: 6.6607

Epoch: 15, Loss: 6.6465

Epoch: 16, Loss: 6.5600

Epoch: 17, Loss: 6.5449

Epoch: 18, Loss: 6.3477

Epoch: 19, Loss: 6.4030

Epoch: 20, Loss: 6.1487

Epoch: 21, Loss: 6.2760

Epoch: 22, Loss: 6.1372

Epoch: 23, Loss: 6.0826

Epoch: 24, Loss: 5.9381

Epoch: 25, Loss: 5.9043

Epoch: 26, Loss: 5.7778

Epoch: 27, Loss: 5.6751

Epoch: 28, Loss: 5.5072

Epoch: 29, Loss: 5.4910

Epoch: 30, Loss: 5.3431

Epoch: 31, Loss: 5.3892

Epoch: 32, Loss: 5.1819

Epoch: 33, Loss: 5.0657

Epoch: 34, Loss: 4.9475

Epoch: 35, Loss: 4.7576

Epoch: 36, Loss: 4.7535

Epoch: 37, Loss: 4.6559

Epoch: 38, Loss: 4.4713

Epoch: 39, Loss: 4.4464

Epoch: 40,

In [50]:
# Evaluate the model on the test set
accuracy = nnm.evaluate_model(model, test_dataloader)
# Save the trained model
torch.save(model.state_dict(), "model/trained_ffnn_model.pth")
print("Model saved as 'trained_ffnn_model.pth'")
# Load the model for future use
print(accuracy)

Accuracy on test set: 68.72% (969/1410)
Model saved as 'trained_ffnn_model.pth'
0.6872340425531915


In [56]:
def generate(model, embedder, input_lyrics) -> str:
    # YOUR CODE HERE
	# Preprocess lyrics
    processed_lyrics = mu.preprocess_sentence(lyric)

    # Encode lyrics
    encoded_lyrics = embedder.encode(processed_lyrics, batch_size=NUM_SEQUENCES_PER_BATCH, convert_to_tensor=True)
    
    # Setting model to evaluation mode turns off Dropout and BatchNorm making the predictions deterministic
    model.eval()  # Set the model to evaluation mode if you haven't already
    
    with torch.no_grad(): # Speeds up inference and reduces memory usage by not having to calcualte gradients
        logits = model(encoded_lyrics) # Forward pass on the model
        probability = nn.functional.softmax(logits, dim=1) # Normalize z scores to probability
        predicted_idx = torch.multinomial(probability, num_samples=1).item()

        #predicted_idx = probability.argmax(dim=1).item() # Retrieve int value
		
	# Transform index to natural-language token
    predicted_token = model.embedding_layer.index_to_token[predicted_idx] 
    
    return predicted_token

In [57]:
import lyricsgenius
song_title = "Yonaguni"
artist_name = "Bad Bunny"

genius = lyricsgenius.Genius("Z_wiD32yFUiTd_bFET7Xo4UkbdCvZZJm7ViomZFISpf4wpr_4nvblXiuMaemGED8")
queried_song = genius.search_song(song_title, artist_name)

if queried_song:
    lyrics = queried_song.lyrics
    processed_lyrics = nnm.preprocess_sentence(lyric)
    encoded_lyrics = embedder(processed_lyrics, batch_size=NUM_SEQUENCES_PER_BATCH)
    encoded_lyrics.unsqueeze_(0) # Add a batch dimension

    model.eval()

    with torch.no_grad(): # Speeds up inference and reduces memory usage by not having to calcualte gradients
        logits = model(encoded_lyrics) # Forward pass on the model
        probability = nn.functional.softmax(logits, dim=1) # Normalize z scores to probability
        predicted_idx = torch.multinomial(probability, num_samples=10).cpu()

    print(label_encoder.inverse_transform([predicted_idx][0]))


else:
    print(f"Song '{song_title}' by '{artist_name}' not found.")


Searching for "Yonaguni" by Bad Bunny...
Done.


ValueError: y should be a 1d array, got an array of shape (1, 10) instead.

In [None]:
probability.cpu()[0][predicted_idx[0]]

tensor([9.8920e-01, 9.4986e-04, 2.7583e-05, 2.6933e-03, 1.0099e-03, 5.0452e-04,
        2.3555e-03, 5.3235e-05, 1.6600e-04, 2.3182e-05])

In [None]:
probability.shape

torch.Size([1, 3321])

In [58]:
print(predicted_idx[0])
print(label_encoder.inverse_transform([predicted_idx][0].squeeze(0)))

tensor([2221,  988,  950, 1522, 1094, 3098,  168,   13,  256, 2170])
['Poker Face by Lady Gaga' 'Fever by Vybz Kartel' 'Falsetto by The-Dream'
 'Kitchen by SZA' 'Girls & Boys by Blur' 'Whatever You Like by T.I.'
 'Anaconda by Nicki Minaj' '1st of tha Month by Bone Thugs-N-Harmony'
 "Backin' It Up (feat. Cardi B) by Pardison Fontaine"
 'Pass the Salt (feat. Vince Staples) by Joy Crookes']


In [69]:
print(next(iter(test_dataloader))[0].shape)
flatten = nn.Flatten()
a = flatten(next(iter(test_dataloader))[0])
print(a.size())

torch.Size([512, 384])
torch.Size([512, 384])


In [68]:
next(iter(test_dataloader))[0].shape

torch.Size([512, 384])

In [24]:
similar_song_lyrics["artist" == "Lady Gaga"]

KeyError: False

In [26]:
similar_song_lyrics.cols

AttributeError: 'DataFrame' object has no attribute 'cols'

In [24]:
encoded_lyrics.shape

torch.Size([384])

In [35]:
bad_bunny_songs = similar_song_lyrics[similar_song_lyrics['artist'] == 'Bad Bunny']
print(bad_bunny_songs[['title', 'similar_track']])

                 title            similar_track
3355          Yonaguni              Moscow Mule
3356          Yonaguni                     Vete
3357          Yonaguni                  Holanda
3358          Yonaguni                  DILUVIO
3359          Yonaguni                   Normal
...                ...                      ...
7006  Tití Me Preguntó                 PROVENZA
7007  Tití Me Preguntó  Feliz Cumpleaños Ferxxo
7008  Tití Me Preguntó                 LA FALDA
7009  Tití Me Preguntó                Tú Con Él
7010  Tití Me Preguntó   CHORRITO PA LAS ANIMAS

[380 rows x 2 columns]


In [30]:
similar_song_lyrics.head()

Unnamed: 0,title,artist,lyrics,similar_track,similar_artist
0,Cuéntale,Ivy Queen,4 ContributorsDile LyricsQuien te va a querer ...,Dile,Ivy Queen
1,Cuéntale,Ivy Queen,3 ContributorsCansada LyricsWelcome to the dra...,Cansada,Ivy Queen
2,Cuéntale,Ivy Queen,11 ContributorsTranslationsEnglishDeutschElla ...,Ella Me Levantó,Daddy Yankee
3,Cuéntale,Ivy Queen,"12 ContributorsDale Don Dale Lyrics[Letra de ""...",Dale Don Dale,Don Omar
4,Cuéntale,Ivy Queen,"8 ContributorsSexy Robótica Lyrics[Letra de ""S...",Sexy Robotica,Don Omar
