# Supervised Feed-Forward Neural Network Analysis

## Instructions

Make sure that the "trained_ffnn_model.pth" is saved in the following directory "model/trained_ffnn_model.pth", and that "kat_data_similar_songs.csv" is stored in the directory "data/kat_data_similar_songs.csv".

In [62]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
from gensim.models import Word2Vec
import numpy as np
import torch
import torch.nn as nn
from tqdm.autonotebook import tqdm
import nn_model as nnm
from sklearn.preprocessing import LabelEncoder
import lyricsgenius


NUM_SEQUENCES_PER_BATCH = 1024

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [53]:
# Load the pre-trained model
embedder = nnm.Embedder('all-MiniLM-L6-v2', device=device)

In [54]:
#load sentences
similar_song_lyrics= pd.read_csv("data/kat_data_similar_songs.csv") 
similar_song_lyrics.head()

Unnamed: 0,title,artist,song,similar_lyrics,similar_track,similar_artist
0,Cuéntale,Ivy Queen,Cuéntale by Ivy Queen,4 ContributorsDile LyricsQuien te va a querer ...,Dile,Ivy Queen
1,Cuéntale,Ivy Queen,Cuéntale by Ivy Queen,3 ContributorsCansada LyricsWelcome to the dra...,Cansada,Ivy Queen
2,Cuéntale,Ivy Queen,Cuéntale by Ivy Queen,11 ContributorsTranslationsEnglishDeutschElla ...,Ella Me Levantó,Daddy Yankee
3,Cuéntale,Ivy Queen,Cuéntale by Ivy Queen,"12 ContributorsDale Don Dale Lyrics[Letra de ""...",Dale Don Dale,Don Omar
4,Cuéntale,Ivy Queen,Cuéntale by Ivy Queen,"8 ContributorsSexy Robótica Lyrics[Letra de ""S...",Sexy Robotica,Don Omar


In [55]:
lyrics, song_title = similar_song_lyrics["similar_lyrics"].tolist(), similar_song_lyrics["title"].tolist()
processed_lyrics = []
for lyric in lyrics:
    processed_lyrics.append(nnm.preprocess_sentence(lyric))

In [56]:
#Create word embeddings 
embeddings = embedder(processed_lyrics, batch_size=NUM_SEQUENCES_PER_BATCH)
print(embeddings.shape)


torch.Size([7047, 384])


In [57]:
# LabelEncoder allows to create the one-hot encoding of the labels
label_encoder = LabelEncoder()

# Fit and transform the labels to integers
y_encoded = label_encoder.fit_transform(song_title) # Label Encoder does transform values into unique

# Convert to a PyTorch tensor
y_tensor = torch.tensor(y_encoded, dtype=torch.int64).to(device)
print(y_tensor[:5])

tensor([141, 141, 141, 141, 141], device='cuda:0')


In [60]:
# Full training pipeline
#model, test_dataloader = nnm.full_pipeline(x=embeddings, y=y_tensor, vocab_size = len(set(y_encoded)), batch_size=NUM_SEQUENCES_PER_BATCH, embedding_size=embeddings.shape[1], hidden_units=1024
#                                          , epochs=500, device=device, early_stop_threshold=1e-3)

# Save the model
#torch.save(model.state_dict(), "model/trained_ffnn_model.pth")

# Initialize the model to load the weights
model = nnm.FFNN(embedding_size=embeddings.shape[1], hidden_units=1024, vocab_size=len(set(y_encoded)), device=device)

# Load the model weights
model.load_state_dict(torch.load("model/trained_ffnn_model.pth"))

# Construct the test dataloader
x_train, x_test, y_train, y_test = nnm.split_dataset(embeddings, y_tensor, device)
_, test_dataloader = nnm.create_dataloaders(x_train, x_test,y_train,y_test, NUM_SEQUENCES_PER_BATCH)

In [61]:
# Evaluate the model on the test set
accuracy, precision, recall, f1 = nnm.evaluate_model(model, test_dataloader)

Accuracy: 18.94% (267/1410)
Precision: 0.1559
Recall: 0.1894
F1 Score: 0.1594


## Human Evaluation Section

In [64]:
song_title = "Fire Burning"
artist_name = "Sean Kingston"

genius = lyricsgenius.Genius("Z_wiD32yFUiTd_bFET7Xo4UkbdCvZZJm7ViomZFISpf4wpr_4nvblXiuMaemGED8")
queried_song = genius.search_song(song_title, artist_name)

if queried_song:
    lyrics = queried_song.lyrics
    processed_lyrics = nnm.preprocess_sentence(lyric)
    encoded_lyrics = embedder(processed_lyrics, batch_size=NUM_SEQUENCES_PER_BATCH)
    encoded_lyrics.unsqueeze_(0) # Add a batch dimension

    model.eval()

    with torch.no_grad(): # Speeds up inference and reduces memory usage by not having to calcualte gradients
        logits = model(encoded_lyrics) # Forward pass on the model
        probability = nn.functional.softmax(logits, dim=1) # Normalize z scores to probability
        predicted_idx = torch.multinomial(probability, num_samples=8).cpu()[0].squeeze(0) # Get the top 10 predictions
        predicted_songs = label_encoder.inverse_transform(predicted_idx)

else:
    print(f"Song '{song_title}' by '{artist_name}' not found.")


Searching for "Fire Burning" by Sean Kingston...
Done.


Labels that are not similar to: Fire Burning by Sean Kingston <br>
* 252 -  'Honey' by Kehlani
* 11 - 505 by Arctic Monkeys
* 13 - 8TEEN by Khalid
* 21 - American Requiem by Beyonce

Labels somewhat similar to : Fire Burning by Sean Kingston <br>
* 336 - Low  by Flo Rida
* 431 - Promiscuous
* 294 - Just Dance

In [65]:
potential_indices = [336, 431, 252]
name_indices = ["Low by Flo Rida", "Promiscuous by Nelly Furtado", "Honey by Kehlani"]

# Get the corresponding labels
for index in potential_indices:
    value = probability[0, index]
    label = label_encoder.inverse_transform([index])[0]
    print(f"Probability for '{label}' is {value} ")

Probability for 'Low' is 5.129538112669252e-05 
Probability for 'Promiscuous' is 0.00024644029326736927 
Probability for 'Honey' is 3.826822648989037e-06 
