In [None]:
from sentence_transformers import SentenceTransformer
import pandas as pd
import numpy as np

In [None]:
model = SentenceTransformer('all-MiniLM-L12-v2')

In [None]:
sds = pd.read_csv("data/small_dataset.csv")

In [None]:
embeddings = model.encode(sds['lyrics'])

In [None]:
embeddings

array([[-1.31038964e-01,  1.82835609e-02, -3.78568582e-02, ...,
         6.01930224e-05,  2.52264198e-02, -1.38734495e-02],
       [-5.06527498e-02,  3.69102024e-02,  1.98155567e-02, ...,
         4.73509207e-02,  1.69067178e-02,  1.62009802e-02],
       [-5.76839410e-02,  4.75641787e-02,  1.90364628e-03, ...,
         8.02092180e-02,  5.36910538e-03, -5.62414564e-02],
       ...,
       [-5.58314333e-03,  1.10310338e-01, -3.39260958e-02, ...,
         7.67268389e-02,  3.38497083e-03, -3.25751267e-02],
       [-4.95762657e-03, -5.17929643e-02,  1.16702043e-01, ...,
         4.69979048e-02, -5.37207872e-02,  1.70569289e-02],
       [ 3.40427319e-03,  6.75700605e-02,  6.94579706e-02, ...,
        -4.65298779e-02,  3.38704437e-02,  1.26633956e-03]], dtype=float32)

In [None]:
sds['embeddings'] = list(embeddings)

In [None]:
def cosine_similarity(v1, v2):
    d = np.dot(v1, v2)
    cos_theta = d / (np.linalg.norm(v1) * np.linalg.norm(v2))
    return(cos_theta)

In [None]:
def relevance_scores(query_embed):
    scores = [cosine_similarity(query_embed, v2) for v2 in sds['embeddings']]
    scores = pd.Series(scores)
    return(scores)

In [None]:
def semantic_search(query_sentence, df = sds, return_top = False):
    query_embed = model.encode(query_sentence)
    scores = relevance_scores(query_embed)
    df['scores'] = scores
    sorted_df = df.sort_values(by = 'scores', ascending = False)
    if return_top == False:
        return sorted_df[['title','artist','lyrics','scores']]
    else:
        return sorted_df.iloc[0]['lyrics']

In [None]:
semantic_search("i'm pleased you are doing well after we left each other")

Unnamed: 0,title,artist,lyrics,scores
1313,​happier,Olivia Rodrigo,\nWe broke up a month ago\nYour friends are mi...,0.401395
694,Let Me Love You,Ariana Grande,\nI just broke up with my ex\nNow I'm out here...,0.395182
1375,​good 4 u,Olivia Rodrigo,"\n(Ah)\n\n\nWell, good for you, I guess you mo...",0.389521
821,Scared to Be Lonely,Martin Garrix & Dua Lipa,\nIt was great at the very start\nHands on eac...,0.388319
954,Praying,Kesha,"\nWell, you almost had me fooled\nTold me that...",0.357559
...,...,...,...,...
504,The Blacker the Berry,Kendrick Lamar,"\nEverything black, I don't want black (They w...",-0.054732
1158,​you should see me in a crown,Billie Eilish,"\nBite my tongue, bide my time\nWearing a warn...",-0.055469
23,Through the Wire,Kanye West,\n\n\nLast October Grammy-nominated producer K...,-0.062410
532,Institutionalized,Kendrick Lamar,\n\n\nWhat money got to do with it\nWhen I don...,-0.067968


In [None]:
print(semantic_search("i'm pleased you are doing well after we left each other", return_top = True))


We broke up a month ago
Your friends are mine, you know I know
You've moved on, found someone new
One more girl who brings out the better in you
And I thought my heart was detached
From all the sunlight of our past
But she's so sweet, she's so pretty
Does she mean you forgot about me?


Oh, I hope you're happy
But not like how you were with me
I'm selfish, I know, I can't let you go
So find someone great, but don't find no one better
I hope you're happy, but don't be happier


And do you tell her she's the most beautiful girl you've ever seen?
An eternal love bullshit you know you'll never mean
Remember when I believed
You meant it when you said it first to me?
And now I'm pickin' her apart
Like cuttin' her down will make you miss my wretched heart
But she's beautiful, she looks kind
She probably gives you butterflies

I hope you're happy
But not like how you were with me
I'm selfish, I know, I can't let you go
So find someone great, but don't find no one better
I hope you're happy
I 