
# importing libraries

In [12]:
import pandas as pd
import nltk
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# loading csv
### Downloaded dataset from here : https://www.kaggle.com/datasets/notshrirang/spotify-million-song-dataset

In [45]:
df = pd.read_csv("spotify_millsongdata.csv")

In [46]:
df.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [47]:
df.tail(5)

Unnamed: 0,artist,song,link,text
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [48]:
df.shape

(57650, 4)

In [49]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

# We don't need link column so we will drop it

In [50]:
df =df.sample(5000).drop('link', axis=1).reset_index(drop=True)

In [51]:
df.head(10)

Unnamed: 0,artist,song,text
0,Puff Daddy,Hate Me Now (Feat. Nas),[Puff] Escobar season has returned... \r\n[Na...
1,Nina Simone,Laziest Gal In Town,"It's not 'cause I wouldn't, \r\nIt's not 'cau..."
2,Britney Spears,Abroad,Let me see your passport \r\nPlease take off ...
3,Townes Van Zandt,Lover's Lullaby,Dreams that have flown down the hall \r\nTear...
4,Point Of Grace,He's The Best Thing,He's a light unto my pathway \r\nHe's a lamp ...
5,Great Big Sea,The River Driver,I was just the age of sixteen \r\nWhen I firs...
6,Howard Jones,Show Me,We got the same beat runnin' round \r\nThroug...
7,Ella Fitzgerald,Bewitched,After one whole quart of brandy \r\nLike a da...
8,Alice Cooper,Dangerous Tonight,Take another bite \r\nIt'll be alright \r\nW...
9,Kinks,Black Messiah,Everybody got the right to speak their mind \...


In [52]:
df['text'][0]

"[Puff] Escobar season has returned...  \r\n[Nas]  \r\nIt's been a long time, been a long time comin  \r\nLooks like the death of me now  \r\nBut you know, there's no turning back now  \r\nThis is what makes me - this is what I am  \r\n[Puff] Feel me? Let's go  \r\n[Base Chorus: Puff Daddy]  \r\nYou can hate me now.. but I won't stop now..  \r\nCause I can't stop now.. you can hate me now..  \r\nBut I won't stop now.. cause I can't stop now..  \r\nYou can hate me now.. you can hate me NOWWWW..  \r\n[1st Chorus: add Nas after each pause]  \r\nQ.B., real niggas, Bravehearts, c'mon  \r\n[1st Chorus: after Nas add Puff Daddy ad lib]  \r\nWell you hate me I'm gon' hate you too  \r\nIt's as simple as that  \r\nDie motherfucker die motherfucker die  \r\nYou don't give a fuck I don't give a fuck  \r\nGo down any way you want it to go down (do it now)  \r\nWeak, jealous motherfuckers (do it now, do it now)  \r\nFuck y'all!  \r\n[Verse 1]  \r\nDon't hate me, hate the money I see, clothes that I 

In [44]:
df = df.sample(3000)

In [53]:
df.shape

(5000, 3)

# Preprocessing

In [54]:
df['text'] = df['text'].str.lower().replace(r'^\w\s', ' ').replace(r'\n', ' ', regex = True)

# Applying stemming

In [55]:
stemmer = PorterStemmer()

def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [stemmer.stem(w) for w in tokens]
    return " ".join(stemming)

In [56]:
df['text'] = df['text'].apply(lambda x: tokenization(x))

In [57]:
tfidvector = TfidfVectorizer(analyzer='word',stop_words='english')
matrix = tfidvector.fit_transform(df['text'])
similarity = cosine_similarity(matrix)

In [58]:
similarity[0]

array([1.        , 0.03043772, 0.01741327, ..., 0.02938947, 0.02187938,
       0.03653265])

In [61]:
df[df['song'] == 'Abroad']

Unnamed: 0,artist,song,text
2,Britney Spears,Abroad,let me see your passport pleas take off your g...


In [62]:
def recommendation(song_df):
    idx = df[df['song'] == song_df].index[0]
    distances = sorted(list(enumerate(similarity[idx])),reverse=True,key=lambda x:x[1])
    
    songs = []
    for m_id in distances[1:21]:
        songs.append(df.iloc[m_id[0]].song)
        
    return songs

In [63]:
recommendation('Abroad')

["Goin' Home To Rock",
 'You Might Need A Man',
 'One Last Time',
 'Break',
 'Let Me Know',
 'All You Need Is Love',
 'Let Me Be The First To Know',
 'The Three Day Man',
 'Gospel Changes',
 'Need Some Love',
 'Paper Friends',
 'Final Eyes',
 'Get It Right',
 'Francine',
 'Every Second Someone Breaks A Heart',
 'I Want Your Love',
 'Do You Know?',
 'They Said You Needed Me',
 'I Am God',
 'Listen Like Thieves']