## Music Recommender

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("spotify_millsongdata.csv")

In [3]:
df.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [4]:
df.tail(5)

Unnamed: 0,artist,song,link,text
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [5]:
df.shape

(57650, 4)

In [6]:
df.columns

Index(['artist', 'song', 'link', 'text'], dtype='object')

In [7]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [18]:
df =df.sample(5000).drop('link', axis=1).reset_index(drop=True)

In [19]:
df.head(10)

Unnamed: 0,artist,song,text
0,Electric Light Orchestra,Yours Truly 2095,"i sent a messag to anoth time , but as the day..."
1,Marilyn Manson,The Love Song,`` i got a crush on a pretti pistol should i t...
2,Van Morrison,Hello Josephine,"hello josephin , a how do you do ? do you reme..."
3,Vanessa Williams,Happiness,i do n't believ what i 'm hearin ' you ca n't ...
4,P!nk,Love Is Such A Crazy Thing,i never thought i 'd find someon but you came ...
5,Ray Boltz,This Is America,"thi is america word by ray boltz , music by st..."
6,Ramones,I Love You,when i look in your eye i see word i ca n't de...
7,Hank Snow,I've Done At Least One Thing,when i think of the dream i 've let slip throu...
8,Kate Bush,Candle In The Wind,goodby norma jean though i never knew you at a...
9,Verve,This Could Be My Moment,i 'm the onli man who ever realli thought that...


In [20]:
df['text'][0]

"i sent a messag to anoth time , but as the day unwind , thi i just ca n't believ , i send a note across anoth plane , mayb it 's all a game , but thi i just ca n't conceiv i drive the veri latest hover car , i do n't know where you are but i miss you so much till then , i met someon who look a lot like you , she doe the thing you do , but she is an ibm she 's onli program to be veri nice , but she 's as cold as ice , whenev i get too near , she tell me that she like me veri much , but when i tri to touch , she make it all too clear . she is the latest in technolog , almost mytholog , but she ha a heart of stone , she ha an iq of 1 , 001 , she ha a jump suit on , and she 's also a telephon . choru is that what you want - is it what you realli want . i realiz that it must seem to strang , that time ha rearrang , but time ha the final word she know i think of you , she read my mind , she tri to be unkind , she know noth of your world although her memori bank overflow no one would ever kn

#### Text Preprocessing

In [13]:
import nltk
from nltk.stem.porter import PorterStemmer

In [21]:
df['text'] = df['text'].str.lower().replace(r'^\w\s', ' ').replace(r'\n', ' ', regex = True)

In [22]:
stemmer = PorterStemmer()

def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [stemmer.stem(w) for w in tokens]
    return " ".join(stemming)

In [23]:
df['text'] = df['text'].apply(lambda x: tokenization(x))

In [24]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [25]:
tfidvector = TfidfVectorizer(analyzer='word',stop_words='english')
matrix = tfidvector.fit_transform(df['text'])
similarity = cosine_similarity(matrix)

In [26]:
similarity[0]

array([1.        , 0.01162097, 0.01000039, ..., 0.031839  , 0.06974158,
       0.04957247])

In [28]:
# Select a random song from the DataFrame
random_song = df.sample(n=1).iloc[0]
song_name = random_song['song']
print(f"Randomly selected song: {song_name}")

Randomly selected song: Shout


In [29]:
def recommendation(song_df):
    idx = df[df['song'] == song_df].index[0]
    distances = sorted(list(enumerate(similarity[idx])),reverse=True,key=lambda x:x[1])
    
    songs = []
    for m_id in distances[1:21]:
        songs.append(df.iloc[m_id[0]].song)
        
    return songs

In [30]:
recommendation(song_name)

['Shout',
 'Shouting In The Evening',
 'Turn It Loose',
 'Shout Bamalama',
 'Shout Shout',
 'Shout For Joy',
 'A Secret Place',
 "I'm Talking About You",
 'Love Is Not The Enemy',
 'Blues For Meister',
 "It Don't Come Easy",
 'Love You Out Loud',
 'Madhouse',
 'Last American Exit',
 'Gates Of The West',
 'Slow Burn',
 'Unmarked',
 'The New Stone Age',
 'Number One',
 'Orange Colored Sky']

In [31]:
import pickle
pickle.dump(similarity,open('similarity.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))