## Music Recommandation System Using Spotify Million Song Dataset
- Dataset link :- https://www.kaggle.com/datasets/notshrirang/spotify-million-song-dataset

### Importing required libraries

In [84]:
import numpy as np 
import pandas as pd
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

### Importing Dataset

In [85]:
music = pd.read_csv("songdata.csv")
music.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \nWhy I had t...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [86]:
music.shape

(57650, 4)

In [87]:
music = music.sample(5000).reset_index().drop('index', axis=1)

In [88]:
music

Unnamed: 0,artist,song,link,text
0,Engelbert Humperdinck,Portofino,/e/engelbert+humperdinck/portofino_20504141.html,"PORTOFINO \nWRITERS MARK SPIRO, JACK WHITE \..."
1,Ella Fitzgerald,Bewitched,/e/ella+fitzgerald/bewitched_20045684.html,After one whole quart of brandy \nLike a dais...
2,Hank Williams,My Son Calls Another Man Daddy,/h/hank+williams/my+son+calls+another+man+dadd...,Tonight my head is bowed in sorrow \nI can't ...
3,Steely Dan,Your Gold Teeth,/s/steely+dan/your+gold+teeth_20130101.html,Got a feeling I've been here before \nWatchin...
4,Bonnie Raitt,Meet Me Half Way,/b/bonnie+raitt/meet+me+half+way_20022704.html,"You come home late from work, baby \nYou wond..."
...,...,...,...,...
4995,Reba Mcentire,But Why,/r/reba+mcentire/but+why_20849162.html,I can open doors all by myself \nAnd I'm stro...
4996,Bruce Springsteen,Jack Of All Trades,/b/bruce+springsteen/jack+of+all+trades_209997...,"I'll mow your lawn, clean the leaves out your ..."
4997,Moody Blues,Vintage Wine,/m/moody+blues/vintage+wine_20095862.html,I remember the taste of the vintage wine \nFr...
4998,Chaka Khan,I Hear Music,/c/chaka+khan/i+hear+music_20028852.html,I hear music \nMighty fine music \nThe murmu...


### Data Preprocessing

In [89]:
music.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   artist  5000 non-null   object
 1   song    5000 non-null   object
 2   link    5000 non-null   object
 3   text    5000 non-null   object
dtypes: object(4)
memory usage: 156.4+ KB


In [90]:
music.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [91]:
music.drop('link', axis=1, inplace=True)

In [92]:
music.sample()

Unnamed: 0,artist,song,text
1626,Meat Loaf,I'll Kill You If You Don't Come Back,From the minute I turned and laid my eyes on y...


In [93]:
music.duplicated().any()

False

In [94]:
music['text'][0]

"PORTOFINO  \nWRITERS MARK SPIRO, JACK WHITE  \n  \nLong ago I used to know where love was found, A place in the sun  \nLong before this heart of mine had broken down You were the one  \nBut now you're a lonely memory My heartache will never end  \nThough all of those nights are far away I wish I were with you again.\nPortofino I still remember the sun in your hair  \nPortofino mariachi music that danced in the air  \nPortofino I want you back in my arms  \n'Cause every time I close my eyes I'm holding you in the dark  \nIn Portofino where you broke my heart.  \nI think of your face and still it can take my breath away After so long  \nAnd though I'll be lonely living for only yesterday I'll get along  \nThough now you're a faded memory My heartache will never end  \nThis unforgotten melody keeps playing over and over again  \nPortofino I still remember the sun in your hair  \nPortofino mariachi music that danced in the air  \nPortofino I want you back in my arms  \n'Cause every time I

In [95]:
music['text'] = music['text'].str.lower().replace(r'\n', '', regex=True)

In [96]:
music.head()

Unnamed: 0,artist,song,text
0,Engelbert Humperdinck,Portofino,"portofino writers mark spiro, jack white l..."
1,Ella Fitzgerald,Bewitched,"after one whole quart of brandy like a daisy,..."
2,Hank Williams,My Son Calls Another Man Daddy,tonight my head is bowed in sorrow i can't ke...
3,Steely Dan,Your Gold Teeth,got a feeling i've been here before watching ...
4,Bonnie Raitt,Meet Me Half Way,"you come home late from work, baby you wonder..."


In [97]:
music['tags'] = music['artist'] + " "+ music['text']

In [98]:
music = music[['artist', 'song', 'tags']]

In [99]:
music.head()

Unnamed: 0,artist,song,tags
0,Engelbert Humperdinck,Portofino,Engelbert Humperdinck portofino writers mark ...
1,Ella Fitzgerald,Bewitched,Ella Fitzgerald after one whole quart of brand...
2,Hank Williams,My Son Calls Another Man Daddy,Hank Williams tonight my head is bowed in sorr...
3,Steely Dan,Your Gold Teeth,Steely Dan got a feeling i've been here before...
4,Bonnie Raitt,Meet Me Half Way,"Bonnie Raitt you come home late from work, bab..."


In [100]:
ps = PorterStemmer()

def stem(text):
    y = []
    for i in text.split():
        y.append(ps.stem(i))

    return " ".join(y) 

In [101]:
music['tags'] = music['tags'].apply(stem)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  music['tags'] = music['tags'].apply(stem)


In [102]:
cv = CountVectorizer(max_features = 5000, stop_words = 'english')
vectors = cv.fit_transform(music['tags']).toarray()

In [103]:
cv.get_feature_names_out()

array(['000', '10', '12', ..., 'zone', 'zoom', 'zz'], dtype=object)

In [104]:
similarity = cosine_similarity(vectors)

In [105]:
def Recommend(music1):
    music_index = music[music['song'] == music1].index[0]
    distances = similarity[music_index]
    music_list = sorted(list(enumerate(distances)), reverse = True, key = lambda x:x[1])[1:6]

    for i in music_list:
        print(music.iloc[i[0]].song)

In [106]:
music.iloc[48]['song']

'Nine Times Blue'

In [107]:
Recommend(music.iloc[48]['song'])

Who Will The Next Fool Be
We're In Love
We Didn't Know
Love Is
Headlines


In [108]:
import pickle
pickle.dump(music.to_dict(), open('music_dict.pkl','wb'))
pickle.dump(similarity,open('similarity.pkl','wb'))