### Importing the packages

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
df = pd.read_csv(r"D:\Data Mining\music data set\spotify_millsongdata.csv")

In [5]:
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [6]:
df.tail()

Unnamed: 0,artist,song,link,text
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [7]:
df.describe().round(2)

Unnamed: 0,artist,song,link,text
count,57650,57650,57650,57650
unique,643,44824,57650,57494
top,Donna Summer,Have Yourself A Merry Little Christmas,/z/zwan/heartsong_20148991.html,I just came back from a lovely trip along the ...
freq,191,35,1,6


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 57650 entries, 0 to 57649
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   artist  57650 non-null  object
 1   song    57650 non-null  object
 2   link    57650 non-null  object
 3   text    57650 non-null  object
dtypes: object(4)
memory usage: 1.8+ MB


In [9]:
df.isna().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [10]:
df.duplicated().sum()

np.int64(0)

In [11]:
df.shape

(57650, 4)

In [12]:
df = df.sample(20000).drop('link', axis=1).reset_index(drop = True)

In [13]:
df

Unnamed: 0,artist,song,text
0,Wanda Jackson,No Place To Go But Home,They turned out the lights friends have said t...
1,Tragically Hip,Let's Stay Engaged,It might be late \r\nBut it won't be early \...
2,Alice Cooper,Little By Little,Black leather gloves and your lipstick shines ...
3,Natalie Grant,Wonderful Life,Beautiful is in the sky \r\nWatch it slowly ...
4,Nine Inch Nails,All Time Low,Go \r\nI've heard all I need to know \r\nYou...
...,...,...,...
19995,John Legend,"Pride, In The Name Of Love",One man come in the name of love \r\nOne man ...
19996,Iron Maiden,Lightning Strikes Twice,I feel the breeze on my face in expectance \r...
19997,Nirvana,Oh Me,If I had to lose a mile \r\nIf I had to touch...
19998,Justin Bieber,Go The Distance,I have often dreamed \r\nOf a far off place ...


### Text Preprocessing

In [14]:
df['text'][0]

"They turned out the lights friends have said there goodnight  \r\nNow there's no place to go but home  \r\nI've had a beautiful evening for awhile I've forgot you were gone  \r\nNow you're back on my mind how I hate closing time  \r\nFor there's no place to go but home  \r\nNo place to go but home and nobody home when I get there  \r\nThey turned out the lights friends have said their goodnight  \r\nNow there's no place to go but home  \r\n  \r\n[ ac.guitar - steel ]  \r\n  \r\nI drive through this ghost of the city and I've never felt so alone  \r\nIt's the world's time to sleep and my time to weep  \r\nFor there's no place to go but home for there's no place to go but home\r\n\r\n"

In [15]:
data = df.copy()

In [16]:
df['text'] = df['text'].str.lower().replace(r'^a-zA-Z0-9',' ').replace(r'\n',' ',regex = True) #we can also use \W\s for regular expression

In [17]:
df

Unnamed: 0,artist,song,text
0,Wanda Jackson,No Place To Go But Home,they turned out the lights friends have said t...
1,Tragically Hip,Let's Stay Engaged,it might be late \r but it won't be early \r...
2,Alice Cooper,Little By Little,black leather gloves and your lipstick shines ...
3,Natalie Grant,Wonderful Life,beautiful is in the sky \r watch it slowly \...
4,Nine Inch Nails,All Time Low,go \r i've heard all i need to know \r your ...
...,...,...,...
19995,John Legend,"Pride, In The Name Of Love",one man come in the name of love \r one man c...
19996,Iron Maiden,Lightning Strikes Twice,i feel the breeze on my face in expectance \r...
19997,Nirvana,Oh Me,if i had to lose a mile \r if i had to touch ...
19998,Justin Bieber,Go The Distance,i have often dreamed \r of a far off place \...


In [18]:
stemmer = PorterStemmer()

In [29]:
def tokens(txt):
    tokens = nltk.word_tokenize(txt)
    a = [stemmer.stem(w) for w in tokens]
    return " ".join(a)

In [30]:
tokens("you are beautiful,beauty")

'you are beauti , beauti'

In [31]:
df['text'].apply(lambda x : tokens(x))

0        they turn out the light friend have said there...
1        it might be late but it wo n't be earli you go...
2        black leather glove and your lipstick shine br...
3        beauti is in the sky watch it slowli all our t...
4        go i 've heard all i need to know your voic in...
                               ...                        
19995    one man come in the name of love one man come ...
19996    i feel the breez on my face in expect not veri...
19997    if i had to lose a mile if i had to touch feel...
19998    i have often dream of a far off place where a ...
19999    hey , good lookin ' what you got cookin ' ? ho...
Name: text, Length: 20000, dtype: object

In [32]:
df

Unnamed: 0,artist,song,text
0,Wanda Jackson,No Place To Go But Home,they turned out the lights friends have said t...
1,Tragically Hip,Let's Stay Engaged,it might be late \r but it won't be early \r...
2,Alice Cooper,Little By Little,black leather gloves and your lipstick shines ...
3,Natalie Grant,Wonderful Life,beautiful is in the sky \r watch it slowly \...
4,Nine Inch Nails,All Time Low,go \r i've heard all i need to know \r your ...
...,...,...,...
19995,John Legend,"Pride, In The Name Of Love",one man come in the name of love \r one man c...
19996,Iron Maiden,Lightning Strikes Twice,i feel the breeze on my face in expectance \r...
19997,Nirvana,Oh Me,if i had to lose a mile \r if i had to touch ...
19998,Justin Bieber,Go The Distance,i have often dreamed \r of a far off place \...


In [33]:
tfid =  TfidfVectorizer(analyzer = "word", stop_words = "english")

In [34]:
matrix = tfid.fit_transform(df['text'])

In [35]:
similar = cosine_similarity(matrix)

In [36]:
similar[0]

array([1.        , 0.01814001, 0.        , ..., 0.        , 0.02839795,
       0.0031637 ], shape=(20000,))

In [38]:
df[df['song']=='Go The Distance'].index[0]

np.int64(2642)

### Recommender Function

In [42]:
def recommender(song_name):
    idx = df[df['song'] == song_name].index[0]
    distance = sorted(list(enumerate(similar[idx])), reverse= True, key = lambda x:x[1])
    song=[]
    for s_id in distance[1:5]:
        song.append(df.iloc[s_id[0]].song)
    return song

In [43]:
recommender("Go The Distance")

['Go The Distance', 'The Distance', 'Long Distance', 'From A Distance']

In [44]:
import pickle

In [45]:
pickle.dump(similar,open("similarity","wb"))

In [46]:
pickle.dump(df,open("df","wb"))