In [1]:
import numpy as np
import pandas as pd

In [2]:
song_df=pd.read_csv('spotify_millsongdata.csv')
song_df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [3]:
song_df.shape

(57650, 4)

In [4]:
song_df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [5]:
song_df=song_df.sample(5000).drop('link', axis=1).reset_index(drop=True)

In [6]:
song_df.head()

Unnamed: 0,artist,song,text
0,Our Lady Peace,Sleeping In,"Souls, although where we meet. This \r\nHose ..."
1,Norah Jones,Little Room,"You and me, and \r\nMe and you \r\nIn my lit..."
2,The White Stripes,Wasting My Time,And if I'm wasting my time \r\nThen nothing c...
3,George Strait,If The Whole World Was A Honky Tonk,"If the whole world was a honky-tonk, \r\nAnd ..."
4,King Diamond,So Sad,"I am sitting in the Dark, I'm with my Love \r..."


In [7]:
song_df['text'][0]

"Souls, although where we meet. This  \r\nHose a lazy Morphine.  \r\nBut I, I'm stretching to see over  \r\nYour shoulders and past your priest  \r\nAnd paper cups and paper shoes  \r\nGive backs to me, but I see right  \r\nthrough  \r\nAnd I know why you overslept  \r\nSo gray, gray, slow rain.  \r\n  \r\nI'm happier than you.  \r\nAnd I'm too high to follow through  \r\nHome, like the bedpan he needs.  \r\nAnd the hose, that's not supposed to be  \r\nBut I, I'm stretching to see over your  \r\nFlowers and Time magazines  \r\nNow I believe in what you do  \r\nThe pain will cease  \r\nWell I know why you overslept  \r\nTo be home, to be\r\n\r\n"

In [8]:
song_df.shape

(5000, 3)

# Text Preprocessing

In [9]:
song_df['text']=song_df['text'].str.lower().replace(r'^a-zA-Z0-9', ' ').replace(r'\n', ' ', regex= True)

In [10]:
song_df.tail()

Unnamed: 0,artist,song,text
4995,Kiss,In Your Face,i'll never cut my hair \r i'll never shed my ...
4996,Point Of Grace,I'll Be Believing,when i'm walking the straight and narrow \r s...
4997,Who,Cousin Kevin,"we're on our own, cousin, \r all alone, cousi..."
4998,Air Supply,I Don't Want To Lose You,(graham russell) \r i called on you today \r...
4999,Isley Brothers,Love The One You're With,if you're down and confused \r and you don't ...


In [11]:
import nltk
from nltk.stem.porter import PorterStemmer

In [12]:
porter = PorterStemmer()

In [13]:
def word(txt):
    word= nltk.word_tokenize(txt)
    A= [porter.stem(w) for w in word]
    return " ".join(A)

In [14]:
word('you are beautiful')

'you are beauti'

In [15]:
song_df['text'].apply(lambda x: word(x))

0       soul , although where we meet . thi hose a laz...
1       you and me , and me and you in my littl room t...
2       and if i 'm wast my time then noth could be be...
3       if the whole world wa a honky-tonk , and it re...
4       i am sit in the dark , i 'm with my love we ar...
                              ...                        
4995    i 'll never cut my hair i 'll never shed my sk...
4996    when i 'm walk the straight and narrow sometim...
4997    we 're on our own , cousin , all alon , cousin...
4998    ( graham russel ) i call on you today i know i...
4999    if you 're down and confus and you do n't reme...
Name: text, Length: 5000, dtype: object

In [16]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [17]:
tfid=TfidfVectorizer(analyzer='word', stop_words='english')

In [18]:
Matrix=tfid.fit_transform(song_df['text'])

In [19]:
Similar=cosine_similarity(Matrix)

In [20]:
Similar[0]

array([1.        , 0.01032029, 0.00348442, ..., 0.02423274, 0.03051967,
       0.00503546])

In [22]:
song_df[song_df['song']=='So Sad'].index[0]

4

# Recommender System

In [31]:
def Recommender(song_name):
    IDX=song_df[song_df['song']==song_name].index[0]
    distance=sorted(list(enumerate(Similar[IDX])), reverse=True, key= lambda x:x[1])
    song= []
    for song_id in distance[1:21]:
        song.append(song_df.iloc[song_id[0]].song)
    return song

In [32]:
Recommender("So Sad")

['Butterfly Blue',
 'Goodbye, My Love',
 'Goodbye Sadness',
 'If We Wait',
 'Remember Love',
 'Love Is All',
 'Never Say Goodbye',
 "Goodbye's",
 'Say Goodbye To Hollywood',
 'Christmas In The Trenches',
 'Here It Is',
 'Immortality',
 'Die Die My Darling',
 'Goodbye',
 'Goodbye',
 'Until Sun Comes Up',
 'Butterfly',
 'Wait',
 'But You Know I Love You',
 "It's Just The Way"]

In [33]:
import pickle 

In [34]:
pickle.dump(Similar, open("Similarity", "wb"))

In [35]:
pickle.dump(song_df, open("song_df", "wb"))