In [38]:
import numpy as np
import pandas as pd


In [39]:
df = pd.read_csv('spotify_millsongdata.csv')
df.head(3)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...


In [40]:
df.shape

(57650, 4)

In [41]:
df=df.sample(n=5000).drop('link',axis=1).reset_index(drop=True)

In [42]:
df['song'][0]

'Angel Come Home'

In [43]:
df['text'][0]

"Oo ooo ooo ooo ooo  \r\nNow the puzzle's laid out on my table  \r\nThe pieces don't fit  \r\nI move from chair to chair  \r\nBut that empty one's there where she used to sit  \r\nI'm sitting here going out of my mind  \r\n'Cause she's gone away  \r\nShe couldn't wait, now I  \r\nI'm afraid it's too late  \r\nI'm waitin' till my angel comes home  \r\nIt's been hell bein' here alone  \r\nI'll be in heaven when my angel comes home  \r\nAngel, angel come home  \r\nIf you see her tell my angel come home  \r\nOo oo oo angel come home  \r\nIf you see her tell my angel to come home  \r\nDoes she remember  \r\nThat I can't forget  \r\nDoes she know I regret  \r\nI never told her how I  \r\nI wanted to hold her  \r\nI just let her go  \r\nLonely nights thinkin'  \r\nIt's starting to sink in  \r\nWe had a way to go  \r\nWe'll grow closer together  \r\nBy being apart  \r\nWhen my angel comes home  \r\nIt's been hell bein' here alone  \r\nGod tell me, why has she flown  \r\nSo far away  \r\nI pray

In [44]:
df['text'] = df['text'] .str.lower().replace(r'[^\w\s]','').replace(r'\n' ,'',regex=True)

In [45]:
df['text'][0]

"oo ooo ooo ooo ooo  \rnow the puzzle's laid out on my table  \rthe pieces don't fit  \ri move from chair to chair  \rbut that empty one's there where she used to sit  \ri'm sitting here going out of my mind  \r'cause she's gone away  \rshe couldn't wait, now i  \ri'm afraid it's too late  \ri'm waitin' till my angel comes home  \rit's been hell bein' here alone  \ri'll be in heaven when my angel comes home  \rangel, angel come home  \rif you see her tell my angel come home  \roo oo oo angel come home  \rif you see her tell my angel to come home  \rdoes she remember  \rthat i can't forget  \rdoes she know i regret  \ri never told her how i  \ri wanted to hold her  \ri just let her go  \rlonely nights thinkin'  \rit's starting to sink in  \rwe had a way to go  \rwe'll grow closer together  \rby being apart  \rwhen my angel comes home  \rit's been hell bein' here alone  \rgod tell me, why has she flown  \rso far away  \ri pray she won't stay another day  \ri'll be in heaven when my angel

In [46]:
import nltk
from nltk.stem.porter import PorterStemmer

ps = PorterStemmer()


def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [ps.stem(w) for w in tokens]
        
    return " ".join(stemming)

In [47]:
tokenization('this is my word loving loved')

'thi is my word love love'

In [48]:
df['text'] = df['text'].apply(lambda x: tokenization(x))

In [49]:
df['text']

0       oo ooo ooo ooo ooo now the puzzl 's laid out o...
1       i 'm send you some money i wish it could be mo...
2       i get up now do n't tri to chang me , i am wha...
3       let me tell you about thi girl i know first gl...
4       do n't give me no more goddamn pain babi , the...
                              ...                        
4995    and what will happen in the morn when the worl...
4996    fire in her eye ? burn ? perfum of the sky ? f...
4997    love you wa easi thought you 'd never leav me ...
4998    ca n't you see , she do n't want you around no...
4999    who 's gon na dri your cryin ' eye ? who 's go...
Name: text, Length: 5000, dtype: object

In [50]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [51]:
tfid = TfidfVectorizer(stop_words='english') 
matrix = tfid.fit_transform(df['text'])

In [52]:
matrix.shape

(5000, 17286)

In [53]:
similarity = cosine_similarity(matrix)

In [54]:
similarity[0]

array([1.        , 0.02455225, 0.02326554, ..., 0.00582593, 0.01250799,
       0.00540252])

In [56]:
df['song'][0]

'Angel Come Home'

In [57]:
df[df['song'] == "Angel Come Home"]

Unnamed: 0,artist,song,text
0,Beach Boys,Angel Come Home,oo ooo ooo ooo ooo now the puzzl 's laid out o...


In [58]:
def recommendation(song):
    idx= df[df['song']== song].index[0]
    distances= sorted(list(enumerate(similarity[idx])),reverse=False, key=lambda x:x[1])
    
    songs  = []
    for i in distances[1:21]:
        songs.append(df.iloc[i[0]].song)
    return songs

In [59]:
recommendation("Angel Come Home")

['Keep It In Motion',
 'All Apologies',
 'Jet Pilot',
 'So The Story Goes',
 'Komm, Gib Mir Deine Hand',
 'Ayoko Na Sana',
 'Toy Story',
 'Se Te Ne Vai (Si Tu Te Vas)',
 'Bohemienne (Esmeralda)',
 'I Love',
 'Idiot Music',
 'For Liquorice John',
 'Radios In Motion',
 'Paano',
 'Hura-hura Huru-hara',
 "Ode Le'eli",
 'Jewels',
 'Holy Mountains',
 'O Christmas Tree',
 'Gin-Iro No Yume']

In [60]:
import pickle
pickle.dump(similarity, open("similarity.pkl", "wb"))
pickle.dump(df, open("df.pkl", "wb"))

