In [144]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [145]:
s_data = pd.read_csv('spotify_millsongdata.csv')
s_data.head(3)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...


In [146]:
s_data.shape

(57650, 4)

In [147]:
s_data.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [148]:
s_data.describe()

Unnamed: 0,artist,song,link,text
count,57650,57650,57650,57650
unique,643,44824,57650,57494
top,Donna Summer,Have Yourself A Merry Little Christmas,/a/abba/ahes+my+kind+of+girl_20598417.html,I just came back from a lovely trip along the ...
freq,191,35,1,6


In [149]:
s_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 57650 entries, 0 to 57649
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   artist  57650 non-null  object
 1   song    57650 non-null  object
 2   link    57650 non-null  object
 3   text    57650 non-null  object
dtypes: object(4)
memory usage: 1.8+ MB


In [150]:
s_data = s_data.sample(5000).drop('link',axis=1).reset_index(drop=True)

In [151]:
s_data.tail(3)

Unnamed: 0,artist,song,text
4997,Bing Crosby,A Faded Summer Love,You left today but you didn't say goodbye \r\...
4998,ABBA,Lay All Your Love On Me,I wasn't jealous before we met \r\nNow every ...
4999,The Killers,Are We Human,"I did my best to notice, when the call came do..."


In [152]:
s_data['text'][576]

"Sit alone, waiting on the morning  \r\nWoman leaving her whole life behind  \r\ntrain rolling on, taking mama's baby home  \r\nno one knows what's going through her mind  \r\n  \r\nJust another love song I'm singing  \r\nAnd you know people sing them all of the time  \r\nJust another lonesome guitar ringing  \r\nThe only difference is this one is mine  \r\n  \r\nFreedom, Lord what a funny word  \r\nWe search for it just like some kind of fool  \r\nWoman leaving home, man sit's there all alone  \r\nLittle child is paying all the dues\r\n\r\n"

In [153]:
df = s_data.sample(5000)
df.shape

(5000, 3)

# text-cleaning and text processing

In [154]:
df['text'] = df['text'].str.lower().replace(r'^\w\s',' ').replace(r'^\n',' ',regex=True)
df.head(3)

Unnamed: 0,artist,song,text
2726,Frank Zappa,Diptheria Blues,"(m. o. i) \r\n \r\n[tully gymnasium, florida..."
2310,Train,I'm Not Waiting In Line,it's clear to see that you're down to earth \...
1304,Slayer,Point,i'm the one that brings you war \r\ninfiltrat...


In [155]:
import nltk
from nltk.stem.porter import PorterStemmer

In [156]:
stemmer = PorterStemmer()

In [157]:
def token(txt):
    token = nltk.word_tokenize(txt)
    a = [stemmer.stem(w) for w in token]
    return " ".join(a)

In [158]:
token('khadee is handsome')

'khade is handsom'

In [159]:
df['text'].apply(lambda x:token(x))

2726    ( m. o. i ) [ tulli gymnasium , florida state ...
2310    it 's clear to see that you 're down to earth ...
1304    i 'm the one that bring you war infiltr unguar...
964     i got a girlfriend onli she do n't know it yet...
4562    doe she love me , with all her heart should i ...
                              ...                        
3162    there 's someth in the way she move , or look ...
762     it do n't hurt anymor all my teardrop are dri ...
3688    mama , whi did you do it ? mama , who drove yo...
1836    vers 1 : is thi the end is thi all that 's lef...
1425    i take you out all over town but you alway sta...
Name: text, Length: 5000, dtype: object

In [160]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [161]:
tf = TfidfVectorizer(analyzer='word',stop_words='english')
m = tf.fit_transform(df['text'])

In [162]:
c = cosine_similarity(m)

In [163]:
c[0]

array([1.        , 0.01774789, 0.0064825 , ..., 0.02672314, 0.01577754,
       0.01363156])

In [165]:
df[df['song'] == 'Diptheria Blues'].index[0]

2726

In [180]:
def recommender(sel_song):
    idx = df[df['song'] == sel_song].index[0]
    so = sorted(list(enumerate(c[idx])),reverse=True,key = lambda x:x[1])
    song = []
    for w in so[1:31]:
        song.append(df.iloc[w[0]].song +'/n')
    return song

In [181]:
recommender('Diptheria Blues')

['The Art Of Letting Go/n',
 'Still The One/n',
 'More Than Words/n',
 'If You Love Me/n',
 'Love You So/n',
 'Who Do You Love?/n',
 'For You To Love/n',
 'Love Is All Around/n',
 'The Ghost Psalm/n',
 "Promise Me You'll Try/n",
 'Out Of Control/n',
 'All You Need Is Love/n',
 'Anyone Who Had A Heart/n',
 'I Love You Goodbye/n',
 'Our Love/n',
 'Broken Promises/n',
 'I Would Die For You/n',
 "Don't Turn Around/n",
 'Love/n',
 'Little Town Flirt/n',
 'Endless Love/n',
 'Love Me Like You Do/n',
 'Love Is On The Way/n',
 'What Is Love?/n',
 'Face To Face/n',
 'L-O-V-E/n',
 'Love/n',
 'You Mean More To Me/n',
 'I Want To Know What Love Is/n',
 'White Sun/n']

In [182]:
import pickle


In [185]:
pickle.dump(c,open('similarity.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))

In [186]:
pickle.load(open('df.pkl','rb'))

Unnamed: 0,artist,song,text
2726,Frank Zappa,Diptheria Blues,"(m. o. i) \r\n \r\n[tully gymnasium, florida..."
2310,Train,I'm Not Waiting In Line,it's clear to see that you're down to earth \...
1304,Slayer,Point,i'm the one that brings you war \r\ninfiltrat...
964,Zebrahead,Anthem,i got a girlfriend \r\nonly she don't know it...
4562,Kenny Loggins,A Lover's Question,"does she love me, with all her heart \r\nshou..."
...,...,...,...
3162,James Taylor,Something In The Way She Moves,"there's something in the way she moves, \r\no..."
762,Hank Snow,I Don't Hurt Anymore,it don't hurt anymore \r\nall my teardrops ar...
3688,Morrissey,Mama Lay Softly On The Riverbed,"mama, why did you do it? \r\nmama, who drove ..."
1836,Indiana Bible College,Void,verse 1: \r\n \r\nis this the end is this al...
