In [2]:
import pandas as pd
df = pd.read_csv(r"spotify_millsongdata.csv")
df_sample = df.sample(frac = 0.1)

In [3]:
df_sample.head(5)

Unnamed: 0,artist,song,link,text
28193,Conway Twitty,Fever,/c/conway+twitty/fever_20869855.html,"Never know how much I love you, \r\nNever kno..."
15116,Otis Redding,New Year's Resolution,/o/otis+redding/new+years+resolution_20288645....,I hope it's not too late \r\nJust to say that...
27976,Clash,Cheapskates,/c/clash/cheapskates_20031804.html,I have been a washer up \r\nAn' he has been a...
7582,"Harry Connick, Jr.",It's Beginning To Look A Lot Like Christmas,/h/harry+connick+jr/its+beginning+to+look+a+lo...,It's beginning to look \r\nA lot like Christm...
34509,Grateful Dead,I'll Take A Melody,/g/grateful+dead/ill+take+a+melody_20806557.html,I've seen the rain pouring down \r\nThe sky w...


In [4]:
df.tail(5)

Unnamed: 0,artist,song,link,text
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [5]:
df_sample.shape

(5765, 4)

In [6]:
df_sample.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [7]:
df_sample = df_sample.drop('link',axis = 1).reset_index(drop=True)

In [8]:
df_sample.head(5)

Unnamed: 0,artist,song,text
0,Conway Twitty,Fever,"Never know how much I love you, \r\nNever kno..."
1,Otis Redding,New Year's Resolution,I hope it's not too late \r\nJust to say that...
2,Clash,Cheapskates,I have been a washer up \r\nAn' he has been a...
3,"Harry Connick, Jr.",It's Beginning To Look A Lot Like Christmas,It's beginning to look \r\nA lot like Christm...
4,Grateful Dead,I'll Take A Melody,I've seen the rain pouring down \r\nThe sky w...


In [9]:
df_sample['text'][0]

"Never know how much I love you,  \r\nNever know how much I care  \r\nWhen you put your arms around me,  \r\nI get a fever that's so hard to bear.  \r\n  \r\nYou give me fever - when you kiss me,  \r\nFever when you hold me tight  \r\nFever - in the morning,  \r\nFever all through the night.  \r\n  \r\nSun lights up the daytime,  \r\nMoon lights up the night  \r\nI light up when you call my name,  \r\nBecause you know I'm gonna treat you right  \r\n  \r\nYou give me fever - when you kiss me,  \r\nFever when you hold me tight  \r\nFever - in the morning,  \r\nFever all through the night.  \r\n  \r\nNow listen to me baby  \r\nAnd hear every word I say  \r\nNo one can love you like I do  \r\n'Cause they don't know how to love you my way  \r\n  \r\nYou give me fever - when you kiss me,  \r\nFever when you hold me tight  \r\nFever - in the morning,  \r\nFever all through the night.  \r\n  \r\nWell, now you've heard my story,  \r\nHere's the point that I have made:  \r\nChicks were born to g

In [10]:
df_sample.shape

(5765, 3)

In [11]:
#text cleaning
df_sample['text'] = df_sample['text'].str.lower().replace(r'^\w\s',' ').replace(r'\n',' ',regex = True)

In [12]:
import nltk
nltk.download('punkt')
from nltk.stem.porter import PorterStemmer

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\91727\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [13]:
stemmer = PorterStemmer()

In [14]:
def token(txt):
    token = nltk.word_tokenize(txt)
    a = [stemmer.stem(w) for w in token]
    return " ".join(a)

In [15]:
df_sample['text'].apply(lambda x: token(x))

0       never know how much i love you , never know ho...
1       i hope it 's not too late just to say that i '...
2       i have been a washer up an ' he ha been a scru...
3       it 's begin to look a lot like christma everyw...
4       i 've seen the rain pour down the sky wa gray ...
                              ...                        
5760    i want to see word by ray boltz , music by ste...
5761    is thi the end of the begin ? or the begin of ...
5762    if i had my druther i 'd go fish find myself a...
5763    across the border they turn water into wine so...
5764    i walk thi road so veri long ago to show the w...
Name: text, Length: 5765, dtype: object

In [16]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [17]:
tfid = TfidfVectorizer(analyzer='word',stop_words='english')

In [18]:
matrix = tfid.fit_transform(df_sample['text'])

In [19]:
similar = cosine_similarity(matrix)

In [20]:
similar[0]

array([1.        , 0.02779891, 0.00839435, ..., 0.01461337, 0.02255859,
       0.01815544])

In [21]:
df_sample.tail(5)

Unnamed: 0,artist,song,text
5760,Ray Boltz,I Want To See,"i want to see \r words by ray boltz, music by..."
5761,Black Sabbath,End Of The Beginning,is this the end of the beginning? \r or the b...
5762,John Denver,Druthers,if i had my druthers i'd go fishing \r find m...
5763,Bon Jovi,Dry County,across the border they turn water into wine \...
5764,Michael W. Smith,I'm Waiting For You,i walked this road \r so very long ago \r to...


In [22]:
df[df['song'] == 'The Last Time'].index[0]

1335

Recommender Function

Recommender Function

In [23]:
def recommender(song_name):
    idx = df[df['song'] == song_name].index[0]
    distance = sorted(list(enumerate(similar[idx])),reverse = True,key = lambda x:x[1])
    song = []
    for s_id in distance[1:21]:
        song.append(df.iloc[s_id[0]].song)
    return song

In [24]:
recommender("The Last Time")

['Dear God',
 'Sweet Leilani',
 'Horribly Hazardous Heffalumps',
 'Under My Skin',
 'Bang-Bang',
 'River',
 'At Last',
 'Love For Sale',
 'If I Close My Eyes',
 'The Message In The Middle Of The Bottom',
 "Ain't It A Shame",
 'Come Around',
 'Syndication',
 'A Hunting We Will Go',
 'Girls! Girls! Girls!',
 'Spin The Wheel',
 'Happy Song',
 'Mirror',
 "Baby Don't Get Hooked On Me",
 'The Things You Said']

In [25]:
import pickle

In [26]:
pickle.dump(similar, open("similarity","wb"))

In [27]:
pickle.dump(df,open("df","wb"))