In [35]:
import pandas as pd
import numpy as np

In [36]:
df = pd.read_csv("spotify_millsongdata.csv")

In [37]:
df.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [38]:
df.tail(5)

Unnamed: 0,artist,song,link,text
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [39]:
df.shape

(57650, 4)

In [40]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [41]:
df =df.sample(5000).drop('link', axis=1).reset_index(drop=True)

In [42]:
df.head(10)

Unnamed: 0,artist,song,text
0,Toto,Rockmaker,"Don't know quite what to say, haven't seen you..."
1,Judy Garland,I Left My Heart In San Francisco,I left my heart \r\nIn San Francisco \r\nHig...
2,Chicago,All Roads Lead To You,Headed north on a one way trip \r\nI never th...
3,Religious Music,Be Thou My Vision,"Be Thou my vision, O Lord of my heart \r\nNau..."
4,Kanye West,Lift Off,(All engines running) \r\nWe gon' take it to ...
5,Snoop Dogg,C-Walkin,"G shit, LBC shit, remember how it used to be? ..."
6,Tim McGraw,You Had To Be There,"He sat down, picked up the phone \r\nAnd said..."
7,The Beatles,"Martha, My Dear","Martha, my dear \r\nThough I spend my days in..."
8,Lionel Richie,Don't Wanna Lose You,"Times are hard, my spirit's weak, \r\nEv'ryth..."
9,Westlife,Everybody Knows,It's never too hard to find the words \r\nThe...


In [43]:
df['text'][0]

"Don't know quite what to say, haven't seen you in seven ages  \r\nCan you tell that it's me, or is my picture on torn out pages?  \r\n  \r\n[Chorus:]  \r\n  \r\nAnd you think it's real, but it's just another future deal  \r\nAnd you know you're right, 'cause you've waited for it all night  \r\nThen you find yourself, sitting on a little shelf  \r\n  \r\nDon't know quite what to do, I've been acting my life in phases  \r\nIs it really true that they're keeping us locked in cages?  \r\n  \r\nRock maker\r\n\r\n"

In [44]:
# df = df.sample(5000)

In [45]:
df.shape

(5000, 3)

Text Cleaning/ Text Preprocessing

In [46]:
df['text'] = df['text'].str.lower().replace(r'^\w\s', ' ').replace(r'\n', ' ', regex = True)

In [47]:
import nltk
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [stemmer.stem(w) for w in tokens]
    return " ".join(stemming)

In [49]:
df['text'] = df['text'].apply(lambda x: tokenization(x))

In [50]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [51]:
tfidvector = TfidfVectorizer(analyzer='word',stop_words='english')
matrix = tfidvector.fit_transform(df['text'])
similarity = cosine_similarity(matrix)

In [52]:
similarity[0]

array([1.        , 0.01721311, 0.03357092, ..., 0.07727118, 0.08062379,
       0.05667018])

In [53]:
df[df['song'] == 'Crying Over You']

Unnamed: 0,artist,song,text
897,ABBA,Crying Over You,i 'm waitin ' for you babi i 'm sit all alon i...


In [54]:
def recommendation(song_df):
    idx = df[df['song'] == song_df].index[0]
    distances = sorted(list(enumerate(similarity[idx])),reverse=True,key=lambda x:x[1])
    
    songs = []
    for m_id in distances[1:21]:
        songs.append(df.iloc[m_id[0]].song)
        
    return songs

In [56]:
recommendation('Crying Over You')

['Crying Time',
 "Don't Come Cryin' To Me",
 "Cryin' In One Eye",
 'Blue Me',
 'Moment Of Forever',
 'Blue River',
 'Vagabond Of The Western World',
 'How Majestic Is Thy Name',
 'Blue Christmas',
 'Work Me, Lord',
 'Lord, In Your Love',
 'George Jackson',
 'Blue Morning, Blue Day',
 'Here I Am Lord',
 '2:10 Train',
 "I Know (You Don't Love Me)",
 "Fixin' To Die",
 'Baby Blue',
 'Cold Chill',
 'Blue']

In [57]:
import pickle
pickle.dump(similarity,open('similarity.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))