In [18]:
import pandas as pd
import numpy as np

In [19]:
df = pd.read_csv('spotify_millsongdata.csv')

In [44]:
df.head(10)

Unnamed: 0,artist,song,text
0,Yngwie Malmsteen,Time Will Tell,I've never killed another man \r\nI've never ...
1,Wanda Jackson,Let Me Talk To You,"Just one more dance, just one more chance so I..."
2,Lady Gaga,Starstruck,Groove. slam. work it back. filter that. baby ...
3,Ramones,Out Of Time,You don't know what's going on \r\nYou've bee...
4,Rihanna,Where Have You Been,"I've been everywhere, man \r\nLooking for som..."
5,Iggy Pop,Search And Destroy,I'm a street walking cheetah \r\nWith a heart...
6,Zao,To Think Of You Is To Treasure An Absent Memory,When you shut your eyes and fell asleep \r\nD...
7,Misfits,Nike A Go Go,Go \r\nIt's a missile girl \r\nIn a missile ...
8,Indigo Girls,Fly Away,Fly away little bird \r\nAny place in this op...
9,Miley Cyrus,Who Owns My Heart,R \r\nO \r\nC \r\nK \r\nMafia \r\n \r\nC...


In [21]:
df.shape

(57650, 4)

In [22]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [23]:
df = df.sample(10000).drop('link', axis=1).reset_index(drop=True)

In [24]:
df.head()

Unnamed: 0,artist,song,text
0,Yngwie Malmsteen,Time Will Tell,I've never killed another man \r\nI've never ...
1,Wanda Jackson,Let Me Talk To You,"Just one more dance, just one more chance so I..."
2,Lady Gaga,Starstruck,Groove. slam. work it back. filter that. baby ...
3,Ramones,Out Of Time,You don't know what's going on \r\nYou've bee...
4,Rihanna,Where Have You Been,"I've been everywhere, man \r\nLooking for som..."


In [46]:
df.tail()

Unnamed: 0,artist,song,text
9995,Dave Matthews Band,Lying In The Hands of God,Maybe I'll be your solider \r\nGladly I'll do...
9996,Tom Lehrer,The Folk Song Army,"We are the folk song army, \r\nEvery one of u..."
9997,Marilyn Manson,The Speed Of Pain,They slit our throats \r\nLike we were flower...
9998,Mary Black,If I Gave My Heart To You,If I gave my heart to you \r\nWould you promi...
9999,Indigo Girls,Shed Your Skin,115 \r\nYou are 17 \r\nItchy trigger \r\nVe...


In [25]:
df['text'][0]

"I've never killed another man  \r\nI've never lived the master plan  \r\nI can only pray that we'll survive  \r\n  \r\nWith every sin and whitened line,  \r\nI see the future slipping by  \r\nThe time is nearing, what have we done  \r\n  \r\nIs there a reason? Someone explain?  \r\nAnd in the end will we pay?  \r\n  \r\nTime will Tell  \r\nI will  \r\n  \r\nThe hands of time we can't turn back  \r\nHave we all slipped through the cracks  \r\nLord have mercy, forgive us all  \r\n  \r\nIs there a reason? Someone explain?  \r\nAnd in the end will we pay?  \r\n  \r\nTime will Tell  \r\nI will  \r\n  \r\nThere's no use looking back  \r\nEverything's turning black  \r\nTime's running out for us all\r\n\r\n"

In [26]:
df.shape

(10000, 3)

In [28]:
#data preprocessing
import nltk
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [stemmer.stem(w) for w in tokens]
    return " ".join(stemmming)

In [31]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [32]:
tfidvector = TfidfVectorizer(analyzer='word',stop_words='english')
matrix = tfidvector.fit_transform(df['text'])
similarity = cosine_similarity(matrix)

In [33]:
similarity[0]

array([1.        , 0.0103545 , 0.00662779, ..., 0.01420802, 0.        ,
       0.03004366])

In [34]:
df[df['song'] == 'Crazy World']

Unnamed: 0,artist,song,text


In [42]:
def recommendation(song_df):
    idx = df[df['song'] == song_df].index[0]
    distances = sorted(list(enumerate(similarity[idx])),reverse=True,key=lambda x:x[1])
    
    songs = []
    for m_id in distances[1:5]:
        songs.append(df.iloc[m_id[0]].song)
        
    return songs

In [43]:
recommendation('Starstruck')

['Starstruck', 'Blow By Blow', 'Blow It All Away', 'Baby']

In [45]:
import pickle
pickle.dump(similarity,open('similarity.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))