In [1]:
import pandas as pd


In [2]:
data= pd.read_csv('./spotify_millsongdata.csv')


In [3]:
data.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [4]:
data.shape

(57650, 4)

In [5]:
data.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [6]:
data= data.drop('link', axis=1).reset_index(drop=True)
data.head(10)


Unnamed: 0,artist,song,text
0,ABBA,Ahe's My Kind Of Girl,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante","Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,Making somebody happy is a question of give an...
5,ABBA,Burning My Bridges,"Well, you hoot and you holler and you make me ..."
6,ABBA,Cassandra,Down in the street they're all singing and sho...
7,ABBA,Chiquitita,"Chiquitita, tell me what's wrong \r\nYou're e..."
8,ABBA,Crazy World,I was out with the morning sun \r\nCouldn't s...
9,ABBA,Crying Over You,I'm waitin' for you baby \r\nI'm sitting all ...


In [7]:
data=data.sample(20000).reset_index(drop=True)
data.shape

(20000, 3)

In [8]:
data.head(10)

Unnamed: 0,artist,song,text
0,Nazareth,Helpless,There is a town in north Ontario \r\nDream co...
1,Neil Sedaka,King Of Clowns,"Here I come, the king of clowns, \r\nAs I hid..."
2,Ziggy Marley,Generation,Many generation have passed away \r\nFighting...
3,Fabolous,Baby Featmike Shorey,(feat. Mike Shorey) \r\n \r\n[Intro] \r\nFa...
4,Queensryche,The Lady Wore Black,On a lonely walk this morning \r\nA light mis...
5,Ace Of Base,Que Sera,They call to her from across the street \r\nB...
6,Phish,Taste,I'm up and I can take what you give \r\nAnd I...
7,Oasis,"It's Good, To Be Free",Head like a rock spinning round and round \r\...
8,Natalie Cole,A Tisket A Tasket,A tisket a tasket \r\nA brown and yellow bask...
9,Yello,Call It Love,For all these years \r\nI've been rushing and...


 Data Preprocessing


In [9]:
data['text']= data['text'].str.lower().replace(r'^\w\s', ' ').replace(r'\n\r', ' ', regex=True)


In [10]:
import nltk
from nltk.stem.porter import PorterStemmer
nltk.download('punkt')
stemmer= PorterStemmer()

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/priteshdube/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [11]:
def tokenization(txt):
  tokens= nltk.word_tokenize(txt)
  stemming= [stemmer.stem(w) for w in tokens]
  return " ".join(stemming)


In [12]:
data['text']= data['text'].apply(lambda x: tokenization(x))

In [13]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [14]:
Tfid= TfidfVectorizer(analyzer='word', stop_words='english')
matrix= Tfid.fit_transform(data['text'])
similarity= cosine_similarity(matrix)

In [15]:
similarity[9]

array([0.        , 0.0189806 , 0.02246813, ..., 0.00474411, 0.05757122,
       0.08653298])

In [19]:
data[data['song']=='Helpless'].index[0]

data.head(10)

Unnamed: 0,artist,song,text
0,Nazareth,Helpless,there is a town in north ontario dream comfort...
1,Neil Sedaka,King Of Clowns,"here i come , the king of clown , as i hide be..."
2,Ziggy Marley,Generation,mani gener have pass away fight for the same c...
3,Fabolous,Baby Featmike Shorey,( feat . mike shorey ) [ intro ] fab ... [ 5x ...
4,Queensryche,The Lady Wore Black,on a lone walk thi morn a light mist in the ai...
5,Ace Of Base,Que Sera,they call to her from across the street but th...
6,Phish,Taste,i 'm up and i can take what you give and i 'm ...
7,Oasis,"It's Good, To Be Free",head like a rock spin round and round i found ...
8,Natalie Cole,A Tisket A Tasket,a tisket a tasket a brown and yellow basket we...
9,Yello,Call It Love,for all these year i 've been rush and run awa...


Recommender

In [20]:
def recommend(song):
  idx= data[data['song']==song].index[0]
  distance= sorted(list(enumerate(similarity[idx])), reverse=True, key=lambda x:x[1])
  song_list=[]
  for s in distance[1:11]:
    song_list.append(data.iloc[s[0]].song)
  return song_list

In [22]:
recommend('Helpless')

['Helpless',
 'Helpless',
 'Breakaway',
 'An Inch An Hour',
 "Latimer's Mercy",
 'Wither',
 'Deeper',
 'Murder City',
 "You Don't Know What Love Is",
 "Here's My Heart"]

In [23]:
import pickle


In [24]:
pickle.dump(similarity, open('similarity.pkl', 'wb'))


In [26]:
pickle.dump(data, open('data.pkl', 'wb'))