In [136]:
import numpy as np
import pandas as pd
import re
import nltk
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle

In [137]:
data=pd.read_csv('spotify_millsongdata.csv')

In [138]:
data

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...
...,...,...,...,...
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...


In [139]:
data.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [140]:
data=data.sample(20000).drop('link', axis=1).reset_index(drop=True)

In [141]:
data.head()

Unnamed: 0,artist,song,text
0,ZZ Top,Chevrolet,In a flat Forty-One with my strat on the door ...
1,Usher,Moving Mountains,It's like whatever I do \r\nOh \r\nJust can'...
2,HIM,Temple Of Love,With the fire from the fireworks up above me ...
3,Yngwie Malmsteen,Teaser,Getting dressed for the masquerade \r\nTo be ...
4,Horrible Histories,Flame,[Verse 1] \r\nGreeks: \r\nIn 776BC Olympics ...


### Text Processing

In [142]:
data['text']=data['text'].str.lower().replace(r'^\w\s','').replace(r'\n','',regex=True) # '^\w\s' for lower case

In [143]:
stemmer=PorterStemmer()

In [144]:
stemmer.stem('beautiful')

'beauti'

In [145]:
def token(text):
    token=nltk.word_tokenize(text)
    a=[stemmer.stem(w) for w in token]
    return " ".join(a)

In [146]:
token('you are gorgeous,beautiful')

'you are gorgeou , beauti'

In [147]:
data['text'].apply(lambda x:token(x))

0        in a flat forty-on with my strat on the door w...
1        it 's like whatev i do oh just ca n't get thro...
2        with the fire from the firework up abov me wit...
3        get dress for the masquerad to be the main att...
4        [ vers 1 ] greek : in 776bc olymp were begun g...
                               ...                        
19995    they say love is cruel , they say love is rath...
19996    no-o-o , i do n't want her so i 'll forget her...
19997    the letter stop in a minor key a christma card...
19998    you bite through the big wall , the big wall b...
19999    what 's it cost you , darl ? what 's it cost y...
Name: text, Length: 20000, dtype: object

In [148]:
tfid=TfidfVectorizer(stop_words='english')

In [149]:
matrix=tfid.fit_transform(data['text'])

In [150]:
similarity=cosine_similarity(matrix)

In [151]:
similarity[0]

array([1.        , 0.02397214, 0.00501164, ..., 0.00465666, 0.00763239,
       0.00550807])

In [156]:
data[data['song'] == 'Money']

Unnamed: 0,artist,song,text
2908,Extreme,Money,"hallelujah \r \rnow i lay, i lay me down to ..."
5403,Yes,Money,"money high, money low \rmoney come, money go...."


In [153]:
def recommendation(song_df):
    idx = data[data['song'] == song_df].index[0]
    distances = sorted(list(enumerate(similarity[idx])),reverse=True,key=lambda x:x[1])
    
    songs = []
    for m_id in distances[1:21]:
        songs.append(data.iloc[m_id[0]].song)
        
    return songs

In [155]:
recommendation('Money')

['Money Money Money Shouts',
 'Money Makes Her Smile',
 'Money Good',
 "Love Don't Mean A Thing",
 'Money Talks',
 'Free Money',
 "All You've Got Is Money",
 'Money Back Guarantee',
 'Money',
 'The Big Money',
 "Make That Money (Scrooge's Song)",
 'Mind On My Money',
 'Man With The Money',
 'When The Money Runs Out',
 'Jokes On You',
 'Take The Money And Run',
 'Love Of Money',
 'Bet Money',
 'Rich Woman',
 'Cash Money']

In [158]:
pickle.dump(similarity,open('similarity.pkl','wb'))

In [159]:
pickle.dump(data,open('data.pkl','wb'))