building a recommendation music model based on the lyrics

In [1]:

import pandas as pd

In [44]:
df=pd.read_csv("spotify_millsongdata.csv")

In [45]:
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [46]:
df.tail(5)

Unnamed: 0,artist,song,link,text
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [47]:
df.shape

(57650, 4)

In [6]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [48]:
df=df.sample(5000).drop('link',axis=1).reset_index(drop=True)

In [49]:
df.head(5)

Unnamed: 0,artist,song,text
0,Weird Al Yankovic,The Saga Begins,A long long time ago \r\nIn a galaxy far away...
1,Miley Cyrus,Evil Is But A Shadow,[Verse 1] \r\nEvil is but a shadow \r\nThat ...
2,Def Leppard,Stand Up,I got to know right now \r\nIt's got to be th...
3,Jimmy Buffett,Manana,Jimmy Buffett \r\nShe said I can't go back to...
4,Ray Boltz,Isaiah 53,Isaiah 53 \r\nWords and music by Ray Boltz an...


In [51]:
df['text']=df['text'].str.lower().replace(r'^\w\s', ' ').replace(r'\n', ' ', regex=True)

In [52]:
df.head(5)

Unnamed: 0,artist,song,text
0,Weird Al Yankovic,The Saga Begins,a long long time ago \r in a galaxy far away ...
1,Miley Cyrus,Evil Is But A Shadow,[verse 1] \r evil is but a shadow \r that al...
2,Def Leppard,Stand Up,i got to know right now \r it's got to be thi...
3,Jimmy Buffett,Manana,jimmy buffett \r she said i can't go back to ...
4,Ray Boltz,Isaiah 53,isaiah 53 \r words and music by ray boltz and...


In [53]:
df.tail(5)

Unnamed: 0,artist,song,text
4995,Il Divo,Close Every Door,"close every door to me, \r hide all the world..."
4996,Olivia Newton-John,Just A Little Too Much,sometimes i think i love you \r just a little...
4997,Avril Lavigne,Make Up,"layin' on the couch just, \r hangin' with my ..."
4998,Otis Redding,Chained And Bound,"darling now, you made me change my mind \r ca..."
4999,Indigo Girls,Happy Joyous Hanukkah,how many nights for hanukah? \r happy joyous ...


In [54]:
import nltk
nltk.download('punkt')
from nltk.stem.porter import PorterStemmer

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Utilisateur\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [55]:
stemmer=PorterStemmer()

In [56]:
def tokenization(txt):
  tokens=nltk.word_tokenize(txt)
  stemming=[stemmer.stem(w) for w in tokens]
  return " ".join(stemming)

In [57]:
tokenization("you are beautiful,beauty")

'you are beauti , beauti'

In [58]:
df["text"].apply(lambda x:tokenization(x))

0       a long long time ago in a galaxi far away nabo...
1       [ vers 1 ] evil is but a shadow that alway acc...
2       i got to know right now it 's got to be thi ti...
3       jimmi buffett she said i ca n't go back to ame...
4       isaiah 53 word and music by ray boltz and stev...
                              ...                        
4995    close everi door to me , hide all the world fr...
4996    sometim i think i love you just a littl too mu...
4997    layin ' on the couch just , hangin ' with my b...
4998    darl now , you made me chang my mind can go no...
4999    how mani night for hanukah ? happi joyou hanuk...
Name: text, Length: 5000, dtype: object

In [59]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [60]:
tfidvector=TfidfVectorizer(analyzer='word',stop_words='english')

In [61]:
matrix = tfidvector.fit_transform(df['text'])

In [62]:
similar=cosine_similarity(matrix)

In [63]:
similar[0]

array([1.        , 0.01047827, 0.02068453, ..., 0.01850979, 0.01994907,
       0.00222042])

In [67]:

df[df['song']=='Close Every Door'].index[0]

np.int64(4995)

In [68]:
def recommender(song_name):
  idx=df[df['song']==song_name].index[0]
  distance=sorted(list(enumerate(similar[idx])),reverse=True,key=lambda x:x[1])
  song=[]
  for s_id in distance[1:11]:
    song.append(df.iloc[s_id[0]].song)
  return song

In [69]:
recommender("Close Every Door")

['Would I Lie To You',
 'I Want To Make The World Turn Around',
 'The World We Live In',
 'Nothing Comes Close',
 'When I Close My Eyes',
 'No More Roger',
 'So Far Away',
 'Under The Same Sun',
 "I Can't Live With You",
 'Caught In The Balance']