### Importing the packages

In [70]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
df = pd.read_csv(r"D:\Data Mining\music data set\spotify_millsongdata.csv")

In [4]:
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [5]:
df.tail()

Unnamed: 0,artist,song,link,text
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [6]:
df.describe().round(2)

Unnamed: 0,artist,song,link,text
count,57650,57650,57650,57650
unique,643,44824,57650,57494
top,Donna Summer,Have Yourself A Merry Little Christmas,/a/abba/ahes+my+kind+of+girl_20598417.html,I just came back from a lovely trip along the ...
freq,191,35,1,6


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 57650 entries, 0 to 57649
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   artist  57650 non-null  object
 1   song    57650 non-null  object
 2   link    57650 non-null  object
 3   text    57650 non-null  object
dtypes: object(4)
memory usage: 1.8+ MB


In [8]:
df.isna().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [9]:
df.duplicated().sum()

0

In [10]:
df.shape

(57650, 4)

In [11]:
df = df.drop('link', axis=1).reset_index(drop = True)

In [12]:
df

Unnamed: 0,artist,song,text
0,ABBA,Ahe's My Kind Of Girl,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante","Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,Making somebody happy is a question of give an...
...,...,...,...
57645,Ziggy Marley,Good Old Days,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,northern star \r\nam i frightened \r\nwhere ...


### Text Preprocessing

In [14]:
df['text'][0]

"Look at her face, it's a wonderful face  \r\nAnd it means something special to me  \r\nLook at the way that she smiles when she sees me  \r\nHow lucky can one fellow be?  \r\n  \r\nShe's just my kind of girl, she makes me feel fine  \r\nWho could ever believe that she could be mine?  \r\nShe's just my kind of girl, without her I'm blue  \r\nAnd if she ever leaves me what could I do, what could I do?  \r\n  \r\nAnd when we go for a walk in the park  \r\nAnd she holds me and squeezes my hand  \r\nWe'll go on walking for hours and talking  \r\nAbout all the things that we plan  \r\n  \r\nShe's just my kind of girl, she makes me feel fine  \r\nWho could ever believe that she could be mine?  \r\nShe's just my kind of girl, without her I'm blue  \r\nAnd if she ever leaves me what could I do, what could I do?\r\n\r\n"

In [15]:
data = df.copy()

In [16]:
df['text'] = df['text'].str.lower().replace(r'^a-zA-Z0-9',' ').replace(r'\n',' ',regex = True) #we can also use \W\s for regular expression

In [40]:
df

Unnamed: 0,artist,song,text
0,ABBA,Ahe's My Kind Of Girl,"look at her face, it's a wonderful face \r an..."
1,ABBA,"Andante, Andante","take it easy with me, please \r touch me gent..."
2,ABBA,As Good As New,i'll never know why i had to go \r why i had ...
3,ABBA,Bang,making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,making somebody happy is a question of give an...
...,...,...,...
57645,Ziggy Marley,Good Old Days,irie days come on play \r let the angels fly ...
57646,Ziggy Marley,Hand To Mouth,power to the workers \r more power \r power ...
57647,Zwan,Come With Me,all you need \r is something i'll believe \r...
57648,Zwan,Desire,northern star \r am i frightened \r where ca...


In [41]:
stemmer = PorterStemmer()

In [62]:
def tokens(txt):
    tokens = nltk.word_tokenize(txt)
    a = [stemmer.stem(w) for w in txt]
    return " ".join(a)

In [66]:
tokens("you are beautiful,beauty")

'y o u   a r e   b e a u t i f u l , b e a u t y'

In [72]:
df['text'].apply(lambda x : token(x))

0         
1         
2         
3         
4         
        ..
57645     
57646     
57647     
57648     
57649     
Name: text, Length: 57650, dtype: object

In [74]:
df

Unnamed: 0,artist,song,text
0,ABBA,Ahe's My Kind Of Girl,"look at her face, it's a wonderful face \r an..."
1,ABBA,"Andante, Andante","take it easy with me, please \r touch me gent..."
2,ABBA,As Good As New,i'll never know why i had to go \r why i had ...
3,ABBA,Bang,making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,making somebody happy is a question of give an...
...,...,...,...
57645,Ziggy Marley,Good Old Days,irie days come on play \r let the angels fly ...
57646,Ziggy Marley,Hand To Mouth,power to the workers \r more power \r power ...
57647,Zwan,Come With Me,all you need \r is something i'll believe \r...
57648,Zwan,Desire,northern star \r am i frightened \r where ca...


In [78]:
tfid =  TfidfVectorizer(analyzer = "word", stop_words = "english")

In [82]:
matrix = tfid.fit_transform(df['text'])

In [86]:
similar = cosine_similarity(matrix)

MemoryError: Unable to allocate 22.8 GiB for an array with shape (3065353040,) and data type int64

In [None]:
similar[0]

In [91]:
df[df['song']=='Waiting for the man'].index[0]

IndexError: index 0 is out of bounds for axis 0 with size 0

### Recommender Function

In [94]:
def recommender(song_name):
    idx = df[df['song'] == song_name].index[0]
    distance = sorted(list(enumerate(similer[idx])), reverse= True, key = lambda x:x[1])
    song=[]
    for s_id in distance[1:5]:
        song.append(df.iloc[s_id[0]].song)
    return song

In [96]:
recommender("Waiting for the man")

IndexError: index 0 is out of bounds for axis 0 with size 0

In [98]:
import pickle

In [104]:
pickle.dump(similar,open("similarity","wb"))

NameError: name 'similar' is not defined

In [None]:
pickle.dump(df,open("df","wb"))