### Importing the Libraries:

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle

### Importing the Dataset:

In [2]:
df = pd.read_csv("D:/Datasets/csv/Music Recommender/songdata.csv")
df.head(3)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \nWhy I had t...


In [3]:
df.shape

(57650, 4)

In [4]:
df = df.sample(n=5000).drop('link', axis=1).reset_index(drop=True)

In [5]:
df.shape

(5000, 3)

In [6]:
df

Unnamed: 0,artist,song,text
0,Kelly Family,Another World,Everybody thought this was \n \nGoing to be ...
1,Lady Gaga,Government Hooker,I can be good (If you just wanna be bad) \nI ...
2,Michael Bolton,Night And Day,"Night and day, you are the one \nOnly you 'ne..."
3,Beach Boys,Celebrate The News,Hello \nMy luck was so bad \nI thought I use...
4,Snoop Dogg,Don't Let Go,"Uh. \nYea. \n(Don't Let Go) \nLife, life \..."
...,...,...,...
4995,Billie Holiday,A Fine Romance,"A fine romance, with no kisses \nA fine roman..."
4996,Marillion,Mad,Tell me I'm mad \nHow should I know? \nTell ...
4997,Hillsong United,Glory,Verse 1: \nGreat is the Lord \nGod almighty ...
4998,Kid Rock,What I Learned Out On The Road,Headin' for the south lands \nLookin' for som...


### Cleaning:

In [7]:
df['song'][0]

'Another World'

In [8]:
df['text'][0]

"Everybody thought this was  \n  \nGoing to be one more freakshow  \n  \nEverybody thought this was going  \n  \nTo be one more of those jokes  \n  \n  \n  \nChorus:  \n  \nIt's another world  \n  \nIt's a wild world  \n  \nIt's another world  \n  \nIt's a wild world  \n  \n  \n  \nMy generation's not going to see  \n  \nYear two thousand fifteen  \n  \nI live out at sea to save  \n  \nMy life from the H.I.V.  \n  \n  \n  \nChorus  \n  \n  \nNo one knows what's going on  \n  \nCause the world is on fire  \n  \nThe word is out the people are  \n  \nDown and we're on the run  \n  \nIs it justice from heaven,  \n  \nOr sent from hell  \n  \nThe world is on fire babe  \n  \nSomething is going on  \n  \nAnd I'm really scared  \n  \n  \n  \nChorus  \n  \n  \nEverybody thought this was  \n  \nGoing to be one more freakshow  \n  \nEverybody thought this was going  \n  \nTo be one more of those jokes  \n  \n  \n  \nRub our face into the dirt  \n  \nTil the fire is over  \n  \nI've been to the e

In [9]:
df['text'] = df['text'].str.lower().replace(r'[^\w\s]','').replace(r'\n',' ',regex=True)

In [10]:
df['text'][0]

"everybody thought this was      going to be one more freakshow      everybody thought this was going      to be one more of those jokes            chorus:      it's another world      it's a wild world      it's another world      it's a wild world            my generation's not going to see      year two thousand fifteen      i live out at sea to save      my life from the h.i.v.            chorus         no one knows what's going on      cause the world is on fire      the word is out the people are      down and we're on the run      is it justice from heaven,      or sent from hell      the world is on fire babe      something is going on      and i'm really scared            chorus         everybody thought this was      going to be one more freakshow      everybody thought this was going      to be one more of those jokes            rub our face into the dirt      til the fire is over      i've been to the edge and      there i stood 'n' looked down      living in a world      w

### Applying Stemming:

In [11]:
import nltk
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [stemmer.stem(w) for w in tokens]
    return " ".join(stemming)

In [12]:
df['text'] = df['text'].apply(lambda x: tokenization(x))

In [13]:
df['text']

0       everybodi thought thi wa go to be one more fre...
1       i can be good ( if you just wan na be bad ) i ...
2       night and day , you are the one onli you 'neat...
3       hello my luck wa so bad i thought i use up all...
4       uh . yea . ( do n't let go ) life , life ( do ...
                              ...                        
4995    a fine romanc , with no kiss a fine romanc , m...
4996    tell me i 'm mad how should i know ? tell me i...
4997    vers 1 : great is the lord god almighti great ...
4998    headin ' for the south land lookin ' for some ...
4999    walk down that lonesom road all by yourself do...
Name: text, Length: 5000, dtype: object

### Applying Vectorisation and Cosine similarity (ML):

In [14]:
tfidvector = TfidfVectorizer(analyzer='word',stop_words='english')
matrix = tfidvector.fit_transform(df['text'])
similarity = cosine_similarity(matrix)

In [15]:
similarity[0]

array([1.        , 0.00175226, 0.01257176, ..., 0.03261308, 0.0310632 ,
       0.01782194])

In [16]:
sorted(list(enumerate(similarity[0])),reverse=False,key=lambda x:x[1])

[(12, 0.0),
 (24, 0.0),
 (26, 0.0),
 (60, 0.0),
 (109, 0.0),
 (142, 0.0),
 (152, 0.0),
 (200, 0.0),
 (216, 0.0),
 (221, 0.0),
 (229, 0.0),
 (253, 0.0),
 (260, 0.0),
 (261, 0.0),
 (304, 0.0),
 (334, 0.0),
 (339, 0.0),
 (357, 0.0),
 (361, 0.0),
 (418, 0.0),
 (420, 0.0),
 (444, 0.0),
 (454, 0.0),
 (485, 0.0),
 (495, 0.0),
 (511, 0.0),
 (563, 0.0),
 (584, 0.0),
 (608, 0.0),
 (610, 0.0),
 (640, 0.0),
 (662, 0.0),
 (716, 0.0),
 (720, 0.0),
 (857, 0.0),
 (866, 0.0),
 (889, 0.0),
 (908, 0.0),
 (927, 0.0),
 (991, 0.0),
 (999, 0.0),
 (1037, 0.0),
 (1053, 0.0),
 (1057, 0.0),
 (1098, 0.0),
 (1121, 0.0),
 (1161, 0.0),
 (1178, 0.0),
 (1187, 0.0),
 (1226, 0.0),
 (1252, 0.0),
 (1285, 0.0),
 (1323, 0.0),
 (1333, 0.0),
 (1348, 0.0),
 (1353, 0.0),
 (1425, 0.0),
 (1491, 0.0),
 (1497, 0.0),
 (1545, 0.0),
 (1554, 0.0),
 (1567, 0.0),
 (1601, 0.0),
 (1628, 0.0),
 (1639, 0.0),
 (1667, 0.0),
 (1678, 0.0),
 (1711, 0.0),
 (1766, 0.0),
 (1779, 0.0),
 (1781, 0.0),
 (1793, 0.0),
 (1795, 0.0),
 (1800, 0.0),
 (1821, 0

### Defining the function to search the song:

In [17]:
def recommendation(song):
    idx = df[df['song'] == song].index[0]
    distances = sorted(list(enumerate(similarity[idx])),reverse=True,key=lambda x:x[1])
    
    songs = []
    for m_id in distances[1:11]:
        songs.append(df.iloc[m_id[0]].song)
        
    return songs

In [19]:
recommendation('Another World')

["I'm Living In Two Worlds",
 'The World We Live In',
 "There's A World",
 'Cold Sky',
 'My World',
 'Before I Met You',
 'Lies',
 'Out Of This World',
 'It Must Be Hell',
 'Made In Heaven']

### Saving the files:

In [20]:
import pickle
pickle.dump(df.to_dict(),open('songs_dict.pkl','wb'))
pickle.dump(similarity,open('similarity.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))