In [86]:
# Importing Libraries.
import pandas as pd

In [87]:
# Loading the csv file.
df = pd.read_csv("spotify_millsongdata.csv")

In [88]:
# Checking the no.of songs(Row) & the features(columns)d. 
# (rows * columns)
df.shape

(57650, 4)

In [89]:
# The first 06 list of songs.
df.head(6)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...
5,ABBA,Burning My Bridges,/a/abba/burning+my+bridges_20003011.html,"Well, you hoot and you holler and you make me ..."


In [90]:
# The last 06 list of songs.
df.tail(6)

Unnamed: 0,artist,song,link,text
57644,Ziggy Marley,Generation,/z/ziggy+marley/generation_20531171.html,Many generation have passed away \r\nFighting...
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [91]:
# Checking for the null value from the data-set.
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [92]:
# Dropping the Link column from the data-set.
df = df.sample(5000).drop('link', axis=1).reset_index(drop=True)

In [38]:
# Dataset after dropping Link column.
df.head(5)

Unnamed: 0,artist,song,text
0,Uriah Heep,Magic Lantern,When you look inside \r\nMy magic lantern \r...
1,Judas Priest,Dragonaut,Welcome to my world of steel \r\nMaster of my...
2,Elton John,Chloe,How come you're so understanding \r\nWhen I t...
3,Ian Hunter,You Nearly Did Me In,See lonely shadows - silver needles \r\nAband...
4,Pearl Jam,Beast Of Burden,I will be the least of your burdens \r\nMy ba...


In [39]:
# Checking the lyrics(text) of the first song.
df['text'][0]

"When you look inside  \r\nMy magic lantern  \r\nAll the world is yet unseen  \r\nTry to gaze into  \r\nThe yellow glowing  \r\nTo get a reflection of me  \r\nIn a land of make believe and magic  \r\nYou can be just anything you feel  \r\nYou think that what you're in is a dream  \r\nBut know that the lantern is viewing  \r\nView the magic lantern  \r\nTake a trip around the world  \r\nView the magic lantern  \r\nThere's a thousand  \r\nMysteries unfurled  \r\nAnd I know one of them  \r\nI remember your face  \r\nCan't remember the place  \r\nHere on our own  \r\nLiving alone  \r\nYou can dream that  \r\nYou're a man of power  \r\nReady to be happy with your gold  \r\nWithout love your mind is going sour  \r\nMoney's no good when you're old  \r\nView the magic lantern  \r\nYou can have the sky above  \r\nYeah, the magic lantern  \r\nYou'll buy everything but love  \r\nEverything but love  \r\n\r\n"

In [119]:
# Taking the random 5000 songs sample.
# df = df.sample(5000)

In [58]:
# Shape of the sample dataset.
df.shape

(5000, 3)

Text Cleaning/ Text Preprocessing.

In [60]:
df['text'] = df['text'].str.lower().replace(r'^\w\s','').replace(r'\n','',regex=True)

In [128]:
df.tail(5)

Unnamed: 0,artist,song,text
327,Wyclef Jean,Baby Daddy,right now if you rais some children that do n'...
1532,Red Hot Chili Peppers,Fire,"well alright , now dig thi ! you do n't care f..."
2110,Pat Benatar,Wuthering Heights,"out on the wiley , windi moor , we 'd roll and..."
4804,Hank Williams,Ready To Go Home,there 's come a day when the world shall melt ...
1974,Roy Orbison,A Mansion On The Hill,tonight down here in the valley i 'm lonesom a...


In [127]:
import nltk
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

def token(txt):
    token = nltk.word_tokenize(txt)
    a = [stemmer.stem(w) for w in token]
    return " ".join(a)

In [96]:
token("you are beautiful, beauty.")

'you are beauti , beauti .'

In [129]:
# Applying the token function to the whole sample data-set.
df['text'] = df['text'].apply(lambda x: token(x))

In [130]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [131]:
tfid = TfidfVectorizer(analyzer='word', stop_words='english')

In [132]:
matrix = tfid.fit_transform(df['text'])

In [133]:
similarity = cosine_similarity(matrix)

In [134]:
similarity[0]

array([1.        , 0.01851219, 0.01638098, ..., 0.02207278, 0.03432506,
       0.01471907])

In [138]:
df[df['song'] =='Fire'].index[0]

4238

Recommendation Function

In [144]:
def recommendation(song_name):
    idx = df[df['song'] == song_name].index[0]
    distance = sorted(list(enumerate(similarity[idx])), reverse = True, key = lambda x:x[1])
    song = []
    
    for s_id in distance[1:9]:
        song.append(df.iloc[s_id[0]].song)
    return song

In [145]:
recommendation("Ready To Go Home")

['Come Breathe',
 'I Like It Like That',
 'Breath You Take',
 'Let The Good Times Roll',
 "Don't Hold Your Breath",
 'Full Circle',
 "Don't Let Me Be The Last To Know",
 'Bringing Me Down']

In [147]:
import pickle
pickle.dump(similarity, open('similarity.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))