In [17]:
import pandas as pd

In [18]:
df = pd.read_csv("spotify_millsongdata.csv")
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


# Text Cleaning

In [19]:
df["text"] = df["text"].str.lower()
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"look at her face, it's a wonderful face \r\na..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"take it easy with me, please \r\ntouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,i'll never know why i had to go \r\nwhy i had...
3,ABBA,Bang,/a/abba/bang_20598415.html,making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,making somebody happy is a question of give an...


In [23]:
df["text"] = df["text"].replace(r"^\w\s", " ").replace(r"\n", " ", regex=True)
df

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"look at her face, it's a wonderful face \r an..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"take it easy with me, please \r touch me gent..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,i'll never know why i had to go \r why i had ...
3,ABBA,Bang,/a/abba/bang_20598415.html,making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,making somebody happy is a question of give an...
...,...,...,...,...
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,irie days come on play \r let the angels fly ...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,power to the workers \r more power \r power ...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r is something i'll believe \r...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r am i frightened \r where ca...


In [24]:
df["text"][0]

"look at her face, it's a wonderful face  \r and it means something special to me  \r look at the way that she smiles when she sees me  \r how lucky can one fellow be?  \r   \r she's just my kind of girl, she makes me feel fine  \r who could ever believe that she could be mine?  \r she's just my kind of girl, without her i'm blue  \r and if she ever leaves me what could i do, what could i do?  \r   \r and when we go for a walk in the park  \r and she holds me and squeezes my hand  \r we'll go on walking for hours and talking  \r about all the things that we plan  \r   \r she's just my kind of girl, she makes me feel fine  \r who could ever believe that she could be mine?  \r she's just my kind of girl, without her i'm blue  \r and if she ever leaves me what could i do, what could i do?\r \r "

# Sample for easier calculation (temporary)

In [25]:
df1 = df.sample(1000).drop('link', axis=1).reset_index(drop=True)
df1.head()

Unnamed: 0,artist,song,text
0,Boney M.,Happy Song,"we are down, we are crazy fools everyday at sc..."
1,Cheap Trick,This Time You Got It,yeah \r yeah \r \r if i knew what i wish i...
2,Journey,Knowing That You Love Me,i still see the look in your eyes \r the nigh...
3,Unwritten Law,Cailin,well it seemed like yesterday when the world w...
4,Doobie Brothers,Song To See You Through,sometimes i can't see past a day \r i know i'...


# Tokenization

In [36]:
# pip install nltk
import nltk
from nltk.stem.porter import PorterStemmer


import ssl

try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    pass
else:
    ssl._create_default_https_context = _create_unverified_https_context

nltk.download("punkt")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\90543\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


True

In [37]:
stemmer = PorterStemmer()

def tokenization(text):
    tokens = nltk.word_tokenize(text)
    word_list = [stemmer.stem(i) for i in tokens]
    
    return " ".join(word_list)

In [38]:
# As an example
# import random
# index = random.choice(df1["text"].index)
# print("Artist:", df1["artist"][index], "\t\tSong:", df1["song"][index])
# tokenization(df1["text"][index])

In [44]:
temp_data = df1["text"].apply(lambda x: tokenization(x))

# Vectorizing

In [45]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [46]:
tfid = TfidfVectorizer(analyzer="word", stop_words="english")

In [47]:
sparse_matrix = tfid.fit_transform(temp_data)

In [49]:
similarity = cosine_similarity(sparse_matrix)

In [50]:
df[df['song'] == 'Crying Over You']

Unnamed: 0,artist,song,link,text
9,ABBA,Crying Over You,/a/abba/crying+over+you_20177611.html,i'm waitin' for you baby \r i'm sitting all a...
19891,UB40,Crying Over You,/u/ub40/crying+over+you_20141696.html,crying over you in the morning \r crying over...


In [60]:
def recommendation(song_df):
    idx = df[df['song'] == song_df].index[0]
    distances = sorted(list(enumerate(similarity[idx])),reverse=True,key=lambda x:x[1])
    
    songs = []
    for m_id in distances[1:10]:
        songs.append(df.iloc[m_id[0]].song)
        
    return songs

In [61]:
song = input("Give me a song: ")

print("Discovery suggestions songs for you:")

for i in recommendation(song):
    print(i)

Discovery suggestions songs for you:
Put A Little Love In Your Heart
Now And Then
Miss The Mississippi And You
Quiet Night Of Quiet Stars
Sogno
As Good As New
The Hand That Feeds
When All Is Said And Done
(The System Of) Doctor Tarr And Professor Fether
