In [16]:
import pandas as pd

In [17]:
df = pd.read_csv("spotify_millsongdata.csv")

In [18]:
df.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [19]:
df.tail(5)

Unnamed: 0,artist,song,link,text
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [20]:
df.shape

(57650, 4)

In [21]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [22]:
df = df.sample(20000).drop('link', axis=1).reset_index(drop=True)

In [23]:
df.head(10)

Unnamed: 0,artist,song,text
0,Tom T. Hall,Lost In Florida,(Lost in Florida lost in Florida) \r\nHere's ...
1,Roxette,Call Of The Wild,"I know her, \r\nAnd every notion I get from h..."
2,Christina Aguilera,Around The World,Tonight I want to get so wild and exotic \r\n...
3,Natalie Cole,This Will Be,"Ooh, ooh, ooh, ooh, \r\nOh, oh, \r\nOoh, oh,..."
4,Rush,Marc Live,[Marc Live] \r\nWoo! Ohhh.. Yeah.. Sink in \...
5,Vince Gill,Way Back Home,A little girl was crying for her mama and her ...
6,John Prine,Saddle In The Rain,"I wish, I hope, I wonder \r\nWhere you're at ..."
7,Lou Reed,Images,I think images are worth repeating \r\nImages...
8,Point Of Grace,Jesus Will Still Be There,"Things change, plans fail \r\nYou look for lo..."
9,Diana Ross,Can't It Wait Until Tomorrow,I know you're standing there \r\nTrying to sa...


In [24]:
df['text'][0]

"(Lost in Florida lost in Florida)  \r\nHere's the ocean and here's the sand but I don't know where I am  \r\nHere's a seashell and here's a fish here's a turtle and here's a wish  \r\nIf you find me take me home if they don't want me leave me alone  \r\nIf you find out they forgot me take me back to ole Sopchoppy  \r\n(Lost in Florida lost in Florida)  \r\nHere's an oyster and here's a clam but I don't know where I am  \r\nHere's a gator and here's a bear but I don't worry and I don't care  \r\nIf you find me take me home...  \r\n(Lost in Florida lost in Florida)  \r\nHere's the skyline and here's the land but I don't know where I am  \r\nCall me Bobby or call me Paul but don't call me north at all  \r\n(Lost in Florida lost in Florida lost in Florida lost in Florida)\r\n\r\n"

In [25]:
df.shape

(20000, 3)

Text Cleaning /Text Preprocessing

In [26]:
df['text'] = df['text'].str.lower().replace(r'^\w\s', ' ').replace(r'\n', ' ', regex = True)

In [27]:
import nltk
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()
def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [stemmer.stem(w) for w in tokens]
    return " ".join(stemming)


In [28]:
df['text'] = df['text'].apply(lambda x: tokenization(x))

In [29]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [30]:
tfidvector = TfidfVectorizer(analyzer='word',stop_words='english')
matrix = tfidvector.fit_transform(df['text'])
similarity = cosine_similarity(matrix)

In [31]:
similarity[0]


array([1.        , 0.01697062, 0.00217914, ..., 0.00762414, 0.01180381,
       0.00650188])

In [32]:
df[df['song'] == 'Here And Now']

Unnamed: 0,artist,song,text
17016,Jose Mari Chan,Here And Now,you and i may never be the perfect pair the ki...
17078,'n Sync,Here And Now,here and now is the way i live unafraid of wha...
19713,Great Big Sea,Here And Now,the sun must set to rise the light will leav y...


In [33]:
# def recommendation(song_df):
#     idx = df[df['song'] == song_df].index[0]
#     distances = sorted(list(enumerate(similarity[idx])),reverse=True,key=lambda x:x[1])
    
#     songs = []
#     for m_id in distances[1:21]:
#         songs.append(df.iloc[m_id[0]].song)
        
#     return songs


In [34]:
def recommendation(song_df):
    # Check if there are any matching rows
    matching_rows = df[df['song'] == song_df]
    
    if matching_rows.empty:
        return "No matching song found."
    
    idx = matching_rows.index[0]
    distances = sorted(list(enumerate(similarity[idx])), reverse=True, key=lambda x: x[1])
    
    songs = []
    for m_id in distances[1:21]:  # Get the top 20 similar songs
        songs.append(df.iloc[m_id[0]].song)
        
    return songs


In [35]:
recommendation('Here And Now')

["Say You'll Never Go",
 "Just Can't Say",
 'Days Like These',
 'You Were Meant For Me',
 'The One That You Love',
 'Change',
 'Easy Living',
 'Fool Til The End',
 "Darlin'",
 'How Could You Leave',
 "That's When I'll Give Up",
 'Back To Your Heart',
 'I Like The Way',
 "Let's Stay Together Tonight",
 'Moments',
 "I'll Be There For You",
 "I'll Never Give You Up",
 'Everytime',
 "Nothing's Gonna Change My Love For You",
 'Live For Today']

In [36]:
import pickle
pickle.dump(similarity,open('similarity.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))
