In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv('spotify_millsongdata.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 57650 entries, 0 to 57649
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   artist  57650 non-null  object
 1   song    57650 non-null  object
 2   link    57650 non-null  object
 3   text    57650 non-null  object
dtypes: object(4)
memory usage: 1.8+ MB


In [3]:
data = data.drop(['link'], axis=1)

In [4]:
data['text'] = data['text'].str.replace('\s',' ')
data['text'] = data['text'].str.replace('\n',' ')
data['text'] = data['text'].str.replace('\r',' ')
data['text'] = data['text'].str.replace(',',' ')
data['text'] = data['text'].str.replace('.',' ')
data['artist'] = data['artist'].str.replace(' ', '')

In [5]:
data

Unnamed: 0,artist,song,text
0,ABBA,Ahe's My Kind Of Girl,Look at her face it's a wonderful face And...
1,ABBA,"Andante, Andante",Take it easy with me please Touch me gentl...
2,ABBA,As Good As New,I'll never know why I had to go Why I had t...
3,ABBA,Bang,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,Making somebody happy is a question of give an...
...,...,...,...
57645,ZiggyMarley,Good Old Days,Irie days come on play Let the angels fly l...
57646,ZiggyMarley,Hand To Mouth,Power to the workers More power Power to...
57647,Zwan,Come With Me,all you need is something i'll believe f...
57648,Zwan,Desire,northern star am i frightened where can ...


In [6]:
data['tags'] = data['text'] + ' ' + data['artist']
data['tags'] = data['tags'].str.lower()
data

Unnamed: 0,artist,song,text,tags
0,ABBA,Ahe's My Kind Of Girl,Look at her face it's a wonderful face And...,look at her face it's a wonderful face and...
1,ABBA,"Andante, Andante",Take it easy with me please Touch me gentl...,take it easy with me please touch me gentl...
2,ABBA,As Good As New,I'll never know why I had to go Why I had t...,i'll never know why i had to go why i had t...
3,ABBA,Bang,Making somebody happy is a question of give an...,making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,Making somebody happy is a question of give an...,making somebody happy is a question of give an...
...,...,...,...,...
57645,ZiggyMarley,Good Old Days,Irie days come on play Let the angels fly l...,irie days come on play let the angels fly l...
57646,ZiggyMarley,Hand To Mouth,Power to the workers More power Power to...,power to the workers more power power to...
57647,Zwan,Come With Me,all you need is something i'll believe f...,all you need is something i'll believe f...
57648,Zwan,Desire,northern star am i frightened where can ...,northern star am i frightened where can ...


In [7]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer(max_features=2000, min_df=1000, max_df=200000)

vector = vectorizer.fit_transform(data['tags']).toarray()

vector

array([[ 1,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ..., 16,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0],
       ...,
       [ 0,  0,  0, ...,  5,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  1]], dtype=int64)

In [8]:
vector.shape

(57650, 740)

In [9]:
vectorizer.get_feature_names_out()


array(['about', 'above', 'across', 'afraid', 'after', 'again', 'against',
       'ago', 'ah', 'ain', 'air', 'alive', 'all', 'almost', 'alone',
       'along', 'already', 'alright', 'always', 'am', 'an', 'and',
       'angel', 'angels', 'another', 'answer', 'any', 'anymore', 'anyone',
       'anything', 'apart', 'are', 'arms', 'around', 'as', 'ask', 'ass',
       'at', 'away', 'babe', 'baby', 'back', 'bad', 'be', 'beat',
       'beautiful', 'because', 'bed', 'been', 'before', 'behind', 'being',
       'believe', 'belong', 'beside', 'best', 'better', 'between', 'big',
       'bit', 'bitch', 'black', 'blame', 'blind', 'blood', 'blow', 'blue',
       'blues', 'body', 'born', 'both', 'bout', 'boy', 'boys', 'brain',
       'break', 'breath', 'breathe', 'bridge', 'bright', 'bring', 'broke',
       'broken', 'brother', 'burn', 'burning', 'but', 'buy', 'by', 'call',
       'called', 'calling', 'came', 'can', 'cannot', 'car', 'care',
       'carry', 'catch', 'caught', 'cause', 'chance', 'change'

In [10]:
from sklearn.metrics.pairwise import cosine_similarity

sim = cosine_similarity(vector[:20000])


In [11]:
sim[0]

array([1.        , 0.31218611, 0.2576226 , ..., 0.19829203, 0.16822504,
       0.18307715])

In [12]:
def recommend_music(music):
    index = data[data['song'] == music].index[0]
    distances=sim[index]
    music_list=sorted(list(enumerate(distances)), reverse=True, key=lambda x:x[1])[1:6]

    for i in music_list:
        print(data.iloc[i[0]].song)

In [16]:
data[:20000]

Unnamed: 0,artist,song,text,tags
0,ABBA,Ahe's My Kind Of Girl,Look at her face it's a wonderful face And...,look at her face it's a wonderful face and...
1,ABBA,"Andante, Andante",Take it easy with me please Touch me gentl...,take it easy with me please touch me gentl...
2,ABBA,As Good As New,I'll never know why I had to go Why I had t...,i'll never know why i had to go why i had t...
3,ABBA,Bang,Making somebody happy is a question of give an...,making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,Making somebody happy is a question of give an...,making somebody happy is a question of give an...
...,...,...,...,...
19995,Ufo,Shake It About,Little girl you're so fine With that body ...,little girl you're so fine with that body ...
19996,Ufo,Time On My Hands,[Chorus] Long long year I've sat in this p...,[chorus] long long year i've sat in this p...
19997,Ufo,Young Blood,Young blood where you going now Young blood...,young blood where you going now young blood...
19998,UglyKidJoe,12 Cents,Watching waiting You know it shouldn't tak...,watching waiting you know it shouldn't tak...


In [17]:
recommend_music('12 Cents')

The Best Day
One Flight Down
Day Like Today
Long Way Home
All I Need To Know
