In [3]:
import pandas as pd
import numpy as np

In [13]:
datasetPath = "Dataset.csv"
df = pd.read_csv(datasetPath)
df = df.drop_duplicates(subset='spotify_id', keep='first')
print(df.head())

               spotify_id                   name                artists  \
0  27xkOIER6uDLKALIelHylZ  Don’t Say You Love Me                    Jin   
1  2RkZ5LkEzeHGRsmDqKwmaJ               Ordinary            Alex Warren   
2  7so0lgd0zP2Sbgs2d7a1SZ       Die With A Smile  Lady Gaga, Bruno Mars   
3  6dOtVTDdiauQNBQEDOtlAB     BIRDS OF A FEATHER          Billie Eilish   
4  0FTmksd2dxiE5e3rWyJXs6        back to friends                  sombr   

   daily_rank  daily_movement  weekly_movement country snapshot_date  \
0           1               1               20     NaN    2025-05-26   
1           2              -1               -1     NaN    2025-05-26   
2           3               0               -1     NaN    2025-05-26   
3           4               0               -1     NaN    2025-05-26   
4           5               1                0     NaN    2025-05-26   

   popularity  is_explicit  ...  key loudness mode  speechiness  acousticness  \
0          89        False  ...    

In [14]:
columns_to_drop = ["country", "daily_movement", "snapshot_date", "weekly_movement", "daily_rank", 'popularity', 'duration_ms']
df.drop(columns=columns_to_drop, inplace=True, errors='ignore')
df['is_explicit'] = df['is_explicit'].astype(int)
df['album_release_date'] = df['album_release_date'].apply(lambda x: str(x).split('-')[0])

print(df.shape)

cols_to_standardize = ['danceability', 'energy', 'key', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'time_signature']
for col in cols_to_standardize:
    mean = df[col].mean()
    std = df[col].std()
    df[col] = (df[col] - mean) / std

df.head(10)


(24207, 18)


Unnamed: 0,spotify_id,name,artists,is_explicit,album_name,album_release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,27xkOIER6uDLKALIelHylZ,Don’t Say You Love Me,Jin,0,Echo,2025,0.401015,-0.538317,0.729062,0.488564,1,-0.801604,0.842754,-0.21084,-0.563225,0.976993,-0.376482,0.152543
1,2RkZ5LkEzeHGRsmDqKwmaJ,Ordinary,Alex Warren,0,"You'll Be Alright, Kid (Chapter 1)",2024,-2.17882,0.238504,-0.933425,0.280079,1,-0.540229,1.723476,-0.210789,-0.922351,-0.623311,1.639918,-2.7033
2,7so0lgd0zP2Sbgs2d7a1SZ,Die With A Smile,"Lady Gaga, Bruno Mars",0,MAYHEM,2025,-1.09972,-0.308801,0.1749,-0.22784,0,-0.790125,0.062114,-0.21084,-0.412392,-0.155462,1.275315,-2.7033
3,6dOtVTDdiauQNBQEDOtlAB,BIRDS OF A FEATHER,Billie Eilish,0,HIT ME HARD AND SOFT,2024,0.529649,-0.861992,-0.933425,-1.010537,1,-0.753921,-0.294178,0.264993,-0.477035,-0.417807,-0.627836,0.152543
4,0FTmksd2dxiE5e3rWyJXs6,back to friends,sombr,0,back to friends,2024,-1.692868,0.409169,-1.210506,1.51305,1,-0.804253,-1.09446,-0.21015,-0.650134,-1.305408,-1.06327,0.152543
5,4wJ5Qq0jBN4ajy7ouZIV1c,APT.,"ROSÉ, Bruno Mars",0,rosie,2024,0.74404,0.762269,-1.487588,0.812979,0,1.225815,-0.981542,-0.21084,1.232403,1.772773,0.954315,0.152543
6,6iOndD4OFo7GkaDypWQIou,La Plena - W Sound 05,"W Sound, Beéle, Ovy On The Drums",1,La Plena (W Sound 05),2025,1.580163,-0.061631,-0.102181,1.130669,1,0.095547,1.078948,-0.203139,-0.591955,0.548497,-0.841153,0.152543
7,4AajxCEwGEsmHmT4H1TwjY,undressed,sombr,0,undressed,2025,-0.220718,1.37431,-1.487588,1.003209,1,-0.711536,-0.546385,-0.21084,-0.721959,1.54978,-0.231984,0.152543
8,4e6TmHCC4PRUj75knNplNP,Mangu,"Fourtwnty, Charita Utami",0,Nalar,2023,-0.992525,-2.109612,0.451981,-0.550014,1,-0.859883,1.495289,-0.210817,-0.747098,-1.471559,-1.02042,0.152543
9,7tI8dRuH2Yc6RuoTjxo4dU,Who,Jimin,0,MUSE,2024,-0.092084,0.603374,-1.487588,1.048044,0,-0.787476,-1.083265,-0.21084,0.068836,1.331159,-0.230727,0.152543


In [15]:
from sklearn.metrics.pairwise import cosine_similarity

similarity_matrix = cosine_similarity(df[cols_to_standardize])


In [None]:
def recommend_songs(song_name, top_n=10):
    idx = df[df['name'].str.lower() == song_name.lower()].index
    if idx.empty:
        return "Song not found in dataset."
    
    idx = idx[0]
    
    similarity_scores = list(enumerate(similarity_matrix[idx]))
    
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    
    top_songs_idx = [i for i, score in similarity_scores[1:top_n+1]]
    
    return df.iloc[top_songs_idx][['name', 'artists']]

recommend_songs('Die With A Smile', top_n=5)


Unnamed: 0,name,artists
259491,Die With A Smile,"Lady Gaga, Bruno Mars"
1915472,Ishq Murshid (Original Score),Ahmed Jahanzeb
134757,Jueves 10,Junior H
1159394,ICE,Peso Pluma
43334,Kỵ Sĩ Và Ánh Sao,Đông Nhi


In [18]:
pip install kivy spotipy requests

Collecting kivy
  Downloading Kivy-2.3.1-cp313-cp313-win_amd64.whl.metadata (14 kB)
Collecting spotipy
  Downloading spotipy-2.25.1-py3-none-any.whl.metadata (5.1 kB)
Collecting Kivy-Garden>=0.1.4 (from kivy)
  Downloading Kivy_Garden-0.1.5-py3-none-any.whl.metadata (159 bytes)
Collecting docutils (from kivy)
  Downloading docutils-0.21.2-py3-none-any.whl.metadata (2.8 kB)
Collecting filetype (from kivy)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting kivy-deps.angle~=0.4.0 (from kivy)
  Downloading kivy_deps.angle-0.4.0-cp313-cp313-win_amd64.whl.metadata (238 bytes)
Collecting kivy-deps.sdl2~=0.8.0 (from kivy)
  Downloading kivy_deps.sdl2-0.8.0-cp313-cp313-win_amd64.whl.metadata (238 bytes)
Collecting kivy-deps.glew~=0.3.1 (from kivy)
  Downloading kivy_deps.glew-0.3.1-cp313-cp313-win_amd64.whl.metadata (237 bytes)
Collecting pypiwin32 (from kivy)
  Downloading pypiwin32-223-py3-none-any.whl.metadata (236 bytes)
Collecting redis>=3.5.3 (from spotipy)
  D


[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip
