In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('universal_top_spotify_songs.csv')
pd.set_option('display.max_columns', None)

In [3]:
df.head()

Unnamed: 0,spotify_id,name,artists,daily_rank,daily_movement,weekly_movement,country,snapshot_date,popularity,is_explicit,duration_ms,album_name,album_release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,7so0lgd0zP2Sbgs2d7a1SZ,Die With A Smile,"Lady Gaga, Bruno Mars",1,0,0,,2025-04-21,90,False,251667,MAYHEM,2025-03-07,0.519,0.601,6,-7.727,0,0.0317,0.289,0.0,0.126,0.498,157.964,3
1,6dOtVTDdiauQNBQEDOtlAB,BIRDS OF A FEATHER,Billie Eilish,2,0,0,,2025-04-21,99,False,210373,HIT ME HARD AND SOFT,2024-05-17,0.747,0.507,2,-10.171,1,0.0358,0.2,0.0608,0.117,0.438,104.978,4
2,2RkZ5LkEzeHGRsmDqKwmaJ,Ordinary,Alex Warren,3,0,1,,2025-04-21,88,False,186964,"You'll Be Alright, Kid (Chapter 1)",2024-09-26,0.368,0.694,2,-6.141,1,0.06,0.704,7e-06,0.055,0.391,168.115,3
3,4wJ5Qq0jBN4ajy7ouZIV1c,APT.,"ROSÉ, Bruno Mars",4,0,-1,,2025-04-21,92,False,169917,rosie,2024-12-06,0.777,0.783,0,-4.477,0,0.26,0.0283,0.0,0.355,0.939,149.027,4
4,0fK7ie6XwGxQTIkpFoWkd1,like JENNIE,JENNIE,5,2,5,,2025-04-21,95,True,123517,Ruby,2025-03-07,0.811,0.743,10,-5.321,0,0.0611,0.281,0.00671,0.0868,0.815,130.019,4


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1955484 entries, 0 to 1955483
Data columns (total 25 columns):
 #   Column              Dtype  
---  ------              -----  
 0   spotify_id          object 
 1   name                object 
 2   artists             object 
 3   daily_rank          int64  
 4   daily_movement      int64  
 5   weekly_movement     int64  
 6   country             object 
 7   snapshot_date       object 
 8   popularity          int64  
 9   is_explicit         bool   
 10  duration_ms         int64  
 11  album_name          object 
 12  album_release_date  object 
 13  danceability        float64
 14  energy              float64
 15  key                 int64  
 16  loudness            float64
 17  mode                int64  
 18  speechiness         float64
 19  acousticness        float64
 20  instrumentalness    float64
 21  liveness            float64
 22  valence             float64
 23  tempo               float64
 24  time_signature      int6

In [5]:
df.isnull().sum()

spotify_id                0
name                     30
artists                  29
daily_rank                0
daily_movement            0
weekly_movement           0
country               26757
snapshot_date             0
popularity                0
is_explicit               0
duration_ms               0
album_name              822
album_release_date      659
danceability              0
energy                    0
key                       0
loudness                  0
mode                      0
speechiness               0
acousticness              0
instrumentalness          0
liveness                  0
valence                   0
tempo                     0
time_signature            0
dtype: int64

In [6]:
# Drop rows where essential fields are missing
df.dropna(subset=['name', 'artists'], inplace=True)

# Fill optional text fields
df['album_name'] = df['album_name'].fillna("Unknown Album")
df['album_release_date'] = df['album_release_date'].fillna("Unknown")

# If country is important to your analysis:
df = df[df['country'].notnull()]

# Reset index after cleaning
df.reset_index(drop=True, inplace=True)

In [7]:
df.isnull().sum()

spotify_id            0
name                  0
artists               0
daily_rank            0
daily_movement        0
weekly_movement       0
country               0
snapshot_date         0
popularity            0
is_explicit           0
duration_ms           0
album_name            0
album_release_date    0
danceability          0
energy                0
key                   0
loudness              0
mode                  0
speechiness           0
acousticness          0
instrumentalness      0
liveness              0
valence               0
tempo                 0
time_signature        0
dtype: int64

In [8]:
# Emotion mapping function
def map_emotion(row):
    valence = row['valence']
    energy = row['energy']
    acousticness = row['acousticness']
    danceability = row['danceability']
    tempo = row['tempo']
    instrumentalness = row['instrumentalness']
    loudness = row['loudness']
    
    if valence > 0.7 and energy > 0.6:
        return 'Happy'
    elif valence < 0.3 and acousticness > 0.6:
        return 'Sad'
    elif acousticness > 0.7 and tempo < 90:
        return 'Relaxed'
    elif energy > 0.7 and valence < 0.4 and loudness > -7:
        return 'Angry'
    elif 0.4 < valence < 0.7 and tempo < 100 and danceability > 0.5:
        return 'Romantic'
    elif danceability > 0.7 and energy > 0.7 and tempo > 100:
        return 'Party'
    else:
        return 'Chill'

# Apply mapping
df['emotion'] = df.apply(map_emotion, axis=1)

In [9]:
# Save or preview result
df[['name', 'artists', 'emotion']].head()

Unnamed: 0,name,artists,emotion
0,Isaka (6am),"CIZA, Jazzworx, Thukuthela",Chill
1,Sdudla or Slender,"Shandesh, Mvzzle",Happy
2,Vuka (feat. Thukuthela),"Oscar Mbo, Jazzworx, Thukuthela",Chill
3,Vuma Dlozi Lami (feat. Ancestral Rituals),"Issa sisdoh, Ancestral Rituals",Happy
4,Ngisakuthanda,"Zee Nxumalo, TBO, PYY Logdrum King, DJ Tearz, ...",Party


In [10]:
# Recommendation function
def recommend_songs(emotion, top_n=10, country=None):
    filtered_df = df[df['emotion'] == emotion]
    
    if country:
        filtered_df = filtered_df[filtered_df['country'] == country]
    
    # Sort by popularity or daily_rank (you can switch)
    recommended = filtered_df.sort_values(by='popularity', ascending=False).head(top_n)
    
    return recommended[['name', 'artists', 'emotion', 'country', 'popularity']]

In [11]:
# Example usage
recommend_songs("Romantic", top_n=10)

Unnamed: 0,name,artists,emotion,country,popularity
1860843,Si No Estás,iñigo quintero,Romantic,SK,99
1769955,Si No Estás,iñigo quintero,Romantic,DE,99
1873000,Si No Estás,iñigo quintero,Romantic,IS,99
1798424,Si No Estás,iñigo quintero,Romantic,ES,99
1846842,Si No Estás,iñigo quintero,Romantic,PL,99
1791537,Si No Estás,iñigo quintero,Romantic,DE,99
1854619,Si No Estás,iñigo quintero,Romantic,MA,99
1791532,Si No Estás,iñigo quintero,Romantic,DK,99
1813277,Si No Estás,iñigo quintero,Romantic,CR,99
1854701,Si No Estás,iñigo quintero,Romantic,LU,99


In [12]:
emoji_emotion_map = {
    "😊": "Happy",
    "😢": "Sad",
    "😴": "Relaxed",
    "😡": "Angry",
    "🥰": "Romantic",
    "🎉": "Party",
    "😌": "Chill"
}

def recommend_from_emoji(emoji, top_n=10):
    emotion = emoji_emotion_map.get(emoji, "Chill")
    return recommend_songs(emotion, top_n)

In [13]:
recommend_from_emoji("😊", top_n=7)

Unnamed: 0,name,artists,emotion,country,popularity
1520669,greedy,Tate McRae,Happy,IT,100
1182781,i like the way you kiss me,Artemas,Happy,AE,100
1182679,i like the way you kiss me,Artemas,Happy,AT,100
1481122,greedy,Tate McRae,Happy,IS,100
1797132,greedy,Tate McRae,Happy,NL,100
1797085,greedy,Tate McRae,Happy,NO,100
1584593,greedy,Tate McRae,Happy,PH,100


In [14]:
recommend_songs("Happy", top_n=10)

Unnamed: 0,name,artists,emotion,country,popularity
1520669,greedy,Tate McRae,Happy,IT,100
1182781,i like the way you kiss me,Artemas,Happy,AE,100
1182679,i like the way you kiss me,Artemas,Happy,AT,100
1481122,greedy,Tate McRae,Happy,IS,100
1797132,greedy,Tate McRae,Happy,NL,100
1797085,greedy,Tate McRae,Happy,NO,100
1584593,greedy,Tate McRae,Happy,PH,100
1797031,greedy,Tate McRae,Happy,NZ,100
1709750,greedy,Tate McRae,Happy,US,100
1584479,greedy,Tate McRae,Happy,PL,100


In [15]:
df.head()

Unnamed: 0,spotify_id,name,artists,daily_rank,daily_movement,weekly_movement,country,snapshot_date,popularity,is_explicit,duration_ms,album_name,album_release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,emotion
0,7c5uGV9Rys18JP2570ykTu,Isaka (6am),"CIZA, Jazzworx, Thukuthela",1,0,1,ZA,2025-04-21,62,False,349910,Isaka (6am),2025-04-11,0.341,0.54,10,-9.07,0,0.0566,0.292,0.279,0.212,0.752,176.762,3,Chill
1,2Ufc72Ojfx9lR30r5srBPB,Sdudla or Slender,"Shandesh, Mvzzle",2,0,-1,ZA,2025-04-21,67,False,224187,Sdudla or Slender,2025-03-12,0.848,0.68,11,-10.33,1,0.0662,0.0915,0.000432,0.0425,0.815,127.052,4,Happy
2,24fRfn3FOM4xk02aE4wief,Vuka (feat. Thukuthela),"Oscar Mbo, Jazzworx, Thukuthela",3,0,4,ZA,2025-04-21,68,False,327451,Vuka (feat. Thukuthela),2024-12-13,0.738,0.455,4,-10.965,0,0.0466,0.175,0.198,0.105,0.524,118.006,4,Chill
3,0DGP0pRR8yz4OW2xxjIskt,Vuma Dlozi Lami (feat. Ancestral Rituals),"Issa sisdoh, Ancestral Rituals",4,0,0,ZA,2025-04-21,66,False,380238,Vuma Dlozi Lami (feat. Ancestral Rituals),2024-09-12,0.675,0.619,2,-8.775,1,0.0328,0.00155,0.0759,0.0456,0.759,117.988,4,Happy
4,2tUXE87QNnoVQZApV3uyVZ,Ngisakuthanda,"Zee Nxumalo, TBO, PYY Logdrum King, DJ Tearz, ...",5,0,0,ZA,2025-04-21,66,False,412092,Ngisakuthanda,2024-09-06,0.767,0.814,8,-5.779,0,0.0555,0.00891,0.000751,0.0668,0.546,112.998,4,Party


In [16]:
# Save the final dataset
#df.to_csv("final_music_data.csv", index=False)

In [17]:
df['country'].unique()

array(['ZA', 'VN', 'VE', 'UY', 'US', 'UA', 'TW', 'TR', 'TH', 'SV', 'SK',
       'SG', 'SE', 'SA', 'RO', 'PY', 'PT', 'PL', 'PK', 'PH', 'PE', 'PA',
       'NZ', 'NO', 'NL', 'NI', 'NG', 'MY', 'MX', 'MA', 'LV', 'LU', 'LT',
       'KZ', 'KR', 'JP', 'IT', 'IS', 'IN', 'IL', 'IE', 'ID', 'HU', 'HN',
       'HK', 'GT', 'GR', 'FR', 'FI', 'ES', 'EG', 'EE', 'EC', 'DO', 'DK',
       'DE', 'CZ', 'CR', 'CO', 'CL', 'CH', 'CA', 'BY', 'BR', 'BO', 'BG',
       'BE', 'AU', 'AT', 'AR', 'AE', 'GB'], dtype=object)