In [1]:
# WHICH 10 ARTISTS ON THE TOP CHARTS OF 2018 are closest
# to the mean in valence, loudness, energy, danceability, tempo?

In [2]:
import pandas as pd

In [3]:
# import csv
csvfile = "top2018.csv"
df = pd.read_csv(csvfile)

In [4]:
# select revelant columns
cols = ["name", "artists", "danceability", "energy", "loudness", "tempo"]
df_light = df[cols].copy()
df_light

Unnamed: 0,name,artists,danceability,energy,loudness,tempo
0,God's Plan,Drake,0.754,0.449,-9.211,77.169
1,SAD!,XXXTENTACION,0.740,0.613,-4.880,75.023
2,rockstar (feat. 21 Savage),Post Malone,0.587,0.535,-6.090,159.847
3,Psycho (feat. Ty Dolla $ign),Post Malone,0.739,0.559,-8.011,140.124
4,In My Feelings,Drake,0.835,0.626,-5.833,91.030
...,...,...,...,...,...,...
95,Rewrite The Stars,Zac Efron,0.684,0.619,-7.005,125.046
96,I Miss You (feat. Julia Michaels),Clean Bandit,0.638,0.658,-6.318,105.076
97,No Brainer,DJ Khaled,0.552,0.760,-4.706,135.702
98,Dusk Till Dawn - Radio Edit,ZAYN,0.258,0.437,-6.593,180.043


In [5]:
# calculate averages of each metric
stats = df_light.describe().transpose()
stats

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
danceability,100.0,0.71646,0.13107,0.258,0.6355,0.733,0.79825,0.964
energy,100.0,0.65906,0.145067,0.296,0.562,0.678,0.77225,0.909
loudness,100.0,-5.67764,1.777577,-10.109,-6.6505,-5.5665,-4.36375,-2.384
tempo,100.0,119.90418,28.795984,64.934,95.73075,120.116,140.02275,198.075


In [6]:
# calculate averages of each metric per artist, excluding outliers

# identify outliers
stats['IQR'] = stats['75%']-stats['25%']
outliers = pd.DataFrame(columns=df_light.columns)
for col in stats.index:
    iqr = stats.at[col, 'IQR']
    cutoff = iqr * 1.5
    lower = stats.at[col, '25%'] - cutoff
    upper = stats.at[col, '75%'] + cutoff
    results = df_light[(df_light[col]<lower)|(df_light[col]>upper)].copy()
    results['outlier'] = col
    outliers = outliers.append(results)


# get averages, excluding outliers
df_avg = df_light[(df_light["artists"]!="Billie Eilish") & (df_light["artists"]!="Keala Settle") & (df_light["artists"]!="ZAYN")]
danceability_avg = df_avg["danceability"].mean()
energy_avg = df_avg["energy"].mean()
loudness_avg = df_avg["loudness"].mean()
tempo_avg = df_avg["tempo"].mean()

print(danceability_avg, energy_avg, loudness_avg, tempo_avg, outliers)

# Outliers stand out especially for their unusually low value for danceability.

0.7294123711340206 0.6646288659793814 -5.606041237113403 118.59163917525771           artists  danceability  energy  loudness  \
80  Billie Eilish         0.351   0.296   -10.109   
93   Keala Settle         0.284   0.704    -7.276   
98           ZAYN         0.258   0.437    -6.593   
80  Billie Eilish         0.351   0.296   -10.109   

                           name       outlier    tempo  
80         lovely (with Khalid)  danceability  115.284  
93                   This Is Me  danceability  191.702  
98  Dusk Till Dawn - Radio Edit  danceability  180.043  
80         lovely (with Khalid)      loudness  115.284  


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


In [7]:
# calculate relative distances to mean for each metric and adding them to see which 10 artists are closer to the mean
df_light["dist_danceability"] = abs((df_light["danceability"] - danceability_avg)/danceability_avg)
df_light["dist_energy"] = abs((df_light["energy"] - energy_avg)/energy_avg)
df_light["dist_loudness"] = abs((df_light["loudness"] - loudness_avg)/loudness_avg)
df_light["dist_tempo"] = abs((df_light["tempo"] - tempo_avg)/tempo_avg)

# sum distances
df_light["diff"] = df_light["dist_danceability"] + df_light["dist_energy"] + df_light["dist_loudness"] + df_light["dist_tempo"]
df_light.sort_values(by=["diff"], ascending=False, inplace=True)

df_light

Unnamed: 0,name,artists,danceability,energy,loudness,tempo,dist_danceability,dist_energy,dist_loudness,dist_tempo,diff
80,lovely (with Khalid),Billie Eilish,0.351,0.296,-10.109,115.284,0.518791,0.554639,0.803233,0.027891,1.904554
38,changes,XXXTENTACION,0.669,0.308,-10.068,64.934,0.082823,0.536583,0.795920,0.452457,1.867784
98,Dusk Till Dawn - Radio Edit,ZAYN,0.258,0.437,-6.593,180.043,0.646291,0.342490,0.176053,0.518176,1.683010
93,This Is Me,Keala Settle,0.284,0.704,-7.276,191.702,0.610645,0.059238,0.297886,0.616488,1.584257
91,Yes Indeed,Lil Baby,0.964,0.346,-9.309,119.958,0.321612,0.479409,0.660530,0.011522,1.473072
...,...,...,...,...,...,...,...,...,...,...,...
39,In My Mind,Dynoro,0.694,0.770,-5.335,125.905,0.048549,0.158541,0.048348,0.061668,0.317107
72,Pray For Me (with Kendrick Lamar),The Weeknd,0.735,0.677,-4.979,100.584,0.007660,0.018614,0.111851,0.151846,0.289971
26,Love Lies (with Normani),Khalid,0.708,0.648,-5.626,143.955,0.029356,0.025020,0.003560,0.213871,0.271807
24,New Rules,Dua Lipa,0.762,0.700,-6.021,116.073,0.044677,0.053219,0.074020,0.021238,0.193154


In [8]:
# which 5 artists have a greater presence in the top chart?
# (index 6 onwards has only 2 songs on the list)
df_top5_popular = df_light.groupby("artists", as_index=False).agg({"name":"count"})
df_top5_popular = df_top5_popular.sort_values(by=["name"], ascending=False).head(5)
top5_popular = list(df_top5_popular["artists"])
top5_popular

['Post Malone', 'XXXTENTACION', 'Drake', 'Ed Sheeran', 'Marshmello']

In [9]:
# what characteristics do these artists' songs have that might make them more appealing?

stats2 = df_light[(df_light["artists"].isin(top5_popular))].describe()

print(df_light[(df_light["artists"].isin(top5_popular))])
print(stats2)

                                   name       artists  danceability  energy  \
38                              changes  XXXTENTACION         0.669   0.308   
61                              Nonstop         Drake         0.912   0.412   
28                       Jocelyn Flores  XXXTENTACION         0.872   0.391   
0                            God's Plan         Drake         0.754   0.449   
84  Perfect Duet (Ed Sheeran & Beyonc?)    Ed Sheeran         0.587   0.299   
9                               FRIENDS    Marshmello         0.626   0.880   
62       Fuck Love (feat. Trippie Redd)  XXXTENTACION         0.797   0.533   
64                              Silence    Marshmello         0.520   0.761   
12                        Nice For What         Drake         0.586   0.909   
51                              Happier    Marshmello         0.687   0.792   
29                              Perfect    Ed Sheeran         0.599   0.448   
2            rockstar (feat. 21 Savage)   Post Malon