In [25]:
import pandas as pd
import numpy as np
import altair as alt


In [26]:
data = pd.read_csv('top10s.csv', encoding='ISO-8859-1')
data = data.drop('Unnamed: 0', axis=1)
data.rename(columns={"top genre": "top_genre"}, inplace=True)
data.head(20)

Unnamed: 0,title,artist,top_genre,year,bpm,nrgy,dnce,dB,live,val,dur,acous,spch,pop
0,"Hey, Soul Sister",Train,neo mellow,2010,97,89,67,-4,8,80,217,19,4,83
1,Love The Way You Lie,Eminem,detroit hip hop,2010,87,93,75,-5,52,64,263,24,23,82
2,TiK ToK,Kesha,dance pop,2010,120,84,76,-3,29,71,200,10,14,80
3,Bad Romance,Lady Gaga,dance pop,2010,119,92,70,-4,8,71,295,0,4,79
4,Just the Way You Are,Bruno Mars,pop,2010,109,84,64,-5,9,43,221,2,4,78
5,Baby,Justin Bieber,canadian pop,2010,65,86,73,-5,11,54,214,4,14,77
6,Dynamite,Taio Cruz,dance pop,2010,120,78,75,-4,4,82,203,0,9,77
7,Secrets,OneRepublic,dance pop,2010,148,76,52,-6,12,38,225,7,4,77
8,Empire State of Mind (Part II) Broken Down,Alicia Keys,hip pop,2010,93,37,48,-8,12,14,216,74,3,76
9,Only Girl (In The World),Rihanna,barbadian pop,2010,126,72,79,-4,7,61,235,13,4,73


In [27]:
# ACROSS ALL THE YEARS, WHAT ARE THE TRENDS OF ALL THE GENRES 

In [28]:
genre_across_years = alt.Chart(data).mark_bar().encode(
    x='year:O',
    y='count(top_genre):Q',
    color='year:N',
    column='top_genre:N',
    tooltip=['top_genre', 'count(top_genre)']
)
genre_across_years.save('genre_across_years.html')
genre_across_years

In [29]:
# WHAT IS THE MOST POPULAR GENRE EACH YEAR 

In [30]:
pivot = data.pivot_table(index='year', columns='top_genre', aggfunc='size', fill_value=0)
max_counts = pivot.apply(lambda row: (row.idxmax(), row.max()), axis=1)

max_counts = max_counts.reset_index()

df = pd.DataFrame(max_counts)
df[['Max_Genre', 'Count']] = df[0].apply(pd.Series)

df.drop(columns=[0], inplace=True)

print(df)

   year  Max_Genre  Count
0  2010  dance pop     31
1  2011  dance pop     38
2  2012  dance pop     15
3  2013  dance pop     42
4  2014  dance pop     27
5  2015  dance pop     52
6  2016  dance pop     46
7  2017  dance pop     31
8  2018  dance pop     38
9  2019        pop      9


In [31]:
most_popular = alt.Chart(df).mark_bar().encode(
    x=alt.X('year', axis=alt.Axis(format='')),
    y='Count',
    tooltip=['Max_Genre', 'Count'],
    color='Max_Genre')

most_popular.save('most_popular.html')
most_popular

In [32]:
# it's all dance pop so let's look at the other genres... 

# NOW WE CAN LOOK AT TRENDS OF GENRE OTHER THAN DANCE POP 

In [33]:
no_dance_pop = data[data['top_genre'] != 'dance pop']
no_dance_pop.head()

Unnamed: 0,title,artist,top_genre,year,bpm,nrgy,dnce,dB,live,val,dur,acous,spch,pop
0,"Hey, Soul Sister",Train,neo mellow,2010,97,89,67,-4,8,80,217,19,4,83
1,Love The Way You Lie,Eminem,detroit hip hop,2010,87,93,75,-5,52,64,263,24,23,82
4,Just the Way You Are,Bruno Mars,pop,2010,109,84,64,-5,9,43,221,2,4,78
5,Baby,Justin Bieber,canadian pop,2010,65,86,73,-5,11,54,214,4,14,77
8,Empire State of Mind (Part II) Broken Down,Alicia Keys,hip pop,2010,93,37,48,-8,12,14,216,74,3,76


In [34]:
pivot2 = no_dance_pop.pivot_table(index='year', columns='top_genre', aggfunc='size', fill_value=0)
max_counts2 = pivot2.apply(lambda row: (row.idxmax(), row.max()), axis=1)

max_counts_no_dance = max_counts2.reset_index()

df2 = pd.DataFrame(max_counts_no_dance)
df2[['Max_Genre', 'Count']] = df2[0].apply(pd.Series)
df2.drop(columns=[0], inplace=True)

print(df2)

   year      Max_Genre  Count
0  2010    atl hip hop      3
1  2011  barbadian pop      4
2  2012            pop      7
3  2013       boy band      6
4  2014            pop      9
5  2015   canadian pop     11
6  2016   canadian pop      7
7  2017            pop      5
8  2018            pop      9
9  2019            pop      9


In [40]:
not_dance_most_pop = alt.Chart(df2).mark_bar().encode(
    x=alt.X('year', axis=alt.Axis(format='')),
    y='Count',
    tooltip=['Max_Genre', 'Count'],
    color='Max_Genre')

not_dance_most_pop.save('not_dance_most_pop.html')
not_dance_most_pop

In [41]:
# things we can notice 
    # 2019 was the one year that did not have dance pop as the most popular genre in the last vis
        #and now it is still not dance pop! 
        
    # Different versions of Pop are still most popular -- with genre 'pop' being shown the most here 
    # There are 50 songs per year within this data set - this vis along with the previous one 
        #encapsulates most of the songs per year 

In [42]:
# Ok so maybe it's not just genre that dictates what is popular... let's look at the song stats 

In [43]:
#2010, 2013, 2016, 2019 

In [44]:
song_stat_pivot = data.pivot_table(index='year', aggfunc='mean')
song_stat_pivot = song_stat_pivot.reset_index()
song_stat_pivot


Unnamed: 0,year,acous,bpm,dB,dnce,dur,live,nrgy,pop,spch,val
0,2010,11.627451,122.058824,-4.901961,64.529412,229.803922,21.176471,77.901961,64.254902,8.882353,57.0
1,2011,13.339623,119.075472,-5.018868,63.641509,242.566038,20.943396,74.886792,61.867925,9.660377,53.698113
2,2012,4.857143,121.085714,-4.857143,65.714286,224.4,15.828571,75.485714,67.771429,5.8,64.171429
3,2013,10.323944,121.676056,-5.140845,62.042254,234.492958,19.71831,73.873239,63.985915,8.309859,53.183099
4,2014,17.551724,123.0,-5.775862,62.534483,224.155172,17.293103,67.775862,62.706897,8.672414,52.086207
5,2015,16.6,119.768421,-5.621053,63.663158,223.368421,18.305263,70.336842,64.568421,7.052632,52.526316
6,2016,15.875,114.325,-6.7125,63.325,220.225,17.7375,67.2375,64.1625,8.3625,45.15
7,2017,16.6,116.8,-5.615385,65.369231,222.169231,15.369231,69.169231,69.015385,9.784615,52.276923
8,2018,12.78125,114.59375,-5.671875,67.203125,217.1875,14.75,65.46875,72.4375,8.625,48.765625
9,2019,21.741935,112.451613,-5.774194,69.709677,200.645161,15.16129,64.741935,84.354839,8.129032,50.806452


In [46]:
bpm = alt.Chart(song_stat_pivot).mark_line(point=alt.OverlayMarkDef(filled=False, fill="white")).encode(
    x=alt.X('year', axis=alt.Axis(format='')),
    y= alt.Y('bpm', scale=alt.Scale(domain=[0, 130])),
    tooltip=['year', 'bpm']
)
                                           
dnce = alt.Chart(song_stat_pivot).mark_line(point=alt.OverlayMarkDef(filled=False, fill="white")).encode(
    x=alt.X('year', axis=alt.Axis(format='')),
    y= alt.Y('dnce', scale=alt.Scale(domain=[0, 130])),
    tooltip=['year', 'dnce']
)

nrgy = alt.Chart(song_stat_pivot).mark_line(point=alt.OverlayMarkDef(filled=False, fill="white")).encode(
    x=alt.X('year', axis=alt.Axis(format='')),
    y= alt.Y('nrgy', scale=alt.Scale(domain=[0, 130])),
    tooltip=['year', 'nrgy']
)

pop = alt.Chart(song_stat_pivot).mark_line(point=alt.OverlayMarkDef(filled=False, fill="white")).encode(
    x=alt.X('year', axis=alt.Axis(format='')),
    y= alt.Y('pop', scale=alt.Scale(domain=[0, 130])),
    tooltip=['year', 'pop']
)

bpm.save('bpm.html')
dnce.save('dnce.html')
nrgy.save('nrgy.html')
pop.save('pop.html')

bpm | dnce | nrgy | pop 