In [1]:
# import the necessary packages
import pandas as pd
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go

In [2]:
# change the default theme
pio.templates.default = "plotly_white"

In [4]:
# read in data and format
df = pd.read_table("date_indexed_songs.csv")
df.index = df.set_index('date').index.astype('datetime64[ns]')
df['album_name'] = df['album_name'].astype('category')
df['album_name'] = df['album_name'].cat.rename_categories({'Taylor Swift': 'Taylor Swift',
                                                          'Fearless Platinum Edition': 'Fearless',
                                                         'Speak Now (Deluxe Edition)': 'Speak Now',
                                                         'Red (Deluxe Edition)': 'Red',
                                                         '1989 (Deluxe Edition)': '1989',
                                                         'reputation': 'reputation',
                                                         'lover': 'lover',
                                                         'folklore (deluxe version)': 'folklore',
                                                         'evermore (deluxe version)': 'evermore',
                                                         "Fearless (Taylor's Version)": "Fearless (Taylor's Version)",
                                                         "Red (Taylor's Version)": "Red (Taylor's Version)",
                                                         'Midnights (3am Edition)': 'Midnights'})

In [6]:
# set a color for each album based on the album cover
# list all main albums
album_colors = {
    'Taylor Swift': '#4BA1C7',
    'Fearless': '#C9B079',
    '1989': '#C5b5d4',
    'Red': '#a71f1f',
    'reputation': '#000000',
    'Midnights': '#898ea8',
    'folklore': '#CBCBCB',
    'evermore': '#DA9F79',
    "Red (Taylor's Version)": '#c96464',
    'Speak Now': '#B9779E',
    'lover': '#FCF1C7',
    "Fearless (Taylor's Version)": '#745835'
    }
albums = ['Taylor Swift','Fearless','Speak Now','Red','1989','reputation','lover','folklore','evermore',"Fearless (Taylor's Version)","Red (Taylor's Version)",'Midnights']

In [33]:
# function to graph all albums over a user-defined period
def get_all_albums(period):
    fig = go.Figure()
    
    for album in albums:
        period_inTop100 = df[df['album_name'] == album].resample(period).count()
        
        fig.add_trace(go.Scatter(x=period_inTop100.index, y=period_inTop100['rank'],
                    mode='lines+markers',
                    name=album,
                                marker_color = album_colors[album]))
    fig.update_layout(
    title='Number of Songs in the Billboard Hot 100 by Album',
    xaxis_title='Date',
    yaxis_title='Number of Songs in the Billboard Hot 100',
    legend_title='Albums')
    fig.write_html("Number of songs in the billboard hot 100 by album.html")
    fig.show()

In [34]:
get_all_albums('3M')

In [35]:
# Show all songs from an album averaged out by a user-defined period
def get_album_songs(album, period):
    trial = df[df['album_name'] == album]
    songs = trial['song'].unique()
    a = trial.groupby('song').resample(period).mean()

    fig = go.Figure()
    for song in songs:
        song_data = a.loc[song]
        fig.add_trace(go.Scatter(x=song_data.index, y=song_data['rank'],
                        mode='lines+markers', name=song))
    #fig.update_yaxes(autorange="reversed")
    fig.update_yaxes(range=[100,1], tick0=1)
    fig.update_layout(
    title= '"' + album + '": Popularity Over Time',
    yaxis_title="Billboard Hot 100 Ranking",
    xaxis_title="Date",
    legend_title="Ranked Songs In Album",
)
    fig.write_html("reputation: popularity over time.html")
    fig.show()

In [36]:
get_album_songs('reputation', 'W')

In [27]:
# get the most featured songs
df1 = df.copy()
sorter = df1['song'].value_counts().iloc[:10].index.tolist() 
df1.song = df1.song.astype("category")
df1.song = df1.song.cat.set_categories(sorter)
df1 = df1.dropna()

fig = px.histogram(df1, y='song', color = 'album_name', color_discrete_map=album_colors)
fig.update_layout(
yaxis_categoryorder='total descending',
title='Songs Featured the Most in the Billboard Hot 100',
xaxis_title='Song',
yaxis_title='Times Featured in the Billboard Hot 100',
legend_title='Album Name')
fig.write_html("Songs Featured the Most in the Billboard Hot 100.html")
fig.show()

In [26]:
# get all songs ever ranked #1
fig = px.histogram(df[df['rank']==1], x='song', color = 'album_name', color_discrete_map=album_colors)
fig.update_layout(
xaxis_categoryorder='total descending',
title='Songs Ranked #1 in the Billboard Hot 100',
xaxis_title='Songs',
yaxis_title='Number of Occurrences',
legend_title='Album Name')
fig.write_html("Songs Ranked #1 in the Billboard Hot 100.html")
fig.show()

In [37]:
# heat map of songs in hot 100 over time
fig = px.density_heatmap(df, x="date", y="rank", nbinsx=30, nbinsy=10, color_continuous_scale='Blues')
fig.update_yaxes(range=[100,1], tick0=1)
fig.update_layout(
title='Popularity Over the Years',
xaxis_title='Date',
yaxis_title='Top 100 Billboard Ranking')
fig.write_html("Popularity Over the Years.html")
fig.show()

In [38]:
# frequency of songs in hot 100 by album
fig = px.bar(df['album_name'].value_counts().iloc[:11], x='album_name', color_discrete_map={
    'Taylor Swift': '#4BA1C7',
    'Fearless': '#C9B079',
    '1989': '#C5b5d4',
    'Red': '#7d1818',
    'reputation': '#4d4d4d',
    'Midnights': '#898ea8',
    'folklore': '#CBCBCB',
    'evermore': '#DA9F79',
    "Red (Taylor's Version)": '#c96464'
    })
fig.update_layout(
title='Count of Songs Featured in the Billboard Hot 100 by Album',
xaxis_title='Count',
yaxis_title='Albums')
fig.write_html("Count of Songs Featured in the Billboard Hot 100 by Album.html")
fig.show()