In [83]:
import json
import pandas as pd
import numpy as np
import datetime
from datetime import datetime as dt
import altair as alt
import dash
from dash import dcc, html, Input, Output, ClientsideFunction
alt.data_transformers.enable("vegafusion")

app = dash.Dash(
    __name__,
    meta_tags=[{"name": "viewport", "content": "width=device-width, initial-scale=1"}],
    external_stylesheets=['https://codepen.io/chriddyp/pen/bWLwgP.css']
)
app.title = "Spotify Song Popularity"

server = app.server
app.config.suppress_callback_exceptions = True


# Read data
df = pd.read_csv('../data/processed/spotify_songs_processed.csv', index_col=0)
object_columns = df.select_dtypes(include=['object']).columns
for column in object_columns:
    df[column] = df[column].astype('string')
features=['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'duration_ms']
genre_list = df['playlist_genre'].unique().tolist()
subgenre_list = df['playlist_subgenre'].unique().tolist()
artist_list = df['track_artist'].unique().tolist()

# Format release date
def parse_date(x):
    try:
        if len(x)==10:
            return dt.strptime(x, "%Y-%m-%d")
        elif len(x)==7:
            return dt.strptime(x, "%Y-%m")
        elif len(x)==4:
            return dt.strptime(x, "%Y")
    except ValueError:
        return None

df["track_album_release_date"] = df["track_album_release_date"].apply(parse_date)
# String -> Datetime


def calculate_decade(date):
    if isinstance(date, pd.Timestamp):
        decade = 10 * (date.year // 10)
        return str(decade) + 's'
    else:
        return None

df["decade"] = df["track_album_release_date"].apply(calculate_decade)

Unnamed: 0,track_id,track_name,track_artist,track_popularity,track_album_id,track_album_name,track_album_release_date,playlist_name,playlist_id,playlist_genre,...,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,nominal_popularity,decade
0,6f807x0ima9a1j3VPbc7VN,I Don't Care (with Justin Bieber) - Loud Luxur...,Ed Sheeran,66,2oCs0DGTsRO98Gh5ZSl2Cx,I Don't Care (with Justin Bieber) [Loud Luxury...,2019-06-14,Pop Remix,37i9dQZF1DXcZDD7cfEKhW,pop,...,1,0.0583,0.102000,0.000000,0.0653,0.5180,122.036,194754,medium,2010s
1,0r7CVbZTWZgbTCYdfa2P31,Memories - Dillon Francis Remix,Maroon 5,67,63rPSO264uRjW1X5E6cWv6,Memories (Dillon Francis Remix),2019-12-13,Pop Remix,37i9dQZF1DXcZDD7cfEKhW,pop,...,1,0.0373,0.072400,0.004210,0.3570,0.6930,99.972,162600,medium,2010s
2,1z1Hg7Vb0AhHDiEmnDE79l,All the Time - Don Diablo Remix,Zara Larsson,70,1HoSmj2eLcsrR0vE9gThr4,All the Time (Don Diablo Remix),2019-07-05,Pop Remix,37i9dQZF1DXcZDD7cfEKhW,pop,...,0,0.0742,0.079400,0.000023,0.1100,0.6130,124.008,176616,high,2010s
3,75FpbthrwQmzHlBJLuGdC7,Call You Mine - Keanu Silva Remix,The Chainsmokers,60,1nqYsOef1yKKuGOVchbsk6,Call You Mine - The Remixes,2019-07-19,Pop Remix,37i9dQZF1DXcZDD7cfEKhW,pop,...,1,0.1020,0.028700,0.000009,0.2040,0.2770,121.956,169093,medium,2010s
4,1e8PAfcKUYoKkxPhrHqw4x,Someone You Loved - Future Humans Remix,Lewis Capaldi,69,7m7vv9wlQ4i0LFuJiE2zsQ,Someone You Loved (Future Humans Remix),2019-03-05,Pop Remix,37i9dQZF1DXcZDD7cfEKhW,pop,...,1,0.0359,0.080300,0.000000,0.0833,0.7250,123.976,189052,medium,2010s
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32828,7bxnKAamR3snQ1VGLuVfC1,City Of Lights - Official Radio Edit,Lush & Simon,42,2azRoBBWEEEYhqV6sb7JrT,City Of Lights (Vocal Mix),2014-04-28,♥ EDM LOVE 2020,6jI1gFr6ANFtT8MmTvA2Ux,edm,...,1,0.0936,0.076600,0.000000,0.0668,0.2100,128.170,204375,medium,2010s
32829,5Aevni09Em4575077nkWHz,Closer - Sultan & Ned Shepard Remix,Tegan and Sara,20,6kD6KLxj7s8eCE3ABvAyf5,Closer Remixed,2013-03-08,♥ EDM LOVE 2020,6jI1gFr6ANFtT8MmTvA2Ux,edm,...,1,0.0420,0.001710,0.004270,0.3750,0.4000,128.041,353120,low,2010s
32830,7ImMqPP3Q1yfUHvsdn7wEo,Sweet Surrender - Radio Edit,Starkillers,14,0ltWNSY9JgxoIZO4VzuCa6,Sweet Surrender (Radio Edit),2014-04-21,♥ EDM LOVE 2020,6jI1gFr6ANFtT8MmTvA2Ux,edm,...,0,0.0481,0.108000,0.000001,0.1500,0.4360,127.989,210112,low,2010s
32831,2m69mhnfQ1Oq6lGtXuYhgX,Only For You - Maor Levi Remix,Mat Zo,15,1fGrOkHnHJcStl14zNx8Jy,Only For You (Remixes),2014-01-01,♥ EDM LOVE 2020,6jI1gFr6ANFtT8MmTvA2Ux,edm,...,1,0.1090,0.007920,0.127000,0.3430,0.3080,128.008,367432,low,2010s


In [124]:
def decade_trend_line():
    chart=alt.Chart(df).mark_line(color='red',opacity=0.4).encode(
        x=alt.X('decade',type='ordinal',title=None),
        y=alt.Y('mean(track_popularity)',scale=alt.Scale(zero=False),title='average popularity'),
    ).properties(height=300,width=400,title='Average Popularity Trend')
    return chart.to_html()

# chart=alt.Chart(df).mark_line(color='red',opacity=0.4).encode(
#         x=alt.X('decade',type='ordinal',title=None),
#         y=alt.Y('mean(track_popularity)',scale=alt.Scale(zero=False),title='average popularity'),
#     ).properties(height=300,width=400,title='Average Popularity Trend')
# chart

In [123]:
def popularity_level_distribution():
    chart=alt.Chart(df).mark_bar(color='orange',opacity=0.7).encode(
            x=alt.X('nominal_popularity',type='ordinal',title=None),
            y=alt.Y('count()',title='Count of Records'),
        ).properties(height=300,width=400,title='Popularity Distribution')
    return chart.to_html()

# chart=alt.Chart(df).mark_bar(color='orange',opacity=0.7).encode(
#         x=alt.X('nominal_popularity',type='ordinal',title=None),
#         y=alt.Y('count()',title='Count of Records'),
#     ).properties(height=300,width=400,title='Popularity Distribution')
# chart

In [125]:
def genre_distribution():
    chart=alt.Chart(df).mark_arc().encode(
            color=alt.Color('playlist_genre'),
            theta='count()',
        ).properties(height=300,width=400,title='Genre Distribution')
    return chart.to_html()

# chart=alt.Chart(df).mark_arc().encode(
#             color=alt.Color('playlist_genre'),
#             theta='count()',
#         ).properties(height=300,width=400,title='Genre Distribution')
# chart

In [120]:
def top_10_popularity_songs():
    popularity_by_songs = df[['track_name','track_popularity']].groupby('track_name').mean('track_popularity').reset_index()
    top10_songs=popularity_by_songs.nlargest(10,"track_popularity")
    popularity_min=top10_songs['track_popularity'].min()-5
    chart = alt.Chart(top10_songs).mark_bar(clip=True).encode(
        x=alt.X("track_popularity",scale=alt.Scale(domain=[popularity_min,100])),
        y=alt.Y("track_name", sort='-x') # sort the x value in descent order
    ).properties(height=300,width=400,title='Song Popularity Ranking')
    return chart.to_html()

In [122]:
def top_10_average_popularity_artists():
    popularity_by_artists = df[['track_artist','track_popularity']].groupby('track_artist').mean('track_popularity').reset_index()
    top10_artists=popularity_by_artists.nlargest(10,"track_popularity")
    popularity_min=top10_artists['track_popularity'].min()-5
    chart = alt.Chart(top10_artists).mark_bar(clip=True).encode(
        x=alt.X("track_popularity",scale=alt.Scale(domain=[popularity_min,100])),
        y=alt.Y("track_artist", sort='-x') # sort the x value in descent order
    ).properties(height=300,width=400,title='Artist Popularity Ranking')
    return chart.to_html()