In [3]:
import json
import pandas as pd
import numpy as np
import datetime
from datetime import datetime as dt
import altair as alt
import dash
from dash import dcc, html, Input, Output, ClientsideFunction
alt.data_transformers.enable("vegafusion")
app = dash.Dash(
    __name__,
    meta_tags=[{"name": "viewport", "content": "width=device-width, initial-scale=1"}],
    external_stylesheets=['https://codepen.io/chriddyp/pen/bWLwgP.css']
)
app.title = "Spotify Song Popularity"

server = app.server
app.config.suppress_callback_exceptions = True


# Read data
df = pd.read_csv('../data/processed/spotify_songs_processed.csv', index_col=0)
object_columns = df.select_dtypes(include=['object']).columns
for column in object_columns:
    df[column] = df[column].astype('string')
features=['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'duration_ms']
genre_list = df['playlist_genre'].unique().tolist()
subgenre_list = df['playlist_subgenre'].unique().tolist()
artist_list = df['track_artist'].unique().tolist()

# Format release date
def parse_date(x):
    try:
        if len(x)==10:
            return dt.strptime(x, "%Y-%m-%d")
        elif len(x)==7:
            return dt.strptime(x, "%Y-%m")
        elif len(x)==4:
            return dt.strptime(x, "%Y")
    except ValueError:
        return None

df["track_album_release_date"] = df["track_album_release_date"].apply(parse_date)
# String -> Datetime

In [20]:
df.info()
df.head()


<class 'pandas.core.frame.DataFrame'>
Index: 32833 entries, 0 to 32832
Data columns (total 25 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   track_id                  32833 non-null  string        
 1   track_name                32828 non-null  string        
 2   track_artist              32828 non-null  string        
 3   track_popularity          32833 non-null  int64         
 4   track_album_id            32833 non-null  string        
 5   track_album_name          32828 non-null  string        
 6   track_album_release_date  32833 non-null  datetime64[ns]
 7   playlist_name             32833 non-null  string        
 8   playlist_id               32833 non-null  string        
 9   playlist_genre            32833 non-null  string        
 10  playlist_subgenre         32833 non-null  string        
 11  danceability              32833 non-null  float64       
 12  energy                 

Unnamed: 0,track_popularity,track_album_release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,duration_min
count,32833.0,32833,32833.0,32833.0,32833.0,32833.0,32833.0,32833.0,32833.0,32833.0,32833.0,32833.0,32833.0,32833.0,32833.0
mean,42.477081,2011-08-07 17:30:40.185179648,0.65485,0.698619,5.374471,-6.719499,0.565711,0.107068,0.175334,0.084747,0.190176,0.510561,120.881132,225799.811622,3.76333
min,0.0,1957-01-01 00:00:00,0.0,0.000175,0.0,-46.448,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4000.0,0.066667
25%,24.0,2008-08-25 00:00:00,0.563,0.581,2.0,-8.171,0.0,0.041,0.0151,0.0,0.0927,0.331,99.96,187819.0,3.130317
50%,45.0,2016-08-26 00:00:00,0.672,0.721,6.0,-6.166,1.0,0.0625,0.0804,1.6e-05,0.127,0.512,121.984,216000.0,3.6
75%,62.0,2019-04-19 00:00:00,0.761,0.84,9.0,-4.645,1.0,0.132,0.255,0.00483,0.248,0.693,133.918,253585.0,4.226417
max,100.0,2020-01-29 00:00:00,0.983,1.0,11.0,1.275,1.0,0.918,0.994,0.994,0.996,0.991,239.44,517810.0,8.630167
std,24.984074,,0.145085,0.18091,3.611657,2.988436,0.495671,0.101314,0.219633,0.22423,0.154317,0.233146,26.903624,59834.006182,0.997233


In [11]:
import altair as alt

# Function for Danceability Chart
def create_danceability_chart(df):
    chart = alt.Chart(df).mark_bar().encode(
        alt.X("danceability:Q", bin=True),
        alt.Y('count()', stack=None),
        alt.Color('nominal_popularity:N')
    ).properties(
        title='Distribution of Danceability'
    )
    return chart.to_html()
#chart = alt.Chart(df).mark_bar().encode(
#        alt.X("danceability:Q", bin=True),
#        alt.Y('count()', stack=None),
#        alt.Color('nominal_popularity:N')
#    ).properties(
#        title='Distribution of Danceability'
#    )
#chart

In [17]:
# Function for Duration Chart
df['duration_min'] = df['duration_ms'] / 60000
def create_duration_chart(df):
    # Assuming 'duration_ms' is in milliseconds and we convert it to minutes
    chart = alt.Chart(df).mark_bar().encode(
        alt.X("duration_min:Q", bin=True),
        alt.Y('count()', stack=None),
        alt.Color('nominal_popularity:N')
    ).properties(
        title='Distribution of Duration (min)'
    )
    return chart

chart = alt.Chart(df).mark_bar().encode(
        alt.X("duration_min:Q", bin=True),
        alt.Y('count()', stack=None),
        alt.Color('nominal_popularity:N')
    ).properties(
        title='Distribution of Duration (min)'
    )
chart

In [22]:
# Function for Key Chart
def create_key_chart(df):
    chart = alt.Chart(df).mark_bar().encode(
        alt.X("key:O"),
        alt.Y('count()', stack=None),
        alt.Color('nominal_popularity:N')
    ).properties(
        title='Distribution of Key'
    )
    return chart
chart = alt.Chart(df).mark_bar().encode(
        alt.X("key:O"),
        alt.Y('count()', stack=None),
        alt.Color('nominal_popularity:N')
    ).properties(
        title='Distribution of Key'
    )
chart

In [23]:
# Function for Mode Chart
def create_mode_chart(df):
    chart = alt.Chart(df).mark_bar().encode(
        alt.X("mode:N"),
        alt.Y('count()', stack=None),
        alt.Color('nominal_popularity:N')
    ).properties(
        title='Distribution of Mode'
    )
    return chart
chart = alt.Chart(df).mark_bar().encode(
        alt.X("mode:N"),
        alt.Y('count()', stack=None),
        alt.Color('nominal_popularity:N')
    ).properties(
        title='Distribution of Mode'
    )
chart

In [26]:
chart = alt.Chart(df).mark_bar().encode(
    x='nominal_popularity',
    y='count()',
    color='nominal_popularity'
).properties(
    title='Population Level Distribution for All Songs'
)
chart