In [1]:
import os
import dotenv
from dotenv import load_dotenv
from spotipy.oauth2 import SpotifyClientCredentials #To access authorised Spotify data
import spotipy 
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd

import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

from helpers import get_artist_tracklist
import warnings
pd.set_option('mode.chained_assignment', None)

In [2]:
# get client id and client secret
load_dotenv()
client_id= os.environ.get("SPOTIPY_CLIENT_ID")
client_secret = os.environ.get("SPOTIPY_SECRET")

client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager) #spotify object to access API

In [3]:
artist = 'The Weeknd'

In [4]:
df = get_artist_tracklist(artist, sp)

In [5]:
df.describe()

Unnamed: 0,popularity,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature,mode,key,valence,duration
count,274.0,274.0,274.0,274.0,274.0,274.0,274.0,274.0,274.0,274.0,274.0,274.0,274.0,274.0
mean,55.361314,0.257387,0.535062,0.586287,0.074193,0.237558,-8.470507,0.072098,119.893354,3.894161,0.332117,5.262774,0.327715,246281.857664
std,11.59279,0.263241,0.176083,0.177208,0.230375,0.199311,3.917383,0.058454,28.333157,0.382448,0.471834,3.595418,0.213974,76398.650533
min,32.0,4e-06,0.0651,0.00973,0.0,0.0465,-32.65,0.0233,59.983,1.0,0.0,0.0,0.0293,95814.0
25%,48.0,0.03395,0.43575,0.49625,0.0,0.09505,-9.28875,0.037,100.023,4.0,0.0,1.0,0.17025,199895.25
50%,53.0,0.1505,0.5705,0.592,3.2e-05,0.1555,-7.566,0.0501,120.0305,4.0,0.0,6.0,0.266,232881.0
75%,61.0,0.4305,0.67025,0.719,0.000857,0.33,-6.25425,0.078225,134.951,4.0,1.0,8.0,0.46375,280291.25
max,92.0,0.974,0.863,0.959,0.994,0.976,-4.008,0.375,193.989,5.0,1.0,11.0,0.907,486426.0


In [6]:
# Box Plots
fig=make_subplots(rows=3,cols=3,subplot_titles=('<i>popularity', '<i>danceability', '<i>energy', 
                                                '<i>loudness', '<i>log(speechiness)', '<i>acousticness', 
                                                '<i>log(liveness)', '<i>valence', '<i>tempo'))
                                            
fig.add_trace(go.Box(y=df['popularity'].values, name=df['popularity'].name), 1,1)
fig.add_trace(go.Box(y=df['danceability'].values, name=df['danceability'].name), 1,2)
fig.add_trace(go.Box(y=df['energy'].values, name=df['energy'].name), 1,3)
fig.add_trace(go.Box(y=df['loudness'].values, name=df['loudness'].name), 2,1)
fig.add_trace(go.Box(y=df['speechiness'].values, name=df['speechiness'].name), 2,2)
fig.add_trace(go.Box(y=df['acousticness'].values, name=df['acousticness'].name), 2,3)
fig.add_trace(go.Box(y=df['liveness'].values, name=df['liveness'].name), 3,1)
fig.add_trace(go.Box(y=df['valence'].values, name=df['valence'].name), 3,2)
fig.add_trace(go.Box(y=df['tempo'].values, name=df['tempo'].name), 3,3)

fig.update_yaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.update_layout(height=900,width=900,title_text='<b>Spotify Metrics BoxPlots')

In [7]:
# Histograms
fig=make_subplots(rows=3,cols=3,subplot_titles=('<i>popularity', '<i>danceability', '<i>energy', 
                                                '<i>loudness', '<i>log(speechiness)', '<i>acousticness', 
                                                '<i>log(liveness)', '<i>valence', '<i>tempo'))

fig.add_trace(go.Histogram(x=df['popularity'],name='popularity'),row=1,col=1)
fig.add_trace(go.Histogram(x=df['danceability'],name='danceability'),row=1,col=2)
fig.add_trace(go.Histogram(x=df['energy'],name='energy'),row=1,col=3)
fig.add_trace(go.Histogram(x=df['loudness'],name='loudness'),row=2,col=1)
fig.add_trace(go.Histogram(x=np.log(df['speechiness']),name='log(speechiness)'),row=2,col=2)
fig.add_trace(go.Histogram(x=df['acousticness'],name='acousticness'),row=2,col=3)
fig.add_trace(go.Histogram(x=np.log(df['liveness']),name='log(liveness)'),row=3,col=1)
fig.add_trace(go.Histogram(x=df['valence'],name='valence'),row=3,col=2)
fig.add_trace(go.Histogram(x=df['tempo'],name='tempo'),row=3,col=3)

fig.update_xaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.update_yaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.update_layout(height=900,width=900,title_text='<b>Spotify Metrics')

In [8]:
audio_features = df[['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence']]
audio_correlation = audio_features.corr().round(2)

mask = np.zeros_like(audio_correlation, dtype=bool)
mask[np.triu_indices_from(mask)] = True
df_corr_viz = audio_correlation.mask(mask)

fig = px.imshow(df_corr_viz, text_auto=True, aspect=True)

fig.update_xaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.update_yaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.show()

In [9]:
df["release_year"] = pd.to_datetime(df["release_year"])
df["year"] = df["release_year"].dt.year
yearly_avg_popularity = df.groupby('year')['popularity'].mean().reset_index()

fig=px.line(yearly_avg_popularity, x = 'year', y = 'popularity',
            hover_data=['popularity'],color_discrete_sequence=['green'],
            markers=True,title='<b> Popularity of Songs By Year')

fig.update_xaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.update_yaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.show()

In [10]:
# Top Albums
top_albums = df.groupby('album_name')['popularity'].mean().sort_values(ascending=False).head(10)
# Reverse the order of the DataFrame
top_albums = top_albums[::-1].reset_index()

fig = go.Figure(go.Bar(
            x= top_albums['popularity'],
            y= top_albums['album_name'],
            orientation='h'))

fig.update_layout(

    xaxis_title="Popularity",
    yaxis_title="Album Names",
    font=dict(
        family="Courier New, monospace",
        size=10,
        color="Black"
    )
)

fig.update_layout( title=dict(text="Most Popular Albums", font=dict(size=32)))

fig.show()


In [11]:
# Convert duration from milliseconds to minutes
df['duration_min'] = df['duration'] / (1000 * 60)

# Average Duration by Album in Minutes
avg_duration_by_album = df.groupby('album_name')['duration_min'].mean().sort_values(ascending=False).reset_index()

fig2 = go.Figure(go.Bar(
            x= avg_duration_by_album['duration_min'],
            y= avg_duration_by_album['album_name'],
            orientation='h'))

fig2.update_layout(
    xaxis_title="Duration in Minutes",
    yaxis_title="Album Name",
    font=dict(
        family="Courier New, monospace",
        size=10,
        color="Black"
    )
)

fig2.update_layout( title=dict(text="Average Track Duration by Album", font=dict(size=32)))

fig2.show()

In [12]:
df["release_year"] = pd.to_datetime(df["release_year"])
df["year"] = df["release_year"].dt.year

fig=px.area(df.groupby('year',as_index=False).count().sort_values(by='track',ascending=False).sort_values(by='year'),
            x='year',y='track',markers=True,labels={'name':'Total songs'},
            color_discrete_sequence=['green'],title='<b>Number of Tracks Released by Year')

fig.update_xaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.update_yaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.update_layout(hovermode='x',title_x=0.5)

In [13]:
fig=px.line(df.sort_values(by='popularity',ascending=False).head(15),x='track',y='popularity',
            hover_data=['album_name'],color_discrete_sequence=['green'],
            markers=True,title='<b> Most Popular Songs')

fig.update_xaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.update_yaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.show()

fig=px.line(df.sort_values(by='popularity',ascending=True).head(15),x='track',y='popularity',
            hover_data=['album_name'],color_discrete_sequence=['green'],
            markers=True,title='<b> Least Popular Songs')

fig.update_xaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.update_yaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.show()