In [1]:
# Import Dependencies
import os
import dotenv
from dotenv import load_dotenv
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy 
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd

import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

from helpers import get_artist_tracklist
import warnings
pd.set_option('mode.chained_assignment', None)

In [2]:
# get client id and client secret
load_dotenv()
client_id= os.environ.get("SPOTIPY_CLIENT_ID")
client_secret = os.environ.get("SPOTIPY_SECRET")

client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager) #spotify object to access API

In [3]:
# Select an Artist
artist = 'Kendrick Lamar'
df = get_artist_tracklist(artist, sp)

In [4]:
# Descriptive Statistics
print('Data Frame Shape:')
print('Rows: ' + str(df.shape[0]) +', Cols: ' + str(df.shape[1]))
print()
print('Data Frame Info:')
print(df.info())
print()
print('Data Frame Column Descriptive Statistics:')
df.describe()

Data Frame Shape:
Rows: 129, Cols: 19

Data Frame Info:
<class 'pandas.core.frame.DataFrame'>
Index: 129 entries, 0 to 141
Data columns (total 19 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   track             129 non-null    object 
 1   album_name        129 non-null    object 
 2   release_year      129 non-null    object 
 3   album_uri         129 non-null    object 
 4   song_uri          129 non-null    object 
 5   popularity        129 non-null    int64  
 6   acousticness      129 non-null    float64
 7   danceability      129 non-null    float64
 8   energy            129 non-null    float64
 9   instrumentalness  129 non-null    float64
 10  liveness          129 non-null    float64
 11  loudness          129 non-null    float64
 12  speechiness       129 non-null    float64
 13  tempo             129 non-null    float64
 14  time_signature    129 non-null    int64  
 15  mode              129 non-null    int64 

Unnamed: 0,popularity,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature,mode,key,valence,duration
count,129.0,129.0,129.0,129.0,129.0,129.0,129.0,129.0,129.0,129.0,129.0,129.0,129.0,129.0
mean,60.015504,0.288491,0.628667,0.643822,0.002461,0.251525,-7.73631,0.273668,120.562279,3.96124,0.527132,4.813953,0.466422,256560.155039
std,10.568958,0.249571,0.130164,0.152829,0.013107,0.17544,2.776507,0.14489,31.976861,0.403173,0.50121,3.620185,0.18199,94116.719346
min,39.0,0.000282,0.301,0.128,0.0,0.0746,-21.49,0.0358,67.9,1.0,0.0,0.0,0.093,75535.0
25%,52.0,0.077,0.541,0.535,0.0,0.121,-8.766,0.148,94.241,4.0,0.0,1.0,0.324,211440.0
50%,60.0,0.225,0.624,0.661,0.0,0.185,-7.176,0.281,117.125,4.0,1.0,5.0,0.483,247747.0
75%,66.0,0.442,0.716,0.766,1.7e-05,0.303,-5.879,0.369,140.07,4.0,1.0,8.0,0.59,283642.0
max,87.0,0.924,0.959,0.907,0.0859,0.72,-3.343,0.75,192.157,5.0,1.0,11.0,0.847,727107.0


In [5]:
# Box Plots
fig=make_subplots(rows=3,cols=3,subplot_titles=('<i>popularity', '<i>danceability', '<i>energy', 
                                                '<i>loudness', '<i>log(speechiness)', '<i>acousticness', 
                                                '<i>log(liveness)', '<i>valence', '<i>tempo'))
                                            
fig.add_trace(go.Box(y=df['popularity'].values, name=df['popularity'].name), 1,1)
fig.add_trace(go.Box(y=df['danceability'].values, name=df['danceability'].name), 1,2)
fig.add_trace(go.Box(y=df['energy'].values, name=df['energy'].name), 1,3)
fig.add_trace(go.Box(y=df['loudness'].values, name=df['loudness'].name), 2,1)
fig.add_trace(go.Box(y=df['speechiness'].values, name=df['speechiness'].name), 2,2)
fig.add_trace(go.Box(y=df['acousticness'].values, name=df['acousticness'].name), 2,3)
fig.add_trace(go.Box(y=df['liveness'].values, name=df['liveness'].name), 3,1)
fig.add_trace(go.Box(y=df['valence'].values, name=df['valence'].name), 3,2)
fig.add_trace(go.Box(y=df['tempo'].values, name=df['tempo'].name), 3,3)

fig.update_yaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.update_layout(height=900,width=900,title_text='<b>Spotify Metrics Box Plots')

In [6]:
# Distribution plots
fig=make_subplots(rows=3,cols=3,subplot_titles=('<i>popularity', '<i>danceability', '<i>energy', 
                                                '<i>loudness', '<i>log(speechiness)', '<i>acousticness', 
                                                '<i>log(liveness)', '<i>valence', '<i>tempo'))

fig.add_trace(go.Histogram(x=df['popularity'],name='popularity'),row=1,col=1)
fig.add_trace(go.Histogram(x=df['danceability'],name='danceability'),row=1,col=2)
fig.add_trace(go.Histogram(x=df['energy'],name='energy'),row=1,col=3)
fig.add_trace(go.Histogram(x=df['loudness'],name='loudness'),row=2,col=1)
fig.add_trace(go.Histogram(x=np.log(df['speechiness']),name='log(speechiness)'),row=2,col=2)
fig.add_trace(go.Histogram(x=df['acousticness'],name='acousticness'),row=2,col=3)
fig.add_trace(go.Histogram(x=np.log(df['liveness']),name='log(liveness)'),row=3,col=1)
fig.add_trace(go.Histogram(x=df['valence'],name='valence'),row=3,col=2)
fig.add_trace(go.Histogram(x=df['tempo'],name='tempo'),row=3,col=3)

fig.update_xaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.update_yaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.update_layout(height=900,width=900,title_text='<b>Spotify Metrics Distribution Plots')

In [7]:
# Audio Features Correlatation Matrix
audio_features = df[['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence', 'popularity']]
audio_correlation = audio_features.corr().round(2)

mask = np.zeros_like(audio_correlation, dtype=bool)
mask[np.triu_indices_from(mask)] = True
df_corr_viz = audio_correlation.mask(mask)

fig = px.imshow(df_corr_viz, text_auto=True, aspect=True)

fig.update_xaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.update_yaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.show()

In [10]:
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

albums = df[['year', 'album_name']].drop_duplicates().reset_index(drop=True)
num_tracks = df.groupby('year',as_index=False).count().sort_values(by='track',ascending=False).sort_values(by='year')
yearly_avg_popularity = df.groupby('year')['popularity'].mean().reset_index().merge(albums)

# Add traces
fig.add_trace(
    go.Scatter(x = num_tracks['year'], y=num_tracks['track'], name="No. Tracks"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=yearly_avg_popularity['year'], y=yearly_avg_popularity['popularity'], name="Popularity"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Average Popularity by Year vs Number of Releases by Year"
)

# Set x-axis title
fig.update_xaxes(
    title_text="Year",
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
                 )

# Set y-axes titles
fig.update_yaxes(
    title_text="<b>Number of Tracks Released</b>", 
    secondary_y=False, 
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)
fig.update_yaxes(title_text="<b>Average Popularity</b>", secondary_y=True)


fig.update_yaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.update_layout(hovermode='x unified',)

print(albums)
fig.show()

   year                                         album_name
0  2022                      Mr. Morale & The Big Steppers
1  2018  Black Panther The Album Music From And Inspire...
2  2017                          DAMN. COLLECTORS EDITION.
3  2017                                              DAMN.
4  2016                               untitled unmastered.
5  2015                                To Pimp A Butterfly
6  2012                    good kid, m.A.A.d city (Deluxe)
7  2011                                         Section.80
8  2010                                   Overly Dedicated


In [11]:
# Top Albums
top_albums = df.groupby('album_name')['popularity'].mean().sort_values(ascending=False).head(10)
# Reverse the order of the DataFrame
top_albums = top_albums[::-1].reset_index()

fig = go.Figure(go.Bar(
            x= top_albums['popularity'],
            y= top_albums['album_name'],
            orientation='h'))

fig.update_layout(

    xaxis_title="Popularity",
    yaxis_title="Album Names",
    font=dict(
        family="Courier New, monospace",
        size=10,
        color="Black"
    )
)

fig.update_layout( title=dict(text="Most Popular Albums", font=dict(size=32)))

fig.show()


In [12]:
# Convert duration from milliseconds to minutes
df['duration_min'] = df['duration'] / (1000 * 60)

# Average Duration by Album in Minutes
avg_duration_by_album = df.groupby('album_name')['duration_min'].mean().sort_values(ascending=False).reset_index()

fig2 = go.Figure(go.Bar(
            x= avg_duration_by_album['duration_min'],
            y= avg_duration_by_album['album_name'],
            orientation='h'))

fig2.update_layout(
    xaxis_title="Duration in Minutes",
    yaxis_title="Album Name",
    font=dict(
        family="Courier New, monospace",
        size=10,
        color="Black"
    )
)

fig2.update_layout( title=dict(text="Average Track Duration by Album", font=dict(size=32)))

fig2.show()

In [13]:
fig=px.line(df.sort_values(by='popularity',ascending=False).head(15),x='track',y='popularity',
            hover_data=['album_name'],color_discrete_sequence=['green'],
            markers=True,title='<b> Most Popular Songs')

fig.update_xaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.update_yaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.show()

fig=px.line(df.sort_values(by='popularity',ascending=True).head(15),x='track',y='popularity',
            hover_data=['album_name'],color_discrete_sequence=['green'],
            markers=True,title='<b> Least Popular Songs')

fig.update_xaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.update_yaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
    gridcolor='lightgrey'
)

fig.show()