In [15]:
# Finding trends in the most-listened songs globally on Spotify over the past 5 years
# Every Thursday, Spotify releases a Weekly Chart of the Top 200 most-listened songs
# For this project, we will look at the global chart
# Historical charts are available at https://charts.spotify.com/charts/view/regional-global-weekly/latest and downloadable as CSV files
# For this project, we have downloaded the weekly charts for the past 5 years for every 4 weeks since 2018-05-24 (roughly 5 years ago)
# These CSV files will be uploaded to GitHub under the data folder
# Then the audio features of these songs are obtained
# Then we can see if there has been a trend in audio features of the most popular songs over time (eg. have songs become more acoustic?)

In [16]:
# first I need to import the necessary packages and define a function

import requests
import pandas as pd

AUTH_URL = 'https://accounts.spotify.com/api/token'

# remember to delete credentials when uploading to GitHub!!
auth_response = requests.post(AUTH_URL, {
    'grant_type': 'client_credentials',
    'client_id': 'insert client id here',
    'client_secret': 'insert client secret here',
})

# convert the response to JSON
auth_response_data = auth_response.json()

# save the access token
access_token = auth_response_data['access_token']

In [17]:
# first we need to get the dates of the charts we want to analyse
from datetime import date, timedelta

# we want to look at the charts every 4 weeks ie. 28 days
delta = timedelta(days=28)

# we want to look at the charts for the past 5 years (starting 2018-05-24 until this week)
start_spotify_dt = date(2018, 5, 24)
end_spotify_dt = date(2023, 5, 25)

# getting a list of all the chart dates
chart_dates = []
while start_spotify_dt <= end_spotify_dt:
    chart_dates.append(start_spotify_dt.isoformat())
    start_spotify_dt += delta


In [18]:
# writing the code step-by-step first

# first we want to open the CSV file as a pandas dataframe
df = pd.read_csv("/Users/jacenhutagaol/Desktop/LSE/DS105L/spotify_csv_files/regional-global-weekly-" + chart_dates[0] + ".csv")
df

Unnamed: 0,rank,uri,artist_names,track_name,source,peak_rank,previous_rank,weeks_on_chart,streams
0,1,spotify:track:7dt6x5M1jzdTEt8oCbisTK,Post Malone,Better Now,Republic Records,1,2,4,30755342
1,2,spotify:track:7ef4DlsgrMEH11cDZd32M6,"Calvin Harris, Dua Lipa",One Kiss (with Dua Lipa),Sony Music UK,2,3,7,27907952
2,3,spotify:track:0b9oOr2ZgvyQu88wzixux9,Childish Gambino,This Is America,Wolf+Rothstein/RCA Records,1,1,3,27688906
3,4,spotify:track:1cTZMwcBJT0Ka3UJPXOeeN,Drake,Nice For What,Cash Money/Drake LP6,1,4,7,26932041
4,5,spotify:track:2XW4DbS6NddZxRPm5rMCeY,Drake,God's Plan,Cash Money/Drake LP6,1,6,18,23808225
...,...,...,...,...,...,...,...,...,...
195,196,spotify:track:7EI6Iki24tBHAMxtb4xQN2,Rita Ora,Anywhere,Rita Sahatciu Ora,39,189,31,3976696
196,197,spotify:track:7AQim7LbvFVZJE3O8TYgf2,"XXXTENTACION, Trippie Redd",Fuck Love (feat. Trippie Redd),Bad Vibes Forever / EMPIRE,41,184,39,3970794
197,198,spotify:track:3ZhTT6yjZwpPph5MIJ53XY,"Lil Skies, Landon Cube",Red Roses (feat. Landon Cube),All We Got Ent.,133,-1,13,3970058
198,199,spotify:track:0KKkJNfGyhkQ5aFogxQAPU,Bruno Mars,That's What I Like,Atlantic Records,6,198,73,3946959


In [19]:
# next we want to get the track ids of the top 100 tracks
# this is so we can use the Spotify API to get the audio features for these tracks

uris = df['uri'].values.tolist()
track_ids_all = [uri.replace('spotify:track:','') for uri in uris]
track_ids_list = track_ids_all[:100]
track_ids_list

['7dt6x5M1jzdTEt8oCbisTK',
 '7ef4DlsgrMEH11cDZd32M6',
 '0b9oOr2ZgvyQu88wzixux9',
 '1cTZMwcBJT0Ka3UJPXOeeN',
 '2XW4DbS6NddZxRPm5rMCeY',
 '5SxkdsY1ufZzoq9iXceLw9',
 '3swc6WTsr7rl9DqQKQA55C',
 '39N9RPD9MRb5WmoLzNzPeA',
 '08bNPGLD8AhKpnnERrAc6G',
 '3V8UKqhEK5zBkBb6d6ub8i',
 '58q2HKrzhC3ozto2nDdN4z',
 '6vN77lE9LK6HP2DewaN6HZ',
 '514rhnksEwHUh6LxXsQ4Y9',
 '3aW0ds4A4tSQDIp75FqWTo',
 '2ARqIya5NAuvFVHSN3bL0m',
 '0e7ipj03S05BNilyu5bRzt',
 '45Egmo7icyopuzJN0oMEdk',
 '10Igtw8bSDyyFs7KIsKngZ',
 '63SevszngYpZOwf63o61K4',
 '4qKcDkK6siZ7Jp1Jb4m0aL',
 '1gm616Plq4ScqNi7TVkZ5N',
 '76cy1WJvNGJTj78UqeA5zr',
 '33IOhptvC2Qoy2UhjiHXLV',
 '5CLGzJsGqhCEECcpnFQA8x',
 '3fpVWegR6YOS1Yk5HSMYIq',
 '55S2PQgSMYAhgoTCcGCDfw',
 '6Hgh47WXVKtXN5zGOu0hjI',
 '4F1yvJfQ7gJkrcgFJQDjOr',
 '79jX8RM5CgPqDKdGEKNW9K',
 '6jA8JUuPCGYjFcgw0AoM5T',
 '6xTU6B6nFwKKTSZ9ySXS80',
 '3ee8Jmje8o58CHK66QrVC2',
 '7iDa6hUg2VgEL1o1HjmfBn',
 '6kPJZM97LwdG9QIsT7khp6',
 '6n4U3TlzUGhdSFbUUhTvLP',
 '3Wf2YGdYT8xVdNsQSoRKk9',
 '6IaieqiCVvsNvEt6Y7yOFa',
 

In [21]:
Authorization = 'Bearer ' + access_token

# lastly we simply use the Spotify API to get the audio features of these songs as a dataframe
track_ids = ','.join(track_ids_list)
audio_features_url = 'https://api.spotify.com/v1/audio-features?ids=' + track_ids
response = requests.get(audio_features_url, headers={'ids': track_ids, 'Authorization': Authorization})
df = pd.DataFrame(data = response.json()['audio_features'])
df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.680,0.578,10,-5.804,1,0.0400,0.3310,0.000000,0.1350,0.341,145.038,audio_features,7dt6x5M1jzdTEt8oCbisTK,spotify:track:7dt6x5M1jzdTEt8oCbisTK,https://api.spotify.com/v1/tracks/7dt6x5M1jzdT...,https://api.spotify.com/v1/audio-analysis/7dt6...,231267,4
1,0.791,0.862,9,-3.240,0,0.1100,0.0370,0.000022,0.0814,0.592,123.994,audio_features,7ef4DlsgrMEH11cDZd32M6,spotify:track:7ef4DlsgrMEH11cDZd32M6,https://api.spotify.com/v1/tracks/7ef4DlsgrMEH...,https://api.spotify.com/v1/audio-analysis/7ef4...,214847,4
2,0.854,0.463,5,-6.159,1,0.1370,0.1170,0.000000,0.3540,0.549,120.024,audio_features,0b9oOr2ZgvyQu88wzixux9,spotify:track:0b9oOr2ZgvyQu88wzixux9,https://api.spotify.com/v1/tracks/0b9oOr2ZgvyQ...,https://api.spotify.com/v1/audio-analysis/0b9o...,225773,4
3,0.567,0.913,8,-6.471,1,0.0736,0.0934,0.000124,0.1140,0.792,93.350,audio_features,1cTZMwcBJT0Ka3UJPXOeeN,spotify:track:1cTZMwcBJT0Ka3UJPXOeeN,https://api.spotify.com/v1/tracks/1cTZMwcBJT0K...,https://api.spotify.com/v1/audio-analysis/1cTZ...,210926,4
4,0.758,0.448,7,-9.441,1,0.1030,0.0309,0.000087,0.5580,0.373,77.176,audio_features,2XW4DbS6NddZxRPm5rMCeY,spotify:track:2XW4DbS6NddZxRPm5rMCeY,https://api.spotify.com/v1/tracks/2XW4DbS6NddZ...,https://api.spotify.com/v1/audio-analysis/2XW4...,198960,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.661,0.725,6,-4.859,1,0.0563,0.0253,0.000000,0.1230,0.605,119.927,audio_features,18W92Zm1KjLCbUIszOhpkD,spotify:track:18W92Zm1KjLCbUIszOhpkD,https://api.spotify.com/v1/tracks/18W92Zm1KjLC...,https://api.spotify.com/v1/audio-analysis/18W9...,197939,4
96,0.522,0.385,0,-7.355,1,0.0288,0.5360,0.000000,0.1350,0.236,89.792,audio_features,2RttW7RAu5nOAfq6YFvApB,spotify:track:2RttW7RAu5nOAfq6YFvApB,https://api.spotify.com/v1/tracks/2RttW7RAu5nO...,https://api.spotify.com/v1/audio-analysis/2Rtt...,207520,4
97,0.722,0.738,9,-6.073,0,0.2470,0.3280,0.000015,0.1980,0.748,198.075,audio_features,5cepAtqnEQ6yVG6088zMMu,spotify:track:5cepAtqnEQ6yVG6088zMMu,https://api.spotify.com/v1/tracks/5cepAtqnEQ6y...,https://api.spotify.com/v1/audio-analysis/5cep...,184720,4
98,0.643,0.766,6,-4.788,0,0.3840,0.3070,0.000014,0.1070,0.463,97.092,audio_features,2gTYVoQCUh0QNUaFix01ld,spotify:track:2gTYVoQCUh0QNUaFix01ld,https://api.spotify.com/v1/tracks/2gTYVoQCUh0Q...,https://api.spotify.com/v1/audio-analysis/2gTY...,191606,4


In [27]:
# this is just all of the above steps combined so we can iterate over all the charts
# the result will be a list of all the dataframes showing the audio features of the top 100 songs every 4 weeks for the past 5 years

Authorization = 'Bearer ' + access_token

list_of_dfs = []
for i in range(len(chart_dates)):
    chart = pd.read_csv("/Users/jacenhutagaol/Desktop/LSE/DS105L/spotify_csv_files/regional-global-weekly-" + chart_dates[i] + ".csv")
    uris = chart['uri'].values.tolist()
    track_ids_all = [uri.replace('spotify:track:','') for uri in uris]
    track_ids_list = track_ids_all[:100]
    track_ids = ','.join(track_ids_list)
    audio_features_url = 'https://api.spotify.com/v1/audio-features?ids=' + track_ids
    response = requests.get(audio_features_url, headers={'ids': track_ids, 'Authorization': Authorization})
    list_of_dfs.append(pd.DataFrame(data = response.json()['audio_features']))