## Gathering Data from Spotify Charts and Using the Spotify API

Using data collected from Spotify Charts and the Spotify API, we can explore how the attributes of popular songs streamed on Spotify. For example, what is the proportion of top 10 songs that are classified as explicit? How has this changed over the course of a year?

In [1]:
# Import libraries
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy
import pandas as pd
from dotenv import load_dotenv
import os


In [2]:
# Initialize the Spotify API client, use client id from command terminal 
load_dotenv()

# Access the variables
SPOTIPY_CLIENT_ID = os.getenv('SPOTIPY_CLIENT_ID')
SPOTIPY_CLIENT_SECRET = os.getenv('SPOTIPY_CLIENT_SECRET')
SPOTIPY_REDIRECT_URI = os.getenv('SPOTIPY_REDIRECT_URI')

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials
                    (client_id=SPOTIPY_CLIENT_ID,
                    client_secret=SPOTIPY_CLIENT_SECRET))

In [3]:
# Define the directory containing the CSV files
csv_directory = 'data'

# Get a list of all CSV files in the directory
# This assumes all files are .csv and are in the csv_directory
csv_files = [file for file in os.listdir(csv_directory) if file.endswith('.csv')]

# Initialize an empty list to store the dataframes
dataframes = []

# Loop through the list of CSV files and read each one
for file in csv_files:
    file_path = os.path.join(csv_directory, file)
    
    # Get the current week from the file path name
    week_date = '-'.join(file_path.split('-')[3:]).split('.')[0]
    
    with open(file_path, 'r') as f:
        songs = f.readlines()
        
    # Add the week to each line
    for i in range(len(songs)):
        songs[i] = songs[i].split('\n')[0]
        if i == 0:
            songs[i] += f',week\n'
        else:
            songs[i] += f',{week_date}\n'

    # Write the modified file content to a new file path
    new_file_path = os.path.join(csv_directory, f'new {file}')
    with open(new_file_path, 'w') as f:
        f.writelines(songs)
    df = pd.read_csv(new_file_path)
    os.remove(new_file_path)
    dataframes.append(df)

# Concatenate all dataframes into one
charts = pd.concat(dataframes, ignore_index=True)

# Display the combined dataframe
charts = charts.drop(columns=['source'])
charts

Unnamed: 0,rank,uri,artist_names,track_name,peak_rank,previous_rank,weeks_on_chart,streams,week
0,1,spotify:track:7K3BhSpAxZBznislvUMVtn,Morgan Wallen,Last Night,1,1,21,10241241,2023-06-22
1,2,spotify:track:3qQbCzHBycnDpGskqOWY0E,"Eslabon Armado, Peso Pluma",Ella Baila Sola,1,2,14,9427182,2023-06-22
2,3,spotify:track:1BxfuPKGuaTgP7aM0Bbdwr,Taylor Swift,Cruel Summer,3,13,32,6268357,2023-06-22
3,4,spotify:track:1Lo0QY9cvc8sUB2vnIOxDT,Luke Combs,Fast Car,4,5,13,6152329,2023-06-22
4,5,spotify:track:7KA4W4McWYRpgf0fWsJZWB,"Tyler, The Creator, Kali Uchis",See You Again (feat. Kali Uchis),5,7,103,5898838,2023-06-22
...,...,...,...,...,...,...,...,...,...
10395,196,spotify:track:58ge6dfP91o9oXMzq3XkIS,Arctic Monkeys,505,18,192,178,2575446,2024-06-13
10396,197,spotify:track:2ZWlPOoWh0626oTaHrnl2a,Frank Ocean,Ivy,169,183,9,2574631,2024-06-13
10397,198,spotify:track:53IRnAWx13PYmoVYtemUBS,Chappell Roan,Femininomenon,198,-1,1,2572362,2024-06-13
10398,199,spotify:track:4obHzpwGrjoTuZh2DItEMZ,Morgan Wallen,7 Summers,3,-1,68,2571426,2024-06-13


In [4]:
start = 0
end = 50
step_size = 50
tracks = []

# Get track info from the Spotify API 50 tracks at a time 
while end <= charts.shape[0]:
    uri_list = list(charts['uri'][start:end])
    tracks.append(sp.tracks(uri_list))
    start = end
    end += step_size

In [5]:
track_list = []

# Put all track info (dictionaries) into a single list
for dct in tracks:
    for key, val in dct.items():
        for track in val:
            track_list.append(track)

In [6]:
# Extract the explicit value from each track and store in a list
exp_list = [track['explicit'] for track in track_list]

In [7]:
# Create a new column for explicit value
charts_info = charts.assign(explicit = exp_list)
charts_info

Unnamed: 0,rank,uri,artist_names,track_name,peak_rank,previous_rank,weeks_on_chart,streams,week,explicit
0,1,spotify:track:7K3BhSpAxZBznislvUMVtn,Morgan Wallen,Last Night,1,1,21,10241241,2023-06-22,True
1,2,spotify:track:3qQbCzHBycnDpGskqOWY0E,"Eslabon Armado, Peso Pluma",Ella Baila Sola,1,2,14,9427182,2023-06-22,False
2,3,spotify:track:1BxfuPKGuaTgP7aM0Bbdwr,Taylor Swift,Cruel Summer,3,13,32,6268357,2023-06-22,False
3,4,spotify:track:1Lo0QY9cvc8sUB2vnIOxDT,Luke Combs,Fast Car,4,5,13,6152329,2023-06-22,False
4,5,spotify:track:7KA4W4McWYRpgf0fWsJZWB,"Tyler, The Creator, Kali Uchis",See You Again (feat. Kali Uchis),5,7,103,5898838,2023-06-22,True
...,...,...,...,...,...,...,...,...,...,...
10395,196,spotify:track:58ge6dfP91o9oXMzq3XkIS,Arctic Monkeys,505,18,192,178,2575446,2024-06-13,False
10396,197,spotify:track:2ZWlPOoWh0626oTaHrnl2a,Frank Ocean,Ivy,169,183,9,2574631,2024-06-13,True
10397,198,spotify:track:53IRnAWx13PYmoVYtemUBS,Chappell Roan,Femininomenon,198,-1,1,2572362,2024-06-13,True
10398,199,spotify:track:4obHzpwGrjoTuZh2DItEMZ,Morgan Wallen,7 Summers,3,-1,68,2571426,2024-06-13,False


In [8]:
# # Define a function to get the explicit value for each song
# def get_explicit(track_uri):
#     track_info = sp.track(track_uri)
#     explicit = track_info['explicit']
    
#     # Alternatively, the following code creates a dataframe to explore other variables in track_info
#     # track_info = pd.DataFrame(track_info.items()).set_index(0)
#     # artists_lst = track_info.loc['artists'].to_list()[0]
#     # artists = ' , '.join([artists_lst[idx]['name'] for idx in \
#     #     range(len(artists_lst))])
#     # track_info.loc['artists'] = artists
#     # track_info = track_info.T.reset_index(drop=True)
#     # explicit = track_info['explicit'].iloc[0]
    
#     return explicit

# # Apply the get_explicit function to create a new column for the explicit variable
# charts_info = charts.assign(explicit = charts['uri'].apply(get_explicit))
# charts_info

In [9]:
charts_count = charts_info.groupby('explicit').count()
charts_count = charts_count.assign(Count = charts_count.get('rank')).get(['Count'])
charts_count

Unnamed: 0_level_0,Count
explicit,Unnamed: 1_level_1
False,5509
True,4891


In [10]:
weekly_charts = charts_info.groupby(['week', 'explicit']).count()
weekly_charts = weekly_charts.assign(Count = weekly_charts.get('rank')).get(['Count'])
weekly_charts

Unnamed: 0_level_0,Unnamed: 1_level_0,Count
week,explicit,Unnamed: 2_level_1
2023-06-22,False,106
2023-06-22,True,94
2023-06-29,False,98
2023-06-29,True,102
2023-07-06,False,106
...,...,...
2024-05-30,True,84
2024-06-06,False,117
2024-06-06,True,83
2024-06-13,False,117
