In [3]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy.oauth2 import SpotifyOAuth
import pandas as pd
from dotenv import load_dotenv
import os

In [4]:
# Load environment variables from .env file
load_dotenv()

True

In [2]:
# Access credentials using os.getenv()
client_id = os.getenv("CLIENT_ID")
client_secret = os.getenv("CLIENT_SECRET")

In [3]:
#Authentication - without user
scope = 'playlist-read-private playlist-modify-public playlist-modify-private user-library-modify user-read-private'
redirect_uri = 'http://localhost:8888/callback'
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=client_id, client_secret=client_secret, scope=scope, redirect_uri=redirect_uri))


In [4]:
my_2023_playlist_link = "https://open.spotify.com/playlist/4iJYxRpJUzgALoMBYPHLs4"
username = 'liamrobinson50'

playlist_URI = my_2023_playlist_link.split("/")[-1].split("?")[0]

playlist_id = f'spotify:playlist:{playlist_URI}'

In [16]:
# Function to get the playlist data
def get_playlist_tracks(username, playlist_id):
    results = sp.user_playlist_tracks(username, playlist_id)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

In [17]:
# This returns a list of dictionaries, with each dictionary containing the track data for one track
playlist_tracks = get_playlist_tracks(username, playlist_id)

### Retrieving General Track Features

In [18]:
# to retrieve the general track data, not including audio features
track_data = []
for track in playlist_tracks:
    track_info = track['track']
    track_data.append({
        'Track Name': track_info['name'],
        'Artist': track_info['artists'][0]['name'],
        'Album': track_info['album']['name'],
        'Release Date': track_info['album']['release_date'],
        'Duration': track_info['duration_ms'],
        'Popularity': track_info['popularity'],
        'Explicit': track_info['explicit'],
        'Track Number': track_info['track_number'],
        'Track URI': track_info['uri']
    })

In [19]:
# converting the track data to a dataframe
df_track_data = pd.DataFrame(track_data)

### Retrieving Audio Features

In [20]:
# First, we extract the list of track URIs from 'playlist_tracks'. This will be used to request the data from the spotify API using sp.audio_features
track_uris = []
for track in playlist_tracks:
    track_uri = track["track"]["uri"]
    track_uris.append(track_uri)

##### Batch Processing:

In [33]:
# Retrieving the audio features from each track URI needs to be done in batches, as there are too many track URIs to do in one request

# Chunk size for batch processing
chunk_size = 50

# List to store audio features for all tracks
audio_features_all = []

# Iterate over track URIs in the chunks of 50
for i in range(0, len(track_uris), chunk_size):
    chunk_uris = track_uris[i:i+chunk_size]
    audio_features_chunk = sp.audio_features(chunk_uris)
    audio_features_all.extend(audio_features_chunk)

# Create DataFrame from audio features
df_audio_features = pd.DataFrame(audio_features_all)

In [34]:
# limiting the df to only the fields of interest
df_audio_features = df_audio_features[['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'uri']]

In [35]:
df_combined = pd.merge(df_track_data, df_audio_features, left_on='Track URI', right_on='uri', how='inner')

In [36]:
# now just cleaning the df so the format of the fields are consistent and to make sure there are no duplicate fields (i.e. track URI)
df_combined.columns = df_combined.columns.str.lower().str.replace(' ', '_')
df_combined = df_combined.drop(columns=['track_uri'])

In [38]:
df_combined

Unnamed: 0,track_name,artist,album,release_date,duration,popularity,explicit,track_number,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,uri
0,船窓,Emerson Kitamura,船窓 / おろかな指,2022-08-03,210800,22,False,1,0.688,0.297,3,-15.369,1,0.0636,0.5050,0.921000,0.1130,0.0391,124.007,spotify:track:75q97me0dAUd7GDbilPCiu
1,I Found What I've Been Searching For,La'Verne Washington,The Promise / I Found What I've Been Searching...,2020-08-21,259540,29,False,2,0.530,0.694,1,-4.774,1,0.0374,0.2820,0.000000,0.2790,0.4740,149.533,spotify:track:1OvyyA8AHjfAdDmnM6KOlY
2,"Good Thoughts, Bad Thoughts",Funkadelic,Standing On The Verge Of Getting It On,1974-07-10,737130,41,False,7,0.398,0.306,4,-17.293,0,0.0363,0.9340,0.012100,0.2150,0.2930,108.145,spotify:track:2jIMUFk9VGIOUYTWyx2pC5
3,Such a Feeling,Aurra,Live and Let Live,1983-01-01,321533,0,False,1,0.767,0.784,0,-6.800,1,0.0592,0.1530,0.000017,0.0557,0.7080,108.005,spotify:track:6avNBU8cdaICX5wDd2cINn
4,High John,MAVI,"Laughing so Hard, it Hurts",2022-10-14,130156,57,True,1,0.545,0.540,11,-10.010,1,0.4800,0.7180,0.000000,0.2070,0.6570,170.387,spotify:track:55liByWjb2T7j0vohSel10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
506,Future Perfect,The Durutti Column,Fidelity,1996,315000,45,False,3,0.729,0.438,1,-18.854,1,0.0535,0.0996,0.393000,0.0919,0.5190,99.976,spotify:track:0RlpgTdnxatoTRBvG34zaB
507,Dance Yourself Free,Rebecca Vasmant,Broken Biscuits,2022-02-17,372524,1,False,2,0.496,0.948,7,-4.493,1,0.2490,0.0531,0.006520,0.0683,0.7370,115.982,spotify:track:0LEcwSGEPRTKFwABHpOHA1
508,I Am Stretched on Your Grave,Sinéad O'Connor,I Do Not Want What I Haven't Got,1990-07-01,333386,39,False,2,0.712,0.654,8,-8.039,0,0.0667,0.0936,0.067200,0.1310,0.5480,93.957,spotify:track:2VQUtkqHTXKY5rm81PZEtk
509,Rotations,Ebende,Åter,2023-11-03,452411,4,False,2,0.807,0.839,11,-11.025,0,0.0640,0.0211,0.861000,0.0884,0.8370,128.008,spotify:track:1y0fZhqZeB5EYEg3lhZnLT


In [None]:
# Save DataFrame to a CSV file
df_combined.to_csv(f'data/2023_playlist.csv', index=False)