## Extract playlist songs from Spotify using the Spotipy API

In [1]:
# load api key and code (make sure not to make publically visible)

from dotenv import load_dotenv
import os

load_dotenv()

api_key = os.getenv("API_KEY")
api_secret = os.getenv("API_SECRET")


In [2]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd

#replace with Spotify credentials before pushing to github
cid = api_key
secret = api_secret

client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)

sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)


In [3]:
# define a function that pulls song attributes from a selected playlist

def call_playlist(creator, playlist_id):
    
    #step1 - initialize dataframe with columns for each song attribute

    playlist_features_list = ["artist", "album", "name", "id", "popularity",
                              "duration", "danceability","energy", "key",
                              "loudness","mode",  "speechiness", "acousticness", 
                              "instrumentalness","liveness", "valence", "tempo"]
    
    playlist_df = pd.DataFrame(columns = playlist_features_list)
    
    #step2 - loop through each track in playlist and pull its metadata and audio features
    
    playlist = sp.user_playlist_tracks(creator, playlist_id)["items"]
    for track in playlist:
        # Create empty dict
        playlist_features = {}
        # Get metadata
        playlist_features["artist"] = track["track"]["album"]["artists"][0]["name"]
        playlist_features["album"] = track["track"]["album"]["name"]
        playlist_features["name"] = track["track"]["name"]
        playlist_features["id"] = track["track"]["id"]
        playlist_features["popularity"] = track["track"]["popularity"]
        playlist_features["duration"] = track["track"]["duration_ms"]
        
        # Get audio features
        audio_features = sp.audio_features(playlist_features["id"])[0]
        for feature in playlist_features_list[6:]:
            playlist_features[feature] = audio_features[feature]
        
        # Concat the dfs
        track_df = pd.DataFrame(playlist_features, index = [0])
        playlist_df = pd.concat([playlist_df, track_df], ignore_index = True)
        playlist_df.to_csv('spotify_ali_favs_01.csv', index=False)
    #Step 3
        
    return playlist_df

In [4]:
call_playlist('alisonci', '2L7YfYGHOiiPnEjZ8vkloC')
#https://open.spotify.com/playlist/37i9dQZEVXcLIlAlyCSA6s?si=8f470f9ba50e4ec9
#https://open.spotify.com/playlist/2L7YfYGHOiiPnEjZ8vkloC?si=c4effa08bfc849cd

Unnamed: 0,artist,album,name,id,popularity,duration,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,Death Cab for Cutie,Thank You for Today,I Dreamt We Spoke Again,6TRQPx5Z0YZ0pCQX7JbtlS,45,184826,0.708,0.7060,9,-6.450,0,0.0385,0.276000,0.006310,0.115,0.450,119.027
1,Active Child,You Are All I See,Hanging On,6o4lFpp8OAFhBqGQbWyAob,19,321453,0.572,0.5350,8,-8.919,1,0.0284,0.284000,0.058700,0.128,0.252,134.962
2,Chelsea Wolfe,Birth of Violence,Deranged for Rock & Roll,0Fs7jajFWWfBcC4Mox3m7p,40,211513,0.196,0.6260,1,-5.805,0,0.0357,0.001400,0.031800,0.135,0.135,142.176
3,Washed Out,High Times,Chimes,2WSxFpSMtTKPNeAyZ24hJA,23,149145,0.379,0.9300,11,-6.006,0,0.5270,0.023400,0.886000,0.554,0.356,180.038
4,Beach House,Depression Cherry,Space Song,7H0ya83CMmgFcOhw0UB6ow,83,320466,0.508,0.7920,0,-7.311,0,0.0297,0.229000,0.124000,0.145,0.601,147.067
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Bon Iver,"For Emma, Forever Ago",Skinny Love,2cbic3TiUENlJX91y67ARR,1,238520,0.593,0.2630,4,-14.029,0,0.0567,0.844000,0.000005,0.127,0.109,76.379
96,The Killers,Hot Fuss,Smile Like You Mean It,4kAgnAL4WKx7YqnlL2fvMh,0,234106,0.362,0.9790,3,-2.581,0,0.1610,0.000378,0.015100,0.231,0.383,125.087
97,Drake,Scorpion,Finesse,2WP8G2pdddDmnh1xbfKBOI,78,182080,0.807,0.3190,0,-8.152,0,0.0581,0.133000,0.000338,0.101,0.299,94.992
98,Cat Power,The Covers Record,(I Can't Get No) Satisfaction,6dv99Xm7fEjm7K1ATHj1UG,49,185160,0.591,0.0963,9,-16.738,0,0.0310,0.932000,0.000566,0.107,0.268,100.421


In [5]:
#Load saved tracks into pandas dataframe
# kaki_favs_2020 = pd.read_csv("./spotify_kaki_favs_2020.csv")
# kaki_favs_2021 = pd.read_csv("./spotify_kaki_favs_2021.csv")
# kaki_favs_2022 = pd.read_csv("./spotify_kaki_favs_2022.csv")


ali_fav_songs= pd.read_csv("./spotify_ali_favs_01.csv")

In [6]:
#kaki_merged_toptracks = pd.concat([kaki_favs_2020,kaki_favs_2021, kaki_favs_2022], axis= 0)
#kaki_merged_toptracks.info(5)

In [7]:
# Pull track ids, which will be used to pull additional song attributes from spotipy
track_ids = ali_fav_songs["id"]
print(len(track_ids))
track_ids = list(track_ids)

100


In [8]:
# Define a formula to pull additional track attributes — track name, album, artist, and release date
def getTrackFeatures(id):
  meta = sp.track(id)

  # meta
  #name = meta['name']
  #album = meta['album']['name']
  #artist = meta['album']['artists'][0]['name']
  release_date = meta['album']['release_date']


  track = [release_date]
  return track

In [9]:
import time

In [10]:
# pull track info and save into a dataframe

tracks = []
for i in range(len(track_ids)):
  time.sleep(.5)
  track = getTrackFeatures(track_ids[i])
  tracks.append(track)

# create dataset
toptracks_attributes = pd.DataFrame(tracks, columns = ['release_date'])
toptracks_attributes.to_csv("ali_favs_attributes_Jan2024.csv", sep = ',')

In [11]:
toptracks_attributes['index_col'] = toptracks_attributes.index # add index column; will be used when merging track info with the attributes

In [12]:
toptracks_attributes.index_col

0      0
1      1
2      2
3      3
4      4
      ..
95    95
96    96
97    97
98    98
99    99
Name: index_col, Length: 100, dtype: int64

In [13]:
toptracks_attributes.info(5)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   release_date  100 non-null    object
 1   index_col     100 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 1.7+ KB


In [14]:
ali_fav_songs

Unnamed: 0,artist,album,name,id,popularity,duration,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,Death Cab for Cutie,Thank You for Today,I Dreamt We Spoke Again,6TRQPx5Z0YZ0pCQX7JbtlS,45,184826,0.708,0.7060,9,-6.450,0,0.0385,0.276000,0.006310,0.115,0.450,119.027
1,Active Child,You Are All I See,Hanging On,6o4lFpp8OAFhBqGQbWyAob,19,321453,0.572,0.5350,8,-8.919,1,0.0284,0.284000,0.058700,0.128,0.252,134.962
2,Chelsea Wolfe,Birth of Violence,Deranged for Rock & Roll,0Fs7jajFWWfBcC4Mox3m7p,40,211513,0.196,0.6260,1,-5.805,0,0.0357,0.001400,0.031800,0.135,0.135,142.176
3,Washed Out,High Times,Chimes,2WSxFpSMtTKPNeAyZ24hJA,23,149145,0.379,0.9300,11,-6.006,0,0.5270,0.023400,0.886000,0.554,0.356,180.038
4,Beach House,Depression Cherry,Space Song,7H0ya83CMmgFcOhw0UB6ow,83,320466,0.508,0.7920,0,-7.311,0,0.0297,0.229000,0.124000,0.145,0.601,147.067
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Bon Iver,"For Emma, Forever Ago",Skinny Love,2cbic3TiUENlJX91y67ARR,1,238520,0.593,0.2630,4,-14.029,0,0.0567,0.844000,0.000005,0.127,0.109,76.379
96,The Killers,Hot Fuss,Smile Like You Mean It,4kAgnAL4WKx7YqnlL2fvMh,0,234106,0.362,0.9790,3,-2.581,0,0.1610,0.000378,0.015100,0.231,0.383,125.087
97,Drake,Scorpion,Finesse,2WP8G2pdddDmnh1xbfKBOI,78,182080,0.807,0.3190,0,-8.152,0,0.0581,0.133000,0.000338,0.101,0.299,94.992
98,Cat Power,The Covers Record,(I Can't Get No) Satisfaction,6dv99Xm7fEjm7K1ATHj1UG,49,185160,0.591,0.0963,9,-16.738,0,0.0310,0.932000,0.000566,0.107,0.268,100.421


In [15]:
# add index to original merged top tracks dataframe
ali_fav_songs.index = range(0, (len(ali_fav_songs)))
ali_fav_songs['index_col'] = ali_fav_songs.index
ali_fav_songs.index_col

0      0
1      1
2      2
3      3
4      4
      ..
95    95
96    96
97    97
98    98
99    99
Name: index_col, Length: 100, dtype: int64

In [22]:
# merge track + attribute dfs for analysis
ali_favs_df = pd.merge(ali_fav_songs, toptracks_attributes, on = ['index_col'])


In [23]:
ali_favs_df.tail()

Unnamed: 0,artist,album,name,id,popularity,duration,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,index_col,release_date
95,Bon Iver,"For Emma, Forever Ago",Skinny Love,2cbic3TiUENlJX91y67ARR,1,238520,0.593,0.263,4,-14.029,0,0.0567,0.844,5e-06,0.127,0.109,76.379,95,2008-02-19
96,The Killers,Hot Fuss,Smile Like You Mean It,4kAgnAL4WKx7YqnlL2fvMh,0,234106,0.362,0.979,3,-2.581,0,0.161,0.000378,0.0151,0.231,0.383,125.087,96,2004-06-15
97,Drake,Scorpion,Finesse,2WP8G2pdddDmnh1xbfKBOI,78,182080,0.807,0.319,0,-8.152,0,0.0581,0.133,0.000338,0.101,0.299,94.992,97,2018-06-29
98,Cat Power,The Covers Record,(I Can't Get No) Satisfaction,6dv99Xm7fEjm7K1ATHj1UG,49,185160,0.591,0.0963,9,-16.738,0,0.031,0.932,0.000566,0.107,0.268,100.421,98,2000-03-21
99,Grizzly Bear,Shields,Yet Again,202QyrB6Q3Kimsr7KqJut3,0,318423,0.472,0.875,4,-5.024,0,0.0446,0.00345,0.00576,0.128,0.461,136.064,99,2012-09-17


In [24]:
ali_favs_df.columns

Index(['artist', 'album', 'name', 'id', 'popularity', 'duration',
       'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'index_col', 'release_date'],
      dtype='object')

In [26]:
# Create a "release year" column based on release date
ali_favs_df['release_year'] = ali_favs_df['release_date'].str[0:4]
ali_favs_df['release_year'] = pd.to_numeric(ali_favs_df['release_year'])
# print(saved_tracks_df)

In [28]:
ali_favs_df['release_year'].describe()

count     100.000000
mean     2012.160000
std         9.981518
min      1968.000000
25%      2009.000000
50%      2015.000000
75%      2018.000000
max      2021.000000
Name: release_year, dtype: float64

In [29]:
ali_favs_df.release_year.value_counts()

2021    13
2015    10
2012     9
2009     8
2018     8
2019     6
2008     6
2017     5
2016     5
2020     5
2011     4
2010     3
2000     2
2005     2
2007     2
2013     2
2014     2
2004     1
2003     1
1998     1
1996     1
1979     1
1972     1
1970     1
1968     1
Name: release_year, dtype: int64

In [30]:
ali_favs_df.to_csv('spotify_ali_favs_merged.csv', index=False)