# Spotify API


In [1]:
# import libraries
from dotenv import load_dotenv
import os
import pandas as pd
import requests
import time
from tqdm.auto import tqdm

# Load environment variables from .env file
load_dotenv()

# Access the API key
client_id = os.getenv('CLIENT_ID')
client_secret = os.getenv('CLIENT_SECRET')

In [37]:
# request access token
url = "https://accounts.spotify.com/api/token"
payload = {
    "grant_type": "client_credentials",
    "client_id": client_id,
    "client_secret": client_secret
}
headers = {
    "Content-Type": "application/x-www-form-urlencoded"
}
response = requests.post(url, data=payload, headers=headers)
response.raise_for_status()  # Raise an exception for HTTP errors
access_token = response.json()["access_token"]


In [38]:
# get playlist data
playlists = pd.read_csv('./data/playlists.csv')
playlists

Unnamed: 0,name,id,link
0,Electro chill,37i9dQZF1DX9ND1QF5hZNF,https://open.spotify.com/playlist/37i9dQZF1DX9...
1,hard Rock,37i9dQZF1DX1X7WV84927n,https://open.spotify.com/playlist/37i9dQZF1DX1...
2,Trap Mojito,37i9dQZF1DX1OIMC8iDi74,https://open.spotify.com/playlist/37i9dQZF1DX1...
3,Hot Country,37i9dQZF1DX1lVhptIYRda,https://open.spotify.com/playlist/37i9dQZF1DX1...
4,Summer Jazz,37i9dQZF1DWTKxc7ZObqeH,https://open.spotify.com/playlist/37i9dQZF1DWT...


## Track List


In [33]:
# function to get hit Spotfy API and get playlist tracks
def get_playlist_tracks(playlist_id, access_token):
    """
    Fetch all tracks from a Spotify playlist.
    
    Parameters:
    - playlist_id: The Spotify playlist ID
    - access_token: Your Spotify API Bearer token

    Returns:
    - A list of all tracks in the playlist
    """
    url = f"https://api.spotify.com/v1/playlists/{playlist_id}/tracks"
    headers = {'Authorization': f'Bearer {access_token}'}
    
    track_list = []
    params = {
        'limit': 50,
        'offset': 0
        }
    
    while True:
        try:
            response = requests.get(url, headers=headers, params=params)
            response.raise_for_status()  # Raise an exception for HTTP errors
            data = response.json()

            # Extract the 'track' value from each item
            tracks_data = [track['track'] for track in data['items'] if track['track'] is not None]

            # Extract track items from the response
            track_list.extend(tracks_data)

            # Check if there are more tracks to fetch
            if data['next']:
                params['offset'] += params['limit']
                time.sleep(1)  # To avoid hitting rate limits
            else:
                break

        except requests.exceptions.RequestException as e:
            print(f"Request failed: {e}")
            break
        except KeyError as e:
            print(f"Unexpected response format: {e}")
            break
    
    # Create a DataFrame
    df = pd.DataFrame(track_list)

    # adding the playlist id to the dataframe
    df['playlist_id'] = playlist_id
    
    return df


In [34]:
# sample playlist
playlist_id = '37i9dQZF1DX9ND1QF5hZNF'

# getting the tracks
tracks = get_playlist_tracks(playlist_id, access_token)

In [35]:
tracks.head()

Unnamed: 0,preview_url,available_markets,explicit,type,episode,track,album,artists,disc_number,track_number,duration_ms,external_ids,external_urls,href,id,name,popularity,uri,is_local,playlist_id
0,https://p.scdn.co/mp3-preview/dc3f5560d882d3f9...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,track,False,True,"{'available_markets': ['AR', 'AU', 'AT', 'BE',...",[{'external_urls': {'spotify': 'https://open.s...,1,1,145599,{'isrc': 'USA2P2439321'},{'spotify': 'https://open.spotify.com/track/4M...,https://api.spotify.com/v1/tracks/4MSj19TwYBLg...,4MSj19TwYBLgDFj3ddEeco,Black Friday (pretty like the sun),86,spotify:track:4MSj19TwYBLgDFj3ddEeco,False,37i9dQZF1DX9ND1QF5hZNF
1,,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",True,track,False,True,"{'available_markets': ['AR', 'AU', 'AT', 'BE',...",[{'external_urls': {'spotify': 'https://open.s...,1,1,199680,{'isrc': 'SEUM72400143'},{'spotify': 'https://open.spotify.com/track/1V...,https://api.spotify.com/v1/tracks/1VKqsNsQvM0o...,1VKqsNsQvM0oCJKzUtWD2H,Something Beautiful - Montmartre Remix,44,spotify:track:1VKqsNsQvM0oCJKzUtWD2H,False,37i9dQZF1DX9ND1QF5hZNF
2,https://p.scdn.co/mp3-preview/e54ac6ee0aa018cf...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,track,False,True,"{'available_markets': ['AR', 'AU', 'AT', 'BE',...",[{'external_urls': {'spotify': 'https://open.s...,1,1,160000,{'isrc': 'BE8LH2400053'},{'spotify': 'https://open.spotify.com/track/6G...,https://api.spotify.com/v1/tracks/6GTdNhWfd1kl...,6GTdNhWfd1klaioOmyleJ8,Open Road (feat. Nate VanDeusen),51,spotify:track:6GTdNhWfd1klaioOmyleJ8,False,37i9dQZF1DX9ND1QF5hZNF
3,,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,track,False,True,"{'available_markets': ['AR', 'AU', 'AT', 'BE',...",[{'external_urls': {'spotify': 'https://open.s...,1,1,214000,{'isrc': 'DECE72401347'},{'spotify': 'https://open.spotify.com/track/5n...,https://api.spotify.com/v1/tracks/5nPbKG04fhLk...,5nPbKG04fhLkIAjcPFaZq7,I Adore You (feat. Daecolm),82,spotify:track:5nPbKG04fhLkIAjcPFaZq7,False,37i9dQZF1DX9ND1QF5hZNF
4,https://p.scdn.co/mp3-preview/bacd8fab60c68555...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,track,False,True,"{'available_markets': ['AR', 'AU', 'AT', 'BE',...",[{'external_urls': {'spotify': 'https://open.s...,1,1,152347,{'isrc': 'SE4RG2306229'},{'spotify': 'https://open.spotify.com/track/40...,https://api.spotify.com/v1/tracks/40rev08jSE8Z...,40rev08jSE8ZQfrESVu4uc,BWU,58,spotify:track:40rev08jSE8ZQfrESVu4uc,False,37i9dQZF1DX9ND1QF5hZNF


## Audio features


In [29]:
def get_audio_features(track_ids, access_token):
    """
    Fetch audio features for a list of Spotify track IDs and return them as a DataFrame.
    
    Parameters:
    - track_ids: A list or array of Spotify track IDs.
    - access_token: Your Spotify API Bearer token.
    
    Returns:
    - A pandas DataFrame with track IDs as the index and audio features as columns.
    """
    endpoint = "https://api.spotify.com/v1/audio-features"
    headers = {'Authorization': f'Bearer {access_token}'}

    # Initialize a list to hold all audio feature data
    all_audio_features = []
    
    # Spotify API allows max 100 IDs per request, so we process in batches
    batch_size = 100
    if isinstance(track_ids, str):
        track_ids = [track_ids]

    # looping through the track_ids in batches 
    for i in range(0, len(track_ids), batch_size):
        batch_ids = track_ids[i:i + batch_size]
        ids = ','.join(batch_ids)
        params = {'ids': ids}
        
        try:
            # Make a GET request to the audio features endpoint
            response = requests.get(endpoint, headers=headers, params=params)
            response.raise_for_status()  # Raise an exception for HTTP errors
            audio_features = response.json()['audio_features']
            all_audio_features.extend(audio_features)

        except requests.exceptions.RequestException as e:
            print(f"Request failed: {e}")
            continue

    # Convert the list of audio features to a DataFrame
    audio_features_df = pd.DataFrame(all_audio_features)
    
    # dropping unnecessary columns
    audio_features_df.drop(['type', 'uri', 'track_href', 'analysis_url', 'duration_ms'], axis=1, inplace=True)
    
    return audio_features_df.reset_index()

In [30]:
# example of getting audio features
af = get_audio_features(['4MSj19TwYBLgDFj3ddEeco', '1VKqsNsQvM0oCJKzUtWD2H'], access_token)
af

Unnamed: 0,index,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id,time_signature
0,0,0.853,0.618,6,-10.833,1,0.0923,0.633,0.89,0.0972,0.32,126.031,4MSj19TwYBLgDFj3ddEeco,4
1,1,0.642,0.734,11,-8.091,0,0.0485,0.135,0.00216,0.311,0.686,120.029,1VKqsNsQvM0oCJKzUtWD2H,4


In [31]:
# getting audio features for all the tracks in the playlist
af_df = get_audio_features(tracks['id'].values[:4], access_token)
af_df

Unnamed: 0,index,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id,time_signature
0,0,0.853,0.618,6,-10.833,1,0.0923,0.633,0.89,0.0972,0.32,126.031,4MSj19TwYBLgDFj3ddEeco,4
1,1,0.642,0.734,11,-8.091,0,0.0485,0.135,0.00216,0.311,0.686,120.029,1VKqsNsQvM0oCJKzUtWD2H,4
2,2,0.624,0.745,1,-5.8,0,0.0543,0.169,0.00244,0.209,0.293,116.949,6GTdNhWfd1klaioOmyleJ8,4
3,3,0.704,0.787,11,-6.751,0,0.0462,0.00926,0.00538,0.159,0.31,119.996,5nPbKG04fhLkIAjcPFaZq7,4


## Putting it together


In [39]:
# getting track list for all the playlists
df = pd.DataFrame()

# looping through all the playlists
for playlist_id in tqdm(playlists['playlist_id']):

    # getting the tracks
    track_list = get_playlist_tracks(playlist_id, access_token)

    # saving tracks
    df = pd.concat([df, track_list])

  0%|          | 0/5 [00:00<?, ?it/s]

In [40]:
print(df.shape)
df.head()

(562, 20)


Unnamed: 0,preview_url,available_markets,explicit,type,episode,track,album,artists,disc_number,track_number,duration_ms,external_ids,external_urls,href,id,name,popularity,uri,is_local,playlist_id
0,https://p.scdn.co/mp3-preview/dc3f5560d882d3f9...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,track,False,True,"{'available_markets': ['AR', 'AU', 'AT', 'BE',...",[{'external_urls': {'spotify': 'https://open.s...,1,1,145599,{'isrc': 'USA2P2439321'},{'spotify': 'https://open.spotify.com/track/4M...,https://api.spotify.com/v1/tracks/4MSj19TwYBLg...,4MSj19TwYBLgDFj3ddEeco,Black Friday (pretty like the sun),86,spotify:track:4MSj19TwYBLgDFj3ddEeco,False,37i9dQZF1DX9ND1QF5hZNF
1,,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",True,track,False,True,"{'available_markets': ['AR', 'AU', 'AT', 'BE',...",[{'external_urls': {'spotify': 'https://open.s...,1,1,199680,{'isrc': 'SEUM72400143'},{'spotify': 'https://open.spotify.com/track/1V...,https://api.spotify.com/v1/tracks/1VKqsNsQvM0o...,1VKqsNsQvM0oCJKzUtWD2H,Something Beautiful - Montmartre Remix,44,spotify:track:1VKqsNsQvM0oCJKzUtWD2H,False,37i9dQZF1DX9ND1QF5hZNF
2,https://p.scdn.co/mp3-preview/e54ac6ee0aa018cf...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,track,False,True,"{'available_markets': ['AR', 'AU', 'AT', 'BE',...",[{'external_urls': {'spotify': 'https://open.s...,1,1,160000,{'isrc': 'BE8LH2400053'},{'spotify': 'https://open.spotify.com/track/6G...,https://api.spotify.com/v1/tracks/6GTdNhWfd1kl...,6GTdNhWfd1klaioOmyleJ8,Open Road (feat. Nate VanDeusen),51,spotify:track:6GTdNhWfd1klaioOmyleJ8,False,37i9dQZF1DX9ND1QF5hZNF
3,,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,track,False,True,"{'available_markets': ['AR', 'AU', 'AT', 'BE',...",[{'external_urls': {'spotify': 'https://open.s...,1,1,214000,{'isrc': 'DECE72401347'},{'spotify': 'https://open.spotify.com/track/5n...,https://api.spotify.com/v1/tracks/5nPbKG04fhLk...,5nPbKG04fhLkIAjcPFaZq7,I Adore You (feat. Daecolm),82,spotify:track:5nPbKG04fhLkIAjcPFaZq7,False,37i9dQZF1DX9ND1QF5hZNF
4,https://p.scdn.co/mp3-preview/bacd8fab60c68555...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,track,False,True,"{'available_markets': ['AR', 'AU', 'AT', 'BE',...",[{'external_urls': {'spotify': 'https://open.s...,1,1,152347,{'isrc': 'SE4RG2306229'},{'spotify': 'https://open.spotify.com/track/40...,https://api.spotify.com/v1/tracks/40rev08jSE8Z...,40rev08jSE8ZQfrESVu4uc,BWU,58,spotify:track:40rev08jSE8ZQfrESVu4uc,False,37i9dQZF1DX9ND1QF5hZNF


In [41]:
# getting audio features for all the tracks in the playlist
af_df = get_audio_features(df['id'].values, access_token)
af_df

Unnamed: 0,index,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id,time_signature
0,0,0.853,0.6180,6,-10.833,1,0.0923,0.63300,0.89000,0.0972,0.320,126.031,4MSj19TwYBLgDFj3ddEeco,4
1,1,0.642,0.7340,11,-8.091,0,0.0485,0.13500,0.00216,0.3110,0.686,120.029,1VKqsNsQvM0oCJKzUtWD2H,4
2,2,0.624,0.7450,1,-5.800,0,0.0543,0.16900,0.00244,0.2090,0.293,116.949,6GTdNhWfd1klaioOmyleJ8,4
3,3,0.704,0.7870,11,-6.751,0,0.0462,0.00926,0.00538,0.1590,0.310,119.996,5nPbKG04fhLkIAjcPFaZq7,4
4,4,0.741,0.1590,7,-17.531,1,0.0884,0.08500,0.87000,0.1110,0.494,114.989,40rev08jSE8ZQfrESVu4uc,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
557,557,0.584,0.1390,5,-11.483,1,0.0304,0.96100,0.03690,0.1060,0.240,106.759,2ghcpXt1t7oQfHcF5hUBP2,4
558,558,0.449,0.1420,10,-18.201,1,0.0421,0.94500,0.91400,0.1110,0.131,112.211,73LXSkZdpjWC4XUDTju8LK,4
559,559,0.555,0.0929,5,-17.157,0,0.0459,0.99500,0.75900,0.1050,0.642,69.861,02ZDihrLEri1ZOF5pla55f,3
560,560,0.424,0.1030,3,-18.281,1,0.0417,0.93700,0.86100,0.1140,0.135,104.450,7wAFHu3ueBLEebC5NEOk9z,4


In [42]:
# merging the two dataframes
final_df = pd.merge(df, af_df, on='id')
final_df.tail()

Unnamed: 0,preview_url,available_markets,explicit,type,episode,track,album,artists,disc_number,track_number,...,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
557,https://p.scdn.co/mp3-preview/8a1385f197225d52...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,track,False,True,"{'available_markets': ['AR', 'AU', 'AT', 'BE',...",[{'external_urls': {'spotify': 'https://open.s...,1,1,...,5,-11.483,1,0.0304,0.961,0.0369,0.106,0.24,106.759,4
558,https://p.scdn.co/mp3-preview/285fc1045ab80497...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,track,False,True,"{'available_markets': ['AR', 'AU', 'AT', 'BE',...",[{'external_urls': {'spotify': 'https://open.s...,1,1,...,10,-18.201,1,0.0421,0.945,0.914,0.111,0.131,112.211,4
559,,"[AD, AE, AR, AT, AU, BD, BE, BG, BH, BO, BR, C...",False,track,False,True,"{'available_markets': ['AD', 'AE', 'AR', 'AT',...",[{'external_urls': {'spotify': 'https://open.s...,1,8,...,5,-17.157,0,0.0459,0.995,0.759,0.105,0.642,69.861,3
560,https://p.scdn.co/mp3-preview/e71ab50f0f910228...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,track,False,True,"{'available_markets': ['AR', 'AU', 'AT', 'BE',...",[{'external_urls': {'spotify': 'https://open.s...,1,1,...,3,-18.281,1,0.0417,0.937,0.861,0.114,0.135,104.45,4
561,https://p.scdn.co/mp3-preview/f789ea5498d1cb8e...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",False,track,False,True,"{'available_markets': ['AR', 'AU', 'AT', 'BE',...",[{'external_urls': {'spotify': 'https://open.s...,1,1,...,2,-13.494,1,0.0397,0.927,0.624,0.119,0.269,120.603,4


In [43]:
# saving to csv
final_df.to_csv('./data/playlist_tracks.csv', index=False)