# spotify api

In [None]:
import pandas as pd

#pip install spotipy --upgrade
import spotipy
#from spotipy.oauth2 import SpotifyOAuth 
from spotipy.oauth2 import SpotifyClientCredentials


In [None]:
# set environment variables
import os

os.environ['SPOTIPY_CLIENT_ID'] = '9fc4a2bf145b43fca0f305f7c52dda41'
os.environ['SPOTIPY_CLIENT_SECRET'] = '9fc435a8811e4ff3a82975cd4fc33e3a'
#os.environ['SPOTIPY_REDIRECT_URI'] = 'http://localhost:8888/callback'

client_id = os.environ.get('SPOTIPY_CLIENT_ID')
client_secret = os.environ.get('SPOTIPY_CLIENT_SECRET')
#redirect_uri = os.environ.get('SPOTIPY_REDIRECT_URI')

# set client credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials())

# set scope and authorization
#scope = 'user-top-read'
#sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))

## get spotify id for billboard lists

In [None]:
# create dataframe of track and artist info
df = pd.read_csv('../data/billboard_charts.csv')
# separate artists into list
df['artist_list'] = df['artist'].str.replace(', | Featuring | With | X | x ', ' & ').str.split(' & ')

In [None]:
# function to search track/artist and return spotify id

def get_track_spid(track, artist):
    try:
        search_result = sp.search(f'{track} {artist}', type='track', limit=1)

        if search_result['tracks']['items']:
            spid = search_result['tracks']['items'][0]['id']
            return spid
        else:
            print(f"No tracks found.")
            return None

    except Exception as e:
        print(f"Error: {e}")
        return None


In [None]:
df['spid'] = df.apply(lambda row: get_track_spid(row['track'], row['artists']), axis=1)
df.head()

## get metadata from spid

In [None]:
# function to get track metadata given a list of spotify ids

def get_metadata(track_spids):
    metadata_list = []
    for track_spid in track_spids:
        # get metadata
        info = sp.track(track_spid)
        metadata = {
            'id': track_spid,
            'isrc': info['external_ids']['isrc'],
            'track': info['name'],
            'artists': [artist['name'] for artist in info['artists']],
            'album': info['album']['name'],
            'release_date': pd.to_datetime(info['album']['release_date']),
            'release_year': pd.to_datetime(info['album']['release_date']).year,
            'length_ms': info['duration_ms'],
            'explicit': info['explicit'],
            'popularity': info['popularity']
        }
        metadata_list.append(metadata)
    metadata_df = pd.DataFrame(metadata_list)
    return metadata_df

In [None]:
metadata_df = get_metadata(df['spid'])
metadata_df

In [None]:
metadata_df.to_csv('metadata.csv')

In [None]:
test_df = pd.read_csv('../data/spotify/metadata.csv', index_col=0)
test_df = test_df[100:150].copy()
test_df.head()

In [None]:
# function to get track audio features given a list of spotify ids

def get_audio_features(track_spids):
    features_list = []
    for track_spid in track_spids:
        features = sp.audio_features(track_spid)
        features_cols = ['id', 'danceability', 'energy', 'key', 'loudness', 'mode','speechiness',
                        'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
        features_list.append(pd.DataFrame(features, columns=features_cols))
    features_df = pd.concat(features_list, ignore_index=True)
    return features_df


import pandas as pd

def get_audio_features(track_spids):
    features_list = []
    for track_spid in track_spids:
        features = sp.audio_features(track_spid)
        if features:
            features[0]['id'] = track_spid  # Add track ID to features
            features_list.append(features[0])

    features_df = pd.DataFrame(features_list)
    return features_df


In [None]:
# function to get track audio features given a list of spotify ids

def get_audio_features(track_spids):
    features_list = []

    for i in range(0, len(track_spids), 100):
        batch = track_spids[i:i+100]
        features = sp.audio_features(batch)
        features_list.extend(features)

    select_cols = ['id', 'danceability', 'energy', 'key', 'loudness', 'mode','speechiness',
                       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
        
    features_df = pd.DataFrame(features_list)
    return features_df



In [None]:
feat_df = get_audio_features(test_df['id'])
feat_df


In [None]:
requests_per_minute = 180
@sleep_and_retry
@limits(calls=requests_per_minute, period=60)
def rate_limited_grab_features(track_id): 
    features = sp.audio_features(track_id)[0]
    return features 



# Initialize an empty list to hold audio features
audio_features_list = []
# The Spotify API allows you to get audio features for multiple tracks in one request (max 100 at a time)

for i in range(0, len(track_ids), 100):
    batch = track_ids[i:i + 100]
    audio_features_results = sp.audio_features(batch)
    audio_features_list.extend(audio_features_results)

# Convert the list of audio features to a DataFrame
audio_features_df = pd.DataFrame(audio_features_list)

# Merge the original DataFrame with the audio features DataFrame
result_df = pd.merge(df, audio_features_df, left_on='Track ID', right_on='id', how='left')

# Write the resulting DataFrame to a new CSV file with semicolon as the delimiter
result_df.to_csv('tracks_with_audio_features.csv', sep=';', index=False)

## functions for playlist id

In [None]:
# function to return a dataframe of track metadata for a given playlist

def get_tracklist(playlist_id):
        
        # get playlist name and info
        playlist_name = sp.playlist(playlist_id)['name']
        playlist_info = sp.playlist(playlist_id)['tracks']['items']
        
        # get info for each track in playlist
        tracklist = []
        
        for item in playlist_info:
             artist_info = item['track']['artists']
             artist_names = [artist['name'] for artist in artist_info]
             artist_ids = [artist['id'] for artist in artist_info]
             
             tracklist.append({
                    'isrc': item['track']['external_ids']['isrc'],
                    #'upc': item['track']['external_ids']['upc'],
                    'track': item['track']['name'],
                    'track_id': item['track']['id'],
                    'artist': artist_names,
                    'artist_id': artist_ids,
                    'album': item['track']['album']['name'],
                    'album_id': item['track']['album']['id'],
                    'release_date': pd.to_datetime(item['track']['album']['release_date']),
                    'release_year': pd.to_datetime(item['track']['album']['release_date']).year,
                    'length_ms': item['track']['duration_ms'],
                    'explicit': item['track']['explicit'],
                    'popularity': item['track']['popularity']
             })
        
        tracklist_df = pd.DataFrame(tracklist)

        # insert column for input playlist name
        tracklist_df.insert(0, 'playlist', playlist_name)

        return tracklist_df

In [None]:
# function to return a dataframe of audio features for a given track

def get_features(track_id):
    
    # get audio features
    features = sp.audio_features(track_id)
    
    # select features to keep
    select_cols = ['id', 'danceability', 'energy', 'key', 'loudness', 'mode','speechiness',
                   'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
    
    # create dataframe with select columns
    features_df = pd.DataFrame(features, columns=select_cols)
    
    return features_df

In [None]:
# function to return combined dataframe

def get_track_data(playlist_urls):
    
    # check that input is a list, convert if not
    if not isinstance(playlist_urls, list):
        playlist_urls = [playlist_urls]
    
    # extract playlist IDs from URLs
    playlist_ids = []
    for playlist_url in playlist_urls:
        playlist_id = playlist_url.split('/')[-1].split('?')[0]
        playlist_ids.append(playlist_id)
        
    # create empty list to store playlist dataframes
    playlists = []

    for playlist_id in playlist_ids:
        # create dataframe of track metadata
        tracklist_df = get_tracklist(playlist_id)
        
        # create dataframe of track audio features
        features_df = pd.concat([get_features(track_id) for track_id in tracklist_df['track_id']], ignore_index=True)

        # merge dataframes
        playlist_df = pd.merge(tracklist_df, features_df, left_on='track_id', right_on='id')
        playlist_df.drop('id', axis=1, inplace=True)

        # add dataframe to list
        playlists.append(playlist_df)
    
    # concatenate all dataframes
    playlists_df = pd.concat(playlists, ignore_index=True)
    
    return playlists_df

## appendix

In [None]:
# troubleshooting - compare top results for track+artist search

track = None
artist = None

results = sp.search(f'{track} {artist}', type='track', limit=10)['tracks']['items']

result_list = []

for x in range(0,10):
    name = results[x]['name']
    album = results[x]['album']['name']
    spid = results[x]['id']
    external_ids = results[x]['external_ids']

    result_info = {'track': name, 'album': album, 'spid': spid, 'ids': external_ids}
    result_list.append(result_info)

result_df = pd.DataFrame(result_list)
result_df

In [None]:
# list of links to my annual top songs playlist 2016-2023

annual_playlists = ['https://open.spotify.com/playlist/37i9dQZF1CyWExfjiBGoVh?si=e227bbd1de8b42f0',
                    'https://open.spotify.com/playlist/37i9dQZF1E9WKHP4NOmDGL?si=e3ff3539c8ae47a1',
                    'https://open.spotify.com/playlist/37i9dQZF1EjgKOpkPK3V4h?si=71409bb3b9cc40a2',
                    'https://open.spotify.com/playlist/37i9dQZF1Et8YfkURNRFQQ?si=e1b3f6b940a3403f',
                    'https://open.spotify.com/playlist/37i9dQZF1EMgToN6NNFzB2?si=01b2a28d5d54452d',
                    'https://open.spotify.com/playlist/37i9dQZF1EUMDoJuT8yJsl?si=86f7eb098f8a4a51',
                    'https://open.spotify.com/playlist/37i9dQZF1F0sijgNaJdgit?si=2ee6884b1718473c',
                    'https://open.spotify.com/playlist/37i9dQZF1Fa1IIVtEpGUcU?si=baa7cf66e4f54189']

# final

In [None]:
import pandas as pd
import time

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

client_id = '3eb451f5ffa447f7a2b2398df184c7fc' # remove !!
client_secret = '8f327632f6cb4d6b99cb69e60e0cffad' # remove !!

s_id = 'cc2aa3729a554faf89adc470dd2a04b0'
s_secret = 'db0ef2575cc64f99972ba635108cae4d'


# set client credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=s_id, client_secret=s_secret))

In [None]:

sp.track('6pWgRkpqVfxnj3WuIcJ7WP')
#sp.audio_features('6pWgRkpqVfxnj3WuIcJ7WP')

In [None]:
sp.audio_features('6pWgRkpqVfxnj3WuIcJ7WP')

In [None]:
def get_playlist_ids(playlist_urls):
    # check that input is a list, convert if not
    if not isinstance(playlist_urls, list):
        playlist_urls = [playlist_urls]
    
    # extract playlist IDs from URLs
    playlist_ids = []
    for playlist_url in playlist_urls:
        playlist_id = playlist_url.split('/')[-1].split('?')[0]
        playlist_ids.append(playlist_id)
        
    return playlist_ids

In [None]:
# function to return dataframe of metadata and audio features for each track in given playlist

def get_track_data(playlist_id):
     # get metadata for each track
     metadata = []
     # spotify API limits to 100 tracks, set offset to retrieve all
     for i in range(0, 1000, 100):
          playlist_info = sp.playlist_tracks(playlist_id, limit=100, offset=i)['items']
          for item in playlist_info:
               metadata.append({
                    'id': item['track']['id'],
                    'track': item['track']['name'],
                    'artist': [artist['name'] for artist in item['track']['artists']],
                    'album': item['track']['album']['name'],
                    'release_date': pd.to_datetime(item['track']['album']['release_date']),
                    'release_year': pd.to_datetime(item['track']['album']['release_date']).year,
                    'length_ms': item['track']['duration_ms'],
                    'explicit': item['track']['explicit'],
                    'popularity': item['track']['popularity']})
     metadata_df = pd.DataFrame(metadata)
     # insert column for input playlist name
     playlist_name = sp.playlist(playlist_id)['name']
     metadata_df.insert(0, 'playlist', playlist_name)

     # get audio features for each track
     features = []
     for id in metadata_df['id']:
          try:
               feature = sp.audio_features(id)[0]
               features.append(feature)
               time.sleep(.01)
          except Exception as e:
               print(f'Error getting audio features for track {id}: {e}')
               features.append(f'Error: {e}')
     select_cols = ['id', 'danceability', 'energy', 'key', 'loudness', 'mode','speechiness',
                   'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
     features_df = pd.DataFrame(features, columns=select_cols)

     # merge and return full dataframe
     playlist_df = pd.merge(metadata_df, features_df, on='id')
     return playlist_df

In [None]:
# function to return audio features for a list of track ids

def get_audio_features(track_id_list):
     features = []
     for track_id in track_id_list:
          try:
               feature = sp.audio_features(track_id)[0]
               features.append(feature)
          except Exception as e:
               print(f'Error getting audio features for track {id}: {e}')
               features.append(None)
     select_cols = ['id', 'danceability', 'energy', 'key', 'loudness', 'mode','speechiness',
                   'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
     features_df = pd.DataFrame(features, columns=select_cols)

     return features_df

In [None]:
# playlists from every noise at once / sound of spotify

sound_of_playlists = ['https://open.spotify.com/playlist/6gS3HhOiI17QNojjPuPzqc', # pop - 428 songs
                      'https://open.spotify.com/playlist/2ZIRxkFuqNPMnlY7vL54uK', # dance pop - 341 songs
                      'https://open.spotify.com/playlist/6MXkE0uYF4XwU4VTtyrpfP', # hip hop - 344 songs
                      'https://open.spotify.com/playlist/6s5MoZzR70Qef7x4bVxDO1', # rap - 344 songs
                      'https://open.spotify.com/playlist/7dowgSWOmvdpwNkGFMUs6e', # rock - 509 songs
                      'https://open.spotify.com/playlist/5HufsVvMDoIPr9tGzoJpW0', # modern rock - 321 songs
                      'https://open.spotify.com/playlist/4mijVkpSXJziPiOrK7YX4M', # country - 413 songs
                      'https://open.spotify.com/playlist/0VZfpqcbBUWC6kpP1vVrvA'] # contemporary country - 380 songs

In [None]:
get_playlist_ids(sound_of_playlists)

In [None]:
test_df = get_metadata('https://open.spotify.com/playlist/4mijVkpSXJziPiOrK7YX4M')
test_df

In [None]:
sp.audio_features('6pWgRkpqVfxnj3WuIcJ7WP')

In [None]:
sound_of_pop = get_track_data('6gS3HhOiI17QNojjPuPzqc')
sound_of_pop.to_csv('sound_of_pop.csv', index=False)

In [None]:
sound_of_hip_hop = get_track_data('6MXkE0uYF4XwU4VTtyrpfP')
sound_of_hip_hop.to_csv('sound_of_hip_hop.csv', index=False)

In [None]:
sound_of_rock = get_metadata('7dowgSWOmvdpwNkGFMUs6e')
sound_of_rock
#sound_of_rock.to_csv('sound_of_rock.csv', index=False)

In [None]:
sp.audio_features('4VP8QiCeaZq8BeTIUrMQOG')

In [None]:
sound_of_country = get_track_data('6MXkE0uYF4XwU4VTtyrpfP')
sound_of_country.to_csv('sound_of_country.csv', index=False)

In [None]:
hip_hop_data = pd.read_csv('../data/sound_of/sound_of_hip_hop.csv')
hip_hop_data.head()

In [None]:
track_ids = hip_hop_data['id'].tolist()
track_ids


In [None]:
features = []
for i in range(0, len(track_ids), 100):
    batch_ids = track_ids[i:i+100]
    batch_features = sp.audio_features(batch_ids)
    features.extend(batch_features)
select_cols = ['id', 'danceability', 'energy', 'key', 'loudness', 'mode','speechiness',
                   'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
features_df = pd.DataFrame(features, columns=select_cols)

     # merge and return full dataframe
     playlist_df = pd.merge(metadata_df, features_df, on='id')
     return playlist_df

