# Spotify API

In [None]:
#pip install spotipy --upgrade

import spotipy
from spotipy.oauth2 import SpotifyOAuth 
from spotipy.oauth2 import SpotifyClientCredentials

import pandas as pd

In [None]:
# manually set environment variables
import os

os.environ['SPOTIPY_CLIENT_ID'] = '9fc4a2bf145b43fca0f305f7c52dda41'
os.environ['SPOTIPY_CLIENT_SECRET'] = 'c01da866a44d400aa4cd615a9bc7a752'
os.environ['SPOTIPY_REDIRECT_URI'] = 'http://localhost:8888/callback'

client_id = os.environ.get('SPOTIPY_CLIENT_ID')
client_secret = os.environ.get('SPOTIPY_CLIENT_SECRET')
redirect_uri = os.environ.get('SPOTIPY_REDIRECT_URI')

In [None]:
# set client credentials
#sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials())

# set scope and authorization
scope = 'user-top-read'
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))

### define functions

In [None]:
# function to return a dataframe of track metadata for a given playlist

def get_tracks(playlist_id):
        
        # get playlist name and info
        playlist_name = sp.playlist(playlist_id)['name']
        playlist_info = sp.playlist(playlist_id)['tracks']['items']
        
        # get info for each track in playlist
        track_info = []
        for item in playlist_info:
             artist_info = item['track']['artists']
             artist_names = [artist['name'] for artist in artist_info]
             artist_ids = [artist['id'] for artist in artist_info]
             
             track_info.append({
                    'track': item['track']['name'],
                    'track_id': item['track']['id'],
                    'artist': artist_names,
                    'artist_id': artist_ids,
                    'album': item['track']['album']['name'],
                    'album_id': item['track']['album']['id'],
                    'release_date': pd.to_datetime(item['track']['album']['release_date']),
                    'release_year': pd.to_datetime(item['track']['album']['release_date']).year,
                    'length_ms': item['track']['duration_ms'],
                    'explicit': item['track']['explicit'],
                    'popularity': item['track']['popularity']
             })
        tracks_df = pd.DataFrame(track_info)

        # insert column for playlist name
        tracks_df.insert(0, 'playlist', playlist_name)

        return tracks_df

In [None]:
# function to return a dataframe of audio features for a given track

def get_features(track_id):
    
    # get audio features
    features = sp.audio_features(track_id)
    
    # select features to keep
    select_cols = ['id', 'danceability', 'energy', 'key', 'loudness', 'mode','speechiness',
                   'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
    
    # create dataframe with select columns
    features_df = pd.DataFrame(features, columns=select_cols)
    
    return features_df

In [None]:
# function to return combined dataframe

def master_playlist(playlist_urls):
    
    # check that input is a list, convert if not
    if not isinstance(playlist_urls, list):
        playlist_urls = [playlist_urls]
    
    # extract playlist IDs from URLs
    playlist_ids = []
    for playlist_url in playlist_urls:
        playlist_id = playlist_url.split('/')[-1].split('?')[0]
        playlist_ids.append(playlist_id)
        
    # create empty list to store playlist dataframes
    master = []

    for playlist_id in playlist_ids:
        # create dataframe of track metadata
        tracks_df = get_tracks(playlist_id)
        
        # create dataframe of track audio features
        features_df = pd.concat([get_features(track_id) for track_id in tracks_df['track_id']], ignore_index=True)

        # merge dataframes
        tracks_df = pd.merge(tracks_df, features_df, left_on='track_id', right_on='id')
        tracks_df.drop('id', axis=1, inplace=True)

        # add dataframe to list
        master.append(tracks_df)
    
    # concatenate all dataframes
    master_df = pd.concat(master, ignore_index=True)
    
    return master_df

### test functions

In [None]:
# list of links to my annual top songs playlist 2016-2023

annual_playlists = ['https://open.spotify.com/playlist/37i9dQZF1CyWExfjiBGoVh?si=e227bbd1de8b42f0',
                    'https://open.spotify.com/playlist/37i9dQZF1E9WKHP4NOmDGL?si=e3ff3539c8ae47a1',
                    'https://open.spotify.com/playlist/37i9dQZF1EjgKOpkPK3V4h?si=71409bb3b9cc40a2',
                    'https://open.spotify.com/playlist/37i9dQZF1Et8YfkURNRFQQ?si=e1b3f6b940a3403f',
                    'https://open.spotify.com/playlist/37i9dQZF1EMgToN6NNFzB2?si=01b2a28d5d54452d',
                    'https://open.spotify.com/playlist/37i9dQZF1EUMDoJuT8yJsl?si=86f7eb098f8a4a51',
                    'https://open.spotify.com/playlist/37i9dQZF1F0sijgNaJdgit?si=2ee6884b1718473c',
                    'https://open.spotify.com/playlist/37i9dQZF1Fa1IIVtEpGUcU?si=baa7cf66e4f54189']

In [None]:
test_playlists = ['https://open.spotify.com/playlist/37i9dQZF1Fa1IIVtEpGUcU?si=baa7cf66e4f54189', 'https://open.spotify.com/playlist/37i9dQZF1E9WKHP4NOmDGL?si=e3ff3539c8ae47a1']
test_track = '0NapkeC45rszeuSgbvcjx4'
test_album = '5rHd8n9uWIpq9w0PP5ZcI3'

In [None]:
master_df = master_playlist(test_playlists)
master_df.head()

In [None]:
# create list of all spotify genres

sp_genres = sp.recommendation_genre_seeds()
sp_genres = sp_genres['genres']

In [None]:
# keep duplicated rows - indicator of user listening to same song years apart
duplicated_rows = tracklist_df[tracklist_df.duplicated(subset='id', keep=False)]
duplicated_rows