## Config

In [136]:
# Import dependencies
import tekore as tk
import os
import time
import pandas as pd
import numpy as np
from datetime import datetime, date
from dateutil.relativedelta import relativedelta

# Environment variables
CLIENTID = os.environ.get('CLIENTID')
CLIENTSECRET = os.environ.get('CLIENTSECRET')
REDIRECT_URI = 'https://example.com/callback'
# Set global variables
MINDATE = datetime((date.today() - relativedelta(years=1)).year, 1, 1)


In [36]:
### Authenticate Tekore and Authorize App
# Get client token
app_token = tk.request_client_token(CLIENTID, CLIENTSECRET)
# Get a user token; Note the need to ask for private read scope in order to read private playlists
# This will open a browser window; the user will need to copy the URL from the browser and paste it into the VSCode Command Palette
user_token = tk.prompt_for_user_token(CLIENTID, CLIENTSECRET, REDIRECT_URI, scope=tk.scope.playlist_read_private)
# Create spotify instance
spotify = tk.Spotify(app_token)
# Get the user's spotifyID; The spotify instance will have to reference the user_token to be authorized for this data
with spotify.token_as(user_token):
    userID = spotify.current_user().id

Opening browser for Spotify login...


## Get user playlists

In [156]:
# Create a playlist for the user
with spotify.token_as(user_token):
    playlists = spotify.playlists(userID, limit=40)
    allPlaylists = [{'id':x.id, 'name':x.name} for x in playlists.items]
allPlaylists

[{'id': '37i9dQZF1DWZP6bJtKFz1V', 'name': 'Et Alt'},
 {'id': '37i9dQZF1DWTggY0yqBxES', 'name': 'Alternative Hip-Hop'},
 {'id': '37i9dQZF1DXdwmD5Q7Gxah', 'name': 'Lorem'},
 {'id': '37i9dQZF1DX5Vy6DFOcx00', 'name': 'big on the internet'},
 {'id': '37i9dQZF1DWWqNV5cS50j6', 'name': 'anti pop'},
 {'id': '1KeuPzbtFE0BbMVfr8dkAm', 'name': 'Summer Playlist Artist Candidates'},
 {'id': '5hZ6SYEDNHly6Oq9MSuUP3', 'name': 'Skankin’ Fit'},
 {'id': '5WVXaXgXsDFeqboz3QJd8k', 'name': '2023'},
 {'id': '1KPjybavao8KU78UZYhx30', 'name': 'Endless Summer 2022'},
 {'id': '4aTeCEuSx2YK1VtJhpRr7V', 'name': 'Summer 2022'},
 {'id': '35o4N6gQmymDoDSDMxc7vZ', 'name': 'Experimental Classical'},
 {'id': '6C7ZJPvjJi1UxJUUsSfla9', 'name': 'They Shall Not Grow Old'},
 {'id': '7nxWVOAM4gqkxEhGl0Hi1a', 'name': 'Give Up'},
 {'id': '71wevVq2dUhBWYnPzGoQNN', 'name': 'The Buzz'},
 {'id': '1uz3b6IIajYqvKAL5Id4tZ', 'name': 'Friends'},
 {'id': '2SV8SjEgxk0Zo1yTHIj5aU', 'name': 'I See a Ship in the Harbor'},
 {'id': '6geWzdZCbU

In [157]:
# I know which playlists I want to pull from
priorSummerNames = [
    '2023',
    'Summer 2022',
    'Summer 2021',
    'Summer 2020',
    'Summer 2019',
    'Summer 2018',
    'anti pop',
    'big on the internet',
    'Lorem',
    'Alternative Hip-Hop',
    'Et-Alt'
]
# Get the IDs of those playlists
priorSummerIDs = [x.id for x in playlists.items if x.name in priorSummerNames]

## Get artists in those playlists

In [158]:
# Get all the artistIDs in those playlists
# Start with an empty list
allArtists = []
# Loop through the playlists
for priorID in priorSummerIDs:
    # Get the artistIDs
    nestedList = [[{"artistID":y.id, "artistName":y.name} for y in x.track.artists] for x in spotify.playlist_items(priorID).items]
    # Unnest the list
    flatList = [item for sublist in nestedList for item in sublist]
    # Append to the empty list
    allArtists += flatList
# Get unique artists
allArtists = pd.DataFrame(allArtists).drop_duplicates(ignore_index=True)
print(len(allArtists))

## Get albums for each artist

In [161]:
# Define function that will get the top tracks for each artist in the list
def fetchRecentAlbums(artistID):
    try:
        # Query the spotify API
        query = spotify.artist_albums(artistID, limit=50, include_groups=['album', 'single']).items
        result = [{'artistID':artistID, 'albumID':x.id, 'albumName':x.name, 'albumReleaseDate':x.release_date, 'albumURI':x.uri} for x in query if datetime.strptime(x.release_date[:4], '%Y')>MINDATE]
    except: 
        # If exception, fill in blank data
        pass
    finally:
        # Wait 2 seconds so we do not exceed our API call allowance
        time.sleep(2)
    return result

In [151]:
# Create empty list
albums = []
# Loop through list of artists
for artistID in allArtists['artistID']:
    # Get all recent albums by that artist
    additions = fetchRecentAlbums(artistID)
    # Append the recent albums to the list
    albums += additions
# Convert to new df
albums = pd.DataFrame(albums)

# Merge the albums into the artists
artists = allArtists.merge(albums, how='left', left_on='artistID', right_on='artistID')
# Drop any duplicates
artists = artists.drop_duplicates(subset=['artistID', 'albumName'])
artists = artists.dropna(subset=['albumID'])
artists

Unnamed: 0,artistID,artistName,albumID,albumName,albumReleaseDate,albumURI
0,12Zk1DFhCbHY6v3xep2ZjI,070 Shake,21ZlnuYMvzSLNOAYWH318k,True love (feat. 070 Shake),2023-04-17,spotify:album:21ZlnuYMvzSLNOAYWH318k
1,12Zk1DFhCbHY6v3xep2ZjI,070 Shake,11X8fP4LGagH6xsQI2aqaY,Cocoon (Martin Garrix & Space Ducks Remix),2023-02-10,spotify:album:11X8fP4LGagH6xsQI2aqaY
3,5jQsMNuCW0iZeFTz6YUK8K,Pluralone,,,,
4,2f0NSj1t2L6JowHINXCFb6,Ruby Red,2FIbisWv6Q7QZVvevvCE3G,Martina Soleil (Living At The Same Time),2023-02-23,spotify:album:2FIbisWv6Q7QZVvevvCE3G


## Write to csv

In [152]:
artists.to_csv('songs2.csv')