# Init & Login

In [1]:
import time
now = time.time
from math import ceil
from random import randrange, choice
from time import sleep
from pprint import pprint
from datetime import datetime

import spotipy
import spotipy.util as util
from IPython.display import clear_output

## Client Info ##
CLIENT_ID     = ""
CLIENT_SECRET = ""
CLIENT_SCOPE  = "user-follow-modify playlist-modify-private playlist-modify-public"
USER_NAME     = "31ytgsr7wdmiaroy77msqpiupdsi"
REDIR_URI     = "https://github.com/jwatson-CO-edu/yt_shuffle_so_good"
AUTH_URL      = 'https://accounts.spotify.com/api/token'
BASE_URL      = 'https://api.spotify.com/v1/'
## API Info ##
_RESPONSE_LIMIT =  100
_MAX_OFFSET     = 1000

with open( "../keys/spot_ID.txt" , 'r' ) as f:
    CLIENT_ID = f.readlines()[0].strip()

with open( "../keys/spot_SECRET.txt" , 'r' ) as f:
    CLIENT_SECRET = f.readlines()[0].strip()

token = None
token = util.prompt_for_user_token(
    username      = USER_NAME,
    scope         = CLIENT_SCOPE,
    client_id     = CLIENT_ID,
    client_secret = CLIENT_SECRET,
    redirect_uri  = REDIR_URI
)

print( token )

spot = spotipy.Spotify( auth = token )
clear_output( wait = True )
sleep( 2 )
print( "TOKEN OBTAINED" )

TOKEN OBTAINED


# Playlists

In [2]:

playlist = {
    'study01' : "0a2qoe6S7lYeZ6nlhZdA0v",
    'study02' : "6gbtR2cBq5PvkghidCvvGk",
    'study03' : "3o3lN2qntdEV7UKTuuC77K",
    'study04' : "41sFSisljvBDMBXtpp5NIw",
    'study05' : "02iS5AFGp8YVuUUqcQf8ys",
    'study06' : "6KI7A4MWrSM7EyKRUjxIi1",
    'study07' : "3V055Md2JdrUT8tX0af7di",
    'study08' : "0tspdJlwSgiyf2O9PO6QaP",
    'study09' : "5mHRBFoQtYy2izeZ66pG95",
    'study10' : "3832xeKGEOAXFJqE4K8kIq",
    'study11' : "65MXR4dubPL9t0P4dgTWvn",
    'study12' : "0ecSAfnD4CulIVnLt26ukI",
    'study13' : "7K9ucByFRgDuZk8KMHeJkL",
}

backfill = "0v26bHydUxcGC5EbMlkjzG"
_N_BKFL  = 400


# Data

In [3]:
_FILTER_TYPES = [ 'album', 'artist', 'track', 'year', 'upc', 'tag:hipster', 'tag:new', 'isrc', 'genre', ]
_SEARCH_TYPES = [ "album", "artist", "playlist", "track", "show", "episode", "audiobook", ]
_N_MAX_SEARCH = 50
_N_DEF_SEARCH = 10
_YEAR_PADDING =  5
_STALE_TIME_S = 60.0 * 60 * 24 * 31
_DATA_PREFIX  = "Study-Music-Data_"
_DATA_POSTFIX = ".pkl"

In [4]:
data = {
    'time'     : now()  , # Data Structure Creation Time
    'playlists': dict() , # Study Playlist Info
    'collectID': set([]), # Currently accepted track IDs
    'artists'  : dict() , # Study Artist Info
    'queries'  : dict() , # Study Genre Info, 2024-08-11: Track info does NOT contain genre
    # 2024-08-11: Track info does NOT contain play count
}
timestamp = datetime.now().strftime( '%Y-%m-%dT%H:%M:%S' )
outFilNam = _DATA_PREFIX + timestamp + _DATA_POSTFIX

# Query Functions

In [7]:

def fetch_entire_playlist( playlist_ID ):
    """ Get infodump on all plalist tracks """
    plTracks = []
    trCount  = 0
    response = spot.user_playlist_tracks(
        CLIENT_ID, 
        playlist_ID, 
        fields = 'items,uri,name,id,total', 
        limit  = _RESPONSE_LIMIT
    )
    Ntracks = response['total']
    while 1:
        trCount += len(response['items'])
        plTracks.extend( response['items'] )
        
        if trCount >= Ntracks:
            break
    
        response = spot.user_playlist_tracks(
            CLIENT_ID, 
            playlist_ID, 
            fields = 'items,uri,name,id,total', 
            limit  = _RESPONSE_LIMIT,
            offset = trCount
        )
    return plTracks


def populate_playlist_data( dataDct, plDict, pause_s = 1.0 ):
    """ Gather data across specified playlists """
    for plName_i, plID_i in plDict.items():
        dataDct['playlists'][ plName_i ] = {
            'ID'    : plID_i,
            'tracks': fetch_entire_playlist( plID_i ),
        }
        # pprint( dataDct['playlists'][ plName_i ]['tracks'][0] )
        # return None
        plSet_i = set([item['track']['id'] for item in dataDct['playlists'][ plName_i ]['tracks']])
        dataDct['collectID'] = dataDct['collectID'].union( plSet_i )

        for track_j in dataDct['playlists'][ plName_i ]['tracks']:
            artistID_j = item['track']['artists']['id']
            if artistID_j not in dataDct['artists']:
                dataDct['artists'][ artistID_j ] = { 
                    'count'   : 1, 
                    'releases': [item['track']['album']['release_date'],], 
                }
            else:
                dataDct['artists'][ artistID_j ]['count'   ] += 1
                dataDct['artists'][ artistID_j ]['releases'].append( item['track']['album']['release_date'] )

        sleep( pause_s )
    

def search_artist_within_era( artistName, releaseDate, 
                              db = None, N = _N_MAX_SEARCH, yearPadding = _YEAR_PADDING, pause_s = 0.5 ):
    """ Return `N` tracks within `yearPadding` of `trackDict` and by the same artist """
    rtnLs = list()
    query = "artist%3A" + str( artistName ).replace( " ", "%20")
    try:
        rYear = int( str( releaseDate )[:4] )
    except Exception:
        rYear = 2024
    bYear   = rYear - yearPadding
    eYear   = rYear + yearPadding
    years   = list( range( bYear, eYear+1 ) )
    miniLim = max( int(N/(eYear - bYear)), 1 )
    Nloop   = int(N / miniLim * 2)
    for i in range( Nloop ):
        iYear = choice( years )
        qry_i = query + "%20year%3A" + str( iYear )

        if (db is not None):
            if (qry_i in db['queries']):
                ofst = db['queries'][ qry_i ]
                db['queries'][ qry_i ] += miniLim
            else:
                ofst = 0
                db['queries'][ qry_i ] = miniLim
        else:
            ofst = 0
        
        res = spot.search( qry_i, 
                           limit  = miniLim, 
                           offset = min( ofst, _MAX_OFFSET ), 
                           type   = 'track' )
        tracks_i = [item['id'] for item in res['tracks']['items']]
        
        if (db is not None):
            tracks_ii = list()
            for trk_j in tracks_i:
                if trk_j not in db['collectID']:
                    tracks_ii.append( trk_j )
            tracks_i = tracks_ii[:]

        rem = N - len( rtnLs )
        if len( tracks_i ) > rem:
            rtnLs.extend( tracks_i[:rem] )
            return rtnLs
        else:
            rtnLs.extend( tracks_i )
            sleep( pause_s )
            
    

In [8]:
pprint( search_artist_within_era( "They Might Be Giants", 2000 ) )

['3ZpapZEZgcT8tdMuQnEMPk',
 '5S1vJejm1YukfhXa5PxPan',
 '62n4kewUuVlidVSAf6Advn',
 '4lIWjen36IJon89y6cqJBV',
 '4tcdZxEE13yADV63V99GIr',
 '1WFCIkoc4PaN7L7SmiI85i',
 '3ZpapZEZgcT8tdMuQnEMPk',
 '4tcdZxEE13yADV63V99GIr',
 '10F47eaBAie50PZe5b8H4h',
 '2kVSOg4uN0TNQ18L0NxUJ1',
 '3ZpapZEZgcT8tdMuQnEMPk',
 '5S1vJejm1YukfhXa5PxPan',
 '62n4kewUuVlidVSAf6Advn',
 '4lIWjen36IJon89y6cqJBV',
 '4tcdZxEE13yADV63V99GIr',
 '3ZpapZEZgcT8tdMuQnEMPk',
 '3ZYRyNz6bORFkKPSXxCdOy',
 '31n9Jx53RXqp6DAfmoMLDn',
 '45d0eHb3XhyqmCYUwYjdGJ',
 '0AZfSuxk2GWjoihWzzDfXr',
 '3ZpapZEZgcT8tdMuQnEMPk',
 '5S1vJejm1YukfhXa5PxPan',
 '62n4kewUuVlidVSAf6Advn',
 '6y2cLmWkuM7u5By6ElBrla',
 '4tcdZxEE13yADV63V99GIr',
 '3ZpapZEZgcT8tdMuQnEMPk',
 '3ZYRyNz6bORFkKPSXxCdOy',
 '31n9Jx53RXqp6DAfmoMLDn',
 '45d0eHb3XhyqmCYUwYjdGJ',
 '0AZfSuxk2GWjoihWzzDfXr',
 '1WFCIkoc4PaN7L7SmiI85i',
 '3ZpapZEZgcT8tdMuQnEMPk',
 '5S1vJejm1YukfhXa5PxPan',
 '4tcdZxEE13yADV63V99GIr',
 '3lGq0byj9il6auYmCXiKiq',
 '3ZpapZEZgcT8tdMuQnEMPk',
 '5S1vJejm1YukfhXa5PxPan',
 