# Init & Login

In [7]:
import time, pickle, os
now = time.time
from math import ceil
from random import randrange, choice
from time import sleep
from pprint import pprint
from datetime import datetime

import spotipy
import spotipy.util as util
from IPython.display import clear_output

## Client Info ##
CLIENT_ID     = ""
CLIENT_SECRET = ""
CLIENT_SCOPE  = "user-follow-modify playlist-modify-private playlist-modify-public"
USER_NAME     = "31ytgsr7wdmiaroy77msqpiupdsi"
REDIR_URI     = "https://github.com/jwatson-CO-edu/yt_shuffle_so_good"
AUTH_URL      = 'https://accounts.spotify.com/api/token'
BASE_URL      = 'https://api.spotify.com/v1/'
## API Info ##
_RESPONSE_LIMIT =  100
_MAX_OFFSET     = 1000

with open( "../keys/spot_ID.txt" , 'r' ) as f:
    CLIENT_ID = f.readlines()[0].strip()

with open( "../keys/spot_SECRET.txt" , 'r' ) as f:
    CLIENT_SECRET = f.readlines()[0].strip()

token = None
token = util.prompt_for_user_token(
    username      = USER_NAME,
    scope         = CLIENT_SCOPE,
    client_id     = CLIENT_ID,
    client_secret = CLIENT_SECRET,
    redirect_uri  = REDIR_URI
)

print( token )

spot = spotipy.Spotify( auth = token )
clear_output( wait = True )
sleep( 2 )
print( "TOKEN OBTAINED" )

TOKEN OBTAINED


# Playlists

In [8]:

playlist = {
    'study01' : "0a2qoe6S7lYeZ6nlhZdA0v",
    'study02' : "6gbtR2cBq5PvkghidCvvGk",
    'study03' : "3o3lN2qntdEV7UKTuuC77K",
    'study04' : "41sFSisljvBDMBXtpp5NIw",
    'study05' : "02iS5AFGp8YVuUUqcQf8ys",
    'study06' : "6KI7A4MWrSM7EyKRUjxIi1",
    'study07' : "3V055Md2JdrUT8tX0af7di",
    'study08' : "0tspdJlwSgiyf2O9PO6QaP",
    'study09' : "5mHRBFoQtYy2izeZ66pG95",
    'study10' : "3832xeKGEOAXFJqE4K8kIq",
    'study11' : "65MXR4dubPL9t0P4dgTWvn",
    'study12' : "0ecSAfnD4CulIVnLt26ukI",
    'study13' : "7K9ucByFRgDuZk8KMHeJkL",
}

backfill = "0v26bHydUxcGC5EbMlkjzG"
_N_BKFL  = 400


# Data

In [9]:
_FILTER_TYPES = [ 'album', 'artist', 'track', 'year', 'upc', 'tag:hipster', 'tag:new', 'isrc', 'genre', ]
_SEARCH_TYPES = [ "album", "artist", "playlist", "track", "show", "episode", "audiobook", ]
_N_MAX_SEARCH = 50
_N_DEF_SEARCH = 10
_YEAR_PADDING =  5
_STALE_TIME_S = 60.0 * 60 * 24 * 31
_DATA_PREFIX  = "Study-Music-Data_"
_DATA_POSTFIX = ".pkl"

In [10]:
data = {
    'time'     : now()  , # Data Structure Creation Time
    'playlists': dict() , # Study Playlist Info
    'collectID': set([]), # Currently accepted track IDs
    'artists'  : dict() , # Study Artist Info
    'queries'  : dict() , # Study Genre Info, 2024-08-11: Track info does NOT contain genre
    # 2024-08-11: Track info does NOT contain play count
}
timestamp = datetime.now().strftime( '%Y-%m-%dT%H:%M:%S' )
outFilNam = _DATA_PREFIX + timestamp + _DATA_POSTFIX
outPath   = os.path.join( 'data/', outFilNam )

# Query Functions

In [15]:

def fetch_entire_playlist( playlist_ID ):
    """ Get infodump on all plalist tracks """
    plTracks = []
    trCount  = 0
    response = spot.user_playlist_tracks(
        CLIENT_ID, 
        playlist_ID, 
        fields = 'items,uri,name,id,total', 
        limit  = _RESPONSE_LIMIT
    )
    Ntracks = response['total']
    while 1:
        trCount += len(response['items'])
        plTracks.extend( response['items'] )
        
        if trCount >= Ntracks:
            break
    
        response = spot.user_playlist_tracks(
            CLIENT_ID, 
            playlist_ID, 
            fields = 'items,uri,name,id,total', 
            limit  = _RESPONSE_LIMIT,
            offset = trCount
        )
    return plTracks


def populate_playlist_data( dataDct, plDict, pause_s = 1.0 ):
    """ Gather data across specified playlists """
    print( "\n### READ MUSIC COLLECTION ###\n" )
    for plName_i, plID_i in plDict.items():
        print( plName_i, '-', plID_i, '...' )
        dataDct['playlists'][ plName_i ] = {
            'ID'    : plID_i,
            'tracks': fetch_entire_playlist( plID_i ),
        }
        # pprint( dataDct['playlists'][ plName_i ]['tracks'][0] )
        # return None
        plSet_i = set([item['track']['id'] for item in dataDct['playlists'][ plName_i ]['tracks']])
        dataDct['collectID'] = dataDct['collectID'].union( plSet_i )

        for track_j in dataDct['playlists'][ plName_i ]['tracks']:

            # pprint( track_j )
            
            artistID_j = track_j['track']['artists'][0]['id'] # 2024-08-12: Assume the first artist is the most relevant
            if artistID_j not in dataDct['artists']:
                dataDct['artists'][ artistID_j ] = { 
                    'name'    : track_j['track']['artists'][0]['name'], 
                    'count'   : 1, 
                    'releases': [track_j['track']['album']['release_date'],], 
                }
            else:
                dataDct['artists'][ artistID_j ]['count'   ] += 1
                dataDct['artists'][ artistID_j ]['releases'].append( track_j['track']['album']['release_date'] )

        sleep( pause_s )
    
    print( "\n### COMPLETE ###\n" )
    

def search_artist_within_era( artistName, releaseDate, 
                              db = None, N = _N_MAX_SEARCH, yearPadding = _YEAR_PADDING, pause_s = 0.5 ):
    """ Return `N` tracks within `yearPadding` of `trackDict` and by the same artist """
    rtnLs = list()
    query = "artist%3A" + str( artistName ).replace( " ", "%20")
    try:
        rYear = int( str( releaseDate )[:4] )
    except Exception:
        rYear = 2024
    bYear   = rYear - yearPadding
    eYear   = rYear + yearPadding
    years   = list( range( bYear, eYear+1 ) )
    miniLim = max( int(N/(eYear - bYear)), 1 )
    Nloop   = int(N / miniLim * 2)
    for i in range( Nloop ):
        iYear = choice( years )
        qry_i = query + "%20year%3A" + str( iYear )
        print( f"Search: {qry_i}" )

        if (db is not None):
            if (qry_i in db['queries']):
                ofst = db['queries'][ qry_i ]
                db['queries'][ qry_i ] += miniLim
            else:
                ofst = 0
                db['queries'][ qry_i ] = miniLim
        else:
            ofst = 0
        
        res = spot.search( qry_i, 
                           limit  = miniLim, 
                           offset = min( ofst, _MAX_OFFSET ), 
                           type   = 'track' )
        tracks_i = [item['id'] for item in res['tracks']['items']]
        
        if (db is not None):
            tracks_ii = list()
            for trk_j in tracks_i:
                if trk_j not in db['collectID']:
                    tracks_ii.append( trk_j )
            tracks_i = tracks_ii[:]

        rem = N - len( rtnLs )
        if len( tracks_i ) > rem:
            rtnLs.extend( tracks_i[:rem] )
            return rtnLs
        else:
            rtnLs.extend( tracks_i )
            sleep( pause_s )
    return rtnLs


def save_music_database( dataDct ):
    """ Pickle `dataDct` to store current music collection data as well as search activity """
    print( f"About to write {outPath} ..." )
    with open( outPath, 'wb' ) as f:
        pickle.dump( dataDct, f )
    print( "COMPLETE!" )


# FIXME: LOAD DB
# FIXME: MERGE PREV DB SEARCH INFO WITH CURRENT DB


def choose_N_artist_year_pairs_from_db( N, db ):
    """ Fetch `N` random (<Artist>, <Date>) pairs from the `db` for searching """
    rtnPairs = list()
    artList  = list( db['artists'].keys() )
    for i in range(N):
        artKey_i = choice( artList )
        artist_i = db['artists'][ artKey_i ]['name']
        rlYear_i = choice( db['artists'][ artKey_i ]['releases'] )
        rtnPairs.append( (artist_i, rlYear_i,) )
    return rtnPairs


def basic_new_music_search_01( db, Ntot, Mper = 5, pause_s = 0.125 ):
    """ Choose random `db` entries as search queries, Return a list of `Ntot` tracks consisting of `Mper` entries for each artist """  
    rtnLst   = list()
    searches = choose_N_artist_year_pairs_from_db( int( ceil( Ntot/Mper ) ), db )
    for (art_i, rel_i) in searches:
        print( f"\tSearch, Artist: {art_i}, Around Year: {rel_i}" )
        rem  = Ntot - len( rtnLst )
        Mper = min( Mper, rem )
        trks = search_artist_within_era( art_i, rel_i, db, N = Mper, yearPadding = 3, pause_s = 0.5 )
        rtnLst.extend( trks )
        print( trks )
        sleep( pause_s )
    return rtnLst


def get_playlist_length( playlist_ID ):
    """ Get the number of total tracks in the playlist """
    response = spot.user_playlist_tracks(
        CLIENT_ID, 
        playlist_ID, 
        fields = 'items,uri,name,id,total', 
        limit  = _RESPONSE_LIMIT
    )
    return response['total']


def refill_playlist_with_new_tracks( plID, db, Ntot = 400, Mper = 5 ):
    """ Top off the playlist with new tracks """
    plLen = get_playlist_length( plID )
    if Ntot > plLen:
        nRem    = Ntot - plLen
        print( f"About to add {nRem} tracks ..." )
        addTrks = basic_new_music_search_01( db, nRem, Mper )
        result  = spot.user_playlist_add_tracks( CLIENT_ID, plID, addTrks )
        print( result )
    else:
        print( "No room for new tracks!" )
            
        

# Read Study Music Collection

In [12]:
populate_playlist_data( data, playlist, pause_s = 1.0 )


### READ MUSIC COLLECTION ###

study01 - 0a2qoe6S7lYeZ6nlhZdA0v ...
study02 - 6gbtR2cBq5PvkghidCvvGk ...
study03 - 3o3lN2qntdEV7UKTuuC77K ...
study04 - 41sFSisljvBDMBXtpp5NIw ...
study05 - 02iS5AFGp8YVuUUqcQf8ys ...
study06 - 6KI7A4MWrSM7EyKRUjxIi1 ...
study07 - 3V055Md2JdrUT8tX0af7di ...
study08 - 0tspdJlwSgiyf2O9PO6QaP ...
study09 - 5mHRBFoQtYy2izeZ66pG95 ...
study10 - 3832xeKGEOAXFJqE4K8kIq ...
study11 - 65MXR4dubPL9t0P4dgTWvn ...
study12 - 0ecSAfnD4CulIVnLt26ukI ...
study13 - 7K9ucByFRgDuZk8KMHeJkL ...
About to write Study-Music-Data_2024-08-13T12:21:53.pkl ...

### COMPLETE ###



# Add New Tracks

In [16]:
refill_playlist_with_new_tracks( backfill, data, Ntot = _N_BKFL, Mper = 5 )

About to add 32 tracks ...
	Search, Artist: Chick Corea Trio, Around Year: 2013-09-10
Search: artist%3AChick%20Corea%20Trio%20year%3A2012
Search: artist%3AChick%20Corea%20Trio%20year%3A2012
Search: artist%3AChick%20Corea%20Trio%20year%3A2012
Search: artist%3AChick%20Corea%20Trio%20year%3A2012
Search: artist%3AChick%20Corea%20Trio%20year%3A2012
Search: artist%3AChick%20Corea%20Trio%20year%3A2010
['12uL43MR8byTEjtg241Z7m', '5RrJCusmR1J54b5ivqdWIu', '21WlpZiXLelHjdoMUhHfrm', '0m8tSPLHhjhGToD6LW0RPp', '1XbLFuzB07x6G2a6NdUDrC']
	Search, Artist: Alpha Room, Around Year: 2021-10-11
Search: artist%3AAlpha%20Room%20year%3A2022
Search: artist%3AAlpha%20Room%20year%3A2018
Search: artist%3AAlpha%20Room%20year%3A2019
Search: artist%3AAlpha%20Room%20year%3A2023
Search: artist%3AAlpha%20Room%20year%3A2022
Search: artist%3AAlpha%20Room%20year%3A2024
Search: artist%3AAlpha%20Room%20year%3A2024
Search: artist%3AAlpha%20Room%20year%3A2023
Search: artist%3AAlpha%20Room%20year%3A2019
['528zV1Ydi8TCkaAKntyR

In [17]:
save_music_database( data )

About to write data/Study-Music-Data_2024-08-13T12:21:53.pkl ...
COMPLETE!
