# Init & Login

In [40]:
import time, pickle, os
now = time.time
from math import ceil
from random import randrange, choice
from time import sleep
from pprint import pprint
from datetime import datetime

import spotipy
import spotipy.util as util
from IPython.display import clear_output

## Client Info ##
CLIENT_ID     = ""
CLIENT_SECRET = ""
CLIENT_SCOPE  = "user-follow-modify playlist-modify-private playlist-modify-public"
USER_NAME     = "31ytgsr7wdmiaroy77msqpiupdsi"
REDIR_URI     = "https://github.com/jwatson-CO-edu/yt_shuffle_so_good"
AUTH_URL      = 'https://accounts.spotify.com/api/token'
BASE_URL      = 'https://api.spotify.com/v1/'
## API Info ##
_RESPONSE_LIMIT =  100
_MAX_OFFSET     = 1000

with open( "../keys/spot_ID.txt" , 'r' ) as f:
    CLIENT_ID = f.readlines()[0].strip()

with open( "../keys/spot_SECRET.txt" , 'r' ) as f:
    CLIENT_SECRET = f.readlines()[0].strip()

token = None
token = util.prompt_for_user_token(
    username      = USER_NAME,
    scope         = CLIENT_SCOPE,
    client_id     = CLIENT_ID,
    client_secret = CLIENT_SECRET,
    redirect_uri  = REDIR_URI
)

print( token )

spot = spotipy.Spotify( auth = token )
clear_output( wait = True )
sleep( 2 )
print( "TOKEN OBTAINED" )

TOKEN OBTAINED


# Playlists

In [41]:
playlist = {
    'study01' : "0a2qoe6S7lYeZ6nlhZdA0v",
    'study02' : "6gbtR2cBq5PvkghidCvvGk",
    'study03' : "3o3lN2qntdEV7UKTuuC77K",
    'study04' : "41sFSisljvBDMBXtpp5NIw",
    'study05' : "02iS5AFGp8YVuUUqcQf8ys",
    'study06' : "6KI7A4MWrSM7EyKRUjxIi1",
    'study07' : "3V055Md2JdrUT8tX0af7di",
    'study08' : "0tspdJlwSgiyf2O9PO6QaP",
    'study09' : "5mHRBFoQtYy2izeZ66pG95",
    'study10' : "3832xeKGEOAXFJqE4K8kIq",
    'study11' : "65MXR4dubPL9t0P4dgTWvn",
    'study12' : "0ecSAfnD4CulIVnLt26ukI",
    'study13' : "7K9ucByFRgDuZk8KMHeJkL",
}

backfill = "0v26bHydUxcGC5EbMlkjzG"
_N_BKFL  = 400


# Data

In [42]:
_FILTER_TYPES = [ 'album', 'artist', 'track', 'year', 'upc', 'tag:hipster', 'tag:new', 'isrc', 'genre', ]
_SEARCH_TYPES = [ "album", "artist", "playlist", "track", "show", "episode", "audiobook", ]
_N_MAX_SEARCH = 50
_N_DEF_SEARCH = 10
_YEAR_PADDING =  5
_STALE_TIME_S = 60.0 * 60 * 24 * 31
_MIN_LEN_S    = 60.0 + 45.0
_DATA_DIR     = "data/"
_DATA_PREFIX  = "Study-Music-Data_"
_DATA_POSTFIX = ".pkl"

In [43]:
data = {
    'time'     : now()  , # Data Structure Creation Time
    'playlists': dict() , # Study Playlist Info
    'collectID': set([]), # Currently accepted track IDs
    'artists'  : dict() , # Study Artist Info
    'queries'  : dict() , # Study Genre Info, 2024-08-11: Track info does NOT contain genre
    # 2024-08-11: Track info does NOT contain play count
}
timestamp = datetime.now().strftime( '%Y-%m-%dT%H:%M:%S' )
outFilNam = _DATA_PREFIX + timestamp + _DATA_POSTFIX
outPath   = os.path.join( 'data/', outFilNam )

# Query Functions

In [44]:

def fetch_entire_playlist( playlist_ID ):
    """ Get infodump on all plalist tracks """
    plTracks = []
    trCount  = 0
    response = spot.user_playlist_tracks(
        CLIENT_ID, 
        playlist_ID, 
        fields = 'items,uri,name,id,total', 
        limit  = _RESPONSE_LIMIT
    )
    Ntracks = response['total']
    while 1:
        trCount += len(response['items'])
        plTracks.extend( response['items'] )
        
        if trCount >= Ntracks:
            break
    
        response = spot.user_playlist_tracks(
            CLIENT_ID, 
            playlist_ID, 
            fields = 'items,uri,name,id,total', 
            limit  = _RESPONSE_LIMIT,
            offset = trCount
        )
    return plTracks


def load_music_database( dataDir = _DATA_DIR ):
    """ Find the latest music database, test for freshness, and set current db if fresh """
    global data
    dbFiles = [os.path.join( dataDir, f ) for f in os.listdir( dataDir ) if (_DATA_PREFIX in str(f))]
    if len( dbFiles ):
        dbFiles.sort( reverse = True )
        with open( dbFiles[0], 'rb' ) as f:
            db = pickle.load( f )
        if ((data['time'] - db['time']) <= _STALE_TIME_S):
            data.update( db )
            return dbFiles[0]
    return None


def populate_playlist_data( dataDct, plDict, pause_s = 1.0 ):
    """ Gather data across specified playlists """
    print( "\n### READ MUSIC COLLECTION ###\n" )
    nuDB = load_music_database()
    if nuDB is not None:
        print( f"Found current collection data at {nuDB}!" )
    else:
        for plName_i, plID_i in plDict.items():
            print( plName_i, '-', plID_i, '...' )
            dataDct['playlists'][ plName_i ] = {
                'ID'    : plID_i,
                'tracks': fetch_entire_playlist( plID_i ),
            }
            # pprint( dataDct['playlists'][ plName_i ]['tracks'][0] )
            # return None
            plSet_i = set([item['track']['id'] for item in dataDct['playlists'][ plName_i ]['tracks']])
            dataDct['collectID'] = dataDct['collectID'].union( plSet_i )
    
            for track_j in dataDct['playlists'][ plName_i ]['tracks']:
    
                # pprint( track_j )
                for artist_k in track_j['track']['artists']:
                    artistID_j = artist_k['id']
                    if artistID_j not in dataDct['artists']:
                        dataDct['artists'][ artistID_j ] = { 
                            'name'    : track_j['track']['artists'][0]['name'], 
                            'count'   : 1, 
                            'releases': [track_j['track']['album']['release_date'],], 
                        }
                    else:
                        dataDct['artists'][ artistID_j ]['count'   ] += 1
                        dataDct['artists'][ artistID_j ]['releases'].append( track_j['track']['album']['release_date'] )
    
            sleep( pause_s )
    
    print( "\n### COMPLETE ###\n" )
    

def search_artist_within_era( artistName, releaseDate, 
                              db = None, N = _N_MAX_SEARCH, yearPadding = _YEAR_PADDING, pause_s = 0.5 ):
    """ Return `N` tracks within `yearPadding` of `trackDict` and by the same artist """
    rtnLs = list()
    query = "artist%3A" + str( artistName ).replace( " ", "%20")
    try:
        rYear = int( str( releaseDate )[:4] )
    except Exception:
        rYear = 2024
    bYear   = rYear - yearPadding
    eYear   = rYear + yearPadding
    years   = list( range( bYear, eYear+1 ) )
    miniLim = max( int(N/(eYear - bYear)), 1 )
    Nloop   = int(N / miniLim * 2)
    for i in range( Nloop ):
        iYear = choice( years )
        qry_i = query + "%20year%3A" + str( iYear )
        print( f"Search: {qry_i}" )

        if (db is not None):
            if (qry_i in db['queries']):
                ofst = db['queries'][ qry_i ]
                db['queries'][ qry_i ] += miniLim
            else:
                ofst = 0
                db['queries'][ qry_i ] = miniLim
        else:
            ofst = 0
        
        res = spot.search( qry_i, 
                           limit  = miniLim, 
                           offset = min( ofst, _MAX_OFFSET ), 
                           type   = 'track' )
        tracks_i = [item['id'] for item in res['tracks']['items']]
        
        if (db is not None):
            tracks_ii = list()
            for trk_j in tracks_i:
                if trk_j not in db['collectID']:
                    tracks_ii.append( trk_j )
            tracks_i = tracks_ii[:]

        rem = N - len( rtnLs )
        if len( tracks_i ) > rem:
            rtnLs.extend( tracks_i[:rem] )
            return rtnLs
        else:
            rtnLs.extend( tracks_i )
            sleep( pause_s )
    return rtnLs


def save_music_database( dataDct ):
    """ Pickle `dataDct` to store current music collection data as well as search activity """
    print( f"About to write {outPath} ..." )
    with open( outPath, 'wb' ) as f:
        pickle.dump( dataDct, f )
    print( "COMPLETE!" )


def choose_N_artist_year_pairs_from_db( N, db ):
    """ Fetch `N` random (<Artist>, <Date>) pairs from the `db` for searching """
    rtnPairs = list()
    artList  = list( db['artists'].keys() )
    for i in range(N):
        artKey_i = choice( artList )
        # pprint( db['artists'][ artKey_i ] )
        artist_i = db['artists'][ artKey_i ]['name']
        rlYear_i = choice( db['artists'][ artKey_i ]['releases'] )
        rtnPairs.append( (artist_i, rlYear_i,) )
    return rtnPairs


def basic_new_music_search_01( db, Ntot, Mper = 5, pause_s = 0.125 ):
    """ Choose random `db` entries as search queries, Return a list of `Ntot` tracks consisting of `Mper` entries for each artist """  
    rtnLst   = list()
    searches = choose_N_artist_year_pairs_from_db( int( ceil( Ntot/Mper ) )*2, db )
    addSet   = set([])
    for (art_i, rel_i) in searches:
        print( f"\tSearch, Artist: {art_i}, Around Year: {rel_i}" )
        rem    = Ntot - len( rtnLst )
        Mper   = min( Mper, rem )
        trks   = search_artist_within_era( art_i, rel_i, db, N = Mper, yearPadding = 3, pause_s = 0.5 )
        trks_i = list()
        for trk in trks:
            if trk not in addSet:
                trks_i.append( trk )
                addSet.add( trk )
        rem = Ntot - len( rtnLst )
        if rem > len( trks_i ):
            rtnLst.extend( trks_i )
        else:
            rtnLst.extend( trks_i[ :rem ] )
            break
        sleep( pause_s )
    return rtnLst


def get_playlist_length( playlist_ID ):
    """ Get the number of total tracks in the playlist """
    response = spot.user_playlist_tracks(
        CLIENT_ID, 
        playlist_ID, 
        fields = 'items,uri,name,id,total', 
        limit  = _RESPONSE_LIMIT
    )
    return response['total']


def refill_playlist_with_new_tracks( plID, db, Ntot = 400, Mper = 5 ):
    """ Top off the playlist with new tracks """
    plLen = get_playlist_length( plID )
    if Ntot > plLen:
        nRem    = Ntot - plLen
        print( f"About to add {nRem} tracks ..." )
        addTrks = basic_new_music_search_01( db, nRem, Mper )
        result  = spot.user_playlist_add_tracks( CLIENT_ID, plID, addTrks )
        print( result )
    else:
        print( "No room for new tracks!" )


def scrub_short_and_explicit_tracks( plID ):
    """ Remove short (<1:45) and explicit (guaranteed vocal) songs """
    tracks = fetch_entire_playlist( plID )

    # 3. For every track j in playlist, do
    j = 0
    while j < len( tracks ):
        track_j   = tracks[j]
        # pprint( track_j )
        # break
        p_dump_j  = False
        trackID_j = track_j['track']['id']
        len_s_j   = track_j['track']['duration_ms']/1000.0
        explc_j   = track_j['track']['explicit']

        if ((len_s_j < _MIN_LEN_S) or explc_j):
            res = spot.playlist_remove_specific_occurrences_of_items( 
                plID, 
                [{'uri': trackID_j, 'positions':[j,]},]
            )
            print( "\tRemove:", trackID_j, j, res )
            tracks.pop(j)
        else:
            j += 1


def scrub_and_refill_playlist_with_new_tracks( plID, db, Ntot = 400, Mper = 5, pause_s = 1.0 ):
    """ Remove suspect tracks and refill backfill in a loop until full """
    scrub_short_and_explicit_tracks( plID )
    plLen = get_playlist_length( plID )
    if Ntot > plLen:
        print( f"\n########## About to top off backfill with {Ntot-plLen} tracks! ##########\n" )
        i = 0
        while plLen < Ntot:
            i += 1
            sleep( pause_s )
            print( f"\n##### Iteration {i} #####\n" )
            refill_playlist_with_new_tracks( plID, db, Ntot, Mper )
            scrub_short_and_explicit_tracks( plID )
            plLen = get_playlist_length( plID )
            
        print( f"\n########## COMPLETE after {i} iterations! ##########\n" )
    else:
        print( f"\n########## NO additional backfill required! ##########\n" )
            
        

# Read Study Music Collection

In [6]:
populate_playlist_data( data, playlist, pause_s = 1.0 )


### READ MUSIC COLLECTION ###

Found current collection data at data/Study-Music-Data_2024-08-14T18:48:38.pkl!

### COMPLETE ###



# Add New Tracks

In [7]:
# refill_playlist_with_new_tracks( backfill, data, Ntot = _N_BKFL, Mper = 5 )
scrub_and_refill_playlist_with_new_tracks( backfill, data, Ntot = 400, Mper = 5, pause_s = 1.0 )

	Remove: 71Cn5bV8aFtAXwIsoSyjHU 46 {'snapshot_id': 'AAAA2LETa+QUtkNX2gXfxgmc/4QHV1Nx'}
	Remove: 6Yv5YXS884z0vibMB5hALf 154 {'snapshot_id': 'AAAA2aihfX78Jdk8ZCq40b6a3bUti5vZ'}
	Remove: 0e4uBYIXWJty3uBDBD2uPQ 176 {'snapshot_id': 'AAAA2uFal6dPfyd70RE3E/FX6uNUe2yX'}
	Remove: 0puCzwgSixoHQiTetF5r6u 183 {'snapshot_id': 'AAAA2/1G5Q6lHXfaMY0UC8ZZjn2ADYlw'}
	Remove: 2yMWcyN0rHT5CZCrjEqW4e 272 {'snapshot_id': 'AAAA3C8Z6wWiZngpPAgviHT+BMda0aBw'}
	Remove: 1R8fYzyHl3TQdCBSPkFaI7 338 {'snapshot_id': 'AAAA3UyEHPR6La0mX8b3PeVyywm42/x0'}

########## About to top off backfill with 50 tracks! ##########


##### Iteration 1 #####

About to add 50 tracks ...
	Search, Artist: Ondolut, Around Year: 2020-07-10
Search: artist%3AOndolut%20year%3A2023
Search: artist%3AOndolut%20year%3A2020
Search: artist%3AOndolut%20year%3A2022
Search: artist%3AOndolut%20year%3A2020
Search: artist%3AOndolut%20year%3A2020
Search: artist%3AOndolut%20year%3A2017
	Search, Artist: Don Slepian, Around Year: 2001-02-06
Search: artist%3

In [8]:
save_music_database( data )

About to write data/Study-Music-Data_2024-08-15T12:25:00.pkl ...
COMPLETE!


In [9]:
# scrub_short_and_explicit_tracks( backfill )

# Orthogonal Convex Hull

In [None]:
def get_AABB_nd( ndPoints ):
    """ Get the Axis-Aligned Bounding Box """
    pnts   = np.array( ndPoints )
    Ndim   = len( pnts[0] )
    rtnLim = np.ones( (2, Ndim,) )
    rtnLim[0,:] *=  1e6
    rtnLim[1,:] *= -1e6
    for i, pnt in enumerate( pnts ):
        for j in range( Ndim ):
            if pnt[j] < rtnLim[0,j]:
                rtnLim[0,j] = pnt[j]
            if pnt[j] > rtnLim[1,j]:
                rtnLim[1,j] = pnt[j]
    return rtnLim
    

def orthogonal_convex_hull_nd( ndPoints ):
    """ Construct the orthogonal convex hull """
    pass

# Advanced Queries

In [49]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.cluster import DBSCAN
from scipy.spatial import ConvexHull

_DBS_EPSILON  = 5.0
_DBS_MIN_MMBR = 5

def fetch_entire_playlist_with_audio_features( playlist_ID ):
    """ Get maximum infodump on all plalist tracks """
    plTracks = []
    trCount  = 0
    
    response = spot.user_playlist_tracks(
        CLIENT_ID, 
        playlist_ID, 
        fields = 'items,uri,name,id,total', 
        limit  = _RESPONSE_LIMIT
    )
    resTracks = response['items']
    Ntracks   = response['total']

    resIDs = [item['track']['id'] for item in resTracks]

    resFeatrs = spot.audio_features( resIDs )
    for i, track_i in enumerate( resTracks ):
        track_i.update( resFeatrs[i] )
    
    
    while 1:
        trCount += len( resTracks )
        plTracks.extend( resTracks )
        
        if trCount >= Ntracks:
            break
    
        response = spot.user_playlist_tracks(
            CLIENT_ID, 
            playlist_ID, 
            fields = 'items,uri,name,id,total', 
            limit  = _RESPONSE_LIMIT
        )
        resTracks = response['items']
    
        resIDs = [item['track']['id'] for item in resTracks]
    
        resFeatrs = spot.audio_features( resIDs )
        for i, track_i in enumerate( resTracks ):
            track_i.update( resFeatrs[i] )
    return plTracks


def get_track_vector( track ):
    """ Express the track characteristics as a vector """
    return np.array([
        track['acousticness'],
        track['danceability'],
        track['duration_ms'] / 1000.0 / _MIN_LEN_S,
        track['energy'],
        track['instrumentalness'],
        track['liveness'],
        track['loudness'],
        track['speechiness'],
        track['tempo'],
        track['valence'],
    ])


def get_tracks_as_vectors( tracks ):
    """ Convert all tracks to vectors """
    Mrows  = len( tracks )
    Ncols  = len( get_track_vector( tracks[0] ) )
    rtnMtx = np.zeros( (Mrows, Ncols,) ) 
    for i, trk in enumerate( tracks ):
        rtnMtx[i,:] = get_track_vector( trk )
    return rtnMtx

# FIXME: GENRE DISTANCE FUNCTION --> THIS IS USED TO GENERATE TOKEN WEIGHTS WHILE NAMING THE GENRE

# FIXME: NAME GENRES BASED ON ITS MEMBERSHIP AND QUALITIES, NOTE: GENRES NAMED RELATIVE TO THE ENTIRE POPULATION AND OTHER GENRES
#        https://stackoverflow.com/a/3789057 --> EXTRACT CANDIDATE ENGLISH WORDS AT A LOWER WEIGHT


def vec_unit( vec ):
    """ Get the unit vector in the direction of 'vec' """
    mag = np.linalg.norm( vec )
    if mag > 0.0:
        return np.divide( vec, mag )
    else:
        return vec


def distance_to_plane_list( qPnt, planeList ):
    """ Return the closest distance `qPnt` can be found to any plane in `planeList` = [ ..., [point, normal], ... ] """
    distMin = 1e9
    pNegatv = False
    for (pnt_i, nrm_i) in planeList:
        dif_i = np.subtract( qPnt, pnt_i )
        dst_i = np.dot( dif_i, -vec_unit( nrm_i ) )
        if pNegatv:
            if (distMin < dst_i < 0.0):
                distMin = dst_i
        elif (dst_i < 0.0):
            pNegatv = True
            distMin = dst_i
        elif (0.0 <= dst_i < distMin):
            distMin = dst_i
    return distMin
    

def generate_genres_from_track_list( tracks ):
    """ Use DBSCAN to generate clusters based on track vectors """
    # NOTE: This function assumes that `tracks` was built using `fetch_entire_playlist_with_audio_features`

    print( f"\n########## Extract genre info from {len(tracks)} tracks! ##########\n" )
    
    trkVecs = get_tracks_as_vectors( tracks )
    clustrs = DBSCAN( eps = _DBS_EPSILON, min_samples = _DBS_MIN_MMBR ).fit( trkVecs )
    genres  = dict()
    for i, trk_i in enumerate( tracks ):
        lbl_i = clustrs.labels_[i]
        if (lbl_i not in genres):
            genres[ lbl_i ] = {
                'name'   : None,
                'tracks' : [trk_i,],
                'len'    : 1,
                'vectors': None,
            }
        else:
            genres[ lbl_i ]['tracks'].append( trk_i )
            genres[ lbl_i ]['len'   ] += 1

    # Erase outliers
    if -1 in genres:
        del genres[-1]
    print( f"Identified {len(genres)} genres in this collection of {len(tracks)} tracks!" )

    for gnre in genres.values():
        gnre['vectors'] = get_tracks_as_vectors( gnre['tracks'] )
        cntr = np.mean( gnre['vectors'], axis = 0 )
        dim  = len( cntr )
        for i in range( gnre['len'] ):
            pnt_i   = gnre['vectors'][i,:]
            dist_i  = np.linalg.norm( np.subtract( cntr, pnt_i ) )
            alpha_i = np.exp( -dist_i )
            cntr    = cntr * (1.0 - alpha_i) + pnt_i * alpha_i
        gnre['center'] = cntr # 2024-08-16: This is probably guaranteed to be inside the convex hull

        # hull   = ConvexHull( gnre['vectors'], qhull_options = 'QJ QbB QR0' )
        # hull   = ConvexHull( gnre['vectors'], qhull_options = 'QJ' ) # Found 1319766 facets!
        # hullPl = list()

        # https://pypi.org/project/pyhull/1.0/

        # # Store hull as a list of <POINT,NORMAL> pairs
        # # 2024-08-17: This will actually accept/reject points outside/inside (but near to) the hull, not the true normal
        # for simplex in hull.simplices:
        #     points = np.zeros( (len( simplex ), dim,) )
        #     for j, k in enumerate( simplex ):
        #         points[j,:] = gnre['vectors'][k,:]
        #     simCntr = np.mean( points )

        #     simRad = 0.0
        #     for j, pnt_j in enumerate( points ):
        #         diff_j = np.linalg.norm( np.subtract( pnt_j, simCntr ) )
        #         simRad = max( simRad, diff_j )

        #     if (simRad >= (_DBS_EPSILON*4.0)):
        #         simNorm = np.subtract( gnre['center'], simCntr )
        #         simNorm = simNorm / np.linalg.norm( simNorm )
        #         hullPl.append( [simCntr, simNorm,] )

        # print( f"Found {len(hullPl)} facets!" )
        
        # gnre['hull'] = hullPl

        # for q in gnre['vectors']:
        #     print( distance_to_plane_list( q, hullPl ) )
        # break
    
    # FIXME: TEST THAT THE HULLS ARE SEPARATE
        
    
        # pprint( dir( hull ) )
        # break
    
    
    # FIXME: GENRE NAMES && PRINT NAMES FOR INSPECTION
    # Q: WHAT IF SIMILAR NAMES ARE GENERATED?
    print( f"\n########## Genre extraction COMPLETE! ##########\n" )


# FIXME: GENRE MEMBERSHIP FUNCTION

# FIXME: STORE GENRE
# FIXME: SCORE SEARCH 01 RESULTS BY GENRE MEMBERSHIP
# FIXME: SCAN NEW RELEASES BY GENRE MEMBERSHIP
#        https://spotipy.readthedocs.io/en/2.24.0/#spotipy.client.Spotify.new_releases
# FIXME: EVALUATE ADDING AUDIO ANALYSIS FEATURES

In [50]:
tracks = fetch_entire_playlist_with_audio_features( backfill )
generate_genres_from_track_list( tracks )

# pprint( tracks[0] )

# print( get_track_vector( tracks[0] ) )


########## Extract genre info from 400 tracks! ##########

Identified 14 genres in this collection of 400 tracks!
Found 1319766 facets!
-110.90248279727577
-98.17366254506557
-102.77236290030432
-95.19594540146909
-113.59438175596989
-96.80667552581362
-95.91716913788649
-99.60781489484995
-113.81892300379027
-95.48261547419906
-116.87428584936359
-97.22562538514443
-109.22719456361872
-102.46697434459233
-97.15144491511991
-107.0484909975024
-101.71246958082821
-110.78821176172255
-99.4256751763439
-96.09021630467286


KeyboardInterrupt: 

In [12]:
# trkData = get_tracks_as_vectors( tracks )

In [18]:
# pca = PCA( n_components = 10 )

# pca.fit( trkData )
# print( pca.explained_variance_ )
# for comp in pca.components_:
#     comp_i = np.abs( comp )
#     print( np.argmax( comp_i ) )
# print( pca.get_params()          )

[1.08504111e+03 5.29918901e+01 4.74948162e-01 8.01205787e-02
 3.05144538e-02 1.90045728e-02 1.26272445e-02 9.86837229e-03
 8.42337741e-03 3.91141848e-04]
8
6
2
0
1
4
3
9
5
7


In [33]:

# print( len(  ) )
# print( np.max( clustering.labels_ ) )

400
15
