# Generate recommendations for your Spotify Playlists

### Connecting to your Spotify account
* Follow instruction [here](https://towardsdatascience.com/extracting-song-data-from-the-spotify-api-using-python-b1e79388d50) for getting your own Spotify Developer API credentials
* To connect to your Spotify account, see [Client Credentials Flow](https://spotipy.readthedocs.io/en/2.19.0/#client-credentials-flow) for authentication

**Note:** when calling the `spotipy` API, if you receive the error message below, re-running the cell should fix

`ConnectionError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))`

### Spotipy ref
* [spotipy docs](https://spotipy.readthedocs.io/en/2.19.0/#welcome-to-spotipy), 
* [github examples](https://github.com/plamere/spotipy/tree/master/examples),  
* [source code](https://github.com/plamere/spotipy/blob/master/spotipy/client.py#L20)

### REQUIRED:

* In your repo, create `spotipy_secret_creds.py`,  
* assign file to `.gitignore`
* define the variables below,

```
SPOTIPY_CLIENT_ID='YOUR_CLIENT_ID'
SPOTIPY_CLIENT_SECRET='YOUR_CLIENT_SECRET'
SPOTIFY_USERNAME='YOUR_USERNAME'
```

## TODOs
--

In [None]:
# !pip install spotipy --user

In [1]:
GCP_PROJECTS = !gcloud config get-value project
PROJECT_ID = GCP_PROJECTS[0]
PROJECT_NUM = !gcloud projects list --filter="$PROJECT_ID" --format="value(PROJECT_NUMBER)"
PROJECT_NUM = PROJECT_NUM[0]
LOCATION = 'us-central1'

print(f"PROJECT_ID: {PROJECT_ID}")
print(f"PROJECT_NUM: {PROJECT_NUM}")
print(f"LOCATION: {LOCATION}")

PROJECT_ID: hybrid-vertex
PROJECT_NUM: 934903580331
LOCATION: us-central1


### pip & package

In [2]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
import re
from tqdm import tqdm

import pandas as pd
import json
from io import BytesIO
from pprint import pprint
import os

from google.cloud import storage

pd.set_option('display.max_columns', 100)

### Setup Clients

In [3]:
import spotipy_secret_creds as creds

os.environ['GOOGLE_CLOUD_PROJECT'] = PROJECT_ID
os.environ['SPOTIPY_CLIENT_ID'] = creds.SPOTIPY_CLIENT_ID
os.environ['SPOTIPY_CLIENT_SECRET'] = creds.SPOTIPY_CLIENT_SECRET
os.environ['SPOTIFY_USERNAME'] = creds.SPOTIFY_USERNAME

SPOTIPY_CLIENT_ID=creds.SPOTIPY_CLIENT_ID
SPOTIPY_CLIENT_SECRET=creds.SPOTIPY_CLIENT_SECRET
SPOTIFY_USERNAME=creds.SPOTIFY_USERNAME

MAX_SEED_LENGTH = 5

In [4]:
# Authenticate
client_credentials_manager = SpotifyClientCredentials(
    client_id=creds.SPOTIPY_CLIENT_ID, 
    client_secret=creds.SPOTIPY_CLIENT_SECRET
)

sp = spotipy.Spotify(
    client_credentials_manager = client_credentials_manager,

)

### helper functions

TODO - put in utils file?

In [5]:
def get_track_features(
    track_uri, 
    count, 
    playlist_uri, 
    n_songs_pl, 
    num_artists_pl, 
    num_albums_pl
):
    
    # Authenticate
    client_credentials_manager = SpotifyClientCredentials(
        client_id=creds.SPOTIPY_CLIENT_ID, 
        client_secret=creds.SPOTIPY_CLIENT_SECRET
    )

    sp = spotipy.Spotify(
        client_credentials_manager = client_credentials_manager,

    )
    
    feature_dict = {}
    
    feature_dict['n_songs_pl'] = n_songs_pl
    feature_dict['num_artists_pl'] = num_artists_pl
    feature_dict['num_albums_pl'] = num_albums_pl
    
    playlist_features = sp.playlist(playlist_uri)
    feature_dict['pl_name'] = playlist_features['name']
    feature_dict['description_pl'] = playlist_features['description']
    feature_dict['collaborative'] = str(playlist_features['collaborative'])
    
    track_meta = sp.track(track_uri)
    # capture track metadata
    feature_dict['track_pos'] = count
    feature_dict['track_uri'] = track_uri
    feature_dict['track_name'] = track_meta['name']
    feature_dict['duration_ms'] = float(track_meta['duration_ms'])
    feature_dict['track_pop'] = float(track_meta['popularity'])
    feature_dict['album_name'] = track_meta['album']['name']
    feature_dict['album_uri'] = track_meta['album']['uri']
    feature_dict['album_release_date'] = track_meta['album']['release_date']
    feature_dict['artist_name'] = track_meta['album']['artists'][0]['name']
    feature_dict['artist_uri'] = track_meta['album']['artists'][0]['uri']

    artist_meta = sp.artist(feature_dict['artist_uri'])
    # capture artist metadata
    feature_dict['artist_followers'] = float(artist_meta['followers']['total'])
    feature_dict['artist_popularity'] = float(artist_meta['popularity'])
    # artist_genres = artist_meta['genres']
    
    if artist_meta['genres']:
        feature_dict['artist_genres'] = " ".join([re.sub(' ','_',i) for i in artist_meta['genres']])
    else:
        feature_dict['artist_genres'] = "unknown"
    
    track_features = sp.audio_features(track_uri)[0]
    # capture track audio features
    feature_dict['duration'] = float(track_features['duration_ms'])
    feature_dict['acousticness'] = track_features['acousticness']
    feature_dict['danceability'] = track_features['danceability']
    feature_dict['energy'] = track_features['energy']
    feature_dict['instrumentalness'] = track_features['instrumentalness']
    feature_dict['key'] = track_features['key']
    feature_dict['liveness'] = track_features['liveness']
    feature_dict['loudness']= track_features['loudness']
    feature_dict['mode'] = track_features['mode']
    feature_dict['speechiness'] = track_features['speechiness']
    feature_dict['tempo'] = track_features['tempo']
    feature_dict['time_signature'] = track_features['time_signature']
    feature_dict['valence'] = track_features['valence']
    
    # TODO: print artist names for comparison later
    
    return feature_dict

def get_playlist_queries(playlist_uri):
    
    track_uris = [x["track"]["uri"] for x in sp.playlist_tracks(playlist_uri)["items"]]
    album_uris = [x["track"]['album']['uri'] for x in sp.playlist_tracks(playlist_uri)["items"]]
    artist_uris = [x["track"]['artists'][0]['uri'] for x in sp.playlist_tracks(playlist_uri)["items"]]
    
    n_songs_pl = len(track_uris)
    num_albums_pl = len(set(album_uris))
    num_artists_pl = len(set(artist_uris))
    
    # n_songs_pl = len(track_uris)
    playlist_featutre_list = []
    
    for count, track_uri in enumerate(track_uris):
        # results = get_track_features(track_uri, count, playlist_uri, n_songs_pl)
        results = get_track_features(track_uri, count, playlist_uri, n_songs_pl, num_artists_pl, num_albums_pl)
        playlist_featutre_list.append(results)
    
    return playlist_featutre_list

# Getting your Spotify playlists

Keep in mind:
* it's possible your playlists have tracks that are not present in the Million Playlists Dataset
* That's OK - we want the model to generalize to unseen data!
* Let's see what the model associates them with...

Note: to retrieve your playlists, make sure they are *added to your profile*

### get user playlists

Option 1: get playlists via `spotipy` API...

In [6]:
play_lists = []
playlists = sp.user_playlists(user=f'{SPOTIFY_USERNAME}', limit=10)

for pl in playlists['items']:
    uri = pl['uri']
    play_lists.append(uri)
    print(f"uri: {uri},  playlist name: {pl['name']}")

uri: spotify:playlist:5fV3fQ2sXEE8O1dbhmeIdo,  playlist name: sleeping with the phish
uri: spotify:playlist:3HeHZi8VGEm6ZNHZ2FVRr6,  playlist name: biebs weeknd
uri: spotify:playlist:3GX5FLE0IxHNZtLye0ETgb,  playlist name: Muscle Shoals
uri: spotify:playlist:0XPJ39OCBhOw5OZa7udYYP,  playlist name: Disco
uri: spotify:playlist:6imD2IJOyw3MEKdZ4XZqZ4,  playlist name: space is the place
uri: spotify:playlist:1E1EwxJyzjt6SYyfnp9mE8,  playlist name: all panic
uri: spotify:playlist:1pGfqRD9CzyO9lOn9Fp09V,  playlist name: live panic - small


Option 2: using the link provided when "sharing" a playlist...

In [7]:
# link from "share" feature
playlist_link = 'https://open.spotify.com/playlist/3GX5FLE0IxHNZtLye0ETgb?si=f99fa67315f14bbe'

# get the uri part
playlist_URI = playlist_link.split("/")[-1].split("?")[0]
print(f"playlist_URI: {playlist_URI}")

# get the tracks from that playlist
track_uris = [x["track"]["uri"] for x in sp.playlist_tracks(playlist_URI)["items"]]
print(f"Track in playlist: {track_uris[0]}")

playlist_URI: 3GX5FLE0IxHNZtLye0ETgb
Track in playlist: spotify:track:7hqesNgWCx8NZTHl4MXkPF


### Loop over multiple playlists

In [8]:
# from sp_utils import get_playlist_queries

featureLIST = [] 

for uri in play_lists:
    results = get_playlist_queries(uri)
    featureLIST.append(results)

len(featureLIST)

7

In [11]:
# first playlist
featureLIST[0]

In [12]:
# first track of first playlist
featureLIST[0][0]

{'n_songs_pl': 17,
 'num_artists_pl': 1,
 'num_albums_pl': 6,
 'pl_name': 'sleeping with the phish',
 'description_pl': 'all phish',
 'collaborative': 'False',
 'track_pos': 0,
 'track_uri': 'spotify:track:6jcO51f5vHa9cJfCnfX1Rf',
 'track_name': 'Soul Shakedown Party',
 'duration_ms': 289973.0,
 'track_pop': 19.0,
 'album_name': 'Amsterdam',
 'album_uri': 'spotify:album:709gu2Yj2tfqmNMIEDfOPg',
 'album_release_date': '1997',
 'artist_name': 'Phish',
 'artist_uri': 'spotify:artist:5wbIWUzTPuTxTyG6ouQKqz',
 'artist_followers': 474106.0,
 'artist_popularity': 57.0,
 'artist_genres': 'blues_rock jam_band',
 'duration': 289973.0,
 'acousticness': 0.261,
 'danceability': 0.65,
 'energy': 0.756,
 'instrumentalness': 2e-05,
 'key': 5,
 'liveness': 0.466,
 'loudness': -7.531,
 'mode': 1,
 'speechiness': 0.0829,
 'tempo': 135.869,
 'time_signature': 4,
 'valence': 0.79}

### create dataframe of all playlists, tracks, and metadata defined in `get_playlist_queries()`

Inspect last `N` songs of playlist...

In [13]:
from itertools import chain

test_df = pd.DataFrame(list(chain.from_iterable(featureLIST)))

print(test_df.shape)
test_df.head()

(138, 32)


Unnamed: 0,n_songs_pl,num_artists_pl,num_albums_pl,pl_name,description_pl,collaborative,track_pos,track_uri,track_name,duration_ms,track_pop,album_name,album_uri,album_release_date,artist_name,artist_uri,artist_followers,artist_popularity,artist_genres,duration,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
0,17,1,6,sleeping with the phish,all phish,False,0,spotify:track:6jcO51f5vHa9cJfCnfX1Rf,Soul Shakedown Party,289973.0,19.0,Amsterdam,spotify:album:709gu2Yj2tfqmNMIEDfOPg,1997,Phish,spotify:artist:5wbIWUzTPuTxTyG6ouQKqz,474106.0,57.0,blues_rock jam_band,289973.0,0.261,0.65,0.756,2e-05,5,0.466,-7.531,1,0.0829,135.869,4,0.79
1,17,1,6,sleeping with the phish,all phish,False,1,spotify:track:2jT3iJ9w0WD51E2VRDPzOD,Divided Sky,812453.0,17.0,Amsterdam,spotify:album:709gu2Yj2tfqmNMIEDfOPg,1997,Phish,spotify:artist:5wbIWUzTPuTxTyG6ouQKqz,474106.0,57.0,blues_rock jam_band,812453.0,0.259,0.24,0.844,0.0582,2,0.956,-6.961,1,0.0505,94.237,4,0.343
2,17,1,6,sleeping with the phish,all phish,False,2,spotify:track:1nZBf3KEHNO1NwVMnCPRWd,Bathtub Gin,783173.0,14.0,Amsterdam,spotify:album:709gu2Yj2tfqmNMIEDfOPg,1997,Phish,spotify:artist:5wbIWUzTPuTxTyG6ouQKqz,474106.0,57.0,blues_rock jam_band,783173.0,0.21,0.434,0.834,0.0524,0,0.369,-5.999,1,0.0423,118.021,4,0.822
3,17,1,6,sleeping with the phish,all phish,False,3,spotify:track:3GgcI3Efpom6Se3xRho66s,Down With Disease,1098160.0,16.0,Amsterdam,spotify:album:709gu2Yj2tfqmNMIEDfOPg,1997,Phish,spotify:artist:5wbIWUzTPuTxTyG6ouQKqz,474106.0,57.0,blues_rock jam_band,1098160.0,0.32,0.496,0.868,0.109,2,0.355,-6.165,1,0.0515,142.017,4,0.66
4,17,1,6,sleeping with the phish,all phish,False,4,spotify:track:5s5DIVU9rB2npB9cV5JThl,Limb By Limb,763373.0,11.0,Amsterdam,spotify:album:709gu2Yj2tfqmNMIEDfOPg,1997,Phish,spotify:artist:5wbIWUzTPuTxTyG6ouQKqz,474106.0,57.0,blues_rock jam_band,763373.0,0.338,0.383,0.761,0.141,10,0.431,-7.391,1,0.039,105.575,3,0.412


### albums, artists, and tracks per playlist

In [14]:
unique_albums = test_df.groupby('pl_name')['album_uri'].nunique()
unique_artists = test_df.groupby('pl_name')['artist_uri'].nunique()
n_songs_pl = test_df.groupby('pl_name')['track_uri'].count()

print(f"unique_albums {unique_albums}\n")
print(f"unique_artists {unique_artists}\n")
print(f"n_songs_pl {n_songs_pl}")

unique_albums pl_name
Disco                      17
Muscle Shoals              11
all panic                   8
biebs weeknd               14
live panic - small          2
sleeping with the phish     6
space is the place         12
Name: album_uri, dtype: int64

unique_artists pl_name
Disco                      11
Muscle Shoals               9
all panic                   1
biebs weeknd               11
live panic - small          1
sleeping with the phish     1
space is the place          6
Name: artist_uri, dtype: int64

n_songs_pl pl_name
Disco                      17
Muscle Shoals              16
all panic                  40
biebs weeknd               16
live panic - small         16
sleeping with the phish    17
space is the place         16
Name: track_uri, dtype: int64


In [15]:
def get_test_instance(list_dict_test):
    '''
    create single test instances given a 
    list of dictionaries representing playlist tracks
    '''
    
    # model serving signature with candidate tower fields
    TEST_PL_QUERY = {
        # 'album_name_can': '',
        'album_name_pl': [ ], 
        # 'album_uri_can': '',
        # 'artist_followers_can': 0, 
        # 'artist_genres_can': "", 
        'artist_genres_pl': [ ], 
        # 'artist_name_can': '', 
        'artist_name_pl': [ ], 
        'artist_pop_can': 0, 
        # 'artist_pop_pl': [], 
        # 'artist_uri_can': '', 
        # 'artists_followers_pl': [ ], 
        'collaborative': '', 
        'description_pl': '', 
        'duration_ms_seed_pl': 0, 
        'duration_ms_songs_pl': [ ], 
        'n_songs_pl': 0, 
        'name': '', 
        'num_albums_pl': 0, 
        'num_artists_pl': 0, 
        # 'track_name_can': '', 
        'track_name_pl': [ ], 
        # 'track_pop_can': 0, 
        'track_pop_pl': [ ], 
        # 'track_uri_can': '', 
        'track_uri_pl': [ ],
        'pid': 1,
    }

    counter = 0
    for track in list_dict_test:
        if counter == 0:
            TEST_PL_QUERY['name'] = track['pl_name']
            TEST_PL_QUERY['n_songs_pl'] = track['n_songs_pl'] 
            TEST_PL_QUERY['num_albums_pl'] = track['num_albums_pl']
            TEST_PL_QUERY['num_artists_pl'] = track['num_artists_pl']
            TEST_PL_QUERY['description_pl'] = track['description_pl']
            TEST_PL_QUERY['collaborative'] = str(track['collaborative'])
        else:
            # do these
            TEST_PL_QUERY['track_pop_pl'].append(track['track_pop'])
            TEST_PL_QUERY['track_uri_pl'].append(track['track_uri'])
            TEST_PL_QUERY['track_name_pl'].append(track['track_name'])
            TEST_PL_QUERY['album_name_pl'].append(track['album_name'])
            TEST_PL_QUERY['artist_name_pl'].append(track['artist_name'])
            TEST_PL_QUERY['artist_genres_pl'].append(track['artist_genres'])
            # TEST_PL_QUERY['artist_pop_pl'].append(track['artist_popularity'])
            TEST_PL_QUERY['duration_ms_songs_pl'].append(track['duration_ms'])
            # TEST_PL_QUERY['artists_followers_pl'].append(track['artist_followers'])
            

        counter=+1
        
    return TEST_PL_QUERY

In [16]:
# list of tracks
sample_tracks = featureLIST[0][-MAX_SEED_LENGTH-1:]

# get metadata for each track
TEST_QUERY = get_test_instance(sample_tracks)
pprint(TEST_QUERY)

{'album_name_pl': ['Amsterdam',
                   "Live at Madison Square Garden New Year's Eve 1995",
                   "Live at Madison Square Garden New Year's Eve 1995",
                   'LivePhish, Vol. 17 7/15/98 (Portland Meadows, Portland, '
                   'OR)',
                   'LivePhish 04/05/98'],
 'artist_genres_pl': ['blues_rock jam_band',
                      'blues_rock jam_band',
                      'blues_rock jam_band',
                      'blues_rock jam_band',
                      'blues_rock jam_band'],
 'artist_name_pl': ['Phish', 'Phish', 'Phish', 'Phish', 'Phish'],
 'artist_pop_can': 0,
 'collaborative': 'False',
 'description_pl': 'all phish',
 'duration_ms_seed_pl': 0,
 'duration_ms_songs_pl': [765826.0, 1231840.0, 1062986.0, 633640.0, 777413.0],
 'n_songs_pl': 17,
 'name': 'sleeping with the phish',
 'num_albums_pl': 6,
 'num_artists_pl': 1,
 'pid': 1,
 'track_name_pl': ['Free',
                   "Mike's Song - Live at Madison Square Garden

# Query Matching Engine

### TODO:
* parametrize this section
* structure section and notebook for readers

In [17]:
from google.cloud import aiplatform as vertex_ai

# Vertex SDK 
vertex_ai.init(project=PROJECT_ID, location=LOCATION)

import time

### Index Endpoint

> grab `IndexEndpoint ID` from deployed MAtching Engine index in [04-train-deploy-pipeline.ipynb](https://github.com/tottenjordan/merlin-on-vertex/blob/main/04-train-deploy-pipeline.ipynb)

In [18]:
INDEX_ENDPOINT_URI = "projects/934903580331/locations/us-central1/indexEndpoints/7816955927440916480"

In [19]:
ME_index_endpoint = vertex_ai.MatchingEngineIndexEndpoint(INDEX_ENDPOINT_URI)
ME_index_endpoint

<google.cloud.aiplatform.matching_engine.matching_engine_index_endpoint.MatchingEngineIndexEndpoint object at 0x7f5fb1561850> 
resource name: projects/934903580331/locations/us-central1/indexEndpoints/7816955927440916480

In [20]:
DEPLOYED_INDEX_ID = ME_index_endpoint.deployed_indexes[0].id
print(f"DEPLOYED_INDEX_ID: {DEPLOYED_INDEX_ID}")

ME_index_endpoint.deployed_indexes

DEPLOYED_INDEX_ID: deployed_ann_v10


[id: "deployed_ann_v10"
index: "projects/934903580331/locations/us-central1/indexes/8764963649002405888"
create_time {
  seconds: 1678312033
  nanos: 13037000
}
private_endpoints {
  match_grpc_address: "10.41.2.5"
}
index_sync_time {
  seconds: 1678364173
  nanos: 842197000
}
automatic_resources {
  min_replica_count: 2
  max_replica_count: 2
}
deployment_group: "default"
]

### Model Endpoint

> grab `Endpoint ID` from deployed query tower in [04-train-deploy-pipeline.ipynb](https://github.com/tottenjordan/merlin-on-vertex/blob/main/04-train-deploy-pipeline.ipynb)

In [21]:
ENDPOINT_URI = 'projects/934903580331/locations/us-central1/endpoints/7135685328731373568'

In [22]:
model_endpoint = vertex_ai.Endpoint(ENDPOINT_URI)
model_endpoint

<google.cloud.aiplatform.models.Endpoint object at 0x7f5fb1572850> 
resource name: projects/934903580331/locations/us-central1/endpoints/7135685328731373568

In [23]:
print(model_endpoint.gca_resource.deployed_models[0])

id: "7189007244631998464"
model: "projects/934903580331/locations/us-central1/models/1392933897834070016"
display_name: "merlin-query-tower-v10"
create_time {
  seconds: 1678298600
  nanos: 984234000
}
dedicated_resources {
  machine_spec {
    machine_type: "n1-standard-4"
    accelerator_type: NVIDIA_TESLA_T4
    accelerator_count: 1
  }
  min_replica_count: 1
  max_replica_count: 1
}
service_account: "934903580331-compute@developer.gserviceaccount.com"
model_version_id: "1"



### Retrieve nearest neighbors in deployed index

**TODO** add Feature Store to this step

In [24]:
def candidate_retrieval(query_instance, deployed_index_id, num_neighbs=10):
    '''
    TODO: some args
    '''
    # here
    start = time.process_time()
    playlist_emb = model_endpoint.predict([query_instance])
    print(f"Generate embeddings in {round((time.process_time() - start),2)} seconds\n")
    
    
    start = time.process_time()
    candidate_tracks = ME_index_endpoint.match(
        deployed_index_id=f'{deployed_index_id}',
        queries=playlist_emb.predictions,
        num_neighbors=10
    )
    
    print(f"Retrieved nearest neighbors in {round((time.process_time() - start),2)} seconds\n")
    playlist_name = query_instance['name']
    playlist_description = query_instance['description_pl']
    
    return candidate_tracks, playlist_description, playlist_name

def interpret_results(candidate_tracks, playlist_description, playlist_name):
    '''
    TODO:
    '''
    # here
    results = []
    
    print(f"playlist: {playlist_name}")
    print(f"description: {playlist_description}\n")
    
    print(f"Retrieved Candidates:\n")
    for i, neighbors in enumerate(candidate_tracks[0]):
        
        track_dict = {}
        
        track_index_id = str(neighbors.id)
        track_meta = sp.track(track_index_id)
        
        track_name = track_meta['name']
        artist_name = track_meta['artists'][0]['name']
        art_uri = track_meta['artists'][0]['uri']
        art_genres = sp.artist(art_uri)['genres']
        
        track_dict['track_name'] = track_name
        track_dict['artist_name'] = artist_name
        track_dict['track_preview_url'] = track_meta['preview_url']
        track_dict['track_spotify_url'] = track_meta['external_urls']['spotify']
        track_dict['track_genres'] = art_genres
        
        track_dict['track_uri'] = track_index_id
        track_dict['neighbor_distance'] = neighbors.distance

        results.append(track_dict)
        
        print(f"{i+1}) {track_name} by {artist_name}; {art_genres}")

    return results


In [25]:
for playlist in featureLIST:
    
    seed_tracks = playlist[-MAX_SEED_LENGTH:]
    
    query = get_test_instance(seed_tracks)
    
    candidates, pl_description, name = candidate_retrieval(
        query_instance=query, 
        deployed_index_id=DEPLOYED_INDEX_ID
    )
    
    results = interpret_results(candidates, pl_description, name)
    results
    print("--------")

Generate embeddings in 0.03 seconds

Retrieved nearest neighbors in 0.0 seconds

playlist: sleeping with the phish
description: all phish

Retrieved Candidates:

1) The Hound of the Baskervilles - Sir Arthur Conan Doyle by DBS Audiobooks; []
2) Pride and Prejudice - Jane Austen Part 1 of 2 by DBS Audiobooks; []
3) Sense and Sensibility - Jane Austen Part 1 of 2 by DBS Audiobooks; []
4) Sense and Sensibility - Jane Austen Part 2 of 2 by DBS Audiobooks; []
5) The Phantom of the Opera - Gaston Leroux Part 1 of 2 by DBS Audiobooks; []
6) Persuasion - Jane Austen Part 1 of 2 by DBS Audiobooks; []
7) The Lost World - Sir Arthur Conan Doyle Part 1 of 2 by DBS Audiobooks; []
8) The Phantom of the Opera - Gaston Leroux Part 2 of 2 by DBS Audiobooks; []
9) The Land that Time Forgot - Edgar Rice Burroughs by DBS Audiobooks; []
10) Persuasion - Jane Austen Part 2 of 2 by DBS Audiobooks; []
--------
Generate embeddings in 0.01 seconds

Retrieved nearest neighbors in 0.0 seconds

playlist: biebs wee

In [29]:
len(featureLIST[0][0])

32

In [30]:
len(featureLIST)

7

In [41]:
query = get_test_instance(featureLIST[3][-5:])

candidates, pl_description, name = candidate_retrieval(
    query_instance=query, 
    deployed_index_id=DEPLOYED_INDEX_ID
)

interpret_results

Generate embeddings in 0.01 seconds

Retrieved nearest neighbors in 0.0 seconds



In [38]:
featureLIST[3][-5:]

[{'n_songs_pl': 17,
  'num_artists_pl': 10,
  'num_albums_pl': 17,
  'pl_name': 'Disco',
  'description_pl': 'dance music from the 1970s United States&#x27; urban nightlife scene. Its sound is typified by four-on-the-floor beats, syncopated basslines, string sections, horns, electric piano, synthesizers, and electric rhythm guitars',
  'collaborative': 'False',
  'track_pos': 12,
  'track_uri': 'spotify:track:5Vrczz39CvlD3OGCa6utoA',
  'track_name': 'Grease - 2007 Remaster',
  'duration_ms': 207306.0,
  'track_pop': 62.0,
  'album_name': 'Frankie Valli...Is The Word',
  'album_uri': 'spotify:album:0o2oPAxKGui4tvrrNgDtkc',
  'album_release_date': '1978',
  'artist_name': 'Frankie Valli',
  'artist_uri': 'spotify:artist:3CDKmzJu6uwEGnPLLZffpD',
  'artist_followers': 233765.0,
  'artist_popularity': 64.0,
  'artist_genres': 'adult_standards bubblegum_pop disco mellow_gold soft_rock',
  'duration': 207307.0,
  'acousticness': 0.156,
  'danceability': 0.82,
  'energy': 0.464,
  'instrumenta