# Generate recommendations for your Spotify Playlists

### Connecting to your Spotify account
* Follow instruction [here](https://towardsdatascience.com/extracting-song-data-from-the-spotify-api-using-python-b1e79388d50) for getting your own Spotify Developer API credentials
* To connect to your Spotify account, see [Client Credentials Flow](https://spotipy.readthedocs.io/en/2.19.0/#client-credentials-flow) for authentication

**Note:** when calling the `spotipy` API, if you receive the error message below, re-running the cell should fix

`ConnectionError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))`

### Spotipy ref
* [spotipy docs](https://spotipy.readthedocs.io/en/2.19.0/#welcome-to-spotipy), 
* [github examples](https://github.com/plamere/spotipy/tree/master/examples),  
* [source code](https://github.com/plamere/spotipy/blob/master/spotipy/client.py#L20)

### REQUIRED:

* In your repo, create `spotipy_secret_creds.py`,  
* assign file to `.gitignore`
* define the variables below,

```
SPOTIPY_CLIENT_ID='YOUR_CLIENT_ID'
SPOTIPY_CLIENT_SECRET='YOUR_CLIENT_SECRET'
SPOTIFY_USERNAME='YOUR_USERNAME'
```

## TODOs
--

In [None]:
# !pip install spotipy --user

In [1]:
GCP_PROJECTS = !gcloud config get-value project
PROJECT_ID = GCP_PROJECTS[0]
PROJECT_NUM = !gcloud projects list --filter="$PROJECT_ID" --format="value(PROJECT_NUMBER)"
PROJECT_NUM = PROJECT_NUM[0]
LOCATION = 'us-central1'

print(f"PROJECT_ID: {PROJECT_ID}")
print(f"PROJECT_NUM: {PROJECT_NUM}")
print(f"LOCATION: {LOCATION}")

PROJECT_ID: hybrid-vertex
PROJECT_NUM: 934903580331
LOCATION: us-central1


### pip & package

In [2]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
import re
from tqdm import tqdm

import pandas as pd
import json
from io import BytesIO
from pprint import pprint
import os

from google.cloud import storage

pd.set_option('display.max_columns', 100)

### Setup Clients

In [3]:
import spotipy_secret_creds as creds

os.environ['GOOGLE_CLOUD_PROJECT'] = PROJECT_ID
os.environ['SPOTIPY_CLIENT_ID'] = creds.SPOTIPY_CLIENT_ID
os.environ['SPOTIPY_CLIENT_SECRET'] = creds.SPOTIPY_CLIENT_SECRET
os.environ['SPOTIFY_USERNAME'] = creds.SPOTIFY_USERNAME

SPOTIPY_CLIENT_ID=creds.SPOTIPY_CLIENT_ID
SPOTIPY_CLIENT_SECRET=creds.SPOTIPY_CLIENT_SECRET
SPOTIFY_USERNAME=creds.SPOTIFY_USERNAME

MAX_SEED_LENGTH = 5

In [4]:
# Authenticate
client_credentials_manager = SpotifyClientCredentials(
    client_id=creds.SPOTIPY_CLIENT_ID, 
    client_secret=creds.SPOTIPY_CLIENT_SECRET
)

sp = spotipy.Spotify(
    client_credentials_manager = client_credentials_manager,

)

### helper functions

TODO - put in utils file?

In [111]:
def get_track_features(
    track_uri, 
    count, 
    playlist_uri, 
    n_songs_pl, 
    num_artists_pl, 
    num_albums_pl
):
    
    # Authenticate
    client_credentials_manager = SpotifyClientCredentials(
        client_id=creds.SPOTIPY_CLIENT_ID, 
        client_secret=creds.SPOTIPY_CLIENT_SECRET
    )

    sp = spotipy.Spotify(
        client_credentials_manager = client_credentials_manager,

    )
    
    feature_dict = {}
    
    feature_dict['n_songs_pl'] = n_songs_pl
    feature_dict['num_artists_pl'] = num_artists_pl
    feature_dict['num_albums_pl'] = num_albums_pl
    
    playlist_features = sp.playlist(playlist_uri)
    feature_dict['pl_name'] = playlist_features['name']
    feature_dict['description_pl'] = playlist_features['description']
    feature_dict['collaborative'] = str(playlist_features['collaborative'])
    
    track_meta = sp.track(track_uri)
    # capture track metadata
    feature_dict['track_pos'] = count
    feature_dict['track_uri'] = track_uri
    feature_dict['track_name'] = track_meta['name']
    feature_dict['duration_ms'] = float(track_meta['duration_ms'])
    feature_dict['track_pop'] = float(track_meta['popularity'])
    feature_dict['album_name'] = track_meta['album']['name']
    feature_dict['album_uri'] = track_meta['album']['uri']
    feature_dict['album_release_date'] = track_meta['album']['release_date']
    feature_dict['artist_name'] = track_meta['album']['artists'][0]['name']
    feature_dict['artist_uri'] = track_meta['album']['artists'][0]['uri']

    artist_meta = sp.artist(feature_dict['artist_uri'])
    # capture artist metadata
    feature_dict['artist_followers'] = float(artist_meta['followers']['total'])
    feature_dict['artist_popularity'] = float(artist_meta['popularity'])
    # artist_genres = artist_meta['genres']
    
    if artist_meta['genres']:
        feature_dict['artist_genres'] = " ".join([re.sub(' ','_',i) for i in artist_meta['genres']])
    else:
        feature_dict['artist_genres'] = "unknown"
    
    track_features = sp.audio_features(track_uri)[0]
    # capture track audio features
    feature_dict['duration'] = float(track_features['duration_ms'])
    feature_dict['acousticness'] = track_features['acousticness']
    feature_dict['danceability'] = track_features['danceability']
    feature_dict['energy'] = track_features['energy']
    feature_dict['instrumentalness'] = track_features['instrumentalness']
    feature_dict['key'] = track_features['key']
    feature_dict['liveness'] = track_features['liveness']
    feature_dict['loudness']= track_features['loudness']
    feature_dict['mode'] = track_features['mode']
    feature_dict['speechiness'] = track_features['speechiness']
    feature_dict['tempo'] = track_features['tempo']
    feature_dict['time_signature'] = track_features['time_signature']
    feature_dict['valence'] = track_features['valence']
    
    # TODO: print artist names for comparison later
    
    return feature_dict

def get_playlist_queries(playlist_uri):
    
    track_uris = [x["track"]["uri"] for x in sp.playlist_tracks(playlist_uri)["items"]]
    album_uris = [x["track"]['album']['uri'] for x in sp.playlist_tracks(playlist_uri)["items"]]
    artist_uris = [x["track"]['artists'][0]['uri'] for x in sp.playlist_tracks(playlist_uri)["items"]]
    
    n_songs_pl = len(track_uris)
    num_albums_pl = len(set(album_uris))
    num_artists_pl = len(set(artist_uris))
    
    # n_songs_pl = len(track_uris)
    playlist_featutre_list = []
    
    for count, track_uri in enumerate(track_uris):
        # results = get_track_features(track_uri, count, playlist_uri, n_songs_pl)
        results = get_track_features(track_uri, count, playlist_uri, n_songs_pl, num_artists_pl, num_albums_pl)
        playlist_featutre_list.append(results)
    
    return playlist_featutre_list

# Getting your Spotify playlists

Keep in mind:
* it's possible your playlists have tracks that are not present in the Million Playlists Dataset
* That's OK - we want the model to generalize to unseen data!
* Let's see what the model associates them with...

Note: to retrieve your playlists, make sure they are *added to your profile*

### get user playlists

Option 1: get playlists via `spotipy` API...

In [106]:
play_lists = []
playlists = sp.user_playlists(user=f'{SPOTIFY_USERNAME}', limit=10)

for pl in playlists['items']:
    uri = pl['uri']
    play_lists.append(uri)
    print(f"uri: {uri},  playlist name: {pl['name']}")

uri: spotify:playlist:3HeHZi8VGEm6ZNHZ2FVRr6,  playlist name: biebs weeknd
uri: spotify:playlist:3GX5FLE0IxHNZtLye0ETgb,  playlist name: Muscle Shoals
uri: spotify:playlist:0XPJ39OCBhOw5OZa7udYYP,  playlist name: Disco
uri: spotify:playlist:6imD2IJOyw3MEKdZ4XZqZ4,  playlist name: space is the place
uri: spotify:playlist:1E1EwxJyzjt6SYyfnp9mE8,  playlist name: all panic
uri: spotify:playlist:1pGfqRD9CzyO9lOn9Fp09V,  playlist name: live panic - small


Option 2: using the link provided when "sharing" a playlist...

In [107]:
# link from "share" feature
playlist_link = 'https://open.spotify.com/playlist/3GX5FLE0IxHNZtLye0ETgb?si=f99fa67315f14bbe'

# get the uri part
playlist_URI = playlist_link.split("/")[-1].split("?")[0]
print(f"playlist_URI: {playlist_URI}")

# get the tracks from that playlist
track_uris = [x["track"]["uri"] for x in sp.playlist_tracks(playlist_URI)["items"]]
print(f"Track in playlist: {track_uris[0]}")

playlist_URI: 3GX5FLE0IxHNZtLye0ETgb
Track in playlist: spotify:track:7hqesNgWCx8NZTHl4MXkPF


### Loop over multiple playlists

In [113]:
# from sp_utils import get_playlist_queries

featureLIST = [] 

for uri in play_lists:
    results = get_playlist_queries(uri)
    featureLIST.append(results)

len(featureLIST)

6

In [61]:
# first playlist
featureLIST[0]

In [62]:
# first track of first playlist
featureLIST[0][0]

### create dataframe of all playlists, tracks, and metadata defined in `get_playlist_queries()`

Inspect last `N` songs of playlist...

In [114]:
from itertools import chain

test_df = pd.DataFrame(list(chain.from_iterable(featureLIST)))

print(test_df.shape)
test_df.head()

(105, 32)


Unnamed: 0,n_songs_pl,num_artists_pl,num_albums_pl,pl_name,description_pl,collaborative,track_pos,track_uri,track_name,duration_ms,track_pop,album_name,album_uri,album_release_date,artist_name,artist_uri,artist_followers,artist_popularity,artist_genres,duration,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
0,20,12,18,biebs weeknd,sonic versatility and dark lyricism; pop tropi...,False,0,spotify:track:66hayvUbTotekKU3H4ta1f,Where Are Ü Now (with Justin Bieber),250285.0,73.0,Skrillex and Diplo present Jack Ü,spotify:album:6bfkwBrGYKJFk6Z4QVyjxd,2015-02-24,Jack Ü,spotify:artist:1HxJeLhIuegM3KgvPn8sTa,1073142.0,58.0,dance_pop edm electro_house pop_dance,250286.0,0.041,0.432,0.781,4e-06,4,0.0789,-4.038,0,0.0567,139.432,4,0.197
1,20,12,18,biebs weeknd,sonic versatility and dark lyricism; pop tropi...,False,1,spotify:track:6I3mqTwhRpn34SLVafSH7G,Ghost,153190.0,86.0,Justice,spotify:album:5dGWwsZ9iB2Xc3UKR0gif2,2021-03-19,Justin Bieber,spotify:artist:1uNFoZAHBGtllmzznpCI3s,67173634.0,89.0,canadian_pop pop,153190.0,0.185,0.601,0.741,2.9e-05,2,0.415,-5.569,1,0.0478,153.96,4,0.441
2,20,12,18,biebs weeknd,sonic versatility and dark lyricism; pop tropi...,False,2,spotify:track:09CtPGIpYB4BrO8qb1RGsF,Sorry,200786.0,80.0,Purpose (Deluxe),spotify:album:6Fr2rQkZ383FcMqFyT7yPr,2015-11-13,Justin Bieber,spotify:artist:1uNFoZAHBGtllmzznpCI3s,67173634.0,89.0,canadian_pop pop,200787.0,0.0797,0.654,0.76,0.0,0,0.299,-3.669,0,0.045,99.945,4,0.41
3,20,12,18,biebs weeknd,sonic versatility and dark lyricism; pop tropi...,False,3,spotify:track:4B0JvthVoAAuygILe3n4Bs,What Do You Mean?,205680.0,77.0,Purpose (Deluxe),spotify:album:6Fr2rQkZ383FcMqFyT7yPr,2015-11-13,Justin Bieber,spotify:artist:1uNFoZAHBGtllmzznpCI3s,67173634.0,89.0,canadian_pop pop,205680.0,0.59,0.845,0.567,0.00142,5,0.0811,-8.118,0,0.0956,125.02,4,0.793
4,20,12,18,biebs weeknd,sonic versatility and dark lyricism; pop tropi...,False,4,spotify:track:7fBv7CLKzipRk6EC6TWHOB,The Hills,242253.0,86.0,Beauty Behind The Madness,spotify:album:0P3oVJBFOv3TDXlYRhGL7s,2015-08-28,The Weeknd,spotify:artist:1Xyo4u8uXC1ZmMpatF05PJ,52058980.0,93.0,canadian_contemporary_r&b canadian_pop pop,242253.0,0.0671,0.585,0.564,0.0,0,0.135,-7.063,0,0.0515,113.003,4,0.137


### albums, artists, and tracks per playlist

In [115]:
unique_albums = test_df.groupby('pl_name')['album_uri'].nunique()
unique_artists = test_df.groupby('pl_name')['artist_uri'].nunique()
n_songs_pl = test_df.groupby('pl_name')['track_uri'].count()

print(f"unique_albums {unique_albums}\n")
print(f"unique_artists {unique_artists}\n")
print(f"n_songs_pl {n_songs_pl}")

unique_albums pl_name
Disco                  5
Muscle Shoals          9
all panic              8
biebs weeknd          18
live panic - small     3
space is the place     9
Name: album_uri, dtype: int64

unique_artists pl_name
Disco                  5
Muscle Shoals          9
all panic              1
biebs weeknd          12
live panic - small     2
space is the place     5
Name: artist_uri, dtype: int64

n_songs_pl pl_name
Disco                  5
Muscle Shoals         13
all panic             40
biebs weeknd          20
live panic - small    14
space is the place    13
Name: track_uri, dtype: int64


In [120]:
def get_test_instance(list_dict_test):
    '''
    create single test instances given a 
    list of dictionaries representing playlist tracks
    '''
    
    # model serving signature with candidate tower fields
    TEST_PL_QUERY = {
        # 'album_name_can': '',
        'album_name_pl': [ ], 
        # 'album_uri_can': '',
        # 'artist_followers_can': 0, 
        # 'artist_genres_can': "", 
        'artist_genres_pl': [ ], 
        # 'artist_name_can': '', 
        'artist_name_pl': [ ], 
        'artist_pop_can': 0, 
        # 'artist_pop_pl': [], 
        # 'artist_uri_can': '', 
        # 'artists_followers_pl': [ ], 
        'collaborative': '', 
        'description_pl': '', 
        'duration_ms_seed_pl': 0, 
        'duration_ms_songs_pl': [ ], 
        'n_songs_pl': 0, 
        'name': '', 
        'num_albums_pl': 0, 
        'num_artists_pl': 0, 
        # 'track_name_can': '', 
        'track_name_pl': [ ], 
        # 'track_pop_can': 0, 
        'track_pop_pl': [ ], 
        # 'track_uri_can': '', 
        'track_uri_pl': [ ],
        'pid': 1,
    }

    counter = 0
    for track in list_dict_test:
        if counter == 0:
            TEST_PL_QUERY['name'] = track['pl_name']
            TEST_PL_QUERY['n_songs_pl'] = track['n_songs_pl'] 
            TEST_PL_QUERY['num_albums_pl'] = track['num_albums_pl']
            TEST_PL_QUERY['num_artists_pl'] = track['num_artists_pl']
            TEST_PL_QUERY['description_pl'] = track['description_pl']
            TEST_PL_QUERY['collaborative'] = str(track['collaborative'])
        else:
            # do these
            TEST_PL_QUERY['track_pop_pl'].append(track['track_pop'])
            TEST_PL_QUERY['track_uri_pl'].append(track['track_uri'])
            TEST_PL_QUERY['track_name_pl'].append(track['track_name'])
            TEST_PL_QUERY['album_name_pl'].append(track['album_name'])
            TEST_PL_QUERY['artist_name_pl'].append(track['artist_name'])
            TEST_PL_QUERY['artist_genres_pl'].append(track['artist_genres'])
            # TEST_PL_QUERY['artist_pop_pl'].append(track['artist_popularity'])
            TEST_PL_QUERY['duration_ms_songs_pl'].append(track['duration_ms'])
            # TEST_PL_QUERY['artists_followers_pl'].append(track['artist_followers'])
            

        counter=+1
        
    return TEST_PL_QUERY

In [121]:
# list of tracks
sample_tracks = featureLIST[0][-MAX_SEED_LENGTH-1:]

# get metadata for each track
TEST_QUERY = get_test_instance(sample_tracks)
pprint(TEST_QUERY)

{'album_name_pl': ['Might Not',
                   '4REAL 4REAL',
                   'SremmLife',
                   'SremmLife 2 (Deluxe)',
                   'Peace Is The Mission (Extended)'],
 'artist_genres_pl': ['canadian_hip_hop canadian_trap pop_rap rap trap',
                      'cali_rap gangster_rap hip_hop pop_rap rap '
                      'southern_hip_hop trap',
                      'hip_hop melodic_rap mississippi_hip_hop pop_rap rap '
                      'southern_hip_hop trap',
                      'hip_hop melodic_rap mississippi_hip_hop pop_rap rap '
                      'southern_hip_hop trap',
                      'dance_pop edm electro_house moombahton pop pop_dance '
                      'pop_rap tropical_house'],
 'artist_name_pl': ['Belly',
                    'YG',
                    'Rae Sremmurd',
                    'Rae Sremmurd',
                    'Major Lazer'],
 'artist_pop_can': 0,
 'collaborative': 'False',
 'description_pl': 'sonic vers

# Query Matching Engine

### TODO:
* parametrize this section
* structure section and notebook for readers

In [122]:
from google.cloud import aiplatform as vertex_ai

# Vertex SDK 
vertex_ai.init(project=PROJECT_ID, location=LOCATION)

import time

### Index Endpoint

In [47]:
INDEX_ENDPOINT_URI = "projects/934903580331/locations/us-central1/indexEndpoints/5901413157808635904"

In [48]:
ME_index_endpoint = vertex_ai.MatchingEngineIndexEndpoint(INDEX_ENDPOINT_URI)
ME_index_endpoint

<google.cloud.aiplatform.matching_engine.matching_engine_index_endpoint.MatchingEngineIndexEndpoint object at 0x7f7004528790> 
resource name: projects/934903580331/locations/us-central1/indexEndpoints/5901413157808635904

In [125]:
DEPLOYED_INDEX_ID = ME_index_endpoint.deployed_indexes[0].id
print(f"DEPLOYED_INDEX_ID: {DEPLOYED_INDEX_ID}")

ME_index_endpoint.deployed_indexes

DEPLOYED_INDEX_ID: deployed_ann_index_v5


[id: "deployed_ann_index_v5"
index: "projects/934903580331/locations/us-central1/indexes/3111433188652613632"
create_time {
  seconds: 1668084206
  nanos: 671993000
}
private_endpoints {
  match_grpc_address: "10.41.2.5"
}
index_sync_time {
  seconds: 1668162021
  nanos: 728904000
}
automatic_resources {
  min_replica_count: 2
  max_replica_count: 2
}
deployment_group: "default"
]

### Model Endpoint

In [49]:
ENDPOINT_URI = 'projects/934903580331/locations/us-central1/endpoints/185144563977945088'

In [51]:
model_endpoint = vertex_ai.Endpoint(ENDPOINT_URI)
model_endpoint

<google.cloud.aiplatform.models.Endpoint object at 0x7f700c7be250> 
resource name: projects/934903580331/locations/us-central1/endpoints/185144563977945088

In [52]:
print(model_endpoint.gca_resource.deployed_models[0])

id: "2808966335944458240"
model: "projects/934903580331/locations/us-central1/models/3137456429858816000"
display_name: "mm-qtower-v8"
create_time {
  seconds: 1668159740
  nanos: 805725000
}
dedicated_resources {
  machine_spec {
    machine_type: "n1-standard-4"
    accelerator_type: NVIDIA_TESLA_T4
    accelerator_count: 1
  }
  min_replica_count: 1
  max_replica_count: 1
}
service_account: "934903580331-compute@developer.gserviceaccount.com"
model_version_id: "1"



### Retrieve nearest neighbors in deployed index

**TODO** add Feature Store to this step

In [157]:
def candidate_retrieval(query_instance, deployed_index_id, num_neighbs=10):
    '''
    TODO: some args
    '''
    # here
    start = time.process_time()
    playlist_emb = model_endpoint.predict([query_instance])
    print(f"Generate embeddings in {round((time.process_time() - start),2)} seconds\n")
    
    
    start = time.process_time()
    candidate_tracks = ME_index_endpoint.match(
        deployed_index_id=f'{deployed_index_id}',
        queries=playlist_emb.predictions,
        num_neighbors=10
    )
    
    print(f"Retrieved nearest neighbors in {round((time.process_time() - start),2)} seconds\n")
    playlist_name = query_instance['name']
    playlist_description = query_instance['description_pl']
    
    return candidate_tracks, playlist_description, playlist_name

def interpret_results(candidate_tracks, playlist_description, playlist_name):
    '''
    TODO:
    '''
    # here
    results = []
    
    print(f"playlist: {playlist_name}")
    print(f"description: {playlist_description}\n")
    
    print(f"Retrieved Candidates:\n")
    for i, neighbors in enumerate(candidate_tracks[0]):
        
        track_dict = {}
        
        track_index_id = str(neighbors.id)
        track_meta = sp.track(track_index_id)
        
        track_name = track_meta['name']
        artist_name = track_meta['artists'][0]['name']
        art_uri = track_meta['artists'][0]['uri']
        art_genres = sp.artist(art_uri)['genres']
        
        track_dict['track_name'] = track_name
        track_dict['artist_name'] = artist_name
        track_dict['track_preview_url'] = track_meta['preview_url']
        track_dict['track_spotify_url'] = track_meta['external_urls']['spotify']
        track_dict['track_genres'] = art_genres
        
        track_dict['track_uri'] = track_index_id
        track_dict['neighbor_distance'] = neighbors.distance

        results.append(track_dict)
        
        print(f"{i+1}) {track_name} by {artist_name}; {art_genres}")

    return results


In [160]:
for playlist in featureLIST:
    
    seed_tracks = playlist[-MAX_SEED_LENGTH:]
    
    query = get_test_instance(seed_tracks)
    
    candidates, pl_description, name = candidate_retrieval(
        query_instance=query, 
        deployed_index_id=DEPLOYED_INDEX_ID
    )
    
    results = interpret_results(candidates, pl_description, name)
    results
    print("--------")

Generate embeddings in 0.11 seconds

Retrieved nearest neighbors in 0.38 seconds

playlist: biebs weeknd
description: sonic versatility and dark lyricism; pop tropical house

Retrieved Candidates:

1) The Girl Next Door by Tomppabeats; ['japanese chillhop', 'lo-fi beats']
2) Far Away by Tomppabeats; ['japanese chillhop', 'lo-fi beats']
3) Smoking at Midnight by Tomppabeats; ['japanese chillhop', 'lo-fi beats']
4) El Quesito by Omar Ruiz; ['corrido', 'corridos tumbados', 'musica mexicana', 'nueva musica mexicana', 'sierreno']
5) Lonely Boy with a Toy Ukulele by Tomppabeats; ['japanese chillhop', 'lo-fi beats']
6) Will You Stay Here with Me by Tomppabeats; ['japanese chillhop', 'lo-fi beats']
7) Don't Leave Me This Way by Tomppabeats; ['japanese chillhop', 'lo-fi beats']
8) Again by Tomppabeats; ['japanese chillhop', 'lo-fi beats']
9) Emotional Crank by Tomppabeats; ['japanese chillhop', 'lo-fi beats']
10) The Duck Song 2 by Bryant Oden; ["children's music"]
--------
Generate embeddings 