# Comparing results

> Follow instruction [here](https://towardsdatascience.com/extracting-song-data-from-the-spotify-api-using-python-b1e79388d50) for getting your own Spotify Developer API credentials

In [None]:
# !pip install spotipy --user

In [1]:
GCP_PROJECTS = !gcloud config get-value project
PROJECT_ID = GCP_PROJECTS[0]
PROJECT_NUM = !gcloud projects list --filter="$PROJECT_ID" --format="value(PROJECT_NUMBER)"
PROJECT_NUM = PROJECT_NUM[0]
LOCATION = 'us-central1'

print(f"PROJECT_ID: {PROJECT_ID}")
print(f"PROJECT_NUM: {PROJECT_NUM}")
print(f"LOCATION: {LOCATION}")

PROJECT_ID: hybrid-vertex
PROJECT_NUM: 934903580331
LOCATION: us-central1


In [2]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
import re
from tqdm import tqdm

import pandas as pd
import json
from io import BytesIO
from pprint import pprint
import os

from google.cloud import storage

pd.set_option('display.max_columns', 100)

### Setup clients

In [7]:
# CURRENT_PATH = !pwd

# os.chdir(f'{CURRENT_PATH}')
# os.getcwd()

* If you registered credentials with the Spotify Web Developer API, easily query your playlists to create your own test examples
* see [spotipy docs](https://spotipy.readthedocs.io/en/2.19.0/#welcome-to-spotipy), [github examples](https://github.com/plamere/spotipy/tree/master/examples), and [source code](https://github.com/plamere/spotipy/blob/master/spotipy/client.py#L20)

In [6]:
import spotipy_secret_creds as creds

os.environ['GOOGLE_CLOUD_PROJECT'] = PROJECT_ID
os.environ['SPOTIPY_CLIENT_ID'] = creds.SPOTIPY_CLIENT_ID
os.environ['SPOTIPY_CLIENT_SECRET'] = creds.SPOTIPY_CLIENT_SECRET
os.environ['SPOTIFY_USERNAME'] = creds.SPOTIFY_USERNAME

SPOTIPY_CLIENT_ID=creds.SPOTIPY_CLIENT_ID
SPOTIPY_CLIENT_SECRET=creds.SPOTIPY_CLIENT_SECRET
SPOTIFY_USERNAME=creds.SPOTIFY_USERNAME

MAX_SEED_LENGTH = 5

# print(f"SPOTIPY_CLIENT_ID: {SPOTIPY_CLIENT_ID}")
# print(f"SPOTIPY_CLIENT_SECRET: {SPOTIPY_CLIENT_SECRET}")
# print(f"SPOTIFY_USERNAME: {SPOTIFY_USERNAME}")

In [8]:
# Authenticate
client_credentials_manager = SpotifyClientCredentials(
    client_id=creds.SPOTIPY_CLIENT_ID, 
    client_secret=creds.SPOTIPY_CLIENT_SECRET
)

# scope = 'playlist-read-private' # scope=scope
sp = spotipy.Spotify(
    client_credentials_manager = client_credentials_manager,
    # auth_manager=SpotifyOAuth(),
)

# Create Test Instances

## (1) Use your own Spotify playlists

Keep in mind:
* it's possible your playlists have tracks that are not present in the Million Playlists Dataset
* That's OK - we want the model to generalize to unseen data!
* Let's see what the model associates them with...

### get user playlists

> To connect to you Spotify account, see [Client Credentials Flow](https://spotipy.readthedocs.io/en/2.19.0/#client-credentials-flow) for authentication

In [12]:
playlists = sp.user_playlists(f'{SPOTIFY_USERNAME}')
while playlists:
    for i, playlist in enumerate(playlists['items']):
        print("%4d %s %s" % (i + 1 + playlists['offset'], playlist['uri'],  playlist['name']))
    if playlists['next']:
        playlists = sp.next(playlists)
    else:
        playlists = None

### Test playlist link

In [13]:
# muscale shoals
playlist_link = 'https://open.spotify.com/playlist/3GX5FLE0IxHNZtLye0ETgb?si=f99fa67315f14bbe'

playlist_URI = playlist_link.split("/")[-1].split("?")[0]

print(f"playlist_URI: {playlist_URI}")

track_uris = [x["track"]["uri"] for x in sp.playlist_tracks(playlist_URI)["items"]]

track_uris

playlist_URI: 3GX5FLE0IxHNZtLye0ETgb


['spotify:track:7hqesNgWCx8NZTHl4MXkPF',
 'spotify:track:0y2y5zcZr0Ciei8bf41cSS',
 'spotify:track:1Hi6XYNirfRidjPo18UUq0',
 'spotify:track:5U51nS2N1TDL1HEHEio7mC',
 'spotify:track:4euYSSvpQZSAQ4gThyL2QD',
 'spotify:track:5CahDIpRZSLStfqNdznMoE',
 'spotify:track:6l6ObuVjdZbc900Q2WgO7K',
 'spotify:track:3i5m63j2chfkDu3qmcb9m3',
 'spotify:track:1MMp1H2Kib2BCDtdL5nL63',
 'spotify:track:2l5kuq6JLqCA6Afx8gGh8V',
 'spotify:track:1Zxu9r1D4fKGaxBUxmIsWG',
 'spotify:track:5n9Ul19Pb8MROfm8eTI7UH',
 'spotify:track:1JFdocYRMthec2BNrkOF1b']

## Loop multiple playlists

### Helper functions

In [301]:
# def get_track_features(track_uri, count, playlist_uri, n_songs_pl):
def get_track_features(track_uri, count, playlist_uri):
    
    feature_dict = {}
    
    playlist_features = sp.playlist(playlist_uri)
    feature_dict['pl_name'] = playlist_features['name']
    feature_dict['description_pl'] = playlist_features['description']
    feature_dict['collaborative'] = playlist_features['collaborative']
    # feature_dict['n_songs_pl'] = n_songs_pl
    
    track_meta = sp.track(track_uri)
    # capture track metadata
    feature_dict['track_pos'] = count
    feature_dict['track_uri'] = track_uri
    feature_dict['track_name'] = track_meta['name']
    feature_dict['duration_ms'] = track_meta['duration_ms']
    feature_dict['track_pop'] = track_meta['popularity']
    feature_dict['album_name'] = track_meta['album']['name']
    feature_dict['album_uri'] = track_meta['album']['uri']
    feature_dict['album_release_date'] = track_meta['album']['release_date']
    feature_dict['artist_name'] = track_meta['album']['artists'][0]['name']
    feature_dict['artist_uri'] = track_meta['album']['artists'][0]['uri']

    artist_meta = sp.artist(feature_dict['artist_uri'])
    # capture artist metadata
    feature_dict['artist_followers'] = artist_meta['followers']['total']
    feature_dict['artist_popularity'] = artist_meta['popularity']
    # artist_genres = artist_meta['genres']
    
    if artist_meta['genres']:
        feature_dict['artist_genres'] = " ".join([re.sub(' ','_',i) for i in artist_meta['genres']])
    else:
        feature_dict['artist_genres'] = "unknown"
    
    track_features = sp.audio_features(track_uri)[0]
    # capture track audio features
    feature_dict['duration'] = track_features['duration_ms']
    feature_dict['acousticness'] = track_features['acousticness']
    feature_dict['danceability'] = track_features['danceability']
    feature_dict['energy'] = track_features['energy']
    feature_dict['instrumentalness'] = track_features['instrumentalness']
    feature_dict['key'] = track_features['key']
    feature_dict['liveness'] = track_features['liveness']
    feature_dict['loudness']= track_features['loudness']
    feature_dict['mode'] = track_features['mode']
    feature_dict['speechiness'] = track_features['speechiness']
    feature_dict['tempo'] = track_features['tempo']
    feature_dict['time_signature'] = track_features['time_signature']
    feature_dict['valence'] = track_features['valence']
    
    return feature_dict

def get_playlist_queries(playlist_uri):
    
    track_uris = [x["track"]["uri"] for x in sp.playlist_tracks(playlist_uri)["items"]]
    
    # n_songs_pl = len(track_uris)
    playlist_featutre_list = []
    
    for count, track_uri in enumerate(track_uris):
        # results = get_track_features(track_uri, count, playlist_uri, n_songs_pl)
        results = get_track_features(track_uri, count, playlist_uri)
        # results['n_songs_pl'] = len(results)
        playlist_featutre_list.append(results)
    
    return playlist_featutre_list

In [14]:
playlist_uris = []

playlist_links = [
    'https://open.spotify.com/playlist/3GX5FLE0IxHNZtLye0ETgb?si=80db2142bb684516', # muscle shoals
    'https://open.spotify.com/playlist/0XPJ39OCBhOw5OZa7udYYP?si=92551fefe7c64eb1', # disco
    'https://open.spotify.com/playlist/6imD2IJOyw3MEKdZ4XZqZ4?si=b29779098622481f', # space is the place
    # 'https://open.spotify.com/playlist/1E1EwxJyzjt6SYyfnp9mE8?si=0f1cda3332c14290', # all panic
    'https://open.spotify.com/playlist/1pGfqRD9CzyO9lOn9Fp09V?si=3f6c8067491a4105', # live panic - small
    'https://open.spotify.com/playlist/3HeHZi8VGEm6ZNHZ2FVRr6?si=ca0b107a42e84067', # biebs weeknd
]

for link in playlist_links:
    playlist_uri = link.split("/")[-1].split("?")[0]
    playlist_uris.append(playlist_uri)

print(f"playlist_uris: {playlist_uris}")

playlist_uris: ['3GX5FLE0IxHNZtLye0ETgb', '0XPJ39OCBhOw5OZa7udYYP', '6imD2IJOyw3MEKdZ4XZqZ4', '1pGfqRD9CzyO9lOn9Fp09V', '3HeHZi8VGEm6ZNHZ2FVRr6']


In [303]:
featureLIST = [] 

for uri in playlist_uris:
    results = get_playlist_queries(uri)
    featureLIST.append(results)

len(featureLIST)

5

In [320]:
featureLIST[0][0]['album_name']

'Amazing Grace (Live at New Temple Missionary Baptist Church, Los Angeles, CA, 01/13/72)'

#### Let's inspect the first song of the 4th playlist

In [322]:
featureLIST[3][0]

{'pl_name': 'live panic - small',
 'description_pl': 'recent live shows for widespread panic',
 'collaborative': False,
 'track_pos': 0,
 'track_uri': 'spotify:track:6GK8JaPXQ7FPiWww3R7nVH',
 'track_name': 'Who Do You Belong To?',
 'duration_ms': 297709,
 'track_pop': 23,
 'album_name': '12/31/17 Fox Theatre, Atlanta, GA',
 'album_uri': 'spotify:album:2YI0tPSSIhvx839b2eCeTJ',
 'album_release_date': '2018-01-03',
 'artist_name': 'Widespread Panic',
 'artist_uri': 'spotify:artist:54SHZF2YS3W87xuJKSvOVf',
 'artist_followers': 246109,
 'artist_popularity': 50,
 'artist_genres': 'athens_indie jam_band roots_rock southern_rock',
 'duration': 297709,
 'acousticness': 0.529,
 'danceability': 0.512,
 'energy': 0.889,
 'instrumentalness': 0.293,
 'key': 2,
 'liveness': 0.71,
 'loudness': -9.173,
 'mode': 1,
 'speechiness': 0.0511,
 'tempo': 108.305,
 'time_signature': 4,
 'valence': 0.749}

In [307]:
from itertools import chain

test_df = pd.DataFrame(list(chain.from_iterable(featureLIST)))

print(test_df.shape)
test_df.head()

(65, 29)


Unnamed: 0,pl_name,description_pl,collaborative,track_pos,track_uri,track_name,duration_ms,track_pop,album_name,album_uri,album_release_date,artist_name,artist_uri,artist_followers,artist_popularity,artist_genres,duration,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
0,Muscle Shoals,"a blend of country, gospel and R &amp; B, alab...",False,0,spotify:track:7hqesNgWCx8NZTHl4MXkPF,"Mary, Don't You Weep - Live at New Temple Miss...",446506,36,Amazing Grace (Live at New Temple Missionary B...,spotify:album:5pIUimaQ6XePPXbMUi3te0,1972,Aretha Franklin,spotify:artist:7nwUJBm0HE4ZxD3f5cy5ok,2976902,69,classic_soul jazz_blues memphis_soul soul sout...,446507,0.744,0.572,0.27,0.000185,1,0.858,-16.201,1,0.0378,81.72,3,0.61
1,Muscle Shoals,"a blend of country, gospel and R &amp; B, alab...",False,1,spotify:track:0y2y5zcZr0Ciei8bf41cSS,Confessions,324760,45,Carolina Confessions,spotify:album:6RVN0MPUbYBiYm2Otf6Li2,2018-10-05,Marcus King,spotify:artist:0FeWKiZSwBRdGzqeCdlH1a,130677,56,modern_blues_rock,324760,0.269,0.357,0.437,0.000403,5,0.151,-9.072,0,0.0312,135.73,3,0.447
2,Muscle Shoals,"a blend of country, gospel and R &amp; B, alab...",False,2,spotify:track:1Hi6XYNirfRidjPo18UUq0,Welcome ’Round Here,261546,49,Carolina Confessions,spotify:album:6RVN0MPUbYBiYm2Otf6Li2,2018-10-05,Marcus King,spotify:artist:0FeWKiZSwBRdGzqeCdlH1a,130677,56,modern_blues_rock,261547,0.00904,0.297,0.734,0.00443,0,0.0959,-7.255,1,0.0443,93.259,4,0.252
3,Muscle Shoals,"a blend of country, gospel and R &amp; B, alab...",False,3,spotify:track:5U51nS2N1TDL1HEHEio7mC,Down In the Flood,302306,45,Already Free,spotify:album:1I4Ydy7hVvINSdRHV7n4fS,2009-01-13,The Derek Trucks Band,spotify:artist:1YwfENKEZrowcmtR1nALZn,193616,44,blues blues_rock country_rock electric_blues j...,302307,0.181,0.551,0.695,0.000154,0,0.192,-7.111,1,0.0323,114.135,4,0.207
4,Muscle Shoals,"a blend of country, gospel and R &amp; B, alab...",False,4,spotify:track:4euYSSvpQZSAQ4gThyL2QD,Ain't Wastin' Time No More,221890,59,Eat A Peach,spotify:album:6D9I7GZsqc3pwDg3DFrtjs,1972-02-12,Allman Brothers Band,spotify:artist:4wQ3PyMz3WwJGI5uEqHUVR,1367664,63,album_rock blues_rock classic_rock country_roc...,221891,0.337,0.453,0.607,0.0237,3,0.0998,-13.4,1,0.0345,91.468,4,0.724


# TODO
* `n_songs_pl`
* `num_artists_pl`
* `num_albums_pl`
* format ragged columns (<= 5)

In [323]:
# test_df_2 = test_df.copy()

# # dfd['new_B'] = dfd.groupby('A')['B'].transform('min')

# # test_df_2['n_songs_pl'] = test_df_2.groupby('track_uri')['pl_name'].count()
# test_df_2['n_songs_pl'] = test_df_2.groupby('pl_name')['ccc'].count()
# # test_df_2['n_songs_pl'] = df[df.A > 0].shape[0]

# test_df_2['pl_name'].value_counts()[145]

# test_df_2.head()
# #aggregate by min
# # s = dfd.groupby('A')['B'].min()

In [250]:
featureDF = pd.DataFrame(playlist_featutre_list)
print(featureDF.shape)
featureDF.head(2)

(13, 28)


Unnamed: 0,pl_name,description_pl,collaborative,track_uri,track_name,duration_ms,track_pop,album_name,album_uri,album_release_date,artist_name,artist_uri,artist_followers,artist_popularity,artist_genres,duration,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
0,Muscle Shoals,"a blend of country, gospel and R &amp; B, alab...",False,spotify:track:7hqesNgWCx8NZTHl4MXkPF,"Mary, Don't You Weep - Live at New Temple Miss...",446506,36,Amazing Grace (Live at New Temple Missionary B...,spotify:album:5pIUimaQ6XePPXbMUi3te0,1972,Aretha Franklin,spotify:artist:7nwUJBm0HE4ZxD3f5cy5ok,2976902,69,classic_soul jazz_blues memphis_soul soul sout...,446507,0.744,0.572,0.27,0.000185,1,0.858,-16.201,1,0.0378,81.72,3,0.61
1,Muscle Shoals,"a blend of country, gospel and R &amp; B, alab...",False,spotify:track:0y2y5zcZr0Ciei8bf41cSS,Confessions,324760,45,Carolina Confessions,spotify:album:6RVN0MPUbYBiYm2Otf6Li2,2018-10-05,Marcus King,spotify:artist:0FeWKiZSwBRdGzqeCdlH1a,130677,56,modern_blues_rock,324760,0.269,0.357,0.437,0.000403,5,0.151,-9.072,0,0.0312,135.73,3,0.447


In [244]:
artist_uri = '7nwUJBm0HE4ZxD3f5cy5ok'
artist_info = sp.artist(artist_uri)
pprint(artist_info.keys())

print(artist_info['popularity']) #.keys()
print(artist_info['followers']['total'])
print(artist_info['genres'])
print(artist_info['name'])

dict_keys(['external_urls', 'followers', 'genres', 'href', 'id', 'images', 'name', 'popularity', 'type', 'uri'])
69
2976902
['classic soul', 'jazz blues', 'memphis soul', 'soul', 'southern soul']
Aretha Franklin
69


In [170]:
track_uri = 'spotify:track:7hqesNgWCx8NZTHl4MXkPF'
track = sp.track(track_uri)
pprint(track.keys())

track['artists'][0]
# track['album']['artists'][0]['uri']

dict_keys(['album', 'artists', 'available_markets', 'disc_number', 'duration_ms', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'name', 'popularity', 'preview_url', 'track_number', 'type', 'uri'])


{'external_urls': {'spotify': 'https://open.spotify.com/artist/7nwUJBm0HE4ZxD3f5cy5ok'},
 'href': 'https://api.spotify.com/v1/artists/7nwUJBm0HE4ZxD3f5cy5ok',
 'id': '7nwUJBm0HE4ZxD3f5cy5ok',
 'name': 'Aretha Franklin',
 'type': 'artist',
 'uri': 'spotify:artist:7nwUJBm0HE4ZxD3f5cy5ok'}

In [None]:
# for count, item in enumerate(my_list, start=1):
#     print(count, item)

### testing

In [289]:
playlist_uris = []

playlist_links = [
    'https://open.spotify.com/playlist/3GX5FLE0IxHNZtLye0ETgb?si=80db2142bb684516', # muscle shoals
    'https://open.spotify.com/playlist/0XPJ39OCBhOw5OZa7udYYP?si=92551fefe7c64eb1', # disco
    'https://open.spotify.com/playlist/6imD2IJOyw3MEKdZ4XZqZ4?si=b29779098622481f', # space is the place
    # 'https://open.spotify.com/playlist/1E1EwxJyzjt6SYyfnp9mE8?si=0f1cda3332c14290', # all panic
    'https://open.spotify.com/playlist/1pGfqRD9CzyO9lOn9Fp09V?si=3f6c8067491a4105', # live panic - small
    # 'https://open.spotify.com/playlist/3HeHZi8VGEm6ZNHZ2FVRr6?si=ca0b107a42e84067', # biebs weeknd
]

for link in playlist_links:
    playlist_uri = link.split("/")[-1].split("?")[0]
    playlist_uris.append(playlist_uri)

print(f"playlist_uris: {playlist_uris}")

playlist_uris: ['3GX5FLE0IxHNZtLye0ETgb', '0XPJ39OCBhOw5OZa7udYYP', '6imD2IJOyw3MEKdZ4XZqZ4', '1pGfqRD9CzyO9lOn9Fp09V']


In [291]:
featureLIST = [] 

for uri in playlist_uris:
    results = get_playlist_queries(uri)
    featureLIST.append(results)

len(featureLIST)

4

In [300]:
print(len(featureLIST[0]))
print(len(featureLIST[1]))
print(len(featureLIST[2]))
print(len(featureLIST[3]))

13
5
13
14


## Your favorite artists, tracks, and genres