In [1]:
import os
import time

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

import pandas as pd
import numpy as np

from credentials import set_credentials

In [2]:
pd.options.display.max_columns = None

# Spotify Recommendation System Walkthrough

## Spotify Access
How to access Spotify through their developer API

In [3]:
set_credentials()

In [4]:
client_credentials_manager = SpotifyClientCredentials(client_id=os.getenv('CLIENT_ID'), client_secret=os.getenv('CLIENT_SECRET'))
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

### Extract Tracks from a Playlist

Spotify makes use of a URI (Universal resource locator) in order to identify playlsits, artists, tracks, ect. For example from the link: `https://open.spotify.com/playlist/2QJwkjEtDXDbtecQ5ypVm3?si=c437b89583c74615` to one of my favourite playlists, only the `2QJwkjEtDXDbtecQ5ypVm3` is the URI.

In [11]:
playlist_link = "https://open.spotify.com/playlist/2QJwkjEtDXDbtecQ5ypVm3?si=c437b89583c74615"  # Motivate
# playlist_link = "https://open.spotify.com/playlist/37i9dQZEVXbNG2KDcFcKOF?si=1333723a6eff4b7f"  # Tutorial

In [12]:
playlist_uri = playlist_link.split('/')[-1].split('?')[0]
print(playlist_uri)

2QJwkjEtDXDbtecQ5ypVm3


In [13]:
mot_playlist = sp.playlist_tracks(playlist_uri, limit=100, offset=0)

In [14]:
print(mot_playlist)

{'href': 'https://api.spotify.com/v1/playlists/2QJwkjEtDXDbtecQ5ypVm3/tracks?offset=0&limit=100&additional_types=track', 'items': [{'added_at': '2020-11-14T05:40:14Z', 'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/fn662s8jr8skpfl5kn81itq7c'}, 'href': 'https://api.spotify.com/v1/users/fn662s8jr8skpfl5kn81itq7c', 'id': 'fn662s8jr8skpfl5kn81itq7c', 'type': 'user', 'uri': 'spotify:user:fn662s8jr8skpfl5kn81itq7c'}, 'is_local': False, 'primary_color': None, 'track': {'album': {'album_type': 'album', 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/3NPpFNZtSTHheNBaWC82rB'}, 'href': 'https://api.spotify.com/v1/artists/3NPpFNZtSTHheNBaWC82rB', 'id': '3NPpFNZtSTHheNBaWC82rB', 'name': 'X Ambassadors', 'type': 'artist', 'uri': 'spotify:artist:3NPpFNZtSTHheNBaWC82rB'}], 'available_markets': ['AD', 'AE', 'AG', 'AR', 'AT', 'AU', 'BB', 'BE', 'BG', 'BH', 'BO', 'BR', 'BS', 'BZ', 'CA', 'CH', 'CL', 'CO', 'CR', 'CW', 'CY', 'CZ', 'DE', 'DK', 'DM', 'DO', '

Display the total number of songs in the playlist

In [15]:
print(mot_playlist['total'])

301


Display author of playlist (spotify link)

In [16]:
items = mot_playlist['items']

In [17]:
print(items)

[{'added_at': '2020-11-14T05:40:14Z', 'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/fn662s8jr8skpfl5kn81itq7c'}, 'href': 'https://api.spotify.com/v1/users/fn662s8jr8skpfl5kn81itq7c', 'id': 'fn662s8jr8skpfl5kn81itq7c', 'type': 'user', 'uri': 'spotify:user:fn662s8jr8skpfl5kn81itq7c'}, 'is_local': False, 'primary_color': None, 'track': {'album': {'album_type': 'album', 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/3NPpFNZtSTHheNBaWC82rB'}, 'href': 'https://api.spotify.com/v1/artists/3NPpFNZtSTHheNBaWC82rB', 'id': '3NPpFNZtSTHheNBaWC82rB', 'name': 'X Ambassadors', 'type': 'artist', 'uri': 'spotify:artist:3NPpFNZtSTHheNBaWC82rB'}], 'available_markets': ['AD', 'AE', 'AG', 'AR', 'AT', 'AU', 'BB', 'BE', 'BG', 'BH', 'BO', 'BR', 'BS', 'BZ', 'CA', 'CH', 'CL', 'CO', 'CR', 'CW', 'CY', 'CZ', 'DE', 'DK', 'DM', 'DO', 'DZ', 'EC', 'EE', 'EG', 'ES', 'FI', 'FR', 'GB', 'GD', 'GR', 'GT', 'GY', 'HK', 'HN', 'HR', 'HT', 'HU', 'ID', 'IE', 'IL', 'IN', 'IS',

In [18]:
len(items)

100

Record track information

In [19]:
track_info = {
    'uris': [],
    'names': [],
    'artist_names': [],
    'artists_uri': [], 
    'artist_pop': [],
    'artist_genres': [],
    'albums': [],
    'track_pop': [],
    'danceability': [],
    'energy': [],
    'keys': [],
    'loudness': [],
    'modes': [],
    'speechiness': [],
    'acousticness': [],
    'instrumentalness': [],
    'liveness': [],
    'valences': [],
    'tempos': [],
    'types': [],
    'ids': [],
    'track_hrefs': [],
    'analysis_urls': [],
    'durations_ms': [],
    'time_signatures': []
}

In [20]:
for item in items:
    track_uri = item['track']['uri']
    track_info['uris'].append(track_uri)  # Retrieve track uri
    track_info['names'].append(item['track']['name'])  # Retrieve track name

    track_info['artists_uri'].append(item['track']['artists'][0]['uri'].split(':')[-1])  # Find artist uri

    track_info['artist_names'].append(item['track']['artists'][0]['name'])  # Access artist name


    track_info['albums'].append(item['track']['album']['name'])  # Access album names
    track_info['track_pop'].append(item['track']['popularity'])  # Access track popularity

The below code is used to extract the Artist details as well as a acoustic analysis of the songs. 
It is written in such a way that it makes an API call for each song instead of using a batch process. Due to this the code is removed to reduce the amount of API calls, and modified later in the code to perform batch requests. 

```python
# Artist Features
track_info['artist_pop'].append(artist_info['popularity'])  # Access 
track_info['artist_genres'].append(artist_info['genres'])  # Access artist genres

# Audio Features
track_features = sp.audio_features(track_uri)[0]  # Access track features
track_info['danceability'].append(track_features['danceability'])
track_info['energy'].append(track_features['energy'])
track_info['keys'].append(track_features['key'])
track_info['loudness'].append(track_features['loudness'])
track_info['modes'].append(track_features['mode'])
track_info['speechiness'].append(track_features['speechiness'])
track_info['acousticness'].append(track_features['acousticness'])
track_info['instrumentalness'].append(track_features['instrumentalness'])
track_info['liveness'].append(track_features['liveness'])
track_info['valences'].append(track_features['valence'])
track_info['tempos'].append(track_features['tempo'])
track_info['types'].append(track_features['type'])
track_info['ids'].append(track_features['id'])
track_info['track_hrefs'].append(track_features['track_href'])
track_info['analysis_urls'].append(track_features['analysis_url'])
track_info['durations_ms'].append(track_features['duration_ms'])
track_info['time_signatures'].append(track_features['time_signature'])
```

In [23]:
artist_info = sp.artists(track_info['artists_uri'][0:50])

In [24]:
artist_info['artists']

[{'external_urls': {'spotify': 'https://open.spotify.com/artist/3NPpFNZtSTHheNBaWC82rB'},
  'followers': {'href': None, 'total': 2949093},
  'genres': ['modern alternative rock', 'modern rock', 'stomp pop'],
  'href': 'https://api.spotify.com/v1/artists/3NPpFNZtSTHheNBaWC82rB',
  'id': '3NPpFNZtSTHheNBaWC82rB',
  'images': [{'height': 640,
    'url': 'https://i.scdn.co/image/ab6761610000e5eb85b6cbf01747c44e145c5a20',
    'width': 640},
   {'height': 320,
    'url': 'https://i.scdn.co/image/ab6761610000517485b6cbf01747c44e145c5a20',
    'width': 320},
   {'height': 160,
    'url': 'https://i.scdn.co/image/ab6761610000f17885b6cbf01747c44e145c5a20',
    'width': 160}],
  'name': 'X Ambassadors',
  'popularity': 70,
  'type': 'artist',
  'uri': 'spotify:artist:3NPpFNZtSTHheNBaWC82rB'},
 {'external_urls': {'spotify': 'https://open.spotify.com/artist/3NPpFNZtSTHheNBaWC82rB'},
  'followers': {'href': None, 'total': 2949093},
  'genres': ['modern alternative rock', 'modern rock', 'stomp pop'],

In [25]:
len(artist_info['artists'])

50

In [26]:
audio_features = sp.audio_features(track_info['uris'])

In [27]:
audio_features

[{'danceability': 0.732,
  'energy': 0.594,
  'key': 11,
  'loudness': -5.911,
  'mode': 0,
  'speechiness': 0.0693,
  'acousticness': 0.00805,
  'instrumentalness': 0.0301,
  'liveness': 0.0802,
  'valence': 0.676,
  'tempo': 107.996,
  'type': 'audio_features',
  'id': '3V9cM3nCH2G66afoDi0snu',
  'uri': 'spotify:track:3V9cM3nCH2G66afoDi0snu',
  'track_href': 'https://api.spotify.com/v1/tracks/3V9cM3nCH2G66afoDi0snu',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/3V9cM3nCH2G66afoDi0snu',
  'duration_ms': 164587,
  'time_signature': 4},
 {'danceability': 0.526,
  'energy': 0.862,
  'key': 2,
  'loudness': -6.003,
  'mode': 1,
  'speechiness': 0.0905,
  'acousticness': 0.0144,
  'instrumentalness': 0.0597,
  'liveness': 0.229,
  'valence': 0.528,
  'tempo': 90.052,
  'type': 'audio_features',
  'id': '0fYVliAYKHuPmECRs1pbRf',
  'uri': 'spotify:track:0fYVliAYKHuPmECRs1pbRf',
  'track_href': 'https://api.spotify.com/v1/tracks/0fYVliAYKHuPmECRs1pbRf',
  'analysis_url': 'http

In [28]:
print(track_info)

{'uris': ['spotify:track:3V9cM3nCH2G66afoDi0snu', 'spotify:track:0fYVliAYKHuPmECRs1pbRf', 'spotify:track:2NldmECxe8jE4TmLmu5Yao', 'spotify:track:3xgK660fsZH7ZDcOMfIdfB', 'spotify:track:7o2CTH4ctstm8TNelqjb51', 'spotify:track:2HrsT9eflRVvdu3ZrorjQ6', 'spotify:track:04aAxqtGp5pv12UXAg4pkq', 'spotify:track:6bYjclvTLjcmZZC5GhmSFQ', 'spotify:track:6jXJmMCmEXC8SmZTK0KId6', 'spotify:track:3qiyyUfYe7CRYLucrPmulD', 'spotify:track:6x4tKaOzfNJpEJHySoiJcs', 'spotify:track:7bEDDsy2LFC0KSqhZp5nPE', 'spotify:track:1ejq4bPmYNhITVsCGNnVv2', 'spotify:track:4Ps2HEc6cRSy3fKPYfGAQ3', 'spotify:track:2lEXaTrSES6eZyypyvo9yR', 'spotify:track:3qgPpwtuRu5oP8EtFSj8HE', 'spotify:track:1T02MleMfYPohqKcd8uY6J', 'spotify:track:1NWfdpuChB7HcctVvDvGIy', 'spotify:track:6EjfjKpSk2eb8aAcX2D08Y', 'spotify:track:6JsiDFQRx7GHcuf2UroSYB', 'spotify:track:2ouTPeXZDbhcm2R2zx5LBG', 'spotify:track:63AXfaJ12wKlnXe9QAvzbl', 'spotify:track:7zzoxJbgjme3366mOp5UnH', 'spotify:track:40AHsBVOfw3H9i1jIrJEbb', 'spotify:track:10Nmj3JCNoMeBQ8

### DataFrame the Information

In [43]:
df = pd.DataFrame.from_dict(track_info)

In [44]:
df.shape

(100, 24)

In [45]:
df.head()

Unnamed: 0,uris,names,artist_names,artist_pop,artist_genres,albums,track_pop,danceability,energy,keys,loudness,modes,speechiness,acousticness,instrumentalness,liveness,valences,tempos,types,ids,track_hrefs,analysis_urls,durations_ms,time_signatures
0,spotify:track:3V9cM3nCH2G66afoDi0snu,BOOM,X Ambassadors,70,"[modern alternative rock, modern rock, stomp pop]",ORION,61,0.732,0.594,11,-5.911,0,0.0693,0.00805,0.0301,0.0802,0.676,107.996,audio_features,3V9cM3nCH2G66afoDi0snu,https://api.spotify.com/v1/tracks/3V9cM3nCH2G6...,https://api.spotify.com/v1/audio-analysis/3V9c...,164587,4
1,spotify:track:0fYVliAYKHuPmECRs1pbRf,Renegades,X Ambassadors,70,"[modern alternative rock, modern rock, stomp pop]",VHS,78,0.526,0.862,2,-6.003,1,0.0905,0.0144,0.0597,0.229,0.528,90.052,audio_features,0fYVliAYKHuPmECRs1pbRf,https://api.spotify.com/v1/tracks/0fYVliAYKHuP...,https://api.spotify.com/v1/audio-analysis/0fYV...,195200,4
2,spotify:track:2NldmECxe8jE4TmLmu5Yao,Hey Child,Korbee,17,[],Hey Child,33,0.649,0.796,9,-3.89,1,0.0328,0.00267,0.000157,0.362,0.653,140.035,audio_features,2NldmECxe8jE4TmLmu5Yao,https://api.spotify.com/v1/tracks/2NldmECxe8jE...,https://api.spotify.com/v1/audio-analysis/2Nld...,241267,4
3,spotify:track:3xgK660fsZH7ZDcOMfIdfB,Jungle,X Ambassadors,70,"[modern alternative rock, modern rock, stomp pop]",VHS,60,0.389,0.748,2,-4.461,1,0.0805,0.000821,0.00058,0.358,0.216,78.056,audio_features,3xgK660fsZH7ZDcOMfIdfB,https://api.spotify.com/v1/tracks/3xgK660fsZH7...,https://api.spotify.com/v1/audio-analysis/3xgK...,189707,4
4,spotify:track:7o2CTH4ctstm8TNelqjb51,Sweet Child O' Mine,Guns N' Roses,78,"[glam metal, hard rock, rock]",Appetite For Destruction,19,0.454,0.91,6,-7.766,1,0.0448,0.0866,0.0996,0.116,0.629,125.116,audio_features,7o2CTH4ctstm8TNelqjb51,https://api.spotify.com/v1/tracks/7o2CTH4ctstm...,https://api.spotify.com/v1/audio-analysis/7o2C...,354520,4


In [46]:
df.describe()

Unnamed: 0,artist_pop,track_pop,danceability,energy,keys,loudness,modes,speechiness,acousticness,instrumentalness,liveness,valences,tempos,durations_ms,time_signatures
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,54.14,42.13,0.60024,0.78556,4.85,-5.68986,0.76,0.062851,0.107456,0.00927,0.199204,0.54624,119.67377,212675.01,4.0
std,16.355285,22.374975,0.114338,0.120654,3.517273,2.178224,0.429235,0.051054,0.142812,0.039135,0.170114,0.192516,27.432697,39241.375504,0.0
min,16.0,0.0,0.212,0.318,0.0,-18.064,0.0,0.0273,1.4e-05,0.0,0.0333,0.15,74.11,126521.0,4.0
25%,43.0,29.5,0.5325,0.71475,2.0,-6.43725,1.0,0.035575,0.007848,0.0,0.0879,0.4155,101.51125,189337.0,4.0
50%,58.0,44.5,0.613,0.813,5.0,-5.3575,1.0,0.0451,0.05035,2e-06,0.1245,0.562,117.151,207724.0,4.0
75%,67.0,57.5,0.683,0.86525,7.25,-4.43925,1.0,0.07185,0.16675,0.000164,0.275,0.6805,126.04875,228229.5,4.0
max,81.0,85.0,0.821,0.97,11.0,-2.333,1.0,0.408,0.622,0.283,0.906,0.98,207.508,354520.0,4.0


## Automate Playlist Track Data Extraction with Offsetting

In [5]:
def construct_storage():
    store = {
        'uris': [],
        'names': [],
        'artist_names': [],
        'artist_uris': [],
        'artist_pop': [],
        'artist_genres': [],
        'albums': [],
        'track_pop': [],
        'danceability': [],
        'energy': [],
        'keys': [],
        'loudness': [],
        'modes': [],
        'speechiness': [],
        'acousticness': [],
        'instrumentalness': [],
        'liveness': [],
        'valences': [],
        'tempos': [],
        'types': [],
        'ids': [],
        'track_hrefs': [],
        'analysis_urls': [],
        'durations_ms': [],
        'time_signatures': []
    }
    return store

def process_items(store, items):
    for item in items:
        track_uri = item['track']['uri'].split(':')[-1]
        store['uris'].append(track_uri)  # Retrieve track uri
        store['names'].append(item['track']['name'])  # Retrieve track name
    
        store['artist_uris'].append(item['track']['artists'][0]['uri'].split(':')[-1])  # Find artist uri
    
        store['artist_names'].append(item['track']['artists'][0]['name'])  # Access artist name
    
        store['albums'].append(item['track']['album']['name'])  # Access album names
        store['track_pop'].append(item['track']['popularity'])  # Access track popularity
    return store


def retrieve_batch_info(playlist, store):
    items = playlist['items']
    store = process_items(store, items)
    return store


def extract_artist_info(store, sp):
    limit = 50
    offset = 0
    while offset < len(store['artist_uris']):
        if offset + limit > len(store['artist_uris']):
            artists_info = sp.artists(store['artist_uris'][offset: len(store['artist_uris'])])
        else:
            artists_info = sp.artists(store['artist_uris'][offset: offset + limit])
            
        for artist in artists_info['artists']:
            store['artist_pop'].append(artist['popularity'])  # Access artist popularity
            store['artist_genres'].append(artist['genres'])  # Access artist genres
            
        offset = offset + limit


def extract_audio_features(store, sp):
    limit = 100
    offset = 0
    while offset < len(store['uris']):
        if offset + limit > len(store['uris']):
            track_info = sp.audio_features(store['uris'][offset: len(store['uris'])])
        else:
            track_info = sp.audio_features(store['uris'][offset: offset + limit])

        for track in track_info:
            store['danceability'].append(track['danceability'])
            store['energy'].append(track['energy'])
            store['keys'].append(track['key'])
            store['loudness'].append(track['loudness'])
            store['modes'].append(track['mode'])
            store['speechiness'].append(track['speechiness'])
            store['acousticness'].append(track['acousticness'])
            store['instrumentalness'].append(track['instrumentalness'])
            store['liveness'].append(track['liveness'])
            store['valences'].append(track['valence'])
            store['tempos'].append(track['tempo'])
            store['types'].append(track['type'])
            store['ids'].append(track['id'])
            store['track_hrefs'].append(track['track_href'])
            store['analysis_urls'].append(track['analysis_url'])
            store['durations_ms'].append(track['duration_ms'])
            store['time_signatures'].append(track['time_signature'])
            
        offset = offset + limit

In [6]:
def extract_tracks(sp, playlist_uri, store):
    offset = 0
    limit = 100
    playlist = sp.playlist_tracks(playlist_uri, limit=2, offset=offset)  # Retrieve the initial batch of songs
    total_songs = playlist['total']  # Extract the total number of songs
    print(f"Total songs: {total_songs}")

    while offset < total_songs:
        time.sleep(10)
        playlist = sp.playlist_tracks(playlist_uri, limit=100, offset=offset)  # Retrieve batch of songs in playlist
        store = retrieve_batch_info(playlist, store)  # Retrieve batch information
        print(f"Current offset: {offset}")
        offset = offset + limit  # Update offset

    extract_artist_info(store, sp)
    extract_audio_features(store, sp)
    return store

In [32]:
store = construct_storage()  # Construct info storage
store = extract_tracks(sp, playlist_uri, store)

Total songs: 301
Current offset: 0
Current offset: 100
Current offset: 200
Current offset: 300


In [120]:
print(store)

{'uris': ['3V9cM3nCH2G66afoDi0snu', '0fYVliAYKHuPmECRs1pbRf', '2NldmECxe8jE4TmLmu5Yao', '3xgK660fsZH7ZDcOMfIdfB', '7o2CTH4ctstm8TNelqjb51', '2HrsT9eflRVvdu3ZrorjQ6', '04aAxqtGp5pv12UXAg4pkq', '6bYjclvTLjcmZZC5GhmSFQ', '6jXJmMCmEXC8SmZTK0KId6', '3qiyyUfYe7CRYLucrPmulD', '6x4tKaOzfNJpEJHySoiJcs', '7bEDDsy2LFC0KSqhZp5nPE', '1ejq4bPmYNhITVsCGNnVv2', '4Ps2HEc6cRSy3fKPYfGAQ3', '2lEXaTrSES6eZyypyvo9yR', '3qgPpwtuRu5oP8EtFSj8HE', '1T02MleMfYPohqKcd8uY6J', '1NWfdpuChB7HcctVvDvGIy', '6EjfjKpSk2eb8aAcX2D08Y', '6JsiDFQRx7GHcuf2UroSYB', '2ouTPeXZDbhcm2R2zx5LBG', '63AXfaJ12wKlnXe9QAvzbl', '7zzoxJbgjme3366mOp5UnH', '40AHsBVOfw3H9i1jIrJEbb', '10Nmj3JCNoMeBQ87uw5j8k', '507t7AQoTaplubSS9jFT5v', '7Aa4LwrZCRV4qN0AsIRilj', '1UGC7ZZGgriKyoYipPR2Ng', '7jH47uEvcW2VYFD3wB03JE', '06zkiXCVsfXfPOqtK8FBiD', '7KB4O3WXC0SJoFuF6fnF9b', '6kcHg7XL6SKyPNd78daRBL', '67WTwafOMgegV6ABnBQxcE', '0iQ77tSa7ncj7zBPtVx6cU', '6fNhZRFEkBfgW39W3wKARJ', '70IM8GwLBrHRxBAcqGN2bw', '6zDs6zI94L761vd0cVScTT', '3QA66YGqHvdSthvEAhGmMH', '5

In [121]:
for key, value in store.items(): 
    print(key, len(value))

uris 301
names 301
artist_names 301
artist_uris 301
artist_pop 301
artist_genres 301
albums 301
track_pop 301
danceability 301
energy 301
keys 301
loudness 301
modes 301
speechiness 301
acousticness 301
instrumentalness 301
liveness 301
valences 301
tempos 301
types 301
ids 301
track_hrefs 301
analysis_urls 301
durations_ms 301
time_signatures 301


In [34]:
df = pd.DataFrame.from_dict(store)

In [35]:
df.head()

Unnamed: 0,uris,names,artist_names,artist_uris,artist_pop,artist_genres,albums,track_pop,danceability,energy,keys,loudness,modes,speechiness,acousticness,instrumentalness,liveness,valences,tempos,types,ids,track_hrefs,analysis_urls,durations_ms,time_signatures
0,3V9cM3nCH2G66afoDi0snu,BOOM,X Ambassadors,3NPpFNZtSTHheNBaWC82rB,70,"[modern alternative rock, modern rock, stomp pop]",ORION,61,0.732,0.594,11,-5.911,0,0.0693,0.00805,0.0301,0.0802,0.676,107.996,audio_features,3V9cM3nCH2G66afoDi0snu,https://api.spotify.com/v1/tracks/3V9cM3nCH2G6...,https://api.spotify.com/v1/audio-analysis/3V9c...,164587,4
1,0fYVliAYKHuPmECRs1pbRf,Renegades,X Ambassadors,3NPpFNZtSTHheNBaWC82rB,70,"[modern alternative rock, modern rock, stomp pop]",VHS,78,0.526,0.862,2,-6.003,1,0.0905,0.0144,0.0597,0.229,0.528,90.052,audio_features,0fYVliAYKHuPmECRs1pbRf,https://api.spotify.com/v1/tracks/0fYVliAYKHuP...,https://api.spotify.com/v1/audio-analysis/0fYV...,195200,4
2,2NldmECxe8jE4TmLmu5Yao,Hey Child,Korbee,5xaoFkc3w6UTdTZ1GJOIsG,17,[],Hey Child,33,0.649,0.796,9,-3.89,1,0.0328,0.00267,0.000157,0.362,0.653,140.035,audio_features,2NldmECxe8jE4TmLmu5Yao,https://api.spotify.com/v1/tracks/2NldmECxe8jE...,https://api.spotify.com/v1/audio-analysis/2Nld...,241267,4
3,3xgK660fsZH7ZDcOMfIdfB,Jungle,X Ambassadors,3NPpFNZtSTHheNBaWC82rB,70,"[modern alternative rock, modern rock, stomp pop]",VHS,61,0.389,0.748,2,-4.461,1,0.0805,0.000821,0.00058,0.358,0.216,78.056,audio_features,3xgK660fsZH7ZDcOMfIdfB,https://api.spotify.com/v1/tracks/3xgK660fsZH7...,https://api.spotify.com/v1/audio-analysis/3xgK...,189707,4
4,7o2CTH4ctstm8TNelqjb51,Sweet Child O' Mine,Guns N' Roses,3qm84nBOXUEQ2vnTfUTTFC,78,"[glam metal, hard rock, rock]",Appetite For Destruction,18,0.454,0.91,6,-7.766,1,0.0448,0.0866,0.0996,0.116,0.629,125.116,audio_features,7o2CTH4ctstm8TNelqjb51,https://api.spotify.com/v1/tracks/7o2CTH4ctstm...,https://api.spotify.com/v1/audio-analysis/7o2C...,354520,4


## Access Top-performing Playlists per Country

Access current/ top performing playlists in order to extract playlist uri's for automated track data extraction. 

In [104]:
playlists = sp.featured_playlists(country='GB', limit=20)

In [105]:
playlist_items = playlists['playlists']['items']

In [54]:
for item in playlist_items[0:10]:
    print(item['uri'])
    print(item['name'])

spotify:playlist:37i9dQZF1DX1OY2Lp0bIPp
Monday Motivation
spotify:playlist:37i9dQZF1DXcDoDDetPsEg
Who We Be
spotify:playlist:37i9dQZF1DXa71eg5j9dKZ
Altar
spotify:playlist:37i9dQZF1DWY4lFlS4Pnso
Hot Hits UK
spotify:playlist:37i9dQZF1DX9Z3vMB2b8im
The Most Beautiful Songs in the World
spotify:playlist:37i9dQZF1DWXU4C3nPzApC
Very Nearly Nashville
spotify:playlist:37i9dQZF1DWSQScAbo5nGF
Indie Roadtrip
spotify:playlist:37i9dQZF1DX7364T8tu1TH
The Dance List
spotify:playlist:37i9dQZF1DXbHcQpOiXk1D
Jazz UK
spotify:playlist:37i9dQZF1DXdeMORbC1XNa
ACS


In [7]:
def find_top_playlists(country):
    uris = []
    names = []
    playlists = sp.featured_playlists(country=country, limit=20)
    playlist_items = playlists['playlists']['items']
    for item in playlist_items:
        uris.append(item['uri'].split(':')[-1])
        names.append(item['name'])
    return uris, names

In [61]:
top_playlists, names = find_top_playlists('GB')
print(top_playlists)
print(names)

['37i9dQZF1DX1OY2Lp0bIPp', '37i9dQZF1DXcDoDDetPsEg', '37i9dQZF1DXa71eg5j9dKZ', '37i9dQZF1DWY4lFlS4Pnso', '37i9dQZF1DX9Z3vMB2b8im', '37i9dQZF1DWXU4C3nPzApC', '37i9dQZF1DWSQScAbo5nGF', '37i9dQZF1DX7364T8tu1TH', '37i9dQZF1DXbHcQpOiXk1D', '37i9dQZF1DXdeMORbC1XNa']
['Monday Motivation', 'Who We Be', 'Altar', 'Hot Hits UK', 'The Most Beautiful Songs in the World', 'Very Nearly Nashville', 'Indie Roadtrip', 'The Dance List', 'Jazz UK', 'ACS']


Confirm extracted uri's work with the created track extraction function

In [57]:
tracks = extract_tracks(sp, top_playlists[0])

Total songs: 100
Current offset: 0


In [58]:
print(tracks)

{'uris': ['spotify:track:6UelLqGlWMcVH1E5c4H7lY', 'spotify:track:0nrRP2bk19rLc0orkWPQk2', 'spotify:track:7JJmb5XwzOO8jgpou264Ml', 'spotify:track:003vvx7Niy0yvhvHt4a68B', 'spotify:track:6RUKPb4LETWmmr3iAEQktW', 'spotify:track:6K4t31amVTZDgR3sKmwUJJ', 'spotify:track:6DCZcSspjsKoFjzjrWoCdn', 'spotify:track:2374M0fQpWi3dLnB54qaLX', 'spotify:track:7KXjTSCq5nL1LoYtL7XAwS', 'spotify:track:4bHsxqR3GMrXTxEPLuK5ue', 'spotify:track:4kbj5MwxO1bq9wjT5g9HaA', 'spotify:track:2JzZzZUQj3Qff7wapcbKjc', 'spotify:track:3ZOEytgrvLwQaqXreDs2Jx', 'spotify:track:3GCdLUSnKSMJhs4Tj6CV3s', 'spotify:track:2V65y3PX4DkRhy1djlxd9p', 'spotify:track:2tUBqZG2AbRi7Q0BIrVrEj', 'spotify:track:1auxYwYrFRqZP7t3s7w4um', 'spotify:track:1KGi9sZVMeszgZOWivFpxs', 'spotify:track:6FE2iI43OZnszFLuLtvvmg', 'spotify:track:3bidbhpOYeV4knp8AIu8Xn', 'spotify:track:1FTSo4v6BOZH9QxKc3MbVM', 'spotify:track:5r5cp9IpziiIsR6b93vcnQ', 'spotify:track:0ikz6tENMONtK6qGkOrU3c', 'spotify:track:5Hroj5K7vLpIG4FNCRIjbP', 'spotify:track:7Ie9W94M7OjPoZV

## Data Collection

Specify the countries from which to extract the top 20 playlists from: 
- South Africa (**ZA**)
- Australia (**AU**)
- United Kingdom (**GB**)
- United States (**UK**)
- Canada (**CA**)
- Jamaica (**JM**)
- Malta (**MT**)
- Netherlands (**NL**)
- France (**FR**)
- Germany (**DE**)
- Ghana (**GH**)

In [11]:
def add_playlist_tracking(name, store):
    store['playlist_name'] = [name] * len(store['uris'])
    return store


def record_playlists(top_playlists, names, playlist_store, name_store):
    playlist_store.extend(top_playlists)
    name_store.extend(names)

In [12]:
countries = ['AU', 'GB', 'US', 'CA', 'JM', 'MT', 'NL', 'FR', 'DE', 'GH', 'ZA']

In [None]:
playlist_store = []
name_store = []  # Construct playlist info storage
store = construct_storage()  # Construct track info storage

for country in countries[0:1]: 
    print(f'Country: {country}') 
    top_playlists, names = find_top_playlists(country)

    for playlist, name in zip(top_playlists[0:2], names[0:2]):
        print(f'Playlist name: {name}')
        store = extract_tracks(sp, playlist, store)
        store = add_playlist_tracking(name, store)
        time.sleep(45)

    record_playlists(top_playlists, names, playlist_store, name_store)
    print('-----------------------------------------------------------------------------')

In [14]:
print(playlist_store)
print(name_store)
print("Total Playlists: ", len(name_store))

['37i9dQZF1DX1dxt8X2wXrw', '37i9dQZF1DX7J2y2eWxEnR', '37i9dQZF1DX8OCw6EqwHPA', '37i9dQZF1DWYIJ3HxqIxIJ', '37i9dQZF1DWYctfAtweUtE', '37i9dQZF1DX07vKD9l5Yfi', '37i9dQZF1DX0w6SRvtNXby', '37i9dQZF1DX0QMvE8mcOAb', '37i9dQZF1DX04mASjTsvf0', '37i9dQZF1DWYtbIjHMwI5j', '37i9dQZF1DX2L0iB23Enbq', '37i9dQZF1DX1Ah0nVaIfRO', '37i9dQZF1DWZaNRXtvIrDt']
['crush', 'R&B Connect', 'softly', 'RADAR AU & NZ', 'Rock Throwback', 'Unwind 00s', "Chillin' in the Name of", 'fuzzy', 'R&B Classics', 'You Do You!', 'Viral Hits', 'Top Tracks of 2022 AU', 'Portal']
Total Playlists:  13


In [15]:
print(store)

{'uris': ['6Dae6DNW72nLZ6KGyE7aeH', '0J9uaeOd7GZi9MTybtqAD4', '0K0ccueOxKjB5vgLVpnhIf', '0dn6S6lJqAIQg90kMmWtVj', '7DLF9Dz9rx3LEIqXIE5ebP', '5mjYQaktjmjcMKcUIcqz4s', '4L4m2mgZDF8KIkJPdWWIAf', '0LzidBf7cUsnZnG34OUPSF', '3OgdnA3LYpJ6vxrfk6X3p4', '6curW2n7DOSwwff4UjELPH', '4baqpGz3NRtHZ8ye1T6Dhl', '0ZDwhNFZnPx9EIPCcwGUHC', '0Ic42kynfrSAJjTlOr0dWR', '7qRCVfjifWMt3q2MVfV8mV', '4pchG0mOBAJoXXu1vmwyTY', '7dEXXV4xk5OpEdswCWC3GX', '72FDwBVy3bw7o9qf8S9Ghq', '3XiIO5kw2UxoY6Aph8Tcd5', '2tXILArq64JkwKB1G2wstD', '7D6jnDW82vRRMiCRLcy0pl', '40TZnaw4eDPChJNHw2Swf3', '5C9anXz5T3PY3A5YkH66cn', '1ubTdHUiLRXe9XaHjo6aAX', '2yxobiDeGo30ftKwde2d5F', '1VwmC2Sw9b9EztWkRvC07M', '3JKRuo3KhL39T6XWAXPohj', '6cU5urANuL1UYaBg5DNre4', '4CNZbr3dSioXNk5bJE46XQ', '4wXMa4oleu4CaD06V7zEyG', '3sLTYBYs6lJsFK84t2X7wt', '68M0lk2tYUaQf1TVsEYSXS', '52hSgk5YbiBmdDdIrGDPsd', '2wueg8zb5SSaSxCyUpcKRJ', '6NhNuqfnXWCvwVPBF6zyOX', '4tEeJe0AS4TbihaokDG6hG', '0Zb004lei42rERugapZaSt', '4qeIlzP0Z1JFXh0AckDcX8', '1MaXSKqoQ2UxothMqc77Xq', '5

## Save Information
Save the information to a DataFrame and to a csv option.

Verify the identical lengths of all store information ahead of constructing a DataFrame

In [16]:
for key, value in store.items(): 
    print(key, len(value))

uris 175
names 175
artist_names 175
artist_uris 175
artist_pop 250
artist_genres 250
albums 175
track_pop 175
danceability 250
energy 250
keys 250
loudness 250
modes 250
speechiness 250
acousticness 250
instrumentalness 250
liveness 250
valences 250
tempos 250
types 250
ids 250
track_hrefs 250
analysis_urls 250
durations_ms 250
time_signatures 250
playlist_name 175


In [137]:
df = pd.DataFrame.from_dict(store)

ValueError: All arrays must be of the same length

In [None]:
df.head()

In [None]:
df.to_csv('../data/tracks.csv', sep=',')