In [28]:
from dotenv import load_dotenv
import os
import base64
import requests
import pandas as pd
import numpy as np

In [29]:
# Get configuration file that has all the api tokens
def configure():
    load_dotenv()
    CLIENT_ID = os.getenv('CLIENT_ID')
    CLIENT_SECRET = os.getenv('CLIENT_SECRET')
    return CLIENT_ID, CLIENT_SECRET

In [30]:
#Grab the Client_id and Client_secret from the env for the documented api
CLIENT_ID, CLIENT_SECRET = configure()
print(CLIENT_ID)
print(CLIENT_SECRET)

40692bc74b7f42f9a0d19365e7af4cd8
559d13d75dd346c9b3b01cbbd2a1f55a


***

In [31]:
def get_token(CLIENT_ID,CLIENT_SECRET):
    '''Takes the Client_ID and Client_Secret to request access token.
        Access tokens expire every hour so have to request new one'''
    ## Setup the authorization str and convert to base64  
    auth_str = CLIENT_ID + ":" + CLIENT_SECRET
    #encode str with utf-8 first
    auth_bytes = auth_str.encode("utf-8")
    #encoding it to required base64
    auth_base64 = str(base64.b64encode(auth_bytes),"utf-8")

    ## Run the Actual Request with Post, setting up required header fields
    url = "https://accounts.spotify.com/api/token"
    headers = {
        "Authorization": "Basic " + auth_base64,
        "Content-Type": "application/x-www-form-urlencoded"
    }

    data = {"grant_type": "client_credentials"}

    ## actually make the request to Spotify
    result = requests.post(url, headers = headers, data = data)
    json_result = result.json()
    token = json_result["access_token"]
    return token

In [32]:
def get_auth_header(token):
    '''Creates Authorization header with the access token for requests to official Spotify API'''
    header = {
        "Authorization": "Bearer " + token
        }
    return header

In [33]:
#get Token
token = get_token(CLIENT_ID,CLIENT_SECRET)

In [34]:
token

'BQDu2PGQ5Vc7_iJnYxjDe0IwIiECsLG1BFQM4r_DrZoDdupG85KYESdd-EIQ9rlt-oja-Dc9UhwizEPrjQbwwCAf_nThmDUWRatWA4u91Oe9lkrhLYOQ'

***

## Read in static data

In [35]:
#read in the csv with all artist_id
artist_info = pd.read_csv("/Users/jaspertsai/Documents/GitHub/Spotify-PlayCounts/data/artists.csv")
## get all artist_name
artist_names = artist_info['artist_name']

In [36]:
#read in the csv with all album_id
album_info = pd.read_csv("/Users/jaspertsai/Documents/GitHub/Spotify-PlayCounts/data/albums.csv")
## get all album_id
album_ids = album_info['album_id']

In [37]:
#read in the csv with all album_id
track_info = pd.read_csv("/Users/jaspertsai/Documents/GitHub/Spotify-PlayCounts/data/tracks.csv")
## get all album_id
track_ids = track_info['track_id']

## More efficient way to get albums' popularity score in bulk

In [38]:
def get_pop_by_album_limit_20(token, subset_20_ids):
    '''Read in an array of up to 20 IDs and get the album_ids and popularity score back as two arrays'''
    url = "https://api.spotify.com/v1/albums"

    #comma separated string to use in api
    album_ids = ','.join(subset_20_ids)

    headers = get_auth_header(token)
    querystring = {
                "ids": album_ids
                }

    result = requests.request("GET",url, headers= headers, params=querystring)
    
    json_result = result.json()
    #get the albums object 
    albums = json_result['albums']

    album_pop_score = np.arange(len(albums))
    for i in range(len(albums)):
        album_pop_score[i] = albums[i]['popularity']
        
        

    return subset_20_ids,album_pop_score



test what `get_pop_album_limit_20()` return

In [39]:
test_list = ['74vajFwEwXJ61OW1DKSPEa','3euz4vS7ezKGnNSwgyvKcd']
get_pop_by_album_limit_20(token,test_list)

(['74vajFwEwXJ61OW1DKSPEa', '3euz4vS7ezKGnNSwgyvKcd'], array([71, 83]))

### Testing out the function for all album_id

In [40]:
#gets the array of indices by 20 and if not multiplier go all way to last element.
## NEED this because API only allows search 20 albums at once
album_by_20_ind = np.arange(0,len(album_ids),20)
album_by_20_ind = np.concatenate((album_by_20_ind, [len(album_ids)]))
album_by_20_ind

array([  0,  20,  40,  60,  80, 100, 120, 140, 160, 180, 200, 220, 240,
       260, 280, 300, 320, 340, 360, 380, 400, 420, 440, 460, 480, 500,
       520, 540, 560, 580, 600, 620, 640, 660, 680, 700, 720, 740, 760,
       780, 800, 820, 840, 860, 880, 881])

In [41]:
album_ids_ls = []
album_pop_ls = []

for i in range(len(album_by_20_ind)-1):
    lower = album_by_20_ind[i]
    upper = album_by_20_ind[i+1]
    #subset to allow function to search 20 albums at once
    subset_id = album_ids[lower:upper]
    temp_id,temp_pop = get_pop_by_album_limit_20(token,subset_id)
    #adds onto the list instead of append which adds list within list
    album_ids_ls.extend(temp_id)
    album_pop_ls.extend(temp_pop)

data = pd.DataFrame({'album_id':album_ids_ls, 'album_popularity':album_pop_ls})
data

Unnamed: 0,album_id,album_popularity
0,74vajFwEwXJ61OW1DKSPEa,71
1,3euz4vS7ezKGnNSwgyvKcd,83
2,5UACk85y1hNRSUtY0ss8pb,62
3,2fYhqwDWXjbpjaIJPEfKFw,85
4,3tx8gQqWbGwqIGZHqDNrGe,81
...,...,...
876,11e4xCXllbvk8pWc1cCas1,45
877,7CoqvPGCLHm7LbgH2Pz9aY,27
878,4T5606j6qpkQrWlwbKPLOp,35
879,7cfhaRiLqBzuAzc6Q24nyW,44


## More efficient way to get tracks' popularity score in bulk

In [42]:
def get_pop_by_tracks_limit_50(token, subset_50_ids):
    '''Read in an array of up to 50 IDs and get the track_ids and popularity score back as two arrays'''
    url = "https://api.spotify.com/v1/tracks"

    #comma separated string to use in api
    track_ids = ','.join(subset_50_ids)

    headers = get_auth_header(token)
    querystring = {
                "ids": track_ids
                }

    result = requests.request("GET",url, headers= headers, params=querystring)
    
    json_result = result.json()
    #get the albums object 
    tracks = json_result['tracks']

    track_pop_score = np.arange(len(tracks))
    for i in range(len(tracks)):
        track_pop_score[i] = tracks[i]['popularity']

    return subset_50_ids,track_pop_score



test what `get_pop_by_tracks_limit_50()` return

In [43]:
test_list = ['12sliObzVzbLbAlDb8eChf','1hG4V53eR16jg7jVTNLOiX']
get_pop_by_tracks_limit_50(token,test_list)

(['12sliObzVzbLbAlDb8eChf', '1hG4V53eR16jg7jVTNLOiX'], array([50, 54]))

### Testing out the function for all track_id

In [44]:
#gets the array of indices by 50 and if not multiplier go all way to last element.
## NEED this because API only allows search 50 tracks at once
track_by_50_ind = np.arange(0,len(track_ids),50)
track_by_50_ind = np.concatenate((track_by_50_ind, [len(track_ids)]))
track_by_50_ind

array([   0,   50,  100,  150,  200,  250,  300,  350,  400,  450,  500,
        550,  600,  650,  700,  750,  800,  850,  900,  950, 1000, 1050,
       1100, 1150, 1200, 1250, 1300, 1350, 1400, 1450, 1500, 1550, 1600,
       1650, 1700, 1750, 1800, 1850, 1900, 1950, 2000, 2050, 2100, 2150,
       2200, 2250, 2300, 2350, 2400, 2450, 2500, 2550, 2600, 2650, 2700,
       2750, 2800, 2850, 2900, 2950, 3000, 3050, 3100, 3150, 3200, 3250,
       3300, 3350, 3400, 3450, 3500, 3550, 3600, 3650, 3700, 3750, 3755])

In [45]:
track_ids_ls = []
track_pop_ls = []

for i in range(len(track_by_50_ind)-1):
    lower = track_by_50_ind[i]
    upper = track_by_50_ind[i+1]
    #subset to allow function to search 50 tracks at once
    subset_id = track_ids[lower:upper]
    temp_id,temp_pop = get_pop_by_tracks_limit_50(token,subset_id)
    #adds onto the list instead of append which adds list within list
    track_ids_ls.extend(temp_id)
    track_pop_ls.extend(temp_pop)

data = pd.DataFrame({'track_id':track_ids_ls, 'track_popularity':track_pop_ls})
data

Unnamed: 0,track_id,track_popularity
0,12sliObzVzbLbAlDb8eChf,50
1,1hG4V53eR16jg7jVTNLOiX,54
2,5VipERQ1ofCowecoFg2MVU,54
3,1N9hFgcgWYbGINUKjhvcK6,52
4,1qKdid2S9fZdSrzLaCcjcF,53
...,...,...
3750,1g09DZjQ7yBommCT6POY2n,41
3751,71IScwIe7bcIlpnlkbKVQw,43
3752,1cJkUN5LAotktryx2nPCr7,39
3753,31JVjy3XWnh6C2zf4kLCXN,40
