In [26]:
from dotenv import load_dotenv
import os
import base64
import requests
import pandas as pd
import numpy as np

In [27]:
# Get configuration file that has all the api tokens
def configure():
    load_dotenv()

In [28]:
#Grab the Client_id and Client_secret from the env for the documented api
configure()
CLIENT_ID = os.getenv('CLIENT_ID')
CLIENT_SECRET = os.getenv('CLIENT_SECRET')
print(CLIENT_ID)
print(CLIENT_SECRET)

40692bc74b7f42f9a0d19365e7af4cd8
790e90e05ada42339bc081c69ffef37e


***

In [29]:
def get_token():
    '''Takes the Client_ID and Client_Secret to request access token.
        Access tokens expire every hour so have to request new one'''
    ## Setup the authorization str and convert to base64  
    auth_str = CLIENT_ID + ":" + CLIENT_SECRET
    #encode str with utf-8 first
    auth_bytes = auth_str.encode("utf-8")
    #encoding it to required base64
    auth_base64 = str(base64.b64encode(auth_bytes),"utf-8")

    ## Run the Actual Request with Post, setting up required header fields
    url = "https://accounts.spotify.com/api/token"
    headers = {
        "Authorization": "Basic " + auth_base64,
        "Content-Type": "application/x-www-form-urlencoded"
    }

    data = {"grant_type": "client_credentials"}

    ## actually make the request to Spotify
    result = requests.post(url, headers = headers, data = data)
    json_result = result.json()
    token = json_result["access_token"]
    return token

In [30]:
def get_auth_header(token):
    '''Creates Authorization header with the access token for requests to official Spotify API'''
    header = {
        "Authorization": "Bearer " + token
        }
    return header

In [31]:
#get Token
token = get_token()

In [32]:
token

'BQBCPnXjWqcUD1eCyQoYgXfZ3biy2GkYdNzV91fvqifyytXGuVwyeaju0CeJe79WWCCpIVbULxuO6pSCfVgIYbsLXuWNVpGomgHXLGiM7EBUpd2y3L6a'

***

## Search for artist_id by artist_name

In [33]:
def search_artist(token, artist_name):
    '''Search the Artist name and get their info including ID and popularity'''
    url = "https://api.spotify.com/v1/search"
    headers = get_auth_header(token)

    querystring = {
                "q": artist_name,
                "type": "artist",
                "limit": 1
                }

    result = requests.request("GET",url, headers= headers, params=querystring)
    
    json_result = result.json()

    try:
        #returns a artist_id, artist name,
        answer = json_result['artists']['items'][0]
        artist_info = {'artist_name':answer['name'],'artist_id':answer['id'], 'artist_followers':answer['followers']['total'], 'artist_popularity':answer['popularity'], 'artist_genres':answer['genres']}
        return artist_info
    except:
        #give error
        print("No artist with this name on Spotify")
        return None
    

Test what `search_artist()` return

In [34]:
artist = search_artist(token, "Drake")

In [35]:
artist

{'artist_name': 'Drake',
 'artist_id': '3TVXtAsR1Inumwj472S9r4',
 'artist_followers': 72296052,
 'artist_popularity': 98,
 'artist_genres': ['canadian hip hop',
  'canadian pop',
  'hip hop',
  'rap',
  'toronto rap']}

In [36]:
artist['artist_id']

'3TVXtAsR1Inumwj472S9r4'

 ## Search for albums and singles by artist_id

#### `get_singles_albums_by_artist_limit_50()`

In [37]:
def get_singles_albums_by_artist_limit_50(token, artist_id,offset):
    '''Search "album" and "singles" given artist_id limit 50 per request at a time
        Return list of items '''
    url = f"https://api.spotify.com/v1/artists/{artist_id}/albums"
    headers = get_auth_header(token)

    querystring = {
                "include_groups": "album,single",
                "limit": 50,
                "offset": offset
                }

    result = requests.request("GET",url, headers= headers, params=querystring)
    
    json_result = result.json()
    items = json_result['items']
    return items

Test what `get_singles_albums_by_artist_limit_50()` return

In [38]:
#testing the first 50
album_single = get_singles_albums_by_artist_limit_50(token,artist['artist_id'],0)
album_single

[{'album_group': 'album',
  'album_type': 'album',
  'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/3TVXtAsR1Inumwj472S9r4'},
    'href': 'https://api.spotify.com/v1/artists/3TVXtAsR1Inumwj472S9r4',
    'id': '3TVXtAsR1Inumwj472S9r4',
    'name': 'Drake',
    'type': 'artist',
    'uri': 'spotify:artist:3TVXtAsR1Inumwj472S9r4'},
   {'external_urls': {'spotify': 'https://open.spotify.com/artist/1URnnhqYAYcrqrcwql10ft'},
    'href': 'https://api.spotify.com/v1/artists/1URnnhqYAYcrqrcwql10ft',
    'id': '1URnnhqYAYcrqrcwql10ft',
    'name': '21 Savage',
    'type': 'artist',
    'uri': 'spotify:artist:1URnnhqYAYcrqrcwql10ft'}],
  'available_markets': ['AR',
   'AU',
   'AT',
   'BE',
   'BO',
   'BR',
   'BG',
   'CA',
   'CL',
   'CO',
   'CR',
   'CY',
   'CZ',
   'DK',
   'DO',
   'DE',
   'EC',
   'EE',
   'SV',
   'FI',
   'FR',
   'GR',
   'GT',
   'HN',
   'HK',
   'HU',
   'IS',
   'IE',
   'IT',
   'LV',
   'LT',
   'LU',
   'MY',
   'MT',
   'MX',
   

#### `get_all_singles_albums_by_artist()`

In [39]:
def get_all_singles_albums_by_artist(token, artist_id):
    '''Grabs all the singles and albums for the artist and return a dataframe
        with album_group, name, id, release_date, total_tracks, uri'''
    offset_counter = 0
    data = []
    while True:
        results = get_singles_albums_by_artist_limit_50(token,artist_id,offset_counter)
        offset_counter += 50
        data.extend(results)

        if len(results) != 50:
            break

    dataframe = pd.DataFrame(data, columns = ["album_group","name","id","release_date","total_tracks",'uri'])
    #drop duplicates
    dataframe = dataframe.drop_duplicates(subset=['name'], keep='first')
    dataframe = dataframe.reset_index(drop=True)
    return dataframe


Test what `get_all_singles_albums_by_artist()` return

In [40]:
all_albums = get_all_singles_albums_by_artist(token,artist['artist_id'])
all_albums

Unnamed: 0,album_group,name,id,release_date,total_tracks,uri
0,album,Her Loss,5MS3MvWHJ3lOZPLiMxzOU6,2022-11-04,16,spotify:album:5MS3MvWHJ3lOZPLiMxzOU6
1,album,"Honestly, Nevermind",3cf4iSSKd8ffTncbtKljXw,2022-06-17,14,spotify:album:3cf4iSSKd8ffTncbtKljXw
2,album,Certified Lover Boy,3SpBlxme9WbeQdI9kx7KAV,2021-09-03,21,spotify:album:3SpBlxme9WbeQdI9kx7KAV
3,album,Dark Lane Demo Tapes,6OQ9gBfg5EXeNAEwGSs6jK,2020-05-01,14,spotify:album:6OQ9gBfg5EXeNAEwGSs6jK
4,album,Care Package,7dqpveMVcWgbzqYrOdkFTD,2019-08-02,17,spotify:album:7dqpveMVcWgbzqYrOdkFTD
...,...,...,...,...,...,...
66,single,Over,4smkAw49jun80LZHzwQghb,2010-01-01,1,spotify:album:4smkAw49jun80LZHzwQghb
67,single,Over (2010 JUNO Awards),1EQFue421vce8Fq0gIA3E9,2010-01-01,1,spotify:album:1EQFue421vce8Fq0gIA3E9
68,single,Fear,2wmvVAqB2sAiJQpp2pRWcD,2009-09-15,1,spotify:album:2wmvVAqB2sAiJQpp2pRWcD
69,single,I'm Goin In,645FRpimwu0b1isiY5M32J,2009-09-15,1,spotify:album:645FRpimwu0b1isiY5M32J


#### `get_album_single_popularity_by_id()`

In [41]:
def get_album_single_popularity_by_id(token, single_album_id):
    '''input one single/album id to search and return an popularity score'''

    url = f"https://api.spotify.com/v1/albums/{single_album_id}"
    headers = get_auth_header(token)

    result = requests.request("GET",url, headers= headers)
    
    json_result = result.json()
    popularity_score = json_result['popularity']
    return popularity_score
    

Test what `get_album_single_popularity_by_id()` return

In [49]:
#get back Popularity score of the album
get_album_single_popularity_by_id(token,"5MS3MvWHJ3lOZPLiMxzOU6")

93

#### `get_all_pop_for_album_single()`

In [43]:
def get_all_pop_for_album_single(token,albums_singles_df):
    '''input dataframe of all albums and singles of an artist and use the id column to get the popularity score return updated dataframe'''
    # get the number of rows in the dataframe
    n = len(albums_singles_df.index)
    #create a list of id
    id_list = albums_singles_df['id']
    #create an array of the same size as id to get the popularity score
    pop_score_list = np.empty(n)

    for i in range(len(id_list)):
        pop_score_list[i] = get_album_single_popularity_by_id(token,id_list[i])
    
    albums_singles_df['album_popularity'] = pop_score_list
    return albums_singles_df


Test what `get_all_pop_for_album_single()` return

In [44]:
#all albums and singles with popularity score
all_albums_pop = get_all_pop_for_album_single(token,all_albums)
all_albums_pop

Unnamed: 0,album_group,name,id,release_date,total_tracks,uri,album_popularity
0,album,Her Loss,5MS3MvWHJ3lOZPLiMxzOU6,2022-11-04,16,spotify:album:5MS3MvWHJ3lOZPLiMxzOU6,93.0
1,album,"Honestly, Nevermind",3cf4iSSKd8ffTncbtKljXw,2022-06-17,14,spotify:album:3cf4iSSKd8ffTncbtKljXw,84.0
2,album,Certified Lover Boy,3SpBlxme9WbeQdI9kx7KAV,2021-09-03,21,spotify:album:3SpBlxme9WbeQdI9kx7KAV,87.0
3,album,Dark Lane Demo Tapes,6OQ9gBfg5EXeNAEwGSs6jK,2020-05-01,14,spotify:album:6OQ9gBfg5EXeNAEwGSs6jK,80.0
4,album,Care Package,7dqpveMVcWgbzqYrOdkFTD,2019-08-02,17,spotify:album:7dqpveMVcWgbzqYrOdkFTD,76.0
...,...,...,...,...,...,...,...
66,single,Over,4smkAw49jun80LZHzwQghb,2010-01-01,1,spotify:album:4smkAw49jun80LZHzwQghb,10.0
67,single,Over (2010 JUNO Awards),1EQFue421vce8Fq0gIA3E9,2010-01-01,1,spotify:album:1EQFue421vce8Fq0gIA3E9,3.0
68,single,Fear,2wmvVAqB2sAiJQpp2pRWcD,2009-09-15,1,spotify:album:2wmvVAqB2sAiJQpp2pRWcD,40.0
69,single,I'm Goin In,645FRpimwu0b1isiY5M32J,2009-09-15,1,spotify:album:645FRpimwu0b1isiY5M32J,56.0


## Search for each track inside dataframe of all albums and singles

#### `get_tracks_by_album_limit_50()`

In [45]:
def get_tracks_by_album_limit_50(token, single_album_id,offset):
    '''input one single/album id to search and return first 50 tracks in the album'''

    url = f"https://api.spotify.com/v1/albums/{single_album_id}/tracks"
    headers = get_auth_header(token)

    querystring = {
                "limit": 50,
                "offset": offset
                }

    result = requests.request("GET",url, headers= headers, params=querystring)
    
    json_result = result.json()
    items = json_result['items']
    return items

Test what `get_tracks_by_album_limit_50` return

In [50]:
get_tracks_by_album_limit_50(token, "5MS3MvWHJ3lOZPLiMxzOU6",0)

[{'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/3TVXtAsR1Inumwj472S9r4'},
    'href': 'https://api.spotify.com/v1/artists/3TVXtAsR1Inumwj472S9r4',
    'id': '3TVXtAsR1Inumwj472S9r4',
    'name': 'Drake',
    'type': 'artist',
    'uri': 'spotify:artist:3TVXtAsR1Inumwj472S9r4'},
   {'external_urls': {'spotify': 'https://open.spotify.com/artist/1URnnhqYAYcrqrcwql10ft'},
    'href': 'https://api.spotify.com/v1/artists/1URnnhqYAYcrqrcwql10ft',
    'id': '1URnnhqYAYcrqrcwql10ft',
    'name': '21 Savage',
    'type': 'artist',
    'uri': 'spotify:artist:1URnnhqYAYcrqrcwql10ft'}],
  'available_markets': ['AR',
   'AU',
   'AT',
   'BE',
   'BO',
   'BR',
   'BG',
   'CA',
   'CL',
   'CO',
   'CR',
   'CY',
   'CZ',
   'DK',
   'DO',
   'DE',
   'EC',
   'EE',
   'SV',
   'FI',
   'FR',
   'GR',
   'GT',
   'HN',
   'HK',
   'HU',
   'IS',
   'IE',
   'IT',
   'LV',
   'LT',
   'LU',
   'MY',
   'MT',
   'MX',
   'NL',
   'NZ',
   'NI',
   'NO',
   'PA',
   'PY',


#### `get_all_tracks_for_one_album()`
This is mandatory to run even if the album has less than 50 track

In [47]:
def get_all_tracks_for_one_album(token, single_album_id):
    '''Grabs all the tracks of the album (function mainly useful if album has more than 50 tracks) and return dataframe with album_id attached'''
    offset_counter = 0
    data = []
    while True:
        results = get_tracks_by_album_limit_50(token,single_album_id,offset_counter)
        offset_counter += 50
        data.extend(results)

        if len(results) != 50:
            break
    
    ##dataframe = pd.DataFrame(data)
    #extract only relevant fields
    dataframe = pd.DataFrame(data, columns = ["name","id",'uri', "track_number"])
    dataframe = dataframe.reset_index(drop=True)
    
    #include album id for easier identification later on
    album_id = np.full(shape = len(dataframe),fill_value = single_album_id)
    dataframe['album_id'] = album_id
    return dataframe


In [51]:
get_all_tracks_for_one_album(token,"5MS3MvWHJ3lOZPLiMxzOU6")

Unnamed: 0,name,id,uri,track_number,album_id
0,Rich Flex,1bDbXMyjaUIooNwFE9wn0N,spotify:track:1bDbXMyjaUIooNwFE9wn0N,1,5MS3MvWHJ3lOZPLiMxzOU6
1,Major Distribution,46s57QULU02Voy0Kup6UEb,spotify:track:46s57QULU02Voy0Kup6UEb,2,5MS3MvWHJ3lOZPLiMxzOU6
2,On BS,34tz0eDhGuFErIuW3q4mPX,spotify:track:34tz0eDhGuFErIuW3q4mPX,3,5MS3MvWHJ3lOZPLiMxzOU6
3,BackOutsideBoyz,0wshkEEcJUQU33RSRBb5dv,spotify:track:0wshkEEcJUQU33RSRBb5dv,4,5MS3MvWHJ3lOZPLiMxzOU6
4,Privileged Rappers,7l2nxyx7IkBX5orhkALg0V,spotify:track:7l2nxyx7IkBX5orhkALg0V,5,5MS3MvWHJ3lOZPLiMxzOU6
5,Spin Bout U,2ZL7WZcjuYKi1KUDtp4kCC,spotify:track:2ZL7WZcjuYKi1KUDtp4kCC,6,5MS3MvWHJ3lOZPLiMxzOU6
6,Hours In Silence,0sSRLXxknVTQDStgU1NqpY,spotify:track:0sSRLXxknVTQDStgU1NqpY,7,5MS3MvWHJ3lOZPLiMxzOU6
7,Treacherous Twins,4Flfb4fGscN9kXPOduQLrv,spotify:track:4Flfb4fGscN9kXPOduQLrv,8,5MS3MvWHJ3lOZPLiMxzOU6
8,Circo Loco,7GeTsDIc5ykNB6lORO6Cee,spotify:track:7GeTsDIc5ykNB6lORO6Cee,9,5MS3MvWHJ3lOZPLiMxzOU6
9,Pussy & Millions (feat. Travis Scott),2KLwPaRDOB87XOYAT2fgxh,spotify:track:2KLwPaRDOB87XOYAT2fgxh,10,5MS3MvWHJ3lOZPLiMxzOU6


#### GET ALL TRACKS FROM ALL ALBUMS AND SINGLES