In [None]:
import requests
import pandas as pd
from tqdm import tqdm

# Authorization

In [None]:
# Create a request session
session = requests.Session()

In [None]:
client_id = #Enter your Client ID

In [None]:
# Save the client_secret into a variable without displaying it
with open('spotify_secret.txt') as f:
    client_secret = f.read().strip()

In [None]:
token_endpoint = 'https://accounts.spotify.com/api/token'

In [None]:
authorization = session.post(token_endpoint,
                             data = {'grant_type': 'client_credentials',
                                     'client_id':client_id,
                                     'client_secret':client_secret
                                    }
                            )

In [None]:
# Check Authorization Response
authorization.status_code

In [None]:
access_token = authorization.json()['access_token']

In [None]:
header={'Authorization':f'Bearer {access_token}'}

# Data Collection - Collating Data

## Collate Albums

In [None]:
artist_ids = [
    '3Nrfpe0tUJi4K4DXYWgMUX',
    '7n2Ycct7Beij7Dj7meI4X0',
    '3HqSLMAZ3g3d5poNaI7GOU',
    '6jJ0s89eD6GaHleKKya26X',
    '4dpARuHxo51G3z768sgnrY',
    '6vWDO969PvNqNYHIOW5v0m']

In [None]:
album_data = {}
for artist_id in artist_ids:
    offset = 0
    limit = 50
    total = -1
    
    while (((offset) < total) or (total == -1)): #offset_conditions
        albums = session.get(
            + #endpoint
              #parameters - offset & limit
            headers=header
        ).json()
        
        for album in albums['items']:
            album_data[album['id']] = {'artist':album['artists'][0]['name'],
                                       'artist id':album['artists'][0]['id'],
                                       'album name':album['name'],
                                       'total tracks':album['total_tracks'],
                                       'release date':album['release_date']}

        offset = offset + limit #increment
        total = int(albums['total'])

Let's view the result we got from the process we did above:

In [None]:
album_data

## Load the data into a DataFrame

There's only so much we can do with dictionaries. Dealing with our data further will be a lot easier if we use dataframes using Pandas:

In [None]:
album_df = pd.DataFrame(album_data).T

In [None]:
album_df

In [None]:
album_df = album_df.reset_index()

In [None]:
album_df.head()

Let's keep things organized by using the appropriate labels for our columns. It also avoids confusion moving forward.

In [None]:
album_df = album_df.rename(columns={'index':'album id'})

In [None]:
album_df.head()

We can notice that there are duplicate albums in our data. However, they have different `album id`s. So, we can infer that Spotify considers these as different albums but as we know we have many albums that just have additionaltracks. To make things lighter for the latter steps, let's try to remove duplicates. Let's do this a systematically by putting the albums with more tracks first:

In [None]:
album_df.sort_values(['total tracks', 'album name'],
                     ascending=False, inplace=True)

Let's then drop the duplicates. By default this method keeps the first occurence of the selected column. In this case, let's use the `album name` as those are the ones with the duplicate values:

In [None]:
album_df.drop_duplicates(subset='album name', inplace=True)

In [None]:
album_df.head()

In [None]:
album_df.reset_index(drop=True, inplace=True)

In [None]:
album_df.head()

We also notice that we have albums with `Various Artists`. These usually occur with compilation albums. Let's clean this up by retaining only albums where the artist belong to the list we initially intend. For this, let's use masking:

In [None]:
album_df['artist id'].isin(artist_ids)

In [None]:
album_df = album_df[album_df['artist id'].isin(artist_ids)].reset_index()

In [None]:
album_df.head()

## Get Album Tracks

In [None]:
album_id_list = album_df['album id'].tolist()

Let's use the same code in iterating through the pages in getting album tracks:

In [None]:
track_data = {}
for album_id in tqdm(album_id_list):
    offset = 0
    limit = 50
    total = -1
    
    while (((offset+limit) < total) or (total == -1)):
        tracks = session.get(
            f'https://api.spotify.com/v1/albums/{album_id}/tracks?'
            f'offset={str(offset)}&limit={str(limit)}',
            headers=header
        ).json()
        
        for item in tracks['items']:
            track_data[item['id']] = {'track name': item['name'],
                                      'artist': item['artists'][0]['name'],
                                      'artist id': item['artists'][0]['id'],
                                      'duration': item['duration_ms'],
                                      'explicit': item['explicit'],
                                      'album id': album_id,
                                      'track number': item['track_number']
                                     }
        offset = offset + limit
        total = int(tracks['total'])

In [None]:
track_df = pd.DataFrame(track_data).T.reset_index()\
                                     .rename(columns={'index':'track id'})

In [None]:
track_df.head()

Let's check how many tracks we got:

In [None]:
len(track_df)

We want to save this data so we can use it in the third (and last) part. One of the common ways to save a dataframe is in csv format. To do this, just apply the `to_csv('filename')` method to your dataframe:

In [None]:
track_df.to_csv('tracks.csv')