Hello! This is a code for extracting data from artists page on SPO in both album and track levels. Using spotipy, a Python library to buit requests on SPO API, set parameters to search for albums, singles, compilations and "appears on" and retrieve all the information about products available on artists pages using only the Spotify Artist ID - or a list of two or more SPO Artist IDs.

Please note:
- The limit for results is 50 products for each request

#1- Installing/importing libraries and inserting credentials to access SPO API

In [None]:
# installing spotipy
!pip install spotipy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting spotipy
  Downloading spotipy-2.20.0-py3-none-any.whl (27 kB)
Collecting requests>=2.25.0
  Downloading requests-2.28.1-py3-none-any.whl (62 kB)
[K     |████████████████████████████████| 62 kB 1.6 MB/s 
Collecting urllib3>=1.26.0
  Downloading urllib3-1.26.12-py2.py3-none-any.whl (140 kB)
[K     |████████████████████████████████| 140 kB 37.9 MB/s 
[?25hCollecting redis>=3.5.3
  Downloading redis-4.3.4-py3-none-any.whl (246 kB)
[K     |████████████████████████████████| 246 kB 36.7 MB/s 
Collecting deprecated>=1.2.3
  Downloading Deprecated-1.2.13-py2.py3-none-any.whl (9.6 kB)
Installing collected packages: urllib3, deprecated, requests, redis, spotipy
  Attempting uninstall: urllib3
    Found existing installation: urllib3 1.24.3
    Uninstalling urllib3-1.24.3:
      Successfully uninstalled urllib3-1.24.3
  Attempting uninstall: requests
    Found existing installation: re

In [None]:
# import libraries
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
import requests
from time import sleep
import pandas as pd

In [None]:
#inserting credentials from SPO API

client_id= "YOUR_CLIENT_ID"
client_secret= "YOUR_CLIENT_SECRET"

AUTH_URL = 'https://accounts.spotify.com/api/token'

# POST
auth_response = requests.post(AUTH_URL, {
    'grant_type': 'client_credentials',
    'client_id': client_id,
    'client_secret': client_secret,
})

# convert the response to JSON
auth_response_data = auth_response.json()

# save the access token
access_token = auth_response_data['access_token']

In [None]:
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# 2 - Code to extract the products from one or more artist page

##2.1 - Inserting the SPO artists ids

In [None]:
artist_id = ['7MiDcPa6UiV3In7lIM71IN','3qvcCP2J0fWi0m0uQDUf6r'] #SPO Artist ID between '' and separated by ,

##2.2 -Functions to extract data from SPO artist(s) page

In [None]:
## this function retrieve a dictionary with all albums from artist(s) inserted on artist_id list

def getArtistAlbum(artist_id):

  result_album = sp.artist_albums(artist_id,album_type=['album'], limit=50)
  artist_album = result_album['items']
  artist_name = []
  artist_id = []
  album_id = []
  album_title = []
  release_date = []
  total_tracks = []
  album_type = []

  for album in artist_album:
    artist_name.append(album['artists'][0]['name'])
    artist_id.append(album['artists'][0]['id'])
    album_id.append(album['id'])
    album_title.append(album['name'])
    release_date.append(album['release_date'])
    total_tracks.append(album['total_tracks'])
    album_type.append (album['album_group'])

    a = {'artist_name':artist_name, 'artist_id':artist_id, 'album_id':album_id,
         'album_title':album_title, 'release_date':release_date, 'total_tracks':total_tracks,
         'type':album_type}

  return a

In [None]:
## this function retrieve a dictionary with all singles from artist(s) inserted on artist_id list
##OBS please note that EPs are retrieved as singles

def getArtistSingle(artist_id):

  result_album = sp.artist_albums(artist_id,album_type=['single'], limit=50)
  artist_album = result_album['items']
  artist_name = []
  artist_id = []
  album_id = []
  album_title = []
  release_date = []
  total_tracks = []
  album_type = []

  for album in artist_album:
    artist_name.append(album['artists'][0]['name'])
    artist_id.append(album['artists'][0]['id'])
    album_id.append(album['id'])
    album_title.append(album['name'])
    release_date.append(album['release_date'])
    total_tracks.append(album['total_tracks'])
    album_type.append (album['album_group'])

    a = {'artist_name':artist_name, 'artist_id':artist_id, 'album_id':album_id,
         'album_title':album_title, 'release_date':release_date, 'total_tracks':total_tracks,
         'type':album_type}

  return a

In [None]:
## this function retrieve a dictionary with all compilations from artist(s) inserted on artist_id list

def getArtistCompilation(artist_id):

  result_album = sp.artist_albums(artist_id,album_type=['compilation'], limit=50)
  artist_album = result_album['items']
  artist_name = []
  artist_id = []
  album_id = []
  album_title = []
  release_date = []
  total_tracks = []
  album_type = []

  for album in artist_album:
    artist_name.append(album['artists'][0]['name'])
    artist_id.append(album['artists'][0]['id'])
    album_id.append(album['id'])
    album_title.append(album['name'])
    release_date.append(album['release_date'])
    total_tracks.append(album['total_tracks'])
    album_type.append (album['album_group'])

    a = {'artist_name':artist_name, 'artist_id':artist_id, 'album_id':album_id,
         'album_title':album_title, 'release_date':release_date, 'total_tracks':total_tracks,
         'type':album_type}

    return a

In [None]:
## this function retrieve a dictionary with all products in "Appears On" section from artist(s) inserted on artist_id list

def getArtistAppearsOn(artist_id):

  result_album = sp.artist_albums(artist_id,album_type=['appears_on'], limit=50)
  artist_album = result_album['items']
  artist_name = []
  artist_id = []
  album_id = []
  album_title = []
  release_date = []
  total_tracks = []
  album_type = []

  for album in artist_album:
    artist_name.append(album['artists'][0]['name'])
    artist_id.append(album['artists'][0]['id'])
    album_id.append(album['id'])
    album_title.append(album['name'])
    release_date.append(album['release_date'])
    total_tracks.append(album['total_tracks'])
    album_type.append (album['album_group'])

    a = {'artist_name':artist_name, 'artist_id':artist_id, 'album_id':album_id,
         'album_title':album_title, 'release_date':release_date, 'total_tracks':total_tracks,
         'type':album_type}

  return a

##2.3 - These loops creates a list with informations of artists products for each type of product.
###Run the cell with the type of product you want to retrieve

In [None]:
artists_albums = [] #list of albums

#getting albums
for artist in artist_id: #for each artist listed in artist_id list
  sleep(.10)
  data_album = getArtistAlbum(artist)
  artists_albums.append(data_album)

In [None]:
artists_singles = [] #list of singles

#getting singles/eps
for artist in artist_id: #for each artist listed in artist_id list
  sleep(.10)
  data_single = getArtistSingle(artist)
  artists_singles.append(data_single)

In [None]:
artists_compilations = [] #list of compilations

#getting compilations
for artist in artist_id: #for each artist listed in artist_id list
  sleep(.10)
  data_compilation = getArtistCompilation(artist)
  if data_compilation == None:
    pass
  else:
    artists_compilations.append(data_compilation)

In [None]:
artists_appearson = [] #list of appears on

#getting appears on
for artist in artist_id: #for each artist listed in artist_id list
  sleep(.10)
  data_appearson = getArtistAppearsOn(artist)
  artists_appearson.append(data_appearson)

##2.4 - Set the list of infos and exporting with respective file names

### for example:

*    if you run the search for albums, insert artists_albums on list_of_dicts and "artists_album" on list_of_titles_dicts.
*   if you run the search for albums and singles, insert **artists_albums, artists_singles** on list_of_dicts and **"artists_album","artists_singles"** on list_of titles_dicts




###2.4.1 - Setting the lists

In [None]:
#setting the lists

list_of_dicts = [artists_albums,artists_singles]
list_of_titles_dicts = ['artists_albums','artists_singles']

###2.4.2 - Exporting one file for each type of product setted above on the lists

In [None]:
# this funcion export one xlsx file for each dictionary inserted on list_of_dicts
# with respective names inserted on list_of_titles_dicts

def exportProductsSeparately(list_of_dicts, list_of_titles_dicts):

  for d, t in zip(list_of_dicts, list_of_titles_dicts):
    df = pd.DataFrame.from_dict(d).apply(pd.Series.explode)
    df.to_excel(t+'.xlsx',index=False,encoding='utf-8')

In [None]:
## running the export separately function
exportProductsSeparately(list_of_dicts,list_of_titles_dicts)

###2.4.3 - Generating the dataframe and exporting the selected type of products in one single file

####creating dataframe

In [None]:
#this function retrieve a dataframe with all products retrieved inserted above on "setting the lists" section
#defaul name of file: Artists_Products_SPO

def exportAllProducts(list_of_dicts):
  list_of_df = []

  for df in list_of_dicts:
    newdf = pd.DataFrame.from_dict(df).apply(pd.Series.explode)
    list_of_df.append(newdf)
    df_artists_allproducts = pd.concat(list_of_df)
    df_artists_allproducts.to_excel(#file name:
                                    'Artists_Products_SPO.xlsx',
                                    index=False,
                                    encoding='utf-8')

####exporting the excel file (named as Artists_Products_SPO)

In [None]:
## running the export all products function
exportAllProducts(list_of_dicts)

#3 - Searching all types of products at once and retrieving a single excel file with the informations

#### The exported file name is Artists_AllProducts_SPO

In [None]:
# this function retrieves all types of products (albums, singles/EPs, compilation, appears on)
# from artist(s) inserted on artist_id list at once
# and generate a compilated Excel file (xlsx)
# default file name: Artists_AllProducts_SPO

def getAllProducts(artist_id):

  artists_albums = []
  artists_singles = []
  artists_compilations = []
  artists_appearson = []

  for artist in artist_id:
    sleep(.10)

    data_album = getArtistAlbum(artist) #getting albums
    if data_album == None:
      pass
    else:
      artists_albums.append(data_album)

    data_single = getArtistSingle(artist) #getting singles/EPs
    if data_single == None:
      pass
    else:
      artists_singles.append(data_single)

    data_compilation = getArtistCompilation(artist) #getting compilations
    if data_compilation == None:
      pass
    else:
      artists_compilations.append(data_compilation)

    data_appearson = getArtistAppearsOn(artist) #getting appears on
    if data_appearson == None:
      pass
    else:
      artists_appearson.append(data_appearson)

  list_of_dicts = [artists_albums, artists_singles, artists_compilations, artists_appearson]
  list_of_titles_dicts = ['artists_albums','artists_singles','artists_compilations','artists_appearson']
  list_of_df = []

  for df in list_of_dicts:
    newdf = pd.DataFrame.from_dict(df).apply(pd.Series.explode)
    list_of_df.append(newdf)
    df_all_products = pd.concat(list_of_df)

    df_all_products.to_excel(#filename:
                             'Artists_AllProducts_SPO.xlsx',index=False,encoding='utf-8')

##3.1 Generating excel file

In [None]:
# Generate compilated excel with all products using list of artist_id
getAllProducts(artist_id)

#4 - Retrieving tracks data from products available on artists page
###Here you can import the SPO album ids from a csv or xslx file or manually insert the SPO album ID(s)



##4.1 - Reading from excel or csv
###if you want to insert it manually, goes to 4.2

In [None]:
# reading xlsx file
# using the artists_albums as example
# use pd.read_csv for csv files

df = pd.read_excel(#name of file imported:
                   'artists_albums.xlsx',index_col=0)

album_ids = list(df['album_id'])

##4.2 - Inserting manually

In [None]:
#insert manually
album_ids = ['3UdCTJxopQVSG5wRdMPKZS']

##4.3 - Functions to retrieve album/tracks information


### This function retrieve the follow album informations: cover link, album id, album title and label

In [None]:
def getAlbumInfo(id):

  album = sp.album(id)
  cover_link = []
  album_id = []
  name = []
  label = []

  for item in album:
    try:
      album_label = album['copyrights'][0].get('text')
    except:
      album_label = 'NotFound'
    cover = album['images'][0]['url']
    cover_link.append(cover)
    name.append(album['name'])
    album_id.append(album['id'])
    label.append(album_label)

    album_info = {'cover_link':cover_link, 'album_id': album_id,'album_title': name,'label':label}

  return album_info

###This function retrieve the follow informations about the tracks in the albums inserted on album_id list: SPO ID, ISRC and track Title

In [None]:
def getAlbumTracks(album_id):

  track_id = []
  track_isrc = []
  track_title = []

  result = sp.album_tracks(album_id)

  for track in range(len(result['items'])):
    id_track = result['items'][track]['id']
    track_id.append(id_track)
    track_search = sp.track(id_track)
    track_title.append(track_search['name'])
    isrc = track_search.get('external_ids')['isrc']
    track_isrc.append(isrc)

  track_data = {'track_id':track_id, 'track_title': track_title,'isrc':track_isrc}

  return track_data

##4.4 -  Running the functions and generating excel files

### Running the functions to generate both album and its tracks info


In [None]:
#running the functions to generate both album and its tracks info

data_track = []
data_album = []

for x in album_ids:
  sleep(.10)
  track_info = getAlbumTracks(x)
  album_info = getAlbumInfo(x)
  data_track.append(track_info)
  data_album.append(album_info)

###Export in separated excel files


In [None]:
#export in separated excel files
list_of_album_data = [data_track, data_album]
list_of_album_title = ['data_track','data_album']

exportProductsSeparately(list_of_album_data,list_of_album_title)