In [44]:
import pandas as pd

In [45]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth

scope = "user-library-read"
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))

In [3]:
# found artist id by hand: 6vWDO969PvNqNYHIOW5v0m
bey_album_dict = sp.artist_albums('6vWDO969PvNqNYHIOW5v0m')

In [4]:
dc_album_dict = sp.artist_albums('1Y8cdNmUJH7yBTd9yOvr5i')

In [5]:
gt_album_dict = sp.artist_albums('7tTF8X2g7wWqOXOniknfv9')

In [10]:
sp.artist("6vWDO969PvNqNYHIOW5v0m")['name']

'Beyoncé'

## Find all albums for Beyonce

In [31]:
album_id = bey_album_dict['items'][1]['id']
print(bey_album_dict['items'][0]['id'])
print(sp.album(album_id)['tracks']['items'][0]['artists'][0])

6FJxoadUE4JNVwWHghBwnb
{'external_urls': {'spotify': 'https://open.spotify.com/artist/6vWDO969PvNqNYHIOW5v0m'}, 'href': 'https://api.spotify.com/v1/artists/6vWDO969PvNqNYHIOW5v0m', 'id': '6vWDO969PvNqNYHIOW5v0m', 'name': 'Beyoncé', 'type': 'artist', 'uri': 'spotify:artist:6vWDO969PvNqNYHIOW5v0m'}


In [32]:
for album in gt_album_dict['items']:
    exclusions = []
    print(f"{album['id']}: {album['release_date']}, {album['name']}, {get_album_artists(album['id'],exclude=exclusions)}")

3ZjZH79VeECJSVUXnlDvXW: 2022-07-22, That Girl Emotional, {'64YaFlpRdDAVGhzxH78N9p', '7tTF8X2g7wWqOXOniknfv9'}


In [132]:
from tqdm import tqdm
import time

class Artist:
    def __init__(self, artist_id, disable_progress_bar=False):
        self.id = artist_id
        self.name = sp.artist(self.id)['name']
        self.albums = []
        self.album_ids = {}

        # fetch list of album metadata (json).  default limit is 20, set to 50.
        self.albums = sp.artist_albums(self.id, limit=50)['items']

        #print(f"{self.name} has {len(self.albums)} albums.")   

        # create a data frame of metadata for the artist which includes album name, 
        # release date and collaborators.
        collaborator_list = []
        self._df = pd.DataFrame()
        for album in tqdm(self.albums, desc="Processing (Artist)", disable=disable_progress_bar): 
            
            # exclude the artist name to understand who they collaborated with.
            collaborator_ids = self.get_album_artists(album['id'], exclude=[self.id])
            
            collaborator_names = {sp.artist(id)['name'] for id in collaborator_ids}
            artist_metadata = {
                'album_id':album['id'],
                'album_name':album['name'],
                'release_date':album['release_date'],
                'collaborator_ids':collaborator_ids,
                'collaborator_names':collaborator_names
            }
            self._df = pd.concat([self._df, pd.DataFrame([artist_metadata])], ignore_index=True)
            collaborator_list += list(collaborator_ids)
            
        self._collaborators = set(collaborator_list)
        self._df['release_date'] = pd.to_datetime(self._df['release_date'],format='mixed')
        self._df = self._df.sort_values(by='release_date').reset_index()

        # now that I have an ordered list of albums by release date, I can make a 
        # list of albums to create a graph
        self._album_list = self._df['album_id'].tolist()
    
    def get_data_frame(self) -> pd.DataFrame:
        return self._df

    def get_albums_list(self) -> list:
        return self._album_list

    def get_collaborators(self) -> set:
        return self._collaborators
    
    def get_album_artists(self, album_id, exclude=None) -> set:
        assert(type(exclude)!=str)
        album_artists = []
    
        album_dict = sp.album(album_id)
        for track in album_dict['tracks']['items']:
            for artist in track['artists']:
                album_artists.append(artist['id'])
        artists = set(album_artists)
        
        # remove name of artist in the album if you only care about collaborators.
        if exclude:
            for id in exclude:
                if id in artists:
                    artists.remove(id)
        return artists

| Artist Name | Spotify ID | Notes |
|----------|----------|----------|
| Destiny's Child | 1Y8cdNmUJH7yBTd9yOvr5i | Lead Singer |
| Beyonce | 6vWDO969PvNqNYHIOW5v0m | Solo |


In [126]:
start = time.time()

beyonce = Artist("6vWDO969PvNqNYHIOW5v0m",disable_progress_bar=True)
end = time.time()
print(f"{end-start} seconds.")

Beyoncé has 50 albums.
28.618391036987305 seconds.


In [None]:
beyonce.get_data_frame().head()

In [128]:
collabs = beyonce.get_collaborators()
print(f"{len(collabs)} collaborators")

132 collaborators


In [129]:
# single album collaborator list...
album = 1
{Artist(id) for id in df.iloc[1,:]['collaborator_ids']}

Slim Thug has 50 albums.


Processing (Artist):  96%|█████████████████████████████████████████████████████████████████████████████████████▍   | 48/50 [00:43<00:01,  1.09it/s]


KeyboardInterrupt: 

In [133]:
# iterate through all collaborators and 
# create artist object and save off 
# ordered list of album ids (i.e. graph)
for id in tqdm(collabs,desc='Processing (collaborators)'):
    artist = Artist(id,disable_progress_bar=True)
    
    #create data series for the album_id column
    ds_albums = artist.get_data_frame()['album_id']
    ds_albums.to_csv(f"{artist.id}.csv",index=False)

Processing (collaborators):   0%|                                                                                          | 0/132 [13:40<?, ?it/s]


KeyboardInterrupt: 