In [51]:
# Imports

import os
import datetime
from pathlib import Path
import pandas as pd
import json
import requests
import urllib.parse
import numpy as np

# Setup

In [52]:
# Get absolute path
absPath = str(Path(os.path.abspath(os.getcwd())).absolute())
datasetsPath = os.path.join(absPath, "datasets")

# Create dataset directory if not exists
if not os.path.exists(datasetsPath):
    os.mkdir(datasetsPath)

# Setup datasets paths
artistsPath = os.path.join(datasetsPath, "artists.csv")
peoplePath = os.path.join(datasetsPath, "people.csv")


# WikiData API Functions

https://www.wikidata.org/w/api.php

In [53]:
# Query to WikiData to get a list of search results. Ideally the first result should be the one we are looking for
def wdQuery(query):
    # Format the request (encoding the query text)
    queryApiRequest = requests.get(
        "https://www.wikidata.org/w/api.php?action=query&format=json&list=search&srsearch={q}&srlimit=20".format(q=urllib.parse.quote_plus(query)))
    
    # If there is a response, then return the json
    if queryApiRequest.status_code == 200:
        return json.loads(queryApiRequest.text)
    
    return None

# Query to WikiData to get the entity information from the ID retrieved from the query results
def wdGetEntity(entityID):
    # Format the request
    queryApiRequest = requests.get(
        "https://www.wikidata.org/w/api.php?action=wbgetentities&ids={eID}&format=json".format(eID=entityID))

    # If there is a response, then return the json
    if queryApiRequest.status_code == 200:
        return json.loads(queryApiRequest.text)["entities"][entityID]

    return None

def wdGetProperty(entityID, propertyID):
    # Format the request
    queryApiRequest = requests.get(
        "https://www.wikidata.org/w/api.php?action=wbgetclaims&entity={eID}&property={pID}&format=json".format(eID=entityID, pID=propertyID))

    # If there is a valid response, then return the json
    if queryApiRequest.status_code == 200:
        try:
            return json.loads(queryApiRequest.text)["claims"][propertyID]
        except:
            return None

    return None


# MusicBrainz API Functions

https://musicbrainz.org/doc/MusicBrainz_API

In [54]:
def mbQuery(query):
    # Format the request (encoding the query text)
    queryApiRequest = requests.get(
        "https://musicbrainz.org/ws/2/artist?query={q}&limit=20&fmt=json".format(q=urllib.parse.quote_plus(query.lower().removeprefix("the "))))

    # If there is a response, then return the json
    if queryApiRequest.status_code == 200:
        return json.loads(queryApiRequest.text)
    
    return None

# Get the artist information. From MusicBrainz API, using includes can
# be retrieved also other information, like aliases, urls, artists relationships etc.
# By default mbGetArtist uses aliases and url-rels. For artists (like members of group)
# can be used artist-rels. Other type in the API documentation.
def mbGetArtist(artistID, includes=None):
    includesStr = "+" + "+".join(includes) if not includes is None and len(includes) > 0 else ""
    # Format the request
    queryApiRequest = requests.get(
        "https://musicbrainz.org/ws/2/artist/{artistID}?fmt=json&inc=aliases+url-rels{incs}".format(artistID=artistID, incs=includesStr))

    # If there is a response, then return the json
    if queryApiRequest.status_code == 200:
        return json.loads(queryApiRequest.text)

    return None


# Data Retrieval

## Utilities

### Constants Utilities

In [55]:
# Map the properties and the entities codes with a human-friendly name
WD_MAP = {
    "instanceOf": "P31",
    "name": "P735",
    "birthName": "P1477",
    "surname": "P734",
    "birthDate": "P569",
    "deathDate": "P570",
    "country": "P27",
    "countryISOCode": "P297",
    "hasPart": "P527",
    "human": "Q5",
    "nativeName": "P1705",
    "musicalGroup": "Q215380"
}

# Set the types of the retrieved data, this will be used next to get the values from the properties
# externalArrayString is an array of strings that WikiData treats as external entities
# externalString is a string that WikiData treats as external entities
# inValue is a string contained in "targetProperty" field
# date is a date contained in the datavalue field
# country is an external property of the entity
WD_DATA_TYPE = {
    "name": {"type": "externalArrayString"},
    "birthName": {"type": "inValue", "targetProperty": "text"},
    "surname": {"type": "externalArrayString"},
    "birthDate": {"type": "date"},
    "deathDate": {"type": "date"},
    "country": {"type": "externalProperty", "targetProperty": WD_MAP["countryISOCode"]},
}

# The properties to be retrieved for persons or groups
HUMAN_KEYS = ["name", "surname", "birthName", "birthDate", "deathDate", "country"]
GROUP_KEYS = ["hasPart"]


### General Utilities

In [56]:
# Function to get a subset of properties from a WikiData entity
def getSubsetOfKeys(dictObject, keys):
    resultObject = {}

    for key in keys:
        # If there is the property, then add it, otherwise add a null value
        # The new dictionary will have instead of WikiData key property, the one in the WD_MAP
        if WD_MAP[key] in dictObject.keys():
            resultObject[key] = dictObject[WD_MAP[key]]
        else:
            resultObject[key] = None

    return resultObject

# Convert WikiData date into a datetime formatted as Y-m-d
def getDateFromInformation(dateTime, datePrecision):
    # WikiData put 00 in the field month or day for not precise dates
    # This will raise an exception because is a malformed datetime
    dateFormats = ["+%Y-%m-%dT%H:%M:%SZ", "+%Y-%m-00T%H:%M:%SZ", "+%Y-00-00T00:00:00Z"]
    destinationFormats = {"year": "%Y", "month": "%Y-%m", "day": "%Y-%m-%d"}

    destinationFormat = destinationFormats["day"]
    if datePrecision == 10:
        destinationFormat = destinationFormats["month"]
    elif datePrecision <= 9:
        destinationFormat = destinationFormats["year"]

    for dateFormat in dateFormats:
        try:
            return datetime.datetime.strptime(
                dateTime, dateFormat
            ).strftime(destinationFormat)
        except:
            pass
    
    return None


### WikiData Entity Utilites

In [57]:
# WikiData is structured as
# claims --> property --> {0,1,...} --> mainsnak --> datavalue --> value
# This function will return an array of only values (or a single value)
def getDataValue(dictObject, keyProperty=None, expectArray=False):
    arrValues = []
    
    # Check if the property exists in the WikiData entity
    dictPropObject = dictObject if keyProperty is None else dictObject[keyProperty]

    if dictPropObject is None:
        return None

    for keyValue in dictPropObject:
        # If there is a property with no value, datavalue is not present
        if "datavalue" in keyValue["mainsnak"].keys():
            valueToAppend = keyValue["mainsnak"]["datavalue"]["value"]
            arrValues.append(valueToAppend)
    
    # If there are no values return a null object
    if len(arrValues) <= 0:
        return None

    # If an array is expected return an array, otherwise return only the first value
    return arrValues[0] if not expectArray else arrValues

# Get the title of the entity
def getEntityTitle(entity):
    try:
        # If there is the english version of the entity, get it
        return entity["labels"]["en"]["value"]
    except:
        # Otherwise select the first one version
        return entity["labels"][list(entity["labels"])[0]]["value"]


### MusicBrainz Utilites

In [58]:
# MusicBrainz return a list of relations. Often there is a wikidata link
# that connect the artist to its WikiData page
def getMBWikiDataEntityID(artistInformation):
    # If there are relations, check until it finds the wikidata one, and extract the entity ID from the link
    try:
        for artistRelation in artistInformation["relations"]:
            if artistRelation["type"] == "wikidata":
                return artistRelation["url"]["resource"].removeprefix("https://www.wikidata.org/wiki/").strip()
    except:
        pass
    
    # Otherwise return null
    return None

# MusicBrainz return a list of relations. If it is a group, a list of members often is retrieved
def getMBGroupMembers(artistInformation, downloadMembers=False):
    # If there are relations, check until it finds the wikidata one, and extract the entity ID from the link
    groupMembers = []

    if not artistInformation is None and downloadMembers:
        artistInformation = mbGetArtist(artistInformation["id"], includes=["artist-rels"])

    # If there are no relations, then return null
    if artistInformation is None or not "relations" in artistInformation.keys():
        return None
    
    # Check all the relations
    for artistRelation in artistInformation["relations"]:
        try:
            # If the relation is of type "member of band", try get the information of the artist
            if artistRelation["type"] == "member of band" and artistRelation["direction"] == "backward":
                getMemberInformation = mbGetArtist(artistRelation["artist"]["id"])
                groupMembers.append(getMemberInformation)
        except:
            pass

    # Otherwise return null
    return groupMembers


### Artist Retrieved Data Utilities

In [59]:
# Get data from a "human" WikiData entity
def getArtistInformation(entityClaims):
    # Get only the intersted properties
    artistInformation = getSubsetOfKeys(entityClaims, HUMAN_KEYS)
    artistExtractedInformation = {}

    # For each property, check the type in WD_DATA_TYPE and act differently
    for keyValue in artistInformation:
        # Get the data value, as an object or as an array, depending on the data type
        expectArray = WD_DATA_TYPE[keyValue]["type"] == "externalArrayString"
        currentInformation = getDataValue(artistInformation, keyValue, expectArray=expectArray)

        realValue = None
        if not currentInformation is None:
            # Get information about the datatype
            wdDataInformation = WD_DATA_TYPE[keyValue]

            # The data is the name of an external entity
            if wdDataInformation["type"] == "externalString":
                realValue = getEntityTitle(wdGetEntity(currentInformation["id"]))
            
            # The data is an array of names of an array of external entities
            elif wdDataInformation["type"] == "externalArrayString":
                for currentEntityInformation in currentInformation:
                    entityToAppend = getEntityTitle(wdGetEntity(currentEntityInformation["id"]))
                    
                    if realValue is None and not entityToAppend is None:
                        realValue = []

                    if not entityToAppend is None:
                        realValue.append(entityToAppend)
            
            # The data is a property of an external WikiData entity
            elif wdDataInformation["type"] == "externalProperty":
                realValue = getDataValue(wdGetProperty(
                    entityID=currentInformation["id"],
                    propertyID=wdDataInformation["targetProperty"]
                ))
                
            # The data is a property of the entity, positioned in the field "targetProperty"
            elif wdDataInformation["type"] == "inValue":
                realValue = currentInformation[wdDataInformation["targetProperty"]]

            # The data is a datetime
            elif wdDataInformation["type"] == "date":
                realValue = getDateFromInformation(
                    currentInformation["time"],
                    currentInformation["precision"]
                )

        # Set the final value
        artistExtractedInformation[keyValue] = realValue
    
    return artistExtractedInformation


In [60]:
# Get the same-formatted artist information retrieved by MusicBrainz
def getArtistInformationMB(artistInformation):
    artistExtractedInformation = {}

    # MusicBrainz doesn't provide separated Name and Surnames
    artistExtractedInformation["name"] = None
    artistExtractedInformation["surname"] = None

    # Search for the legal name in the aliases
    try:
        artistExtractedInformation["birthName"] = None
        
        for artistAlias in artistInformation["aliases"]:
            if artistAlias["type"] == "Legal name":
                artistExtractedInformation["birthName"] = artistAlias["name"]
                
                # Often the sort name is in the form "surname, name"
                nameAndSurname = artistAlias["sort-name"].split(",")
                artistExtractedInformation["surname"] = [nameAndSurname[0].strip()]
                artistExtractedInformation["name"] = [nameAndSurname[1].strip()]
    except:
        artistExtractedInformation["birthName"] = None

    # If there is the life-span begin (birthDate for person, foundation for group), then set it as birthDate
    try:
        artistExtractedInformation["birthDate"] = artistInformation["life-span"]["begin"]
    except:
        artistExtractedInformation["birthDate"] = None
    
    # If there is the life-span end (deathDate for person, disbandment for group), then set it as deathDate
    try:
        artistExtractedInformation["deathDate"] = artistInformation["life-span"]["end"]
    except:
        artistExtractedInformation["deathDate"] = None
    
    # If there is the country iso code, then set it as country
    try:
        artistExtractedInformation["country"] = artistInformation["country"]
    except:
        artistExtractedInformation["country"] = None

    return artistExtractedInformation


### People Object Generator Utilities

In [61]:
# Generate the object to insert in the DataFrame
def generatePeopleObject(peopleInfo):
    # Set the complete name, the name and the surname
    completeName = peopleInfo["birthName"] if not peopleInfo["birthName"] is None else peopleInfo["entityName"]

    # Name and Surname are arrays of strings (if a person has more than one name or surname)
    name = peopleInfo["name"]
    surname = peopleInfo["surname"]

    # If there is no name, but there is surname and complete name
    # retrieve the name by substracting the surname from the complete name
    if name is None and not surname is None and not completeName is None:
        # Use as position to cut only the first surname
        # This because I don't know how surnames are concatenated (by a -, or by space or other chars)
        if completeName.find(surname[0]) >= 0:
            surnamePosition = completeName.find(surname[0]) - 1
            name = completeName[:surnamePosition].strip()

        # Set the final surname joining the arrays
        surname = " ".join(surname)

    # If there is no surname, but there is name and complete name
    # retrieve the surname by substracting the name from the complete name
    elif not name is None and surname is None and not completeName is None:
        # Use as position to cut only the last name
        # This because I don't know how names are concatenated (by a -, or by space or other chars)
        if completeName.find(name[-1]) >= 0:
            namePosition = completeName.find(name[-1]) + len(name[-1])
            surname = completeName[namePosition:].strip()

        # Set the final name joining the arrays
        name = " ".join(name)

    # If there is no name, no surname but there is complete name
    # retrieve the name and the surname by dividing at the first space
    # If there are no space, set as name the complete name, and null for the surname
    elif name is None and surname is None and not completeName is None:
        namePosition = completeName.find(" ")
        surnamePosition = completeName.find(" ")
        
        if namePosition >= 0:
            name = completeName[:surnamePosition].strip()
            surname = completeName[namePosition:].strip()
        else:
            name = completeName
    
    # If there are both name and surname, set the final name and surname joining the arrays
    elif not name is None and not surname is None:
        name = " ".join(name)
        surname = " ".join(surname)
    
    # If the name or the surname is an empty string, consider it as null
    name = name if not name is None and len(name) > 0 else None
    surname = surname if not surname is None and len(surname) > 0 else None

    # Get and setup the artist information needed
    peopleObject = {
        "id": peopleInfo["wdID"] if not peopleInfo["wdID"] is None else peopleInfo["mbID"],
        "name": name,
        "surname": surname,
        "birthdate": peopleInfo["birthDate"],
        "deathdate": peopleInfo["deathDate"],
        "nationality": peopleInfo["country"],
        "artist": peopleInfo["artistID"],
        "complete_name": peopleInfo["birthName"],
        "entity_name": peopleInfo["entityName"],
    }

    return peopleObject


### Query Utilities

In [62]:
# Try to get the best result from the WikiData query
def chooseBestQueryResult(queryResults):
    # If there are no results, return null
    if queryResults is None or not "query" in queryResults.keys():
        return None
    
    # WikiData query results are in query --> search
    queryResults = queryResults["query"]["search"]

    # If there are no query results, return null
    if len(queryResults) <= 0:
        return None
    
    print("\n🔍 [SEARCH RESULTS]")

    # Get an array of possibile results
    artistResults = []
    for queryResult in queryResults:
        # Snippet is a brief description of the entity
        querySnippet = queryResult["snippet"].lower()

        # If there are one or more of the keywords, than probably the result is a valid one
        # keywords are in order of importance, so for example if there are two results
        # containing the first one the word "artist" and the second one the word "rapper"
        # the second one will be chosen
        keyWords = [
                        "dj",
                        "singer",
                        "songwriter",
                        "rapper",
                        "duo",
                        "trio",
                        "group",
                        "band",
                        "orchestra",
                        "producer",
                        "music",
                        "artist"
                ]
        
        # If there are one or more of the banned keyword, the result is not considered
        bannedKeyWords = [
                            "album",
                            "discography",
                            "single",
                            "song ",
                            "wikimedia",
                            "film"
                        ]

        # Create an array with the positions of where each keyword has been found (-1 if not found)
        # Both for keywords and banned keywords
        keyFindResults = np.array([querySnippet.find(keyWord)
                                    for keyWord in keyWords])
        bannedKeyFindResults = np.array([querySnippet.find(keyWord)
                                            for keyWord in bannedKeyWords])

        # Get the keywords found (their position in the array of keywords)
        matchedKeys = np.where(keyFindResults >= 0)[0]
        bannedMatchedKeys = np.where(bannedKeyFindResults >= 0)[0]

        # Check if there is a match
        hasMatch = False

        # If there are no banned words and there is at least one keyword, than there is a match
        if len(bannedMatchedKeys) <= 0 and len(matchedKeys) > 0:
            hasMatch = True
            artistResults.append(matchedKeys)
        
        # Otherwise there is no match
        else:
            artistResults.append([])

        # Print the query (with --> if probably a valid match)
        hasMatchStr = "--> " if hasMatch else "- "
        print(hasMatchStr + querySnippet)

    # For each query, get the index of the more important matched word, or infinity if no matched words
    # Other algorithms probably can be used, for example considering also the number of matches
    artistResults = [k[0] if len(k) > 0 else np.inf for k in artistResults]

    # If there are no matches, return null
    if np.min(artistResults) == np.inf:
        return None

    # Then select the result with the minimum value
    artistResult = queryResults[np.argmin(artistResults)]
        
    return artistResult


In [63]:
# Try to get the best result from MusicBrainz query
def chooseBestQueryResultMB(queryResults, expectedArtist=None):
    # If there are no results, return null
    if queryResults is None or not "artists" in queryResults.keys():
        return None
    
    # MusicBrainz (artists) query results are in query --> artists
    queryResults = queryResults["artists"]
    
    # If there are no query results, return null
    if len(queryResults) <= 0:
        return None

    # By default, get the first result
    artistResult = queryResults[0]

    # If the name of the artist is passed, try to get a best match
    if not expectedArtist is None:
        for result in queryResults:
            # If the expected artist name is contained in the result name, than there is a match
            if expectedArtist.lower() in result["name"].lower():
                artistResult = result
                break
    
    # Get the artist information from MusicBrainz
    artistResult = mbGetArtist(artistResult["id"])

    # Set also the "title" and the "snippet", to get a object equal to the WikiData one
    if not artistResult is None:
        # Set as title the WikiData entity ID of the artist
        artistResult["title"] = getMBWikiDataEntityID(artistResult)
        # And as snippet the name of the artist
        artistResult["snippet"] = artistResult["name"]

    return artistResult


## Data Retrieval

In [64]:
# Load the CSV files in memory
artists = pd.read_csv(artistsPath, sep=",", index_col="id")

In [65]:
peopleDF = None
peopleCols = ["id", "name", "surname", "birthdate", "deathdate",
              "nationality", "artist", "complete_name", "entity_name"]

In [66]:
# Setup DataFrames
peopleDF = pd.DataFrame([], columns=peopleCols)

In [67]:
# Iterate over the album DataFrame
index = 0

#for artistID, row in zip(["7DMveApC7UnC2NPfPvlHSU"], [{"name": "Cheat Codes"}]):
#for artistID, row in zip(["0vR2qb8m9WHeZ5ByCbimq2"], [{"name": "Reik"}]):
#for artistID, row in zip(["2T1aUibqR2QC2sINIDQOAK"], [{"name": "Mambo Kingz"}]):

for artistID, row in artists.iterrows():
    artistName = str(row["name"])

    print("\n\n\n🧑‍🎤 [[" + artistName.upper() + "]]")

    # Search the artist from MusicBrainz
    mbQueryResults = mbQuery(artistName)
    mbArtistResult = chooseBestQueryResultMB(mbQueryResults, artistName)
    artistResult = mbArtistResult.copy() if not mbArtistResult is None else None

    # If on MusicBrainz there isn't the wanted artist or it doesn't have WikiData page
    # Try to search directly on WikiData
    if artistResult is None or artistResult["title"] is None:
        print("\n🛑 [NO MATCHES ON MUSICBRAINZ, TRYING TO WIKIDATA]")
        wdQueryResults = wdQuery(artistName)
        artistResult = chooseBestQueryResult(wdQueryResults)
    
    # If there are is no match also on WikiData, then skip the artist
    if artistResult is None:
        print("\n🛑 [NO MATCHES ON WIKIDATA]")
        continue
    
    # Log the final choose
    print("\n✔️ [FINAL CHOOSE] " + artistResult["title"] +
            " - " + artistResult["snippet"])

    # Get the entityID. WikiData returns it in the title field
    entityID = artistResult["title"]

    # Get the WikiData entity data
    entityObject = wdGetEntity(entityID)

    # Get the WikiData claims of the entity. Claims is the list of properties
    entityClaims = None
    # The entity name is temporary the title
    entityName = artistResult["title"]

    # If there are properties, set the entity claims
    # And set the name as the entity title (the title of the WikiData page)
    if "claims" in entityObject.keys():
        entityClaims = entityObject["claims"]
        entityName = getEntityTitle(entityObject)

    # List all the members of the artist (if one the artist is a person, otherwise this will a list of persons)
    artistMembers = []

    # Use MusicBrainz information or not
    useMusicBrainz = False

    # If there is a WikiData page
    if not entityClaims is None:
        # If the WikiData entity has the property "instanceOf" to "human", then the artist is a single person artist
        if WD_MAP["instanceOf"] in entityClaims.keys() and getDataValue(entityClaims, WD_MAP["instanceOf"])["id"] == WD_MAP["human"]:
            print("\n👨 [SINGLE ARTIST]")
            # Get the information from WikiData
            artistInformation = getArtistInformation(entityClaims)
            # And set other additional information like the artist name, the entity title and the WikiData ID
            artistInformation["artistID"] = artistID
            artistInformation["artistName"] = artistName
            artistInformation["entityName"] = entityName
            artistInformation["mbID"] = mbArtistResult["id"] if not mbArtistResult is None else None
            artistInformation["wdID"] = entityID

            # And add it to the array of artists
            artistMembers.append(artistInformation)
        
        # Otherwise, if the WikiData entity has the property "hasPart", then I can get the list of group members
        elif WD_MAP["hasPart"] in entityClaims.keys():
            print("\n👪 [MEMBERS]")
            # Get only the "hasPart" property
            groupInformation = getSubsetOfKeys(entityClaims, GROUP_KEYS)
            # And get the ids of the WikiData pages as an array
            groupMembers = getDataValue(groupInformation, "hasPart", expectArray=True)

            # For each member, get the data
            for groupMember in groupMembers:
                # Get the WikiData entity
                memberEntity = wdGetEntity(groupMember["id"])

                # If there is a valid WikiData page
                if not memberEntity is None and "claims" in memberEntity.keys():
                    # Get the properties and the title of the page
                    memberEntityClaims = memberEntity["claims"]
                    memberEntityName = getEntityTitle(memberEntity)
                    
                    # And set the information of the artist
                    artistInformation = getArtistInformation(memberEntityClaims)
                    artistInformation["artistID"] = artistID
                    artistInformation["artistName"] = artistName
                    artistInformation["entityName"] = memberEntityName
                    artistInformation["mbID"] = mbArtistResult["id"] if not mbArtistResult is None else None
                    artistInformation["wdID"] = groupMember["id"]

                    # And add it to the array of artists
                    artistMembers.append(artistInformation)
        
        # Otherwise probably the WikiData entity is neither a human neither a group, so try to use MusicBrainz data
        else:
            print("\n🛑 [NO INFORMATION TO RETRIEVE, TRYING WITH MUSICBRAINZ DATA]")
            useMusicBrainz = True

    # Otherwise try to use MusicBrainz data
    else:
        print("\n🛑 [NO WIKIDATA PAGE, TRYING WITH MUSICBRAINZ DATA]")
        useMusicBrainz = True

    # If no data on WikiData, try to get the information from MusicBrainz
    if useMusicBrainz:
        if not mbArtistResult is None:
            # If the artist is a person get the information
            if mbArtistResult["type"] == "Person":
                mbArtistInformation = getArtistInformationMB(mbArtistResult)
                mbArtistInformation["entityName"] = mbArtistResult["name"]
                mbArtistInformation["artistName"] = artistName
                mbArtistInformation["artistID"] = artistID
                mbArtistInformation["mbID"] = mbArtistResult["id"]
                mbArtistInformation["wdID"] = None
                            
                artistMembers.append(mbArtistInformation)
            
             # If the artist is a group get the information
            elif mbArtistResult["type"] == "Group":
                # Get the members of the group. Since in the mbArtistResult artist relationships are not retrieved,
                # with downloadMember=True I force to get the artist information from the web
                mbGroupMembers = getMBGroupMembers(mbArtistResult, downloadMembers=True)

                # Add the group members
                if not mbGroupMembers is None:
                    for mbGroupMember in mbGroupMembers:
                        if not mbGroupMember is None:
                            mbMemberInformation = getArtistInformationMB(mbGroupMember)
                            mbMemberInformation["entityName"] = mbGroupMember["name"]
                            mbMemberInformation["artistName"] = artistName
                            mbMemberInformation["artistID"] = artistID
                            mbMemberInformation["mbID"] = mbGroupMember["id"]
                            mbMemberInformation["wdID"] = None

                            artistMembers.append(mbMemberInformation)

            else:
                print("🛑 [NO MUSICBRAINZ DATA]")
        else:
            print("🛑 [NO MUSICBRAINZ DATA]")
    
    # Create a list of people objects to insert in the DataFrame
    peopleObjList = []
    print("\n🗃️ [PEOPLE RETRIEVED]")
    for peopleInfo in artistMembers:
        peopleObject = generatePeopleObject(peopleInfo)
        
        print(peopleObject["entity_name"] + ": " + json.dumps(peopleObject))
        
        peopleObjList.append(list(peopleObject.values()))

    # Create rows DataFrame for the people
    peopleObjDF = pd.DataFrame(peopleObjList, columns=peopleCols)

    # Add the people info to the DataFrame
    peopleDF = pd.concat([peopleDF, peopleObjDF], ignore_index=True)

    # Print stats every 5000 people
    '''
    if index % 5000 == 0:
        print("\n\n🎧 [STATUS INFO #{row}]".format(row=index))
        print(peopleDF.info())
    '''
    
    # Save DataFrame to file every 100 people
    if index % 100 == 0:
        print("\n\n💾 [STATUS INFO #{row}] Dataset saved\n".format(row=index))
        peopleDF.to_csv(peoplePath)
    
    index += 1

# Print info about the DataFrames
print(peopleDF.info())





🧑‍🎤 [[THE WEEKND]]

✔️ [FINAL CHOOSE] Q2121062 - The Weeknd

👨 [SINGLE ARTIST]

🗃️ [PEOPLE RETRIEVED]
The Weeknd: {"id": "Q2121062", "name": "Tesfaye", "surname": "Tesfaye", "birthdate": "1990-02-16", "deathdate": null, "nationality": "CA", "artist": "1Xyo4u8uXC1ZmMpatF05PJ", "complete_name": "Abel Makkonen Tesfaye", "entity_name": "The Weeknd"}


💾 [STATUS INFO #0] Dataset saved




🧑‍🎤 [[DAFT PUNK]]

✔️ [FINAL CHOOSE] Q185828 - Daft Punk

👪 [MEMBERS]

🗃️ [PEOPLE RETRIEVED]
Thomas Bangalter: {"id": "Q471656", "name": "Bangalter", "surname": "Bangalter", "birthdate": "1975-01-03", "deathdate": null, "nationality": "FR", "artist": "4tZwfgrHOc3mvqYlEYSvVi", "complete_name": null, "entity_name": "Thomas Bangalter"}
Guy-Manuel de Homem-Christo: {"id": "Q471650", "name": "de Homem Christo", "surname": "de Homem Christo", "birthdate": "1974-02-08", "deathdate": null, "nationality": "FR", "artist": "4tZwfgrHOc3mvqYlEYSvVi", "complete_name": "Guillaume Emmanuel de Homem-Christo", "entity_na

KeyboardInterrupt: 

In [None]:
# Save datasets to file
peopleDF.to_csv(peoplePath)