# Final Project - Testing file

## Import Libraries

In [2]:
import numpy as np
import pandas as pd

import requests
import re
import json
import csv
import os

import time
from datetime import datetime, timedelta
import locale

from bs4 import BeautifulSoup

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

from tqdm.notebook import tqdm

import musicbrainzngs

## Scraping

In [None]:
week_url = 8444

FRcharts_url = "https://www.chartsinfrance.net/charts/8444/singles.php"

# Create an empty DataFrame to store the scraped data
FRcharts_df = pd.DataFrame(columns=["Position", "Position Evolution", "Artist", "Song Title", "Year", "Week", "URL Week ID"])

# Send a GET request to the URL and parse the HTML content
response = requests.get(FRcharts_url)
soup = BeautifulSoup(response.content, "html.parser")

# Scrape the year and week date
year_select = soup.find("option", selected=True)
year = year_select.text.strip()

week_select = soup.find("select", attrs={"name": "semaine"})
week_option = week_select.find("option", selected=True)
week_date = week_option.text.strip()

# Scrape the chart details
chart_data = soup.find_all("div", class_="b572")
for entry in chart_data:
    position = entry.find("div", class_="c1_td2").text.strip()
    position_evolution = entry.find("font", class_="entry").text.strip()
    artist = entry.find("font", class_="noir13b").text.strip()
    song_title = entry.find("font", class_="noir11").text.strip()

# Add the chart data to the DataFrame using pd.concat (append will be deprecated from future Pandas version)
    
    FRcharts_df = pd.concat([FRcharts_df, pd.DataFrame([[position, position_evolution, artist, song_title, year, week_date]], columns=FRcharts_df.columns)], ignore_index=True)

FRcharts_df

In [None]:
# Define Dataframe first outside the function

FRcharts_df = pd.DataFrame(columns=["Position", "Position Evolution", "Artist", "Song Title", "Year", "Week", "URL Week ID"])


# Define scraping function, passing the DF as an argument to the function

def scrape_chart_pages(start_week_ID, end_week_ID, FRcharts_df):
    base_url = "https://www.chartsinfrance.net/charts/"
    
    week_id = start_week_ID #week_id = 8444  # Starting week number
    page_statuses = {}  # Dictionary to store page statuses

    while week_id <= end_week_ID:
        FRcharts_url = f"{base_url}{week_id}/singles.php"
        
        #Create a Dataframe to store the given week info
        week_df = pd.DataFrame(columns=FRcharts_df.columns)
        
        # Send a GET request to the URL and check if it received a response
        delay = 0.5
        
        try:
            response = requests.get(FRcharts_url)
            page_statuses[week_id] = response.status_code  # Store the request status in page statuses
            response.raise_for_status()  # Raise an exception if an HTTP error occurred
        except requests.exceptions.HTTPError as e:
            print(f"URL not found: {FRcharts_url}")
            continue  # Skip to the next iteration of the loop

        # Parse the HTML content
        soup = BeautifulSoup(response.content, "html.parser")

        # Scrape the year and week date
        year_select = soup.find("option", selected=True)
        year = year_select.text.strip()

        week_select = soup.find("select", attrs={"name": "semaine"})
        week_option = week_select.find("option", selected=True)
        week_date = week_option.text.strip()

        # Scrape the chart details
        chart_data = soup.find_all("div", class_="b572")
        for entry in chart_data:
            try:
                position = entry.find("div", class_="c1_td2").text.strip()
                
                # Find the position evolution in all four possible classes
                position_trends = ["entry", "egal", "up", "down"]
                position_evolution = ""
                for trend in position_trends:
                    evolution_elem = entry.find("font", class_=trend)
                    if evolution_elem:
                        position_evolution = evolution_elem.text.strip()
                        break #exit the loop after finding the position evolution

                artist = entry.find("font", class_="noir13b").text.strip()
                song_title = entry.find("font", class_="noir11").text.strip()

            except AttributeError:
                print("Error scraping data for an entry. Week #", week_id, " Skipping. Position #", position)

            # Add the scraped data to the week's DataFrame using pd.concat (append will be deprecated from future Pandas version)
            entry_df = pd.DataFrame([[position, position_evolution, artist, song_title, year, week_date, week_id]], columns=FRcharts_df.columns)
            week_df = pd.concat([week_df, entry_df], ignore_index=True)

        # Export the week's Dataframe to csv, append the csv if it exists without column names
        # Append the global DF
        week_df.to_csv('top50_in_progress.csv', mode='a', header=not os.path.isfile('top50_in_progress.csv')) #index=False, mode: 'w' (overwrite) / 'a' (append)
        FRcharts_df = pd.concat([FRcharts_df, week_df])
#       

        # Feedback for Scraping status monitoring
        print("Scraped #", week_id,": ", week_date)
        
        # Check if a second page exists
#        pagination_div = soup.find("div", class_="pagination")
#        second_page_link = pagination_div.find("a", href=True)
#        if second_page_link:
#            page2_url = base_url + second_page_link["href"]
#            response2 = requests.get(page2_url)
#            if response2.status_code == 200:
#                soup2 = BeautifulSoup(response2.content, "html.parser")
#                chart_data2 = soup2.find_all("div", class_="b572")

        # Update week number and add delay before next request
        week_id += 1
        time.sleep(delay)

    FRcharts_df.reset_index(drop=True, inplace=True)
    FRcharts_df.index = FRcharts_df.index + 1
    FRcharts_df.to_csv('top50.csv', mode='a', header=not os.path.isfile('top50.csv')) #index=False, mode: 'w' (overwrite) / 'a' (append)
    
    return page_statuses, FRcharts_df

In [None]:
# Define Dataframe first outside the function
FRcharts_df = pd.DataFrame(columns=["Position", "Position Evolution", "Artist", "Song Title", "Year", "Week", "URL Week ID"])

#Run the function and capture its return outside the function in the global DF
start_week_ID = 8452 #8444
end_week_ID = 9951 #9451

page_statuses, FRcharts_df = scrape_chart_pages(start_week_ID, end_week_ID, FRcharts_df) # 8444 to 9951

## Music Brainz API

In [None]:
# Def function to fetch the artist gender
def get_artist_gender(performer):
    try:
        # Search for the artist from the Musicbrainz API
        result = musicbrainzngs.search_artists(artist=performer)
        
        # Extract the artist ID from the search results
        artist_id = result["artist-list"][0]["id"]
        
        # Get the artist details including gender
        artist_details = musicbrainzngs.get_artist_by_id(artist_id) #, includes=["gender"]
        
        # Extract the gender from the artist details
        gender = artist_details["artist"]["gender"]
        
        return gender
    
    # Handle any exceptions and return None if gender is not found
    except (IndexError, KeyError):       
        return None

In [None]:
# Works, but takes 57 seconds to retrieve info for 29 artists!

# # Create an empty list to store the artist information
# artist_info_list = []
# 
# # Iterate over your artist_test list and fetch the information
# for artist in artist_test:
#     artist_info = fetch_artist_info(artist)
#     if artist_info:
#         artist_info_list.append(artist_info)
# 
# # Create a DataFrame from the artist information list
# artist_df = pd.DataFrame(artist_info_list)
# 
# artist_df

In [None]:
#################### BATCHING TEST #########################
# NOT CONVINCING (2'30 FOR 30 ARTISTS + RETURNS DUPLICATES + MISMATCHES)

# Set the batch size
#batch_size = 10

# Split the artist_test list into batches
#batches = [artist_test[i:i+batch_size] for i in range(0, len(artist_test), batch_size)]

# Iterate over the batches and fetch the information
#for batch in batches:
#    result = musicbrainzngs.search_artists(artist=batch)
#    if 'artist-list' in result:
#        artists = result['artist-list']
#        for artist_info in artists:
#            # Extract the artist name from artist_info
#            artist_name = artist_info['name']
#
#            # Fetch the detailed information for the artist
#            artist_info_dict = fetch_artist_info(artist_name)
#            if artist_info_dict:
#                artist_info_list.append(artist_info_dict)

################# END OF BATCHING TEST ##################

In [None]:
for performer in top_20_performers.index:
    gender = get_artist_gender(performer)
    print(f"Artist: {performer}")
    print(f"Gender: {gender}")
    print("---")

In [38]:
artist_info_df = pd.read_csv('artist_info.csv', sep=',', index_col=0)

In [39]:
artist_info_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 13495 entries, 0 to 663
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Artist          13495 non-null  object
 1   MusicBrainz ID  13495 non-null  object
 2   Type            12836 non-null  object
 3   Gender          7770 non-null   object
 4   Area            11805 non-null  object
 5   Begin Date      10603 non-null  object
 6   End Date        2819 non-null   object
 7   Genres          13495 non-null  object
 8   Other Tags      13495 non-null  object
dtypes: object(9)
memory usage: 1.0+ MB


In [40]:
artist_info_df['Gender'].value_counts()

Male              5379
Female            2346
Non-binary          31
Not applicable      11
Other                3
Name: Gender, dtype: int64

In [41]:
artist_info_df

Unnamed: 0,Artist,MusicBrainz ID,Type,Gender,Area,Begin Date,End Date,Genres,Other Tags
0,Boris,57652bf8-cfe8-42e7-b9a7-5572a7080d8d,Group,,Japan,1992,,"['ambient', 'dissonant', 'doom metal', 'drone'...",[]
1,Richard “Groove” Holmes,726cfe69-c905-4161-a10c-accb13d9ec26,Person,Male,United States,1931-05-02,1991-06-29,"['hard bop', 'jazz and blues', 'soul jazz']",[]
2,‘Little’ Jimmy Dickens,bd7589a3-f82f-4c3d-b7e2-e57e89552da1,Person,Male,United States,1920-12-19,2015-01-02,[],[]
3,Pookie Hudson,29dc9009-015f-47c4-bd17-ed2af6d2ae0c,Person,,,1934-06-11,2006-01-16,[],[]
4,“Weird Al” Yankovic,7746d775-9550-4360-b8d5-c37bd448ce01,Person,Male,United States,1959-10-23,,"['accordion', 'american', 'comedy', 'comedy ro...",[]
...,...,...,...,...,...,...,...,...,...
659,Élodie Frégé,8cda32b5-1447-4ded-8a0e-f1cf8e6b3480,Person,Female,France,1982-02-15,,"['french', 'pop', 'world']",[]
660,Émilie Simon,01252145-c9e8-4de5-a480-9b2bed05450a,Person,Female,France,1978-07-17,,"['arranger', 'arrangeur', 'composer', 'composi...",[]
661,Étienne Daho,1e0de31c-4957-4649-9aa3-7b0f1d9d2c84,Person,Male,France,1956-01-14,,"['pop', 'pop rock', 'synth-pop']",[]
662,Ólafur Arnalds,6655955b-1c1e-4bcb-84e4-81bcd9efab30,Person,Male,Iceland,1986-11-03,,"['ambient', 'composer', 'contemporary classica...",[]


In [28]:
artist_info_df = artist_info_df.rename(columns={'Genres': 'Tags'})
artist_info_df

Unnamed: 0,Artist,MusicBrainz ID,Type,Gender,Area,Begin Date,End Date,Tags,Other Tags
0,Boris,57652bf8-cfe8-42e7-b9a7-5572a7080d8d,Group,,Japan,1992,,"['ambient', 'dissonant', 'doom metal', 'drone'...",[]
1,Richard “Groove” Holmes,726cfe69-c905-4161-a10c-accb13d9ec26,Person,Male,United States,1931-05-02,1991-06-29,"['hard bop', 'jazz and blues', 'soul jazz']",[]
2,‘Little’ Jimmy Dickens,bd7589a3-f82f-4c3d-b7e2-e57e89552da1,Person,Male,United States,1920-12-19,2015-01-02,[],[]
3,Pookie Hudson,29dc9009-015f-47c4-bd17-ed2af6d2ae0c,Person,,,1934-06-11,2006-01-16,[],[]
4,“Weird Al” Yankovic,7746d775-9550-4360-b8d5-c37bd448ce01,Person,Male,United States,1959-10-23,,"['accordion', 'american', 'comedy', 'comedy ro...",[]
...,...,...,...,...,...,...,...,...,...
695,US,7ae4c284-330a-404c-ade4-846441d3524d,Group,,Netherlands,1998,,[],[]
696,U2,a3cb23fc-acd3-4ce0-8f36-1e5aa6a18432,Group,,Ireland,1976,,"['alternative dance', 'alternative pop', 'alte...",[]
697,U2,a3cb23fc-acd3-4ce0-8f36-1e5aa6a18432,Group,,Ireland,1976,,"['alternative dance', 'alternative pop', 'alte...",[]
698,U2,a3cb23fc-acd3-4ce0-8f36-1e5aa6a18432,Group,,Ireland,1976,,"['alternative dance', 'alternative pop', 'alte...",[]


In [29]:
artist_info_df = artist_info_df.rename(columns={'Other Tags': 'Genres'})


In [30]:
artist_info_df

Unnamed: 0,Artist,MusicBrainz ID,Type,Gender,Area,Begin Date,End Date,Tags,Genres
0,Boris,57652bf8-cfe8-42e7-b9a7-5572a7080d8d,Group,,Japan,1992,,"['ambient', 'dissonant', 'doom metal', 'drone'...",[]
1,Richard “Groove” Holmes,726cfe69-c905-4161-a10c-accb13d9ec26,Person,Male,United States,1931-05-02,1991-06-29,"['hard bop', 'jazz and blues', 'soul jazz']",[]
2,‘Little’ Jimmy Dickens,bd7589a3-f82f-4c3d-b7e2-e57e89552da1,Person,Male,United States,1920-12-19,2015-01-02,[],[]
3,Pookie Hudson,29dc9009-015f-47c4-bd17-ed2af6d2ae0c,Person,,,1934-06-11,2006-01-16,[],[]
4,“Weird Al” Yankovic,7746d775-9550-4360-b8d5-c37bd448ce01,Person,Male,United States,1959-10-23,,"['accordion', 'american', 'comedy', 'comedy ro...",[]
...,...,...,...,...,...,...,...,...,...
695,US,7ae4c284-330a-404c-ade4-846441d3524d,Group,,Netherlands,1998,,[],[]
696,U2,a3cb23fc-acd3-4ce0-8f36-1e5aa6a18432,Group,,Ireland,1976,,"['alternative dance', 'alternative pop', 'alte...",[]
697,U2,a3cb23fc-acd3-4ce0-8f36-1e5aa6a18432,Group,,Ireland,1976,,"['alternative dance', 'alternative pop', 'alte...",[]
698,U2,a3cb23fc-acd3-4ce0-8f36-1e5aa6a18432,Group,,Ireland,1976,,"['alternative dance', 'alternative pop', 'alte...",[]


In [31]:
genres_df = pd.read_csv('musicbrainz_genres.csv', sep=',', index_col=None)
genres_df.head()

Unnamed: 0,Genres
0,2 tone
1,2-step
2,aak
3,abhang
4,aboio


In [32]:
# Iterate through the rows
for index, row in artist_info_df.iterrows():
    # Compare Tags with MusicBrainz genre list
    tags = row['Tags']
    genres = []

    # Add tag to genres list if found in MB's genres
    for tag in tags:
        if tag in genres_df['Genres'].values:
            genres.append(tag)

    # Move genres to the "Genre" column and remove them from "Tags"
    artist_info_df.at[index, 'Genres'] = genres
    artist_info_df.at[index, 'Tags'] = [tag for tag in tags if tag not in genres]


artist_info_df

ValueError: could not broadcast input array from shape (0,) into shape (9,)

In [15]:
# Rename "Tags" column to "Other Tags"
artist_info_df = artist_info_df.rename(columns={'Tags': 'Other Tags'})

In [16]:
artist_info_df 

Unnamed: 0,Artist,MusicBrainz ID,Type,Gender,Area,Begin Date,End Date,Other Tags,Genres
0,Boris,57652bf8-cfe8-42e7-b9a7-5572a7080d8d,Group,,Japan,1992,,"['ambient', 'dissonant', 'doom metal', 'drone'...",[]
1,Richard “Groove” Holmes,726cfe69-c905-4161-a10c-accb13d9ec26,Person,Male,United States,1931-05-02,1991-06-29,"['hard bop', 'jazz and blues', 'soul jazz']",[]
2,‘Little’ Jimmy Dickens,bd7589a3-f82f-4c3d-b7e2-e57e89552da1,Person,Male,United States,1920-12-19,2015-01-02,[],[]
3,Pookie Hudson,29dc9009-015f-47c4-bd17-ed2af6d2ae0c,Person,,,1934-06-11,2006-01-16,[],[]
4,“Weird Al” Yankovic,7746d775-9550-4360-b8d5-c37bd448ce01,Person,Male,United States,1959-10-23,,"['accordion', 'american', 'comedy', 'comedy ro...",[]
...,...,...,...,...,...,...,...,...,...
690,The Cantina Band,f87aef91-8b6b-4471-b36f-f26a99a3e791,Group,,,,,[],[]
691,The Capitols,4e111d8d-5152-485b-ad74-ddadc5267155,Group,,United States,1962,1969,[],[]
692,The Capris,c1ab601d-27ec-4960-be0d-c5bf90455beb,Group,,Queens,1957,,[],[]
693,The Caravelles,ea994f2b-ddaf-42f7-b21d-ac1b2124c34e,Group,,United Kingdom,1963,1980,[],[]


## LAST FM API

In [33]:
api_key = '53ff0f26f16a5679c3b5f46c3322d98a'

In [63]:
#artist_mbids = ['MBID1', 'MBID2', ...]
artist_mbids = artist_info_df['MusicBrainz ID']


In [64]:
artist_mbids

0      57652bf8-cfe8-42e7-b9a7-5572a7080d8d
1      726cfe69-c905-4161-a10c-accb13d9ec26
2      bd7589a3-f82f-4c3d-b7e2-e57e89552da1
3      29dc9009-015f-47c4-bd17-ed2af6d2ae0c
4      7746d775-9550-4360-b8d5-c37bd448ce01
                       ...                 
659    8cda32b5-1447-4ded-8a0e-f1cf8e6b3480
660    01252145-c9e8-4de5-a480-9b2bed05450a
661    1e0de31c-4957-4649-9aa3-7b0f1d9d2c84
662    6655955b-1c1e-4bcb-84e4-81bcd9efab30
663    6efbfc07-3346-4d30-af85-0abe646b97ba
Name: MusicBrainz ID, Length: 13495, dtype: object

In [86]:
list_artist_mbids=list(artist_mbids)

In [87]:
list_artist_mbids

['57652bf8-cfe8-42e7-b9a7-5572a7080d8d',
 '726cfe69-c905-4161-a10c-accb13d9ec26',
 'bd7589a3-f82f-4c3d-b7e2-e57e89552da1',
 '29dc9009-015f-47c4-bd17-ed2af6d2ae0c',
 '7746d775-9550-4360-b8d5-c37bd448ce01',
 'af3d3c30-fcd5-4012-91a2-7a2845827ccc',
 'af3d3c30-fcd5-4012-91a2-7a2845827ccc',
 '25b7b584-d952-4662-a8b9-dd8cdfbfeb64',
 'ea2603e5-7412-49d9-80bc-60cc3c02cd87',
 'becd8cc6-a453-4183-af88-dedaaec859a6',
 '8cf8869d-e066-4c94-b734-fe05749badf0',
 'c2a44e93-3a2b-44aa-bd8b-7a71bb76e3b5',
 '48e78462-2f0b-4a1a-9fa4-0585e2991e80',
 '5403bf6e-bc1d-4e62-b31f-926a2bf66a14',
 '40e7db88-da65-46c6-909d-82e48a40da4d',
 '0c5c6c59-441d-496e-8542-8dbd3c102cf3',
 '741dbc8e-1cd5-4a66-b2b8-43bbad87ae12',
 'c653c820-39ad-4700-9269-a38b61f5f6c6',
 'b18bc9c4-6f22-4f1b-a918-e9c86a39fe7a',
 'b9a06530-1241-4162-836f-7b8e79deaa58',
 'c58fb6f8-7483-466d-aa52-04b9da89f2ae',
 'e2bd25ef-41c4-440a-b4e4-e7bcec4a3e39',
 'f5eceac4-9832-400f-b488-7f6048a1df6f',
 'de4ad287-a6ee-4a76-863b-f3ac8a40aa37',
 'f37c537b-3557-

In [90]:
test_list = list_artist_mbids[0]

In [91]:
test_list

'57652bf8-cfe8-42e7-b9a7-5572a7080d8d'

In [94]:
mbids = test_list

In [80]:
# mbids = ','.join(test_list)


In [95]:
url = f'http://ws.audioscrobbler.com/2.0/?method=artist.getinfo&mbid={mbids}&api_key={api_key}&format=json'

In [96]:
url

'http://ws.audioscrobbler.com/2.0/?method=artist.getinfo&mbid=57652bf8-cfe8-42e7-b9a7-5572a7080d8d&api_key=53ff0f26f16a5679c3b5f46c3322d98a&format=json'

In [97]:
response = requests.get(url)


In [98]:
response.text

'{"artist":{"name":"Boris","mbid":"b46277a8-5982-4340-a5a3-4e883d6103a8","url":"https://www.last.fm/music/Boris","image":[{"#text":"https://lastfm.freetls.fastly.net/i/u/34s/2a96cbd8b46e442fc41c2b86b821562f.png","size":"small"},{"#text":"https://lastfm.freetls.fastly.net/i/u/64s/2a96cbd8b46e442fc41c2b86b821562f.png","size":"medium"},{"#text":"https://lastfm.freetls.fastly.net/i/u/174s/2a96cbd8b46e442fc41c2b86b821562f.png","size":"large"},{"#text":"https://lastfm.freetls.fastly.net/i/u/300x300/2a96cbd8b46e442fc41c2b86b821562f.png","size":"extralarge"},{"#text":"https://lastfm.freetls.fastly.net/i/u/300x300/2a96cbd8b46e442fc41c2b86b821562f.png","size":"mega"},{"#text":"https://lastfm.freetls.fastly.net/i/u/300x300/2a96cbd8b46e442fc41c2b86b821562f.png","size":""}],"streamable":"0","ontour":"1","stats":{"listeners":"438358","playcount":"16575307"},"similar":{"artist":[{"name":"Boris with Michio Kurihara","url":"https://www.last.fm/music/Boris+with+Michio+Kurihara","image":[{"#text":"https:

In [59]:
if response.status_code == 200:
    data = json.loads(response.text)
    artists = data['artist'] #['artists']
    for artist in artists:
        #artist_name = artist['name']
        bio = artist['bio']['content']
        print(artist_name, ':', bio)
else:
    print('Request failed with status code:', response.status_code)

KeyError: 'artist'

In [62]:
if response.status_code == 200:
    data = json.loads(response.text)
    name = data['artist']['name']
    bio = data['artist']['bio']['content']
    tags = data['artist']['bio']['content']
    #print(tags)
    #print('--------------')
    print(bio)
else:
    print('Request failed with status code:', response.status_code)

KeyError: 'artist'

In [50]:
response.text

'{"artist":{"name":"The Cardigans","mbid":"3e55d51d-687f-4a9d-af96-2fabccf802e5","url":"https://www.last.fm/music/The+Cardigans","image":[{"#text":"https://lastfm.freetls.fastly.net/i/u/34s/2a96cbd8b46e442fc41c2b86b821562f.png","size":"small"},{"#text":"https://lastfm.freetls.fastly.net/i/u/64s/2a96cbd8b46e442fc41c2b86b821562f.png","size":"medium"},{"#text":"https://lastfm.freetls.fastly.net/i/u/174s/2a96cbd8b46e442fc41c2b86b821562f.png","size":"large"},{"#text":"https://lastfm.freetls.fastly.net/i/u/300x300/2a96cbd8b46e442fc41c2b86b821562f.png","size":"extralarge"},{"#text":"https://lastfm.freetls.fastly.net/i/u/300x300/2a96cbd8b46e442fc41c2b86b821562f.png","size":"mega"},{"#text":"https://lastfm.freetls.fastly.net/i/u/300x300/2a96cbd8b46e442fc41c2b86b821562f.png","size":""}],"streamable":"0","ontour":"1","stats":{"listeners":"2218912","playcount":"45464117"},"similar":{"artist":[{"name":"A Camp","url":"https://www.last.fm/music/A+Camp","image":[{"#text":"https://lastfm.freetls.fastly

In [102]:
artist_mbids = artist_info_df['MusicBrainz ID']

list_artist_mbids=list(artist_mbids)

artist_bios = []  # List to store the retrieved artist bios


for index, mbid in enumerate(list_artist_mbids[0:10]):
    try:
        artist = artist_info_df.loc[index, 'Artist']
        
        # Send a request to the Last.fm API to retrieve the artist's bio
        url = f'http://ws.audioscrobbler.com/2.0/?method=artist.getinfo&mbid={mbid}&api_key={api_key}&format=json'
        response = requests.get(url)
        data = json.loads(response.text)
        
        if 'artist' in data:
            artist_bio = data['artist']['bio']['content']
            artist_bios.append({'Artist': artist, 'MBID': mbid, 'Bio': artist_bio})
        else:
            artist_bios.append({'Artist': artist, 'MBID': mbid, 'Bio': None})  # Append None if artist not found or bio not available
    
        print(f"Scraped bio for artist {index+1}/{len(list_artist_mbids[0:10])}")
    
    except Exception as e:
        print(f"Error retrieving bio for artist {artist} (MBID: {mbid}): {str(e)}")
        artist_bios.append({'Artist': artist, 'MBID': mbid, 'Bio': None})  # Append None in case of any error

# Create a new DataFrame from the artist_bios list
artist_bios_df = pd.DataFrame(artist_bios)

# Export the DataFrame to a JSON file
artist_bios_df.to_json('artist_bios.json', orient='records')

Scraped bio for artist 1/10
Scraped bio for artist 2/10
Scraped bio for artist 3/10
Scraped bio for artist 4/10
Scraped bio for artist 5/10
Scraped bio for artist 6/10
Scraped bio for artist 7/10
Scraped bio for artist 8/10
Scraped bio for artist 9/10
Scraped bio for artist 10/10


In [1]:
artist_bios_df

NameError: name 'artist_bios_df' is not defined

In [108]:
artist_mbids = artist_info_df['MusicBrainz ID']

list_artist_mbids=list(artist_mbids)

artist_bios = []  # List to store the retrieved artist bios


for index, mbid in enumerate(list_artist_mbids):
    try:
        #artist = artist_info_df.loc[index, 'Artist']
        
        # Send a request to the Last.fm API to retrieve the artist's bio
        url = f'http://ws.audioscrobbler.com/2.0/?method=artist.getinfo&mbid={mbid}&api_key={api_key}&format=json'
        response = requests.get(url)
        data = json.loads(response.text)
        
        if 'artist' in data:
            artist_bio = data['artist']['bio']['content']
            artist_bios.append({'MBID': mbid, 'Bio': artist_bio})
        else:
            artist_bios.append({'MBID': mbid, 'Bio': None})  # Append None if artist not found or bio not available
    
        print(f"Scraped bio for artist {index+1}/{len(list_artist_mbids)}")
    
    except Exception as e:
        print(f"Error retrieving bio for artist MBID: {mbid}): {str(e)}")
        artist_bios.append({'MBID': mbid, 'Bio': None})  # Append None in case of any error

# Create a new DataFrame from the artist_bios list
artist_bios_df = pd.DataFrame(artist_bios)

# Export the DataFrame to a JSON file
artist_bios_df.to_json('artist_bios.json', orient='records')

Scraped bio for artist 1/13495
Scraped bio for artist 2/13495
Scraped bio for artist 3/13495
Scraped bio for artist 4/13495
Scraped bio for artist 5/13495
Scraped bio for artist 6/13495
Scraped bio for artist 7/13495
Scraped bio for artist 8/13495
Scraped bio for artist 9/13495
Scraped bio for artist 10/13495
Scraped bio for artist 11/13495
Scraped bio for artist 12/13495
Scraped bio for artist 13/13495
Scraped bio for artist 14/13495
Scraped bio for artist 15/13495
Scraped bio for artist 16/13495
Scraped bio for artist 17/13495
Scraped bio for artist 18/13495
Scraped bio for artist 19/13495
Scraped bio for artist 20/13495
Scraped bio for artist 21/13495
Scraped bio for artist 22/13495
Scraped bio for artist 23/13495
Scraped bio for artist 24/13495
Scraped bio for artist 25/13495
Scraped bio for artist 26/13495
Scraped bio for artist 27/13495
Scraped bio for artist 28/13495
Scraped bio for artist 29/13495
Scraped bio for artist 30/13495
Scraped bio for artist 31/13495
Scraped bio for a

In [111]:
artist_bios_df.to_csv('artist_bios.csv', index=False)