In [3]:
import musicbrainzngs
import pandas as pd

# Set up the MusicBrainz client
musicbrainzngs.set_useragent("anonymous", "1.0", "anonymous@university.com")

def get_debut_artists_with_releases_2024():
    debut_artists = []
    
    # Search for artists with a specific start year
    result = musicbrainzngs.search_artists(query="begin:2024", limit=100)
    
    for artist in result['artist-list']:
        artist_info = {
            'name': artist['name'],
            'id': artist['id'],
            'country': artist.get('country'),
            'begin_date': artist.get('life-span', {})["begin"],
            'debut_release': None  # Placeholder for the debut release
        }

        # Fetch the artist's releases
        try:
            releases = musicbrainzngs.browse_releases(artist=artist['id'], includes=['release-groups'], limit=100)
            release_groups = releases['release-list']
            
            
            # Sort by release date and select the earliest
            if release_groups:
                release_groups.sort(key=lambda x: x.get('first-release-date', '9999'))
                artist_info['debut_release'] = {
                    'title': release_groups[0]['title'],
                    'date': release_groups[0].get('date'),
                    'title2': release_groups[0]["release-group"].get("title"),
                    'type2': release_groups[0]["release-group"].get("type"),
                    'date2': release_groups[0]["release-group"].get("first-release-date")
                }

        except Exception as e:
            print(f"Error fetching releases for {artist['name']}: {e}")
        
        debut_artists.append(artist_info)
    
    return debut_artists

# Fetch debut artists and their debut releases for 2024
artists_with_releases_2024 = get_debut_artists_with_releases_2024()

df = pd.DataFrame(artists_with_releases_2024)


In [5]:
df = pd.DataFrame(artists_with_releases_2024)
df = df.drop("debut_release", axis=1).join(pd.json_normalize(df.debut_release).add_prefix("release_"))
df.to_json("../data/raw/debut_performers.jsonl", lines=True, orient="records",)
