In [1]:
import json
import itertools
from time import sleep
from pprint import pprint

import pandas as pd
import requests
from dotenv import load_dotenv
from spotipy import Spotify, SpotifyClientCredentials

from util import mbz

load_dotenv()

spotify = Spotify(client_credentials_manager=SpotifyClientCredentials(),
                  requests_timeout=10, retries=3)

In [2]:
def fetch_label(isrc):
    url = f"https://musicbrainz.org/ws/2/isrc/{isrc}?fmt=json&inc=label-rels"
    r = requests.get(url)
    #print(r)
    try:
        sleep(1)
        if not r.ok:
            #print("NOT OK")
            return
        recordings = r.json()["recordings"]
    except Exception as err:
        #print("ERROR")
        return
    if len(recordings) == 0:
        #print("NO RECORDS")
        return
    for recording in recordings:
        if "relations" not in recording:
            continue
        for relation in recording["relations"]:
            #print(relation)
            label = relation["label"]
            yield {
                "id": label["id"],
                "name": label["name"],
                "begin": relation["begin"],
                "rel_type": relation.get("type"),
                "label_type": label.get("type"),
            } # relation
        break

In [4]:
def fetch_parent_label(label_id):
    label = mbz.get_label_by_id(label_id, includes=["label-rels"])["label"]
    sleep(1)
    if "label-relation-list" not in label:
        return
    for ll in label["label-relation-list"]:
        if ll["direction"] == "forward": #\
                #or ll["label"].get("type") != "Holding":
            continue
        yield {
            "id": ll["label"]["id"],
            "name": ll["label"]["name"],
            "rel_type": ll.get("type"),
            "label_type": ll["label"].get("type"),
        }

In [8]:
def search_label(album_name, artist_name):
    releases = mbz.search_releases(f"album: {album_name} artist: {artist_name}")
    #print(album_name, artist_name)
    sleep(1)
    if "release-list" not in releases:
        return
    for i in range(0, 3):
        try:
            release = releases["release-list"][i]
        except IndexError as err:
            break
        if "label-info-list" not in release:
            break
        for label in release["label-info-list"]:
            if "label" in label:
                yield label["label"]
            # break

In [6]:
def get_artist_info(artist_name):
    artists = mbz.search_artists(artist_name)
    if artists["artist-count"] > 0:
        artist = artists["artist-list"][0]
        artist_gender = artist.get("gender")
        artist_country= artist.get("country")
        artist_tags = artist.get("tag-list")
        artist_begin_area = artist.get("begin-area", {}).get("name")
        try:
            top_genre = sorted(artist_tags, key=lambda tag: tag["count"])[-1]["name"]
        except:
            top_genre = None
        return {
            "gender": artist_gender,
            "country": artist_country,
            "genre": top_genre,
            "begin_area": artist_begin_area,
        }
    return {}

In [None]:
tracks_df = pd.read_parquet("data/02-track_features_balanced_wide.pq")
tracks_df["labels"] = tracks_df.apply(lambda r: list(search_label(r["album"], json.loads(r["artists"])[0])), axis=1)
tracks_df["parent_labels"] = tracks_df["labels"].apply(lambda labels: [list(fetch_parent_label(label["id"])) for label in labels if "id" in label])
tracks_df["parent_holding"] = tracks_df["parent_labels"].apply(lambda labels_list: [l for l in itertools.chain(*labels_list) if l["label_type"] == "Holding"])
tracks_df["parent_distrib"] = tracks_df["parent_labels"].apply(lambda labels_list: [l for l in itertools.chain(*labels_list) if l["label_type"] == "Holding"])
tracks_df.to_parquet("data/02-track_features_balanced_wide_w_labels.pq")