In [1]:
import json
import itertools
from time import sleep
from pprint import pprint

import pandas as pd
import requests
from dotenv import load_dotenv
from spotipy import Spotify, SpotifyClientCredentials

from util import mbz

load_dotenv()

spotify = Spotify(client_credentials_manager=SpotifyClientCredentials(),
                  requests_timeout=10, retries=3)

In [2]:
def fetch_label(isrc):
    url = f"https://musicbrainz.org/ws/2/isrc/{isrc}?fmt=json&inc=label-rels"
    r = requests.get(url)
    #print(r)
    try:
        sleep(1)
        if not r.ok:
            #print("NOT OK")
            return
        recordings = r.json()["recordings"]
    except Exception as err:
        #print("ERROR")
        return
    if len(recordings) == 0:
        #print("NO RECORDS")
        return
    for recording in recordings:
        if "relations" not in recording:
            continue
        for relation in recording["relations"]:
            #print(relation)
            label = relation["label"]
            yield {
                "id": label["id"],
                "name": label["name"],
                "begin": relation["begin"],
                "rel_type": relation.get("type"),
                "label_type": label.get("type"),
            } # relation
        break

In [3]:
def fetch_parent_label(label_id):
    label = mbz.get_label_by_id(label_id, includes=["label-rels"])["label"]
    sleep(1)
    if "label-relation-list" not in label:
        return
    for ll in label["label-relation-list"]:
        if ll["direction"] == "forward": #\
                #or ll["label"].get("type") != "Holding":
            continue
        yield {
            "id": ll["label"]["id"],
            "name": ll["label"]["name"],
            "rel_type": ll.get("type"),
            "label_type": ll["label"].get("type"),
        }

In [4]:
def search_label(album_name, artist_name):
    releases = mbz.search_releases(f"album: {album_name} artist: {artist_name}")
    #print(album_name, artist_name)
    sleep(1)
    if "release-list" not in releases:
        return
    for i in range(0, 3):
        try:
            release = releases["release-list"][i]
        except IndexError as err:
            break
        if "label-info-list" not in release:
            break
        for label in release["label-info-list"]:
            if "label" in label:
                yield label["label"]
            # break

In [5]:
def get_artist_info(artist_name):
    artists = mbz.search_artists(artist_name)
    if artists["artist-count"] > 0:
        artist = artists["artist-list"][0]
        artist_gender = artist.get("gender")
        artist_country= artist.get("country")
        artist_tags = artist.get("tag-list")
        artist_begin_area = artist.get("begin-area", {}).get("name")
        try:
            top_genre = sorted(artist_tags, key=lambda tag: tag["count"])[-1]["name"]
        except:
            top_genre = None
        return {
            "gender": artist_gender,
            "country": artist_country,
            "genre": top_genre,
            "begin_area": artist_begin_area,
        }
    return {}

In [8]:
tracks_df = pd.read_parquet("../../data/02-track_features_balanced_wide.pq")
tracks_df["labels"] = tracks_df.apply(lambda r: list(search_label(r["album"], json.loads(r["artists"])[0])), axis=1)
tracks_df["parent_labels"] = tracks_df["labels"].apply(lambda labels: [list(fetch_parent_label(label["id"])) for label in labels if "id" in label])
tracks_df["parent_holding"] = tracks_df["parent_labels"].apply(lambda labels_list: [l for l in itertools.chain(*labels_list) if l["label_type"] == "Holding"])
tracks_df["parent_distrib"] = tracks_df["parent_labels"].apply(lambda labels_list: [l for l in itertools.chain(*labels_list) if l["label_type"] == "Holding"])
tracks_df.to_parquet("data/02-track_features_balanced_wide_w_labels.pq")

FileNotFoundError: [Errno 2] No such file or directory: 'data/02-track_features_balanced_wide_w_labels.pq'

In [9]:
labels_df = tracks_df[["id", "album_id", "artist_ids", "labels", "parent_labels", "parent_holding", "parent_distrib"]]
labels_df.to_parquet("../../data/raw/labels.pq")
labels_df.to_csv("../../data/raw/labels.csv")

In [10]:
labels_df

Unnamed: 0,id,album_id,artist_ids,labels,parent_labels,parent_holding,parent_distrib
0,3tv697PZNnaQN0Mn2zGhS5,17jzAujoXwIF60ojew1yHU,"[""7MiDcPa6UiV3In7lIM71IN""]",[{'id': '9e6b4d7f-4958-4db7-8504-d89e315836af'...,[[{'id': '76165151-48c0-49da-aa6e-ff50cda698e2...,[{'id': 'e097dfe4-25e6-40ee-b720-f81e513609d2'...,[{'id': 'e097dfe4-25e6-40ee-b720-f81e513609d2'...
1,5pL1kLX6Na0U0TMcjr4uCX,2kwDzS0PeUOLvCBFrfdgXZ,"[""6LYGxFCmlJvAG6loQiNeox""]",[],[],[],[]
2,08Xghd9UmgnmPnC5TXaHPb,040Q4a68MlNNNn6Wiqc3y9,"[""7wxdiKcDiQBZmlu0TA7QwB""]",[],[],[],[]
3,4wd4XuOegRaSCUh8h0v0nT,66s167lmALWxjRCd09LYLq,"[""2XsudBfsvfyV3kgElVU8iF""]",[{'id': '011d1192-6f65-45bd-85c4-0400dd45693e'...,[[{'id': 'a18a216f-fc97-4d3e-9b8b-aaa93446f0e5...,[{'id': '27795d9f-3994-4643-9cd2-c5f2b304472d'...,[{'id': '27795d9f-3994-4643-9cd2-c5f2b304472d'...
4,2UVbBKQOdFAekPTRsnkzcf,7fd7SEK25VS3gJAUgSwL6y,"[""6oMuImdp5ZcFhWP0ESe6mG""]",[{'id': 'abea2d3e-eabf-4480-ab24-9382dd642c73'...,[[{'id': 'e63fbdc9-1547-4ffa-96d1-c4e887b6af28...,[{'id': 'e63fbdc9-1547-4ffa-96d1-c4e887b6af28'...,[{'id': 'e63fbdc9-1547-4ffa-96d1-c4e887b6af28'...
...,...,...,...,...,...,...,...
4241,5FPnjikbwlDMULCCCa6ZCJ,6WivmTXugLZLmAWnZhlz7g,"[""0hEurMDQu99nJRq8pTxO14""]",[{'id': '88a12c3e-4dcf-440c-9a89-c2f4036cfba0'...,[[{'id': '9311b139-862e-476b-95e9-f46de04953de...,[],[]
4242,4675yUu8AUbE72T94BkLCD,0knL5fTAXqq9oq4Yeyibc8,"[""7hJcb9fa4alzcOq3EaNPoG"", ""2RdwBSPQiwcmiDo9ki...",[{'id': '977bebc6-0299-414a-a1e7-294771483efe'...,[[{'id': 'ce7a2977-c7dd-4695-97e0-5e72572bf0d3...,[{'id': '399a1a44-54b2-4cf2-99be-d95d096eebd3'...,[{'id': '399a1a44-54b2-4cf2-99be-d95d096eebd3'...
4243,0Sg5kWoILCs2zBoIHIsDAa,0xBQEVKKlFKaetqike1qXK,"[""0u2FHSq3ln94y5Q57xazwf""]",[{'id': 'abea2d3e-eabf-4480-ab24-9382dd642c73'...,[[{'id': 'e63fbdc9-1547-4ffa-96d1-c4e887b6af28...,[{'id': 'e63fbdc9-1547-4ffa-96d1-c4e887b6af28'...,[{'id': 'e63fbdc9-1547-4ffa-96d1-c4e887b6af28'...
4244,4G2npa34uNpY4IOnAhA02K,7f6DVhjTCZa9dZMOe21eUp,"[""0JbfYwi6VX4DlyGp0mDJnS""]",[],[],[],[]
