In [2]:
import logging
from time import sleep

import pandas as pd
import requests

from slugify import slugify
from util import mbz, MusicBrainzArtistCredits, resolve_release_date,\
    with_spotify_track_metadata, with_spotify_artist_metadata, with_spotify_album_metadata

logger = logging.getLogger()
logger.setLevel(logging.INFO)

In [2]:
# "General Field" categories at the awards:
# * Record of the Year
# * Best New Artist
# * Song of the Year
# * Album of the Year

award_categories_albums = {
    "Album of the Year": "64249380-b076-4a9d-aa41-e617d81fa1c9",
    "Best Alternative Music Album": "16a89a2d-1d9e-4a29-a59b-a5e88304a75d",
    "Best Contemporary Instrumental Album": "6d57cea7-9e36-45e9-bc6f-e012702c3383",
    "Best Country Album": "b3db9d98-e65f-493e-89a3-e751d88d5802",
    "Best Dance/Electronic Album": "292ebdfe-efab-4e6d-8cbb-5ba1f3bb67ee",
    "Best Folk Album": "1a6331c0-770b-4930-9568-6cfa238e7c39",
    "Best Gospel Album": "fabcae7e-07e1-4a27-8f42-4cbbf492a2fa",
    "Best Jazz Vocal Album": "1de1ec47-b3f5-41ea-bd62-6a0eddef6dad",
    "Best Latin Pop Album": "763e57a0-624c-4696-8827-318bd04dad2a",
    "Best New Age Album": "51e73fae-c35c-4a3b-8728-71c0f25934f7",
    "Best Pop Vocal Album": "96eef69c-02a8-4e07-8b0d-cc56a184bd1f",
    "Best R&B Album": "ea68a069-6f97-4d53-896a-15fbf98fee8f",
    "Best Rap Album": "e0d04d24-d3c6-40ed-b193-3b54eb07ff39",
    "Best Reggae Album": "d209d0d9-34dc-4058-a996-eb69d092b3b7",
    "Best Rock Album": "32316c2c-9e30-4dea-b87a-b230d0648ee3",
    "Best Traditional Pop Vocal Album": "759d5a0d-6f19-4666-b1b5-4fb42fd1dc84",
    "Best Urban Contemporary Album": "22f63d2b-d89e-4ea7-8e8f-40aa99f9ac4a",
    "Best Progressive R&B Album": "8fca5005-9b30-429c-9019-e237fbbf0df8",
}

award_categories_recordings = {
    "Best Dance Recording": "5dc3275e-ac47-4391-9550-a4d469a97cec",
    "Best Metal Performance": "5de171bc-91bd-4887-8422-ba93f60394ae",
    "Best Pop Duo/Group Performance": "751db902-9a7f-466b-80a5-ed6690f7a5ad",
    "Best Pop Solo Performance": "6102cfa9-39b4-4f54-bc71-c1b4e4778dbf",
    "Best R&B Performance": "70ecf47c-c271-4e0e-9a8d-8d34a405f8b5",
    "Best R&B Song": "e0fdce30-0429-4c3e-bace-a58d0c73a02f",
    "Best Rap Performance": "4bc824fc-53f9-49f1-b6f6-0d2a82a6409d",
    "Best Rap Song": "152884ff-c77b-4389-af17-c384f49ff9f8",
    "Best Rock Performance": "04e66a3e-57db-411d-bed9-b07e192ce5ce",
    "Best Rock Song": "5b7adb3d-28c7-4921-b144-d063c4d5d239",
    "Best Traditional R&B Performance": "46b6b177-a4db-4f49-a05e-46d295536f55",
    "Record of the Year": "e0ca0518-ab36-4ecf-bd71-448e94057ff2",
    "Song of the Year": "cff3008a-3a98-4626-a321-c42cd4eaf10d",
}

nominated_albums = []
nominated_artists = []
nominated_recordings = []

In [3]:
# Grammy Awards for Albums

for award_category, series_mbid in award_categories_albums.items():
    logging.info(award_category)

    resp = requests.get(f"https://musicbrainz.org/ws/2/series/{series_mbid}?inc=release-group-rels+artist-credits&fmt=json")
    resp.raise_for_status()
    sleep(1)

    for relation in resp.json()["relations"]:
        release_group = relation["release_group"]
        artist_credits = MusicBrainzArtistCredits.from_dict(release_group)

        release_year, release_date = None, None
        if "first-release-date" in release_group:
            if len(release_group["first-release-date"]) == 10:
                release_date = release_group["first-release-date"]
                release_year = int(release_date[0:4])
            elif len(release_group["first-release-date"]) == 4:
                release_year = int(release_group["first-release-date"])

        for artist_mbid, artist_name in artist_credits:
            nominated_artists.append({
                "artist_mbid": artist_mbid,
                "artist_name": artist_name,
                "award_category": award_category,
                "award_year": int(relation["attribute-values"]["number"][0:4]),
                "award_nominee": 1,
                "award_winner": int("winner" in relation["attribute-values"]["number"]),
                "release_year": release_year,
                "release_date": release_date,
            })

        nominated_albums.append({
            "album_mbid": release_group["id"],
            "album_name": release_group["title"],
            "artist_mbid": artist_credits.mbids[0],
            "artist_name": artist_credits.names[0],
            "artist_credit": artist_credits.credit,
            "artists_credited_count ": artist_credits.count,
            "award_category": award_category,
            "award_year": int(relation["attribute-values"]["number"][0:4]),
            "award_nominee": 1,
            "award_winner": int("winner" in relation["attribute-values"]["number"]),
            "release_year": release_year,
            "release_date": release_date,
        })


In [4]:
# Grammy Awards for Recordings

for award_category, series_mbid in award_categories_recordings.items():
    print(award_category, series_mbid)

    resp = requests.get(f"https://musicbrainz.org/ws/2/series/{series_mbid}?inc=recording-rels+artist-credits+release-rels&fmt=json")
    resp.raise_for_status()
    sleep(1)

    for relation in resp.json()["relations"]:
        recording = relation["recording"]
        artist_credits = MusicBrainzArtistCredits.from_dict(recording)
        print(f"... {artist_credits.credit} - {recording['title']}")
        release_year, release_date = resolve_release_date(artist_name=artist_credits.credit,
                                                          track_name=recording["title"])

        for artist_mbid, artist_name in artist_credits:
            nominated_artists.append({
                "artist_mbid": artist_mbid,
                "artist_name": artist_name,
                "award_category": award_category,
                "award_year": int(relation["attribute-values"]["number"][0:4]),
                "award_nominee": 1,
                "award_winner": int("winner" in relation["attribute-values"]["number"]),
                "release_year": release_year,
                "release_date": release_date,
            })

        nominated_recordings.append({
            "recording_mbid": recording["id"],
            "recording_name": recording["title"],
            "artist_mbid": artist_credits.mbids[0],
            "artist_name": artist_credits.names[0],
            "artist_credit": artist_credits.credit,
            "artists_credited_count ": artist_credits.count,
            "award_category": award_category,
            "award_year": int(relation["attribute-values"]["number"][0:4]),
            "award_nominee": 1,
            "award_winner": int("winner" in relation["attribute-values"]["number"]),
            "release_year": release_year,
            "release_date": release_date,
        })

Best Dance Recording 5dc3275e-ac47-4391-9550-a4d469a97cec
... KAYTRANADA, Kali Uchis - 10%
... Janet Jackson - All for You
... Skrillex, Sirah - Bangarang
... Cher - Believe
... Jayda G - Both Of Us
... Giorgio Moroder, Donna Summer - Carry On
... Zedd, Foxes - Clarity
... Kylie Minogue - Come Into My World
... Dirty Vegas - Days Go By
... The Chainsmokers, Daya - Don’t Let Me Down
... Silk City, Dua Lipa - Electricity
... The Chemical Brothers, Q‐Tip - Galvanize
... The Chemical Brothers - Got to Keep On
... Justin Timberlake - Lovestoned / I Think She Knows
... Disclosure, Aminé, slowthai - My High
... Diplo, SIDEPIECE - On My Mind
... Rihanna - Only Girl (in the World)
... Lady Gaga - Poker Face
... Clean Bandit, Jess Glynne - Rather Be
... Madonna - Ray of Light
... Skrillex - Scary Monsters and Nice Sprites
... Justin Timberlake, Timbaland - SexyBack
... Flume, Toro y Moi - The Difference
... LCD Soundsystem - Tonite
... Britney Spears - Toxic
... Jack Ü, Justin Bieber - Where Are

In [6]:
print(f"nominated albums: {len(nominated_albums)}")
print(f"nominated artists: {len(nominated_artists)}")
print(f"nominated recordings: {len(nominated_recordings)}")

nominated albums: 892
nominated artists: 1068
nominated recordings: 108


In [23]:
# Grammy Nominees
# Because nominees aren't always enumerated in the lists above

def is_artist_nominated(list_of_dicts, year, artist_mbid, artist_name):
    artist_name = slugify(artist_name)
    for record in list_of_dicts:
        artist_match = (record["artist_mbid"] == artist_mbid
                        or slugify(record["artist_name"]) == artist_name)
        if artist_match and record["award_year"] == year:
            return True
    return False


def is_recording_nominated(list_of_dicts, year, artist_mbid, artist_name,
                          recording_mbid, recording_name):
    artist_name = slugify(artist_name)
    recording_name = slugify(recording_name)
    for record in list_of_dicts:
        artist_match = (record["artist_mbid"] == artist_mbid
                        or slugify(record["artist_name"]) == artist_name)
        recording_match = (record["recording_mbid"] == recording_mbid
                           or slugify(record["recording_name"]) == recording_name)
        if artist_match and recording_match and record["award_year"] == year:
            return True
    return False


for year in range(1995, 2022):
    release = mbz.search_releases(f"{year} GRAMMY Nominees")["release-list"][0]
    sleep(1)
    recordings = mbz.browse_recordings(release=release["id"], includes=["artist-credits"])["recording-list"]
    sleep(1)

    for recording in recordings:
        artist_credits = MusicBrainzArtistCredits.from_dict(recording)
        release_year, release_date = resolve_release_date(artist_name=artist_credits.credit,
                                                          track_name=recording["title"])
        recording_already_nominated = False

        for artist_mbid, artist_name in artist_credits:
            if is_recording_nominated(nominated_recordings, year,
                                      artist_mbid=artist_mbid, artist_name=artist_name,
                                      recording_mbid=recording["id"], recording_name=recording["title"])\
                    and not recording_already_nominated:
                recording_already_nominated = True
                print(f"track already nominated: {year} - {artist_name} - {recording['title']}")

            if is_artist_nominated(nominated_artists, year,
                                   artist_mbid=artist_mbid, artist_name=artist_name):
                print(f"artist already nominated: {year} - {artist_name}")
                continue

            print(f"adding artist: {year} - {artist_name}")
            nominated_artists.append({
                "artist_mbid": artist_mbid,
                "artist_name": artist_name,
                "award_category": "General",
                "award_year": year,
                "award_nominee": 1,
                "award_winner": 0,
                "release_year": release_year,
                "release_date": release_date,
            })

        if recording_already_nominated:
            print(f"track already nominated: {year} - {artist_credits.credit} - {recording['title']}")
            continue

        print(f"adding track: {year} - {artist_credits.credit} - {recording['title']}")
        nominated_recordings.append({
            "recording_mbid": recording["id"],
            "recording_name": recording["title"],
            "artist_mbid": artist_credits.mbids[0],
            "artist_name": artist_credits.names[0],
            "artist_credit": artist_credits.credit,
            "artists_credited_count ": artist_credits.count,
            "award_category": "General",
            "award_year": year,
            "award_nominee": 1,
            "award_winner": 0,
            "release_year": release_year,
            "release_date": release_date,
        })

adding artist: 1995 - Sheryl Crow
adding track: 1995 - Sheryl Crow - All I Wanna Do
adding artist: 1995 - Elton John
adding track: 1995 - Elton John - Can You Feel the Love Tonight (End Title)
artist already nominated: 1995 - Mary Chapin Carpenter
adding track: 1995 - Mary Chapin Carpenter - He Thinks He’ll Keep Her
adding artist: 1995 - Mariah Carey
adding track: 1995 - Mariah Carey - Hero
artist already nominated: 1995 - Boyz II Men
adding track: 1995 - Boyz II Men - I’ll Make Love to You
artist already nominated: 1995 - Bonnie Raitt
adding track: 1995 - Bonnie Raitt - Longing in Their Hearts
artist already nominated: 1995 - Bonnie Raitt
adding track: 1995 - Bonnie Raitt - Love Sneakin’ Up on You
artist already nominated: 1995 - Luther Vandross
adding track: 1995 - Luther Vandross - Love the One You’re With
adding artist: 1995 - Barbra Streisand
adding track: 1995 - Barbra Streisand - Ordinary Miracles
artist already nominated: 1995 - Seal
adding track: 1995 - Seal - Prayer for the D

In [26]:
nominated_albums_df = pd.DataFrame(nominated_albums)
nominated_artists_df = pd.DataFrame(nominated_artists)
nominated_recordings_df = pd.DataFrame(nominated_recordings)

print(f"nominated albums: {len(nominated_albums_df)}")
print(f"nominated artists: {len(nominated_artists_df)}")
print(f"nominated recordings: {len(nominated_recordings_df)}")

nominated albums: 892
nominated artists: 1413
nominated recordings: 580


In [85]:
# Add Spotify ids and metadata to recordings
nominated_recordings_df = with_spotify_track_metadata(nominated_recordings_df,
                                                      artist_key="artist_name", track_key="recording_name")

# Add Spotify ids and metadata to albums
nominated_albums_df = with_spotify_album_metadata(nominated_albums_df,
                                                  artist_key="artist_name", album_key="album_name")

# Merge with nominated artists. For the remaining artists, scrape spotify.
artist_link_df = nominated_recordings_df[~nominated_recordings_df["spotify_track_id"].isnull()][["artist_mbid", "spotify_artist_id"]].drop_duplicates()
nominated_artists_df = pd.merge(nominated_artists_df, artist_link_df, on="artist_mbid", how="outer")
nominated_artists_df = with_spotify_artist_metadata(nominated_artists_df, artist_key="artist_name")

nominated_albums_df.to_csv("data/album_nominations.csv.gz", compression="gzip")
nominated_albums_df.to_parquet("data/album_nominations.pq")
nominated_artists_df.to_csv("data/artist_nominations.csv.gz", compression="gzip")
nominated_artists_df.to_parquet("data/artist_nominations.pq")
nominated_recordings_df.to_csv("data/recording_nominations.csv.gz", compression="gzip")
nominated_recordings_df.to_parquet("data/recording_nominations.pq")

INFO:root:artist: KAYTRANADA, track: 10%
INFO:root:status: 96.72 (561 / 580
INFO:root:artist: Janet Jackson, track: All for You
INFO:root:artist: Skrillex, track: Bangarang
INFO:root:artist: Cher, track: Believe
INFO:root:artist: Jayda G, track: Both Of Us
INFO:root:artist: Giorgio Moroder, track: Carry On
INFO:root:artist: Zedd, track: Clarity
INFO:root:artist: Kylie Minogue, track: Come Into My World
INFO:root:artist: Dirty Vegas, track: Days Go By
INFO:root:artist: The Chainsmokers, track: Don’t Let Me Down
INFO:root:artist: Silk City, track: Electricity


KeyboardInterrupt: 

In [None]:
# diagnostics
#
print(nominated_recordings_df["spotify_track_id"].isnull().sum())
# nominated_recordings_df[nominated_recordings_df["spotify_track_id"].isnull()]
print(nominated_artists_df["spotify_artist_id"].isnull().sum())
print(nominated_albums_df["spotify_artist_id"].isnull().sum())

In [7]:
# Summarize artists nominations
import pandas as pd


df = pd.read_parquet("../../data/raw/artist_nominations.pq")
df = df[~df["spotify_artist_id"].isna()]

df[f"grammy_nominated_csum_1994"] = 0
df[f"grammy_won_csum_1994"] = 0
for year in range(1995, 2022):
    df[f"grammy_nominated_{year}"] = ((df["award_year"] == year) & (df["award_nominee"] == 1)).astype(int)
    df[f"grammy_won_{year}"] = ((df["award_year"] == year) & (df["award_winner"] == 1)).astype(int)
    df[f"grammy_nominated_csum_{year}"] = df[f"grammy_nominated_csum_{year - 1}"] + df[f"grammy_nominated_{year}"]
    df[f"grammy_won_csum_{year}"] = df[f"grammy_won_csum_{year - 1}"] + df[f"grammy_won_{year}"]


artist_popularity = (
    df[["artist_mbid", "spotify_track_popularity"]]
        .rename(columns={"spotify_track_popularity": "spotify_popularity"})
        .drop_duplicates()
)

artist_first_win = (
    df[df["award_winner"] == 1]
        .groupby("artist_mbid")
        .min("award_year")
        .reset_index()
        .rename(columns={"award_year": "first_win"})
    [["artist_mbid", "first_win"]]
)

artist_last_win = (
    df[df["award_winner"] == 1]
        .groupby("artist_mbid")
        .max("award_year")
        .reset_index()
        .rename(columns={"award_year": "last_win"})
    [["artist_mbid", "last_win"]]
)

artist_first_nomination = (
    df[df["award_winner"] == 0]
        .groupby("artist_mbid")
        .min("award_year")
        .reset_index()
        .rename(columns={"award_year": "first_nomination"})
    [["artist_mbid", "first_nomination"]]
)

artist_last_nomination = (
    df[df["award_winner"] == 0]
        .groupby("artist_mbid")
        .min("award_year")
        .reset_index()
        .rename(columns={"award_year": "last_nomination"})
    [["artist_mbid", "last_nomination"]]
)

df = (
    df.groupby(["artist_mbid", "spotify_artist_id", "artist_name"])
        .agg("sum")
        .reset_index()
        .drop(columns=["award_year", "release_year", "spotify_track_popularity"])
)

#df = pd.merge(df, artist_first_win, on="artist_mbid", how="outer")
#df = pd.merge(df, artist_last_win, on="artist_mbid", how="outer")
#df = pd.merge(df, artist_first_nomination, on="artist_mbid", how="outer")
#df = pd.merge(df, artist_last_nomination, on="artist_mbid", how="outer")
#df = pd.merge(df, artist_popularity, on="artist_mbid", how="outer")
#df["first_nomination"] = df.apply(lambda row: min(row["first_nomination"], row["first_win"]), axis=1)

#df.to_csv("data/artist_nominations_summary.csv.gz", compression="gzip")
#df.to_parquet("data/artist_nominations_summary.pq")
df

  df[f"grammy_won_csum_{year}"] = df[f"grammy_won_csum_{year - 1}"] + df[f"grammy_won_{year}"]
  df[f"grammy_nominated_{year}"] = ((df["award_year"] == year) & (df["award_nominee"] == 1)).astype(int)
  df[f"grammy_won_{year}"] = ((df["award_year"] == year) & (df["award_winner"] == 1)).astype(int)
  df[f"grammy_nominated_csum_{year}"] = df[f"grammy_nominated_csum_{year - 1}"] + df[f"grammy_nominated_{year}"]


Unnamed: 0,artist_mbid,spotify_artist_id,artist_name,award_nominee,award_winner,grammy_nominated_csum_1994,grammy_won_csum_1994,grammy_nominated_1995,grammy_won_1995,grammy_nominated_csum_1995,...,grammy_nominated_csum_2019,grammy_won_csum_2019,grammy_nominated_2020,grammy_won_2020,grammy_nominated_csum_2020,grammy_won_csum_2020,grammy_nominated_2021,grammy_won_2021,grammy_nominated_csum_2021,grammy_won_csum_2021
0,0031bc7a-1b6f-4620-99db-5202405b0749,2Irt3HB3JdzLWsDmvB9QP6,Lena Horne,1,0,0,0,0,0,0,...,1,0,0,0,1,0,0,0,1,0
1,0039c7ae-e1a7-4a7d-9b49-0cbc716821a6,0YrtvWJMgSdVrk3SfNjTbx,Death Cab for Cutie,4,0,0,0,0,0,0,...,4,0,0,0,4,0,0,0,4,0
2,00f82af1-6537-4d4b-9e03-103fffe19999,67FFKYikvTlvsPNk4NPOYJ,Baha Men,1,1,0,0,0,0,0,...,1,1,0,0,1,1,0,0,1,1
3,0103c1cc-4a09-4a5d-a344-56ad99a77193,0p4nmQO2msCgU4IF37Wi3j,Avril Lavigne,2,0,0,0,0,0,0,...,2,0,0,0,2,0,0,0,2,0
4,012151a8-0f9a-44c9-997f-ebd68b5389f9,53XhwfbYqKCa1cC15pYq2q,Imagine Dragons,2,0,0,0,0,0,0,...,2,0,0,0,2,0,0,0,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
708,fe85367e-4036-43c1-874b-b91af81cb4f3,68d6ZfyMUYURol2y15Ta2Y,Snarky Puppy,3,3,0,0,0,0,0,...,2,2,0,0,2,2,1,1,3,3
709,ff6e677f-91dd-4986-a174-8db0474b1799,3GBPw9NK25X1Wt2OUvOwY3,Jack Johnson,1,0,0,0,0,0,0,...,1,0,0,0,1,0,0,0,1,0
710,ff95eb47-41c4-4f7f-a104-cdc30f02e872,1vgSaC0BPlL6LEm4Xsx59J,Brian Eno,1,0,0,0,0,0,0,...,1,0,0,0,1,0,0,0,1,0
711,ffb5f70c-cd10-48a5-92d0-cea440e6fd40,3fLBmhcgWkPI47LfVQ8paB,Tamar Braxton,1,0,0,0,0,0,0,...,1,0,0,0,1,0,0,0,1,0


In [None]:
#- un-nominated & un-charted
#- charted, not nominated
#- not charted, nominated
#- winner