In [1]:
GENIUS_TOKEN = "CYylC51NBQe_zpK-nXJ0P6fWOqDs7vjpq_uUvDr5BCRF7NBfFJMjz-sohJQ3A4wH"

In [4]:
import os
import time
import re
import requests
import pandas as pd
import numpy as np
import networkx as nx
import community as community_louvain
from itertools import combinations
from collections import Counter, defaultdict
from wordcloud import WordCloud
from bs4 import BeautifulSoup
import lyricsgenius
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
import matplotlib.pyplot as plt
import json
import csv

In [8]:
songs = pd.read_csv('../../datadump/songs_no_dublicates.csv',
                    usecols=['recording_mbid','first_release_year','artist_mbid'])
writes = pd.read_csv('../../datadump/writerships.csv',
                     usecols=['recording_mbid','writer_id'])

# Sørg for kun den tidligste udgivelse per recording
songs = (songs
         .sort_values('first_release_year')
         .drop_duplicates('recording_mbid', keep='first'))

# ─── 1) Beregn peak‐decade per artist ───────────────────────────────
# A) tilføj decade‐kolonne for hver sang
songs['decade'] = (songs.first_release_year // 10) * 10

# B) tæl sange per artist × decade
adc = (
    songs
    .groupby(['artist_mbid','decade'])
    .size()
    .reset_index(name='count')
)
# C) find for hver artist den decade med max count
idx = adc.groupby('artist_mbid')['count'].idxmax()
artist_decade_map = (
    adc
    .loc[idx, ['artist_mbid','decade']]
    .set_index('artist_mbid')['decade']
    .to_dict()
)


# ─── 2) Funktion der bygger artist–artist‐graf for et givent årti ──
def build_artist_graph_peak(decade_start):
    decade_end = decade_start + 9

    # A) vælg kun sange i det tiår
    dec_songs = songs[
        songs['first_release_year'].between(decade_start, decade_end) &
        (songs['artist_mbid'].map(artist_decade_map) == decade_start)
    ]
    rec2art = dict(zip(dec_songs.recording_mbid, dec_songs.artist_mbid))

    # B) filtrer writerships
    writes_sub = writes[writes.recording_mbid.isin(rec2art)]

    # C) akkumuler artist‐par pr. writer og husk writers
    pair_counter = Counter()
    edge_writers = defaultdict(set)
    for writer_id, grp in writes_sub.groupby('writer_id'):
        artists = sorted({ rec2art[r] for r in grp.recording_mbid })
        if len(artists) < 2:
            continue
        for a1, a2 in combinations(artists, 2):
            pair_counter[(a1, a2)] += 1
            edge_writers[(a1, a2)].add(writer_id)

    # D) byg graf med vægt og writer-liste
    G = nx.Graph()
    for (a1, a2), w in pair_counter.items():
        G.add_edge(a1, a2,
                   weight=w,
                   writers=list(edge_writers[(a1, a2)]))
    return G


# ─── 3) Eksempel: byg grafer for 1960s–2020s og print stats ─────────
artist_graphs_peak = {}
for start in range(1960, 2030, 10):
    label = f"{start}s"
    G = build_artist_graph_peak(start)
    artist_graphs_peak[label] = G
    print(f"{label}: {G.number_of_nodes():>4} noder  | {G.number_of_edges():>6} kanter")

1960s:    9 noder  |     15 kanter
1970s:    5 noder  |      7 kanter
1980s:    2 noder  |      1 kanter
1990s:   78 noder  |   1143 kanter
2000s:  195 noder  |   5959 kanter
2010s:  137 noder  |   2682 kanter
2020s:   73 noder  |    568 kanter


In [10]:
import pandas as pd

# Indlæs artist‐lookup og byg dict: artist_mbid → navn
artist_lookup = pd.read_csv('../../datadump/artists_all.csv',
                           usecols=['artist_mbid','name'],
                           dtype=str)
artist_dict = artist_lookup.set_index('artist_mbid')['name'].to_dict()

In [20]:
input_dir = "../../public/flest_udgivelser"
output_dir = "../../public/flest_udgivelser_csv"
os.makedirs(output_dir, exist_ok=True)

for filename in os.listdir(input_dir):
    if filename.endswith(".json"):
        with open(os.path.join(input_dir, filename), "r", encoding="utf-8") as f:
            data = json.load(f)

        decade = filename.replace(".json", "")
        output_path = os.path.join(output_dir, f"{decade}.csv")

        with open(output_path, "w", newline="", encoding="utf-8") as out:
            writer = csv.writer(out)
            writer.writerow(["artist_mbid", "artist_name", "community"])
            for node in data["nodes"]:
                writer.writerow([node["id"], node["name"], node["community"]])


import re

def load_communities_from_csv(folder):
    decade2comm = {}
    for filename in os.listdir(folder):
        if filename.endswith(".csv"):
            match = re.search(r"(\d{4}s)", filename)  # matcher fx '1960s'
            if not match:
                continue
            decade = match.group(1)

            df = pd.read_csv(os.path.join(folder, filename))
            comm2nodes = defaultdict(list)
            for _, row in df.iterrows():
                comm2nodes[row["community"]].append(row["artist_mbid"])
            decade2comm[decade] = comm2nodes
    return decade2comm

all_comm2nodes_peak = load_communities_from_csv("../../public/flest_udgivelser_csv")

In [21]:
for decade, G in artist_graphs_peak.items():
    print(f"\nTjekker community data for {decade}...")
    comm2nodes = all_comm2nodes_peak.get(decade, {})
    print(f" → Fundet {len(comm2nodes)} communities")
    if not comm2nodes:
        continue


Tjekker community data for 1960s...
 → Fundet 2 communities

Tjekker community data for 1970s...
 → Fundet 2 communities

Tjekker community data for 1980s...
 → Fundet 1 communities

Tjekker community data for 1990s...
 → Fundet 4 communities

Tjekker community data for 2000s...
 → Fundet 4 communities

Tjekker community data for 2010s...
 → Fundet 3 communities

Tjekker community data for 2020s...
 → Fundet 3 communities


In [22]:
CANDIDATE_SONGS = 40
TARGET_LYRICS   = 20
DATADIR         = "../../datadump"
OUT_DIR         = "community_lyrics_ovh"
os.makedirs(OUT_DIR, exist_ok=True)
id_to_name = artists.set_index('artist_mbid')['name'].to_dict()

# ─── 3) HJÆLPEFUNKTIONER ──────────────────────────────────────────────────────
def fetch_lyrics_ovh(artist: str, title: str) -> str:
    url = f"https://api.lyrics.ovh/v1/{requests.utils.quote(artist)}/{requests.utils.quote(title)}"
    try:
        r = requests.get(url, timeout=5)
        if r.status_code == 200:
            return r.json().get("lyrics", "")
    except Exception:
        pass
    return ""

def clean_lyrics(text: str) -> str:
    text = re.sub(r"\[.*?\]", "", text)
    text = re.sub(r"^[A-Za-z0-9 ]+:\s*", "", text, flags=re.MULTILINE)
    lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
    return "\n".join(lines)

# ─── 4) LOOP OVER COMMUNITIES & HENT LYRICS ─────────────────────────────────
for decade, G in artist_graphs_peak.items():
    comm2nodes = all_comm2nodes_peak.get(decade, {})
    if not comm2nodes:
        continue

    for comm_id, members in comm2nodes.items():
        label = f"{decade}_comm{comm_id}"
        print(f"\n▶ Behandler {label}: {len(members)} artister")

        # A) Byg per‐artist liste over “forbundne” writers
        eligible_writers = defaultdict(set)
        for u, v, data in G.edges(data=True):
            if u in members and v in members and "writers" in data:
                for w in data["writers"]:
                    eligible_writers[u].add(w)
                    eligible_writers[v].add(w)

        # B) Filtrér sange
        df = (
            writes
            .merge(songs[['recording_mbid','title','artist_mbid']],
                   on='recording_mbid', how='inner')
        )
        df = df[df['artist_mbid'].isin(members)]
        mask = df.apply(lambda row: row['writer_id'] in eligible_writers[row['artist_mbid']], axis=1)
        df = df[mask]

        df_songs = df[['recording_mbid','title','artist_mbid']].drop_duplicates('recording_mbid')
        if df_songs.empty:
            print("   ⚠ Ingen sange opfylder de nye, skærpede kriterier – springer over.")
            continue

        # C) Hent lyrics løbende indtil vi har 20
        rows = []
        seen_recordings = set()
        while len(rows) < TARGET_LYRICS and len(seen_recordings) < len(df_songs):
            remaining = df_songs[~df_songs['recording_mbid'].isin(seen_recordings)]
            if remaining.empty:
                break
            row = remaining.sample(n=1, random_state=int(time.time())).iloc[0]
            seen_recordings.add(row['recording_mbid'])

            artist_name = id_to_name.get(row['artist_mbid'], "")
            title_clean = re.sub(r"\s*\(.*?\)", "", row['title']).strip()
            raw = fetch_lyrics_ovh(artist_name, title_clean)
            lyric = clean_lyrics(raw)
            if lyric:
                rows.append({
                    "recording_mbid": row["recording_mbid"],
                    "title":          row["title"],
                    "artist_name":    artist_name,
                    "lyrics":         lyric
                })
            time.sleep(1)

        # D) Gem lyrics
        out = pd.DataFrame(rows)
        path = os.path.join(OUT_DIR, f"{label}_lyrics.csv")
        out.to_csv(path, index=False, encoding="utf-8")
        print(f"   ✅ Gemt {len(rows)} lyrics i {path}")


▶ Behandler 1960s_comm0: 5 artister
   ✅ Gemt 7 lyrics i community_lyrics_ovh/1960s_comm0_lyrics.csv

▶ Behandler 1960s_comm1: 4 artister
   ✅ Gemt 20 lyrics i community_lyrics_ovh/1960s_comm1_lyrics.csv

▶ Behandler 1970s_comm0: 2 artister
   ✅ Gemt 0 lyrics i community_lyrics_ovh/1970s_comm0_lyrics.csv

▶ Behandler 1970s_comm1: 3 artister
   ✅ Gemt 20 lyrics i community_lyrics_ovh/1970s_comm1_lyrics.csv

▶ Behandler 1980s_comm0: 2 artister
   ✅ Gemt 2 lyrics i community_lyrics_ovh/1980s_comm0_lyrics.csv

▶ Behandler 1990s_comm0: 21 artister
   ✅ Gemt 20 lyrics i community_lyrics_ovh/1990s_comm0_lyrics.csv

▶ Behandler 1990s_comm1: 41 artister
   ✅ Gemt 20 lyrics i community_lyrics_ovh/1990s_comm1_lyrics.csv

▶ Behandler 1990s_comm2: 8 artister
   ✅ Gemt 20 lyrics i community_lyrics_ovh/1990s_comm2_lyrics.csv

▶ Behandler 1990s_comm3: 8 artister
   ✅ Gemt 20 lyrics i community_lyrics_ovh/1990s_comm3_lyrics.csv

▶ Behandler 2000s_comm1: 14 artister
   ✅ Gemt 20 lyrics i community_lyri

In [27]:
import os
import pandas as pd
import re
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt

LYRICS_DIR = "community_lyrics_ovh"
OUT_WC_FREQ = "wordclouds_frequency"
os.makedirs(OUT_WC_FREQ, exist_ok=True)

# indlæs alle community‐filer
for fname in os.listdir(LYRICS_DIR):
    if not fname.endswith("_lyrics.csv"):
        continue
    label = fname.replace("_lyrics.csv","")
    file_path = os.path.join(LYRICS_DIR, fname)
    if os.stat(file_path).st_size == 0:
        print(f"Skipping empty file: {fname}")
        continue
    try:
        df = pd.read_csv(file_path)
    except pd.errors.EmptyDataError:
        print(f"Skipping file with no data: {fname}")
        continue
    # slå alle lyrics sammen til ét dokument
    text = "\n".join(df["lyrics"].fillna(""))
    # rens kun alfanumerisk
    tokens = re.findall(r"\b\w{3,}\b", text.lower())
    freq = Counter(tokens)
    common = {w:c for w,c in freq.most_common(100)}
    # lav cloud
    wc = WordCloud(width=800, height=400, background_color="white")\
         .generate_from_frequencies(common)
    plt.figure(figsize=(8,4))
    plt.imshow(wc, interpolation="bilinear")
    plt.axis("off")
    plt.title(f"Frequency WordCloud: {label}")
    plt.savefig(f"{OUT_WC_FREQ}/{label}_freq.png", dpi=150, bbox_inches="tight")
    plt.close()

Skipping file with no data: 1970s_comm0_lyrics.csv


In [23]:
import os
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import nltk
from nltk.corpus import stopwords

LYRICS_DIR = "community_lyrics_ovh"
OUT_WC_TFIDF = "wordclouds_tfidf"
os.makedirs(OUT_WC_TFIDF, exist_ok=True)

nltk.download("stopwords", quiet=True)
STOP = stopwords.words("english")

for fname in os.listdir(LYRICS_DIR):
    if not fname.endswith("_lyrics.csv"): continue
    label = fname.replace("_lyrics.csv","")
    file_path = os.path.join(LYRICS_DIR, fname)
    if os.stat(file_path).st_size == 0:
        print(f"Skipping empty file: {fname}")
        continue
    try:
        df = pd.read_csv(file_path)
    except pd.errors.EmptyDataError:
        print(f"Skipping file with no data: {fname}")
        continue
    docs = df["lyrics"].fillna("").tolist()

    # simpel rens og stop‐liste
    def clean(txt):
        txt = re.sub(r"[^\w\s]"," ", txt.lower())
        return txt

    docs = [clean(d) for d in docs]
    vec = TfidfVectorizer(stop_words=STOP,
                          max_features=50,
                          token_pattern=r"(?u)\b\w\w+\b")
    X = vec.fit_transform(docs)
    terms = vec.get_feature_names_out()
    scores = X.sum(axis=0).A1
    freqs = dict(zip(terms, scores))

    wc = WordCloud(width=800, height=400, background_color="white")\
         .generate_from_frequencies(freqs)
    plt.figure(figsize=(8,4))
    plt.imshow(wc, interpolation="bilinear")
    plt.axis("off")
    plt.title(f"TF–IDF WordCloud: {label}")
    plt.savefig(f"{OUT_WC_TFIDF}/{label}_tfidf.png", dpi=150, bbox_inches="tight")
    plt.close()

Skipping file with no data: 1970s_comm0_lyrics.csv


In [24]:
import networkx as nx
import pandas as pd

rows = []
for decade, G in artist_graphs_peak.items():
    if G.number_of_nodes()==0:
        continue

    comps = list(nx.connected_components(G))
    largest = max(comps, key=len)
    G0 = G.subgraph(largest)

    rows.append({
        "decade": decade,
        "n_nodes":       G.number_of_nodes(),
        "n_edges":       G.number_of_edges(),
        "avg_degree":    sum(dict(G.degree()).values())/G.number_of_nodes(),
        "density":       nx.density(G),
        "avg_clust":     nx.average_clustering(G, weight="weight"),
        "n_comps":       nx.number_connected_components(G),
        "largest_comp":  len(largest),
        "diameter":      nx.diameter(G0) if G0.number_of_nodes()>1 else 0,
        "avg_shortest":  nx.average_shortest_path_length(G0) if G0.number_of_nodes()>1 else 0
    })

stats_df = pd.DataFrame(rows)
stats_df.to_csv("network_summary_per_decade.csv", index=False)
print(stats_df)

  decade  n_nodes  n_edges  avg_degree   density  avg_clust  n_comps  \
0  1960s        9       15    3.333333  0.416667   0.172758        1   
1  1970s        5        7    2.800000  0.700000   0.201463        1   
2  1980s        2        1    1.000000  1.000000   0.000000        1   
3  1990s       78     1143   29.307692  0.380619   0.058971        1   
4  2000s      195     5959   61.117949  0.315041   0.010425        1   
5  2010s      137     2682   39.153285  0.287892   0.012263        1   
6  2020s       73      568   15.561644  0.216134   0.085273        1   

   largest_comp  diameter  avg_shortest  
0             9         3      1.638889  
1             5         2      1.300000  
2             2         1      1.000000  
3            78         3      1.646021  
4           195         4      1.727465  
5           137         3      1.761271  
6            73         4      1.973744  


In [25]:
import pandas as pd

# Indlæs kun de kolonner, du behøver
artist_lookup = pd.read_csv(
    '../../datadump/artists_all.csv',
    usecols=['artist_mbid', 'name'],
    dtype=str
)

# Lav dict: mbid → navn
artist_dict = artist_lookup.set_index('artist_mbid')['name'].to_dict()

In [8]:
import pandas as pd
import networkx as nx

# ── antag at du allerede har defineret: ────────────────────────────────────
#   artist_graphs: {'1960s': G_1960s, '1970s': G_1970s, …}
#   artist_dict:   mapping fra artist_mbid → artistnavn

rows = []
for decade, G in artist_graphs_peak.items():
    if G.number_of_nodes() == 0:
        continue
    # Beregn centraliteter
    deg_cent = nx.degree_centrality(G)
    btw_cent = nx.betweenness_centrality(G, weight='weight')
    cls_cent = nx.closeness_centrality(G)
    eig_cent = nx.eigenvector_centrality(G, weight='weight', max_iter=500)

    measures = [
        ('degree',       deg_cent),
        ('betweenness',  btw_cent),
        ('closeness',    cls_cent),
        ('eigenvector',  eig_cent),
    ]

    # For hver måltype: tag top 5 og gem i en tabel
    for name, cent in measures:
        top5 = sorted(cent.items(), key=lambda x: x[1], reverse=True)[:5]
        for mbid, score in top5:
            rows.append({
                'decade':        decade,
                'measure':       name,
                'artist_mbid':   mbid,
                'artist_name':   artist_dict.get(mbid, mbid),
                'centrality':    score
            })

# Lav en DataFrame og gem til CSV
df = pd.DataFrame(rows)
df.to_csv('centrality_top5_per_decade.csv', index=False, encoding='utf-8')
print(df)

    decade      measure                           artist_mbid  \
0    1960s       degree  e7495426-6e14-4429-b647-dbe700ad57d4   
1    1960s       degree  a85c70af-90e4-4a7c-83b1-e1bd567d7d2f   
2    1960s       degree  812e18ca-29c2-472f-a185-b85befd03221   
3    1960s       degree  bc4ca610-333b-424b-8c3c-c724e6327b62   
4    1960s       degree  b411483b-e9cc-4b4f-9661-0452333c615a   
..     ...          ...                                   ...   
123  2020s  eigenvector  272989c8-5535-492d-a25c-9f58803e027f   
124  2020s  eigenvector  6f1a58bf-9b1b-49cf-a44a-6cefad7ae04f   
125  2020s  eigenvector  5df62a88-cac9-490a-b62c-c7c88f4020f4   
126  2020s  eigenvector  b1e26560-60e5-4236-bbdb-9aa5a8d5ee19   
127  2020s  eigenvector  b7539c32-53e7-4908-bda3-81449c367da6   

                   artist_name  centrality  
0               Joanie Sommers    0.875000  
1              Bobby Goldsboro    0.750000  
2                   Jimmy Dean    0.375000  
3                   Peter Nero    0.375

In [9]:
import pandas as pd

# antag at df er den DataFrame du allerede har med kolonner:
# ['decade','measure','artist_name','centrality']

# 1) Lav to pivottabeller: én med navne, én med scores
names = (
    df
    .groupby(['decade','measure'])['artist_name']
    .apply(lambda x: ", ".join(x))
    .unstack()
)
scores = (
    df
    .groupby(['decade','measure'])['centrality']
    .apply(lambda x: ", ".join(f"{v:.3f}" for v in x))
    .unstack()
)

# 2) Sæt dem sammen i én DataFrame med multi‐level kolonner
out = pd.concat([names, scores], axis=1, keys=['artists','scores'])
out.columns = pd.MultiIndex.from_product(
    out.columns.levels,
    names=['type','measure']
)

# 3) (Valgfrit) Ryk type først, så mål
out = out.swaplevel(axis=1).sort_index(axis=1, level=0)

# 4) Gem eller vis
out.to_csv('centrality_top5_pivot.csv', encoding='utf-8')
out

measure,betweenness,betweenness,closeness,closeness,degree,degree,eigenvector,eigenvector
type,artists,scores,artists,scores,artists,scores,artists,scores
decade,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
1960s,"Bobby Goldsboro, Joanie Sommers, Peter Nero, A...","0.429, 0.411, 0.250, 0.250, 0.161","Joanie Sommers, Bobby Goldsboro, Jimmy Dean, P...","0.889, 0.800, 0.615, 0.615, 0.615","Joanie Sommers, Bobby Goldsboro, Jimmy Dean, P...","0.875, 0.750, 0.375, 0.375, 0.375","Joanie Sommers, Peter Nero, Morgana King, Jimm...","0.653, 0.528, 0.438, 0.223, 0.187"
1970s,"Shaun Cassidy, Jeannie C. Riley, The Partridge...","0.500, 0.333, 0.000, 0.000, 0.000","Lynn Anderson, Shaun Cassidy, Jeannie C. Riley...","1.000, 0.800, 0.800, 0.667, 0.667","Lynn Anderson, Shaun Cassidy, Jeannie C. Riley...","1.000, 0.750, 0.750, 0.500, 0.500","Lynn Anderson, Jeannie C. Riley, Loggins & Mes...","0.688, 0.609, 0.266, 0.244, 0.159"
1980s,"Sheena Easton, Amy Holland","0.000, 0.000","Sheena Easton, Amy Holland","1.000, 1.000","Sheena Easton, Amy Holland","1.000, 1.000","Sheena Easton, Amy Holland","0.707, 0.707"
1990s,"Bobby McFerrin, Janis Joplin, Neil Diamond, Bl...","0.063, 0.054, 0.042, 0.039, 0.038","Neil Diamond, Anne Murray, Kenny Rogers, Célin...","0.828, 0.819, 0.811, 0.802, 0.794","Neil Diamond, Anne Murray, Kenny Rogers, Célin...","0.792, 0.779, 0.766, 0.753, 0.740","Kenny Rogers, Anne Murray, Neil Diamond, Shirl...","0.351, 0.337, 0.327, 0.317, 0.220"
2000s,"Corinne Bailey Rae, Santana, Meat Loaf, Duffy,...","0.035, 0.028, 0.024, 0.023, 0.022","Diana Ross, Barry Manilow, Tom Jones, Eric Cla...","0.812, 0.802, 0.802, 0.785, 0.764","Diana Ross, Barry Manilow, Tom Jones, Eric Cla...","0.768, 0.758, 0.753, 0.727, 0.696","Frank Sinatra, Nat King Cole, Louis Armstrong,...","0.415, 0.352, 0.314, 0.291, 0.267"
2010s,"John Legend, Ed Sheeran, Bruce Springsteen, Ri...","0.032, 0.031, 0.030, 0.027, 0.026","Ed Sheeran, Bruce Springsteen, Johnny Mathis, ...","0.791, 0.773, 0.768, 0.764, 0.756","Ed Sheeran, Bruce Springsteen, Johnny Mathis, ...","0.735, 0.706, 0.699, 0.691, 0.676","Johnny Mathis, Tony Bennett, Dionne Warwick, H...","0.429, 0.389, 0.302, 0.219, 0.211"
2020s,"Taylor Swift, Lana Del Rey, Chicago, Luke Comb...","0.127, 0.108, 0.063, 0.059, 0.055","Taylor Swift, Lana Del Rey, Dua Lipa, SZA, Pos...","0.699, 0.692, 0.679, 0.661, 0.655","Lana Del Rey, Taylor Swift, Dua Lipa, SZA, Pos...","0.583, 0.583, 0.556, 0.528, 0.472","SZA, Dua Lipa, Doja Cat, Post Malone, Lana Del...","0.370, 0.339, 0.304, 0.273, 0.227"


In [26]:
import networkx as nx
import numpy as np
import pandas as pd
from collections import Counter

# ---------- helpers ---------------------------------------------------
def gini(array):
    """Gini coefficient for inequality of degree distribution."""
    if len(array) == 0:
        return np.nan
    array = np.sort(np.array(array, dtype=float))
    n = len(array)
    cum = np.cumsum(array)
    return (n + 1 - 2 * cum.sum() / cum[-1]) / n

def safe_metric(func, *args, **kwargs):
    """Return NaN instead of blowing up on disconnected graphs."""
    try:
        return func(*args, **kwargs)
    except (nx.NetworkXError, ZeroDivisionError):
        return np.nan

# ---------- what we want to measure ----------------------------------
def decade_metrics(G, top_n=5):
    """
    Compute a dictionary of graph statistics that are easy to interpret
    in a social-science write-up.
    """
    n = G.number_of_nodes()
    m = G.number_of_edges()
    deg   = dict(G.degree())
    deg_w = dict(G.degree(weight="weight"))
    degrees = np.array(list(deg.values()))

    # giant component for path-based metrics
    if n > 0:
        GCC = max(nx.connected_components(G), key=len)
        sub = G.subgraph(GCC)
    else:
        sub = G

    return {
        "nodes"             : n,
        "edges"             : m,
        "avg_degree"        : degrees.mean()          if n else np.nan,
        "median_degree"     : np.median(degrees)      if n else np.nan,
        "degree_gini"       : gini(degrees),          # inequality measure
        "density"           : nx.density(G),
        "avg_clustering"    : safe_metric(nx.average_clustering, G, weight=None),
        "assortativity_deg" : safe_metric(nx.degree_assortativity_coefficient, G),
        "avg_path_length"   : safe_metric(nx.average_shortest_path_length, sub),
        "diameter"          : safe_metric(nx.diameter, sub),
        # ---- community detection (greedy modularity) ----
        **{
            "n_communities": len(comms := list(nx.algorithms.community.greedy_modularity_communities(G))),
            "modularity"   : nx.algorithms.community.modularity(G, comms)
        },
        # top-N weighted-degree writers for slide copy
        "top_writers"       : ", ".join(
            f"{wid}:{int(deg_w[wid])}"
            for wid, _ in Counter(deg_w).most_common(top_n)
        )
    }

# ---------------------------------------------------------------------
# 1. RUN over all decades
# ---------------------------------------------------------------------
records = []
for decade, G in artist_graphs_peak.items():
    if G.number_of_nodes() == 0:
        print(f"Skipping null graph for {decade}")
        continue
    rec = decade_metrics(G)
    rec["decade"] = decade
    records.append(rec)

metrics_df = (pd.DataFrame(records)
                .sort_values("decade")
                .set_index("decade")
                .round(3))

display(metrics_df)          # Jupyter display; or print(metrics_df)

# ---------------------------------------------------------------------
# 2. OPTIONAL – export for website / slides
# ---------------------------------------------------------------------
metrics_df.to_csv("decade_network_metrics_flest_udgivelser.csv")

  return (xy * (M - ab)).sum() / np.sqrt(vara * varb)


Unnamed: 0_level_0,nodes,edges,avg_degree,median_degree,degree_gini,density,avg_clustering,assortativity_deg,avg_path_length,diameter,n_communities,modularity,top_writers
decade,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1960s,9,15,3.333,3.0,0.281,0.417,0.589,-0.558,1.639,3,2,0.222,"e7495426-6e14-4429-b647-dbe700ad57d4:27, bc4ca..."
1970s,5,7,2.8,3.0,0.143,0.7,0.767,-0.5,1.3,2,2,-0.219,"354812d4-2dfb-4611-9a8f-8f8e795e48bf:33, 79251..."
1980s,2,1,1.0,1.0,0.0,1.0,0.0,,1.0,1,1,0.0,"7b004920-b04e-4ff2-b2e5-55d8f1cc0522:4, 8439a3..."
1990s,78,1143,29.308,30.5,0.334,0.381,0.743,-0.2,1.646,3,3,0.086,"c3d14b41-a48d-488f-bfed-ce0597bb0b1f:437, 05ec..."
2000s,195,5959,61.118,58.0,0.372,0.315,0.718,-0.162,1.727,4,3,0.141,"197450cd-0124-4164-b723-3c22dd16494d:2260, fbe..."
2010s,137,2682,39.153,35.0,0.353,0.288,0.686,-0.142,1.761,3,3,0.282,"48896dee-a985-424d-9849-84802f7e79c9:900, 8be0..."
2020s,73,568,15.562,15.0,0.379,0.216,0.597,-0.11,1.974,4,3,0.214,"6f1a58bf-9b1b-49cf-a44a-6cefad7ae04f:139, 2729..."
