# Complexity72h - Busy Bees (Ecological Networks)

## Load data, build graphs, and compute metrics

In [None]:
%load_ext autoreload
%autoreload 2

import os
import pandas as pd
from functions import *

folder = "violetavivi/complexity72h_2025/data"
datasets = [f"{folder}/{d}" for d in sorted(os.listdir(folder)) if os.path.isdir(f"{folder}/{d}")]

for dataset in datasets:
    try:
        # Load species labels.
        labels = load_labels("matrix_A.csv", folder=dataset)
        # Matrices and vectors.
        A = load_matrix("matrix_A.csv", folder=dataset)
        A_sd = load_matrix("matrix_A_sd.csv", folder=dataset)
        B = load_matrix("matrix_B.csv", folder=dataset)
        B_sd = load_matrix("matrix_B_sd.csv", folder=dataset)
        env = load_vector("env.csv", folder=dataset)
        rainfall = load_vector(f"rainfall_anomaly.csv", folder=dataset)
        matrices = build_matrices(A, B, P=env)
        # Years the dataset comprises.
        years = load_vector(f"r_vector.csv", folder=dataset, quotechar='"', usecols=0, dtype=int)[:len(matrices)]
        # Build temporal graph.
        TG = build_temporal_graph(matrices)
        # Compute metrics to verify correlation.
        centralizations = compute_centralizations(TG)
        biedges = compute_biedges(TG, years=years)
        nmis = compute_nmis(TG)
        dcnmis = compute_nmis(TG, dc=True)
        # Plot temporal graph.
        names = [f"{year} ($R$={int(r)}, $H$={h:.2f})" if h else f"{year} ($R$={int(r)})"
                 for year, h, r in zip(years, [0]+dcnmis, env[:len(matrices)])]
        fig = draw_graph(TG, labels=labels, title=dataset.split("/")[-1], names=names)
    except:
        print(f"Error loading data for dataset: {dataset}")
        # continue
        raise
    else:
        print(f"Loaded dataset: {dataset}")
    break

# fig.tight_layout()
fig.savefig(f"graph.png", format="png", dpi=300)
fig.savefig(f"graph.svg", format="svg")
fig

## Verify correlation between different metrics

### Degree-corrected NMI and environmental data

In [None]:
df = pd.DataFrame(
    {"dcnmi": [0]+dcnmis, "rainfall": rainfall},
    index=range(2015, 2015+len(TG)),
)

# normalize
for col in df.columns:
    df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())

fig = df.plot(marker="o")
fig.grid(True, color="lightgray", linestyle="--", linewidth=0.5)
fig.set_yticks([0,.1,.2,.3,.4,.5,.6,.7,.8,.9,1])
fig.set_title("DCNMI and rainfall (Normalized)")

df.corr()

### Degree centralization and environmental data

> **NOTE:** degree centralization is pretty much the same as graph entropy!

In [None]:
df = pd.DataFrame(
    {"env": env, "centralization_in": centralizations["in_degree"]},
    index=range(2015, 2015+len(TG)),
)

# normalize
for col in df.columns:
    df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())

fig = df.plot(marker="o")
fig.grid(True, color="lightgray", linestyle="--", linewidth=0.5)
fig.set_yticks([0,.1,.2,.3,.4,.5,.6,.7,.8,.9,1])
fig.set_title("Graph in-degree centralization and rainfall (Normalized)")

df.corr()

### Bidirectional edges and environmental data

In [None]:
df = pd.DataFrame(
    {"env": env, "bidirectional": biedges},
    index=range(2015, 2015+len(TG)),
)

# normalize
for col in df.columns:
    df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())

fig = df.plot(marker="o")
fig.grid(True, color="lightgray", linestyle="--", linewidth=0.5)
fig.set_yticks([0,.1,.2,.3,.4,.5,.6,.7,.8,.9,1])
fig.set_title("Bidirectional edges and rainfall (Normalized)")

df.corr()

### Number of positive and negative edges

In [None]:
pos_edges, neg_edges = [], []

for G in TG:
    p = [e for e in G.edges if G.edges[e]["weight"] > 0]
    n = [e for e in G.edges if G.edges[e]["weight"] < 0]
    pos_edges.append(len(p)/(len(p)+len(n)))
    neg_edges.append(len(n)/(len(p)+len(n)))

df = pd.DataFrame(
    {"positive_edges": pos_edges, "negative_edges": neg_edges},
    index=range(2015, 2015+len(TG)),
)

fig = df.plot.bar(
    stacked=True,
    figsize=(12, 6),
    title="Positive and negative edges per year",
    ylabel="Number of edges",
    xlabel="Year",
    color=[tab10[0], tab10[3]],
)

fig.grid(True, color="lightgray", linestyle="--", linewidth=0.5)
fig.set_xticklabels(f"")

___

In [None]:
import pandas as pd

df = pd.DataFrame(
    {
        # "entropy": [graph_entropy(G) for G in TG],
        # "rainfall": rainfall,
        # "rainfall_anomaly": [rainfall[i] - rainfall[i-1] if i > 0 else 0 for i in range(len(rainfall))],
        # "env": env,
        # "centralization": centralizations["degree"],
        # "centralization_in": centralizations["in_degree"],
        # "centralization_out": centralizations["out_degree"],
        # "bidirectional": biedges,
        # "nmi": [0] + nmis,  # Start with 0 for the first year as there's no previous year to compare.
        # "dcnmi": [0] + dcnmis,  # Start with 0 for the first year as there's no previous year
    },
    index=range(2015, 2015+len(TG)),
)

for col in df.columns:
    # if col in ["rainfall", "rainfall_anomaly", "env"]:
        df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())

fig = df.plot(marker="o")
fig.grid(True, color="lightgray", linestyle="--", linewidth=0.5)
fig.set_yticks([0,.1,.2,.3,.4,.5,.6,.7,.8,.9,1])

df.corr()