In [1]:
import pandas as pd
import networkx as nx
import numpy as np

FILE_2012 = "resourcetradeearth-all-all-243-2012.xlsx"
FILE_2022 = "resourcetradeearth-all-all-243-2022.xlsx"

WEIGHT_COL = "Weight (1000kg)"  # tonnes

def build_trade_graph(excel_path: str, year: int) -> tuple[nx.DiGraph, pd.DataFrame]:
    """
    Build a directed weighted graph for a single year.
    Returns (G, edges_df_agg).
    """
    df = pd.read_excel(excel_path, sheet_name="Trades")

    # Filter to the one year you care about
    df = df[df["Year"] == year].copy()

    # Keep only what we need
    df = df[[
        "Exporter ISO3", "Exporter",
        "Importer ISO3", "Importer",
        WEIGHT_COL
    ]].copy()

    df[WEIGHT_COL] = pd.to_numeric(df[WEIGHT_COL], errors="coerce").fillna(0.0)

    # Aggregate to one edge per exporter-importer pair
    edges = (
        df.groupby(["Exporter ISO3", "Exporter", "Importer ISO3", "Importer"], as_index=False)[WEIGHT_COL]
          .sum()
    )

    # Build directed graph
    G = nx.DiGraph(year=year)

    # Add nodes with name attributes
    exporters = edges[["Exporter ISO3", "Exporter"]].rename(columns={"Exporter ISO3":"ISO3", "Exporter":"Name"})
    importers = edges[["Importer ISO3", "Importer"]].rename(columns={"Importer ISO3":"ISO3", "Importer":"Name"})
    nodes = pd.concat([exporters, importers], ignore_index=True).drop_duplicates()

    for _, r in nodes.iterrows():
        G.add_node(r["ISO3"], name=r["Name"])

    # Add edges with weight and distance
    for _, r in edges.iterrows():
        w = float(r[WEIGHT_COL])
        if w <= 0:
            continue
        G.add_edge(
            r["Exporter ISO3"],
            r["Importer ISO3"],
            weight=w,                 # trade volume (tonnes)
            distance=1.0 / w          # for weighted shortest paths / betweenness
        )

    return G, edges


def top_exporters_by_weight(G: nx.DiGraph, topn: int = 10) -> pd.DataFrame:
    out_w = {}
    for u, v, data in G.edges(data=True):
        out_w[u] = out_w.get(u, 0.0) + data["weight"]

    rows = [(iso, G.nodes[iso].get("name"), w) for iso, w in out_w.items()]
    out = pd.DataFrame(rows, columns=["ISO3", "Exporter", "Export weight (tonnes)"])
    return out.sort_values("Export weight (tonnes)", ascending=False).head(topn)


def centralities(G: nx.DiGraph, topn: int = 10) -> tuple[pd.DataFrame, pd.DataFrame]:
    deg_c = nx.degree_centrality(G)  # uses total degree (in+out), normalized by (n-1)
    bet_w = nx.betweenness_centrality(G, weight="distance", normalized=True)

    base = pd.DataFrame({
        "ISO3": list(G.nodes()),
        "Country": [G.nodes[n].get("name") for n in G.nodes()],
        "Degree centrality": [deg_c[n] for n in G.nodes()],
        "Weighted betweenness": [bet_w[n] for n in G.nodes()],
        "In-degree": [G.in_degree(n) for n in G.nodes()],
        "Out-degree": [G.out_degree(n) for n in G.nodes()],
    })

    top_deg = base.sort_values(["Degree centrality", "In-degree", "Out-degree"], ascending=False).head(topn)
    top_bet = base.sort_values(["Weighted betweenness", "Degree centrality"], ascending=False).head(topn)
    return top_deg, top_bet


# ---- Run for the two years only ----
G2012, E2012 = build_trade_graph(FILE_2012, 2012)
G2022, E2022 = build_trade_graph(FILE_2022, 2022)

top_exp_2012 = top_exporters_by_weight(G2012, 10)
top_exp_2022 = top_exporters_by_weight(G2022, 10)

top_deg_2012, top_bet_2012 = centralities(G2012, 10)
top_deg_2022, top_bet_2022 = centralities(G2022, 10)

print("Top exporters 2012:\n", top_exp_2012)
print("\nTop exporters 2022:\n", top_exp_2022)

print("\nTop degree centrality 2012:\n", top_deg_2012[["ISO3","Country","Degree centrality","In-degree","Out-degree"]])
print("\nTop weighted betweenness 2012:\n", top_bet_2012[["ISO3","Country","Weighted betweenness","Degree centrality"]])

print("\nTop degree centrality 2022:\n", top_deg_2022[["ISO3","Country","Degree centrality","In-degree","Out-degree"]])
print("\nTop weighted betweenness 2022:\n", top_bet_2022[["ISO3","Country","Weighted betweenness","Degree centrality"]])

Top exporters 2012:
     ISO3       Exporter  Export weight (tonnes)
62   IND          India            8.116887e+06
152  VNM        Vietnam            6.202719e+06
137  THA       Thailand            5.513687e+06
111  PAK       Pakistan            2.725304e+06
148  USA  United States            1.752402e+06
147  URY        Uruguay            7.133573e+05
20   BRA         Brazil            6.686038e+05
28   CHN          China            6.152690e+05
67   ITA          Italy            5.515180e+05
94   MMR        Myanmar            4.113910e+05

Top exporters 2022:
     ISO3       Exporter  Export weight (tonnes)
65   IND          India            1.652398e+07
140  THA       Thailand            6.745331e+06
155  VNM        Vietnam            3.648794e+06
114  PAK       Pakistan            3.043730e+06
25   CHN          China            1.512240e+06
150  USA  United States            1.309622e+06
99   MMR        Myanmar            1.056571e+06
78   KHM       Cambodia            6.234023e+

In [7]:
import numpy as np
import pandas as pd
import networkx as nx

def global_network_metrics(G: nx.DiGraph) -> dict:
    """
    Compute a small set of global metrics for a directed trade network:
    - nodes, edges
    - density (directed)
    - reciprocity (directed)
    - clustering / transitivity on an undirected projection (common convention)
    """
    n = G.number_of_nodes()
    m = G.number_of_edges()

    metrics = {
        "nodes": n,
        "edges": m,
        "density": nx.density(G),
    }

    r = nx.reciprocity(G)  # may be None if no edges
    metrics["reciprocity"] = float(r) if r is not None else np.nan

    # Clustering / transitivity computed on undirected projection
    GU = G.to_undirected()
    metrics["transitivity_undirected"] = nx.transitivity(GU) if n >= 3 else np.nan
    metrics["avg_clustering_undirected"] = nx.average_clustering(GU) if n >= 3 else np.nan

    return metrics


def compare_metrics(G2012: nx.DiGraph, G2022: nx.DiGraph) -> pd.DataFrame:
    m12 = global_network_metrics(G2012)
    m22 = global_network_metrics(G2022)

    return (
        pd.DataFrame({"2012": m12, "2022": m22})
          .assign(change=lambda x: x["2022"] - x["2012"])
    )


# Usage:
metrics_comparison = compare_metrics(G2012, G2022)
print(metrics_comparison)


                                  2012         2022      change
nodes                       221.000000   223.000000    2.000000
edges                      3535.000000  3986.000000  451.000000
density                       0.072707     0.080515    0.007809
reciprocity                   0.290240     0.334170    0.043929
transitivity_undirected       0.354080     0.401275    0.047196
avg_clustering_undirected     0.672891     0.688680    0.015789
