# Supply Chain Graph Demo

A quick, self contained walkthrough that loads the supply chain CSV, projects a product co-purchase graph, and visualizes the result. Run the optional install cell below if you do not already have graph/plotting libraries in your environment.

In [None]:
# Optional: install plotting deps if they are missing in your environment
%pip -q install networkx matplotlib seaborn


In [None]:
from pathlib import Path
import itertools

import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use("seaborn-v0_8")
%matplotlib inline

DATA_PATH = Path("data/DataCoSupplyChainDataset.csv")
assert DATA_PATH.exists(), "Expected data/DataCoSupplyChainDataset.csv to be present"

# The CSV uses latin-1 encoding
df = pd.read_csv(DATA_PATH, encoding="latin-1")
df.shape

## Quick look at the catalog

In [None]:
product_counts = df["Product Name"].value_counts().head(10)
display(product_counts.to_frame(name="orders_with_product"))

orders_per_customer = df.groupby("Order Customer Id")["Order Id"].nunique()
orders_per_customer.describe()[["min", "mean", "50%", "max"]]

## Build a co-purchase edge list

We treat every order as a basket of products and connect products that co-occur in the same order. To keep things light, we limit to orders with a modest number of unique products and cap the number of baskets we process.

In [None]:
# Keep only the columns we need and drop empty product names
orders = df[["Order Id", "Product Name"]].dropna()

# Keep baskets with 2-10 unique products and cap to first 4k baskets for speed
basket_series = (
    orders.groupby("Order Id")["Product Name"]
    .apply(lambda s: list(dict.fromkeys(s)))
    .loc[lambda s: (s.str.len() >= 2) & (s.str.len() <= 10)]
    .head(4000)
)
len(basket_series)

In [None]:
# Build weighted edges: weight = number of orders where the pair co-occurs
edge_weights = {}
for products in basket_series:
    for a, b in itertools.combinations(sorted(products), 2):
        edge_weights[(a, b)] = edge_weights.get((a, b), 0) + 1

edges_df = pd.DataFrame(
    [
        {"source": a, "target": b, "weight": w}
        for (a, b), w in edge_weights.items()
    ]
).sort_values("weight", ascending=False)
edges_df.head()

## Build the graph and compute simple rankings

In [None]:
# Focus on the stronger co-purchases to keep the visualization readable
top_edges = edges_df.query("weight >= 2").head(120)

G = nx.Graph()
for _, row in top_edges.iterrows():
    G.add_edge(row["source"], row["target"], weight=row["weight"])

degree_centrality = nx.degree_centrality(G)
ranked = (
    pd.Series(degree_centrality, name="degree_centrality")
    .sort_values(ascending=False)
    .head(10)
)
display(ranked.to_frame())
len(G), G.number_of_edges()

## Visualize the co-purchase network

In [None]:
plt.figure(figsize=(12, 10))
pos = nx.spring_layout(G, k=0.35, seed=42)

weights = [G[u][v]["weight"] for u, v in G.edges()]
nodes = list(G.nodes())
node_sizes = [80 + 400 * degree_centrality[n] for n in nodes]

nx.draw_networkx_edges(G, pos, alpha=0.3, width=[w * 0.5 for w in weights], edge_color="#7aa6c7")
nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color="#ef8a62", alpha=0.8, linewidths=0.5, edgecolors="#333")
nx.draw_networkx_labels(G, pos, font_size=8)

plt.title("Product co-purchase network (top edges)")
plt.axis("off")
plt.show()

## Communities (greedy modularity)

NetworkX includes a fast, approximate community detector. We color nodes by detected community to reveal product clusters that frequently co-occur.

In [None]:
communities = list(nx.algorithms.community.greedy_modularity_communities(G))
community_map = {}
for cid, members in enumerate(communities):
        for m in members:
            community_map[m] = cid

palette = sns.color_palette("tab10", n_colors=max(1, len(communities)))
node_colors = [palette[community_map[n] % len(palette)] for n in G.nodes()]

plt.figure(figsize=(12, 10))
nx.draw_networkx_edges(G, pos, alpha=0.25, width=[w * 0.4 for w in weights], edge_color="#a6bddb")
nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color=node_colors, alpha=0.85, linewidths=0.5, edgecolors="#333")
nx.draw_networkx_labels(G, pos, font_size=8)

plt.title("Communities in the co-purchase graph (greedy modularity)")
plt.axis("off")
plt.show()

pd.DataFrame(
    {
        "community": list(range(len(communities))),
        "size": [len(c) for c in communities],
    }
)

## Next steps

- Swap in a different projection (e.g., higher weight threshold or more baskets) to see the structure change.
- Export `edges_df` to Neo4j and compare NetworkX metrics vs. GDS (PageRank, Louvain).
- Use the top central products as candidates for cross-sell bundles.