## 03c - Output: Network density

- **Project:** _Families, households, networks: Rethinking the relational structure of families through large-scale network data_ <br>
- **Authors:** Nicol√°s Soler (ORCID 0009-0001-4239-9396), Tom Emery, Agnieszka Kanas <br>
- **Last updated:** January 2026 <br>
- **Full research article published in journal:** _Demography_ (2026)

In [None]:
import yaml
import polars as pl
import pandas as pd
import seaborn as sns
import seaborn.objects as so
from seaborn import axes_style
import matplotlib as mpl
import matplotlib.pyplot as plt

In [None]:
# Load YAML configuration
path_config = 'config.yml'
with open(path_config, 'r') as f:
    config = yaml.safe_load(f)

In [None]:
# Read densities for each ego household with a child aged 0-4
dtypes_density = {
    "id_hhd":pl.Int64,
    "density_2":pl.Float64,
    "density_3":pl.Float64,
    "density_4":pl.Float64,
}

density = (
    pl
    .scan_csv(config["data"]["sample"], separator=",", encoding="utf8", schema_overrides=dtypes_density)
    .filter(pl.col("is_ego_child")==1)
    .select(dtypes_density.keys())
    # Keep only one row per ego-household with child 0-4
    .unique(subset=["id_hhd"], keep="any")
    .collect()
)

# 1 - Prepare dataframe

In [None]:
# Dataframe for density plot

# Prepare to transform from wide to long
list_cols = ["density_2","density_3","density_4"]
stub = "density"

# Select columns and transform to pandas
density_plot = (
    density
    .select(list_cols + ["id_hhd"])
    .to_pandas()
)

# From wide to long and back to Polars
density_plot = pd.wide_to_long(density_plot, i = "id_hhd", j = "dist", stubnames = stub, sep = "_")
density_plot = pl.from_pandas(density_plot, include_index = True)

# Keep only density values with at least 10 observations per dist
density_plot = (
    density_plot
    .sort(["id_hhd","dist"])
    .with_columns(len = pl.col("dist").len().over(["dist","density"]))
    .filter(pl.col("len") >= 10)
    .sort("id_hhd")
)

# 2 - Plot figure

In [None]:
# Initialise figure with matplotlib
fig_4 = mpl.figure.Figure(figsize = (8,3), layout = "tight", dpi = 400)

# Plot main content with seaborn objects (so)
p = (
    # Clustering by distance
    so.Plot(data = density_plot, x = "density", color = "dist")
    # 4 subplots
    .facet("dist")
    # Add histogram
    .add(
        so.Bars(alpha = 1),
        so.Hist(
            bins = 50,
            stat = "count"
        )
    )
    # Add KDE
    .add(
        so.Line(),
        so.KDE(bw_adjust = 0.5)
    )
    # Adjust axes
    .scale(
        # Ticks
        x = so.Continuous().tick(at = [0, 0.2, 0.4, 0.6, 0.8, 1]),
        y = so.Continuous().tick(at = [10, 20000, 40000, 60000, 80000, 100000]),
        # Colorblind-friendly palette
        color = ["#DC267F", "#FE6100", "#FFB000"]
    )
    # Axes limits
    .limit(x = (0,1), y = (10, 100000))
    # Label axes
    .label(
        x = "Density", 
        y = "Count",
        title = "By distance {}".format
    )
    # Configure theme
    .theme({**axes_style("whitegrid"), "axes.edgecolor": "black", "grid.linestyle":":", "legend.frameon":False, "legend.fontsize":20})
    # Plot on figure
    .on(fig_4)
).plot()

# Tick labels
p._figure.axes[0].xaxis.set_ticklabels(["0", "0.2", "0.4", "0.6", "0.8", "1"])
p._figure.axes[0].yaxis.set_ticklabels(["10", "20k", "40k", "60k", "80k", "100k"])

# No legend
p._figure.legends.clear()

# Supertitle
# p._figure.suptitle("Distribution of network density at cumulative network distance", size = 12, y = 0.95)
plt.show()

In [None]:
# Store
p.save(config["output"]["fig_4_density"], dpi = 400)