## 03e - Output: Household size

- **Project:** _Families, households, networks: Rethinking the relational structure of families through large-scale network data_ <br>
- **Authors:** Nicol√°s Soler (ORCID 0009-0001-4239-9396), Tom Emery, Agnieszka Kanas <br>
- **Last updated:** January 2026 <br>
- **Full research article published in journal:** _Demography_ (2026)

In [None]:
import yaml
import polars as pl
import polars.selectors as cs
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.patches as patches

In [None]:
# Load YAML configuration
path_config = 'config.yml'
with open(path_config, 'r') as f:
    config = yaml.safe_load(f)

In [None]:
# Read hhd size
dtypes_size = {
    "id_hhd":pl.Int64,
    "is_ego_child":pl.Int64,
    "hhd_size":pl.Int64
}

hhdsize = (
    pl
    .scan_csv(config["data"]["sample"], separator=",", encoding="utf8", schema_overrides=dtypes_size)
    .filter(pl.col("is_ego_child")==1)
    .select(dtypes_size.keys())
    # Keep only one row per ego-household with child 0-4
    .unique(subset=["id_hhd"], keep="any")
    .drop("is_ego_child")
    .collect()
)

In [None]:
# Keep only household size values with at least 10 observations per size
filter_10obs = hhdsize.group_by("hhd_size").len()
hhdsize_plot = hhdsize.join(filter_10obs, how="left", on="hhd_size")
hhdsize_plot = hhdsize_plot.filter(pl.col("len")>=10).drop("len")

In [None]:
# Subset only ego-households with size <15
hhdsize_plot = hhdsize_plot.filter(pl.col("hhd_size")<15)

In [None]:
# Number of households with size <15
len(hhdsize_plot)

# 1 - Plot figure

In [None]:
# Plot histogram
fig_7,ax = plt.subplots(1,1, figsize=(6,4))

#ax.grid(True, axis="y", color = "#DED3D1", linestyle = ":")
sns.histplot(data=hhdsize_plot, x="hhd_size", binwidth=1, ax=ax, color="#785EF0")
ax.set_xlim(0,15)
ax.set_xticks(ticks=list(range(0,16)), labels=list(range(0,16)))
ax.set_ylim(10,225000)
ax.set_yticks(ticks=[10,25000,25000*2,25000*3,25000*4,25000*5,25000*6,25000*7,25000*8], 
              labels=[10,25000,25000*2,25000*3,25000*4,25000*5,25000*6,25000*7,25000*8])
ax.set_xlabel("Household size")
ax.annotate("Maximum:", xy = (10.85, 8000), fontsize = 11, color = "black")
ax.annotate(hhdsize["hhd_size"].max(), xy = (13.5, 7700), fontsize = 11, weight = "bold", color = "#785EF0")


In [None]:
# Store 
fig_7 = fig_7.get_figure()
fig_7.savefig(config["output"]["fig_7_hhdsize"], bbox_inches = "tight", dpi = 400)