In [2]:
# Download EU-LFS aggregates for telework (Eurostat) – dataset: lfsa_ehomp
# Output: data/external/eurostat_lfsa_ehomp_ES_EU27_2015_2024.csv  (tidy: one row per year)
#
# NOTE: This downloads PUBLIC AGGREGATES (not SUF microdata).
#       It fetches Spain (ES) and EU27_2020 for 2015–2024.

from pathlib import Path
import pandas as pd
import eurostat

# --- Settings ---
geo_list = ["ES", "EU27_2020"]
year_min, year_max = 2015, 2024

# --- Output folder ---
datadir = Path("../data/external")
datadir.mkdir(parents=True, exist_ok=True)
out_csv = datadir / "eurostat_lfsa_ehomp_ES_EU27_2015_2024.csv"

# --- Download ---
df = eurostat.get_data_df(
    "lfsa_ehomp",                 # Employed persons working from home (% of employment)
    filter_pars={"geo": geo_list},
    flags=False
).reset_index(drop=False)

# --- Normalize geo column name (eurostat lib may return 'geo\\TIME_PERIOD') ---
geo_col = next(c for c in df.columns if c.lower().startswith("geo"))
df = df.rename(columns={geo_col: "geo"})

# --- Wide years -> long tidy ---
year_cols = [c for c in df.columns if str(c).isdigit()]
id_vars = [c for c in df.columns if c not in year_cols]
tidy = df.melt(id_vars=id_vars, value_vars=year_cols, var_name="year", value_name="value")

# --- Types & year filter ---
tidy["year"] = pd.to_numeric(tidy["year"], errors="coerce")
tidy = tidy.dropna(subset=["year"])
tidy = tidy[(tidy["year"] >= year_min) & (tidy["year"] <= year_max)]

# --- Optional: keep common cuts if present (Total sex, 20–64, % of employment) ---
def _safe_filter(d, col, allowed):
    if col in d.columns:
        d = d[d[col].astype(str).str.upper().isin([a.upper() for a in allowed])]
    return d

# Common dimension names used by Eurostat in this table (may vary by vintage)
tidy = _safe_filter(tidy, "sex", ["T"])             # Total
tidy = _safe_filter(tidy, "age", ["Y20-64"])        # 20–64
tidy = _safe_filter(tidy, "unit", ["PC_EMP", "PC"]) # percent of employment (keep either label)
tidy = _safe_filter(tidy, "wstatus", ["EMP"])       # employed total (if present)

# --- Keep only needed columns + sort ---
keep_cols = ["geo", "year", "value"] + [c for c in ["sex", "age", "unit", "wstatus"] if c in tidy.columns]
tidy = tidy[keep_cols].sort_values(["geo", "year"]).reset_index(drop=True)

# --- Save ---
tidy.to_csv(out_csv, index=False)
print(f"Saved: {out_csv}  rows={len(tidy)}  cols={list(tidy.columns)}")


Saved: ..\data\external\eurostat_lfsa_ehomp_ES_EU27_2015_2024.csv  rows=60  cols=['geo', 'year', 'value', 'sex', 'age', 'unit', 'wstatus']
