In [1]:
# Import necessary libraries

import pandas as pd
import requests


In [2]:
def eurostat_series(dataset, params, freq, value_name="value"):
    params = dict(params)
    params["format"] = "JSON"
    EUROSTAT_BASE = "https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data"
    try:
        r = requests.get(f"{EUROSTAT_BASE}/{dataset}", params=params, timeout=30)
        if "sorry.ec.europa.eu" in r.url:
            raise ConnectionError(
                "Eurostat API unavailable (redirected to sorry.ec.europa.eu)."
            )
        r.raise_for_status()
        j = r.json()
    except (requests.RequestException, ValueError) as exc:
        raise ConnectionError(
            "Eurostat API unavailable or returned invalid JSON."
        ) from exc
    time_index = j["dimension"]["time"]["category"]["index"]
    values = j.get("value", {})
    df = (
        pd.DataFrame(
            [(t, values.get(str(i))) for t, i in time_index.items()],
            columns=["period", value_name],
        )
        .dropna()
        .sort_values("period")
    )
    periods = df.pop("period").astype(str)
    if freq == "M":
        periods = periods.str.replace(r"^(\d{4})M(\d{2})$", r"\1-\2", regex=True)
    df["time"] = (
        pd.PeriodIndex(periods, freq=freq).to_timestamp(how="end").normalize()
    )
    return df.reset_index(drop=True)


def eurostat_components(
    dataset,
    base_params,
    items,
    freq,
    dim="na_item",
    how="inner",
):
    if isinstance(items, (list, tuple)):
        items = {item: item for item in items}
    frames = []
    for item_code, col_name in items.items():
        params = dict(base_params, **{dim: item_code})
        frames.append(
            eurostat_series(dataset, params, freq, value_name=col_name)
        )
    if not frames:
        return pd.DataFrame(columns=["time"])
    df = frames[0]
    for frame in frames[1:]:
        df = df.merge(frame, on="time", how=how)
    return df.sort_values("time").reset_index(drop=True)


In [3]:
base_params = dict(geo="PT", unit="CP_MEUR")

# GDP and expenditure components (annual, current prices)
components = [
    "B1GQ",

    "P31_S14_S15",
    "P31_S13",
    "P32_S13",
    "P51G",
    "P52_P53",
    "P6",
    "P7",

    "D1",
    "B2A3G",
    "D2",
    "D3",

    "B1G",
    "D21",
    "D31",]

y = eurostat_components("nama_10_gdp", base_params, components, freq="Y")
y["gdp_eo_exp"] = y["B1GQ"] - (
    y["P31_S14_S15"]+
    y["P31_S13"]+
    y["P32_S13"]+
    y["P51G"]+y["P52_P53"]+
    y["P6"]-y["P7"])

y["gdp_eo_inc"] = y["B1GQ"] - (
    y["D1"]+
    y["B2A3G"]+
    y["D2"]-y["D3"])

y["gdp_eo_prod"] = y["B1GQ"] - (
    y["B1G"]+
    y["D21"]-y["D31"])

# B1G by industry (annual, current prices)
nace_components = [
    "TOTAL",
    "A",
    "B-E",
    "F",
    "G-I",
    "J",
    "K",
    "L",
    "M_N",
    "O-Q",
    "R-U",
]

b1g_by_nace = eurostat_components(
    "nama_10_a10",
    dict(base_params, na_item="B1G"),
    nace_components,
    freq="Y",
    dim="nace_r2",
    how="outer",
)


In [4]:
import plotly.graph_objects as go

df = y.dropna(subset=["B1GQ"]).sort_values("time").copy()
df["public_consumption"] = df["P31_S13"] + df["P32_S13"]
df["net_exports"] = df["P6"] - df["P7"]
df["time"] = df["time"] - pd.DateOffset(months=6)

components = [
    ("P31_S14_S15", "Consumo privado (P31_S14_S15)"),
    ("public_consumption", "Consumo público (P31_S13+P32_S13)"),
    ("P51G", "Investimento físico (P51G)"),
    ("P52_P53", "Variação de existências e valorações (P52_P53)"),
    ("net_exports", "Exportações líquidas (P6-P7)"),
]

colors = {
    "P31_S14_S15": "rgba(37, 99, 235, 0.6)",
    "public_consumption": "rgba(16, 185, 129, 0.6)",
    "P51G": "rgba(249, 115, 22, 0.6)",
    "P52_P53": "rgba(239, 68, 68, 0.6)",
    "net_exports": "rgba(107, 114, 128, 0.6)",
}

for col, _ in components:
    df[f"{col}_share"] = df[col] / df["B1GQ"] * 100

fig = go.Figure()
for col, label in components:
    fig.add_trace(
        go.Bar(
            x=df["time"],
            y=df[f"{col}_share"],
            name=label,
            marker_color=colors[col],
        )
    )

fig.update_layout(
    barmode="relative",
    template="plotly_white",
    title="Portugal: componentes do PIB pela despesa (% do PIB)",
    title_x=0.5,
    legend_title_text="",
    xaxis_title="",
    yaxis_title="",
    legend=dict(orientation="h", x=0.5, y=-0.2, xanchor="center", yanchor="top", bgcolor="rgba(0,0,0,0)", borderwidth=0),
)
fig.update_xaxes(dtick="M36", tickformat="%Y", hoverformat="%Y")
fig.update_yaxes(tickformat=".1f", ticksuffix="%")
fig.show()


In [5]:
import plotly.graph_objects as go

df = y.dropna(subset=["B1GQ"]).sort_values("time").copy()
df["D2_D3"] = df["D2"] - df["D3"]
df["time"] = df["time"] - pd.DateOffset(months=6)

components = [
    ("D1", "Remuneração dos empregados (D1)"),
    ("B2A3G", "Excedente bruto de exploração e rendimento misto (B2A3G)"),
    ("D2_D3", "Impostos líquidos sobre a produção e importações (D2-D3)"),
]

colors = {
    "D1": "rgba(37, 99, 235, 0.6)",
    "B2A3G": "rgba(16, 185, 129, 0.6)",
    "D2_D3": "rgba(249, 115, 22, 0.6)",
}

for col, _ in components:
    df[f"{col}_share"] = df[col] / df["B1GQ"] * 100

fig = go.Figure()
for col, label in components:
    fig.add_trace(
        go.Bar(
            x=df["time"],
            y=df[f"{col}_share"],
            name=label,
            marker_color=colors[col],
        )
    )

fig.update_layout(
    barmode="relative",
    template="plotly_white",
    title="Portugal: componentes do PIB pela ótica do rendimento (% do PIB)",
    title_x=0.5,
    legend_title_text="",
    xaxis_title="",
    yaxis_title="",
    legend=dict(orientation="h", x=0.5, y=-0.2, xanchor="center", yanchor="top", bgcolor="rgba(0,0,0,0)", borderwidth=0),
)
fig.update_xaxes(dtick="M36", tickformat="%Y", hoverformat="%Y")
fig.update_yaxes(tickformat=".1f", ticksuffix="%")
fig.show()


In [19]:
import plotly.graph_objects as go

df = (
    b1g_by_nace.merge(
        y[["time", "B1GQ", "D21", "D31"]],
        on="time",
        how="inner",
    )
    .sort_values("time")
    .copy()
)

df["net_taxes"] = df["D21"] - df["D31"]
df["time"] = df["time"] - pd.DateOffset(months=6)

sector_components = [
    c for c in nace_components if c != "TOTAL" and c in df.columns
]

sector_labels = {
    "A": "Agric., silvic. e pesca (A)",
    "B-E": "Indústria (B-E)",
    "F": "Construção (F)",
    "G-I": "Comércio, transp. e aloj. (G-I)",
    "J": "Informação e comunicação (J)",
    "K": "Atividades fin. e de seguros (K)",
    "L": "Atividades imobiliárias (L)",
    "M_N": "Atividades prof. e admin. (M_N)",
    "O-Q": "Admin. púb., educ. e saúde (O-Q)",
    "R-U": "Artes e outros serviços (R-U)",
}

components = [(c, sector_labels.get(c, c)) for c in sector_components]
components.append(("net_taxes", "Impostos líq. nos prod. (D21-D31)"))

palette = [
    "rgba(37, 99, 235, 0.6)",
    "rgba(16, 185, 129, 0.6)",
    "rgba(249, 115, 22, 0.6)",
    "rgba(239, 68, 68, 0.6)",
    "rgba(107, 114, 128, 0.6)",
    "rgba(14, 165, 233, 0.6)",
    "rgba(168, 85, 247, 0.6)",
    "rgba(234, 179, 8, 0.6)",
    "rgba(20, 184, 166, 0.6)",
    "rgba(236, 72, 153, 0.6)",
    "rgba(148, 163, 184, 0.6)",
]

palette = palette * ((len(components) // len(palette)) + 1)
colors = {c: palette[i] for i, (c, _) in enumerate(components)}

for col, _ in components:
    df[f"{col}_share"] = df[col] / df["B1GQ"] * 100

fig = go.Figure()
for col, label in components:
    fig.add_trace(
        go.Bar(
            x=df["time"],
            y=df[f"{col}_share"],
            name=label,
            marker_color=colors[col],
        )
    )

fig.update_layout(
    barmode="relative",
    template="plotly_white",
    title="Portugal: componentes do PIB pela ótica da produção (% do PIB)",
    title_x=0.5,
    legend_title_text="",
    xaxis_title="",
    yaxis_title="",
    legend=dict(x=1.02, y=0.5, xanchor="left", yanchor="middle", bgcolor="rgba(0,0,0,0)", borderwidth=0),
)
fig.update_xaxes(dtick="M36", tickformat="%Y", hoverformat="%Y")
fig.update_yaxes(tickformat=".1f", ticksuffix="%")
fig.show()


In [27]:
years = [2004, 2014, 2024]


def series_by_year(df, col, years):
    tmp = df.copy()
    tmp["year"] = tmp["time"].dt.year
    tmp = tmp[tmp["year"].isin(years)]
    return tmp.set_index("year")[col].reindex(years)


def add_row(rows, approach, name, code, values, years):
    row = {
        "Abordagem": approach,
        "Componente": name,
        "Código": code,
    }
    for yr in years:
        row[str(yr)] = values.loc[yr]
    rows.append(row)


gdp_series = series_by_year(y[["time", "B1GQ"]], "B1GQ", years)

rows = []
add_row(rows, "-", "PIB a preços correntes", "B1GQ", gdp_series, years)

exp_df = y[
    [
        "time",
        "B1GQ",
        "P31_S14_S15",
        "P31_S13",
        "P32_S13",
        "P51G",
        "P52_P53",
        "P6",
        "P7",
    ]
].copy()
exp_df["public_consumption"] = exp_df["P31_S13"] + exp_df["P32_S13"]
exp_df["net_exports"] = exp_df["P6"] - exp_df["P7"]

exp_components = [
    ("P31_S14_S15", "Consumo privado", "P31_S14_S15"),
    ("public_consumption", "Consumo público", "P31_S13+P32_S13"),
    ("P51G", "Investimento físico", "P51G"),
    ("P52_P53", "Variação de existências e valorações", "P52_P53"),
    ("net_exports", "Exportações líquidas", "P6-P7"),
]

for col, name, code in exp_components:
    values = series_by_year(exp_df, col, years) / gdp_series * 100
    add_row(rows, "Despesa", name, code, values, years)

inc_df = y[["time", "B1GQ", "D1", "B2A3G", "D2", "D3"]].copy()
inc_df["D2_D3"] = inc_df["D2"] - inc_df["D3"]

inc_components = [
    ("D1", "Remuneração dos empregados", "D1"),
    (
        "B2A3G",
        "Excedente bruto de exploração e rendimento misto",
        "B2A3G",
    ),
    (
        "D2_D3",
        "Impostos líquidos sobre a produção e importações",
        "D2-D3",
    ),
]

for col, name, code in inc_components:
    values = series_by_year(inc_df, col, years) / gdp_series * 100
    add_row(rows, "Rendimento", name, code, values, years)

prod_df = (
    b1g_by_nace.merge(
        y[["time", "B1GQ", "D21", "D31"]],
        on="time",
        how="inner",
    )
    .sort_values("time")
    .copy()
)
prod_df["net_taxes"] = prod_df["D21"] - prod_df["D31"]

sector_components = [
    c for c in nace_components if c != "TOTAL" and c in prod_df.columns
]

sector_labels = {
    "A": "Agricultura, silvicultura e pesca",
    "B-E": "Indústria",
    "F": "Construção",
    "G-I": "Comércio, transportes e alojamento",
    "J": "Informação e comunicação",
    "K": "Atividades financeiras e de seguros",
    "L": "Atividades imobiliárias",
    "M_N": "Atividades profissionais e administrativas",
    "O-Q": "Administração pública, educação e saúde",
    "R-U": "Artes e outros serviços",
}

for code in sector_components:
    name = sector_labels.get(code, code)
    values = series_by_year(prod_df, code, years) / gdp_series * 100
    add_row(rows, "Produção", name, code, values, years)

values = series_by_year(prod_df, "net_taxes", years) / gdp_series * 100
add_row(
    rows,
    "Produção",
    "Impostos líquidos sobre os produtos",
    "D21-D31",
    values,
    years,
)

table = pd.DataFrame(rows)
table = table.round(1)
table


Unnamed: 0,Abordagem,Componente,Código,2004,2014,2024
0,-,PIB a preços correntes,B1GQ,152248.4,173186.7,289428.0
1,Despesa,Consumo privado,P31_S14_S15,63.7,66.1,60.9
2,Despesa,Consumo público,P31_S13+P32_S13,20.4,18.4,16.9
3,Despesa,Investimento físico,P51G,23.4,15.1,20.4
4,Despesa,Variação de existências e valorações,P52_P53,0.4,0.2,-0.1
5,Despesa,Exportações líquidas,P6-P7,-7.9,0.1,1.8
6,Rendimento,Remuneração dos empregados,D1,47.5,44.3,47.7
7,Rendimento,Excedente bruto de exploração e rendimento misto,B2A3G,40.4,42.9,39.2
8,Rendimento,Impostos líquidos sobre a produção e importações,D2-D3,12.1,12.8,13.1
9,Produção,"Agricultura, silvicultura e pesca",A,2.6,2.0,2.0


In [28]:
# Labor income share (annual)
labor_params = dict(geo="PT", unit="THS_PER")

labor_inputs = eurostat_components(
    "nama_10_pe",
    labor_params,
    ["EMP_DC", "SAL_DC"],
    freq="Y",
)

labor_share = (
    y.merge(labor_inputs, on="time", how="inner")
    .sort_values("time")
    .copy()
)

labor_share["GVAfp"] = labor_share["B1GQ"] - (
    labor_share["D2"] - labor_share["D3"]
)
labor_share["LIS_na"] = labor_share["D1"] / labor_share["GVAfp"]
labor_share["LIS_a"] = labor_share["LIS_na"] * (
    labor_share["EMP_DC"] / labor_share["SAL_DC"]
)
labor_share["LIS_bad"] = labor_share["D1"] / labor_share["B1GQ"]

labor_share = labor_share[
    [
        "time",
        "B1GQ",
        "D1",
        "D2",
        "D3",
        "GVAfp",
        "EMP_DC",
        "SAL_DC",
        "LIS_na",
        "LIS_a",
        "LIS_bad",
    ]
]

labor_share


Unnamed: 0,time,B1GQ,D1,D2,D3,GVAfp,EMP_DC,SAL_DC,LIS_na,LIS_a,LIS_bad
0,1995-12-31,91015.8,42592.0,12188.8,1889.3,80716.3,4528.98,3592.31,0.527675,0.665263,0.467963
1,1996-12-31,96626.8,45839.5,13108.4,2245.3,85763.7,4604.87,3636.76,0.534486,0.676767,0.474397
2,1997-12-31,103306.6,49059.8,13723.8,2144.9,91727.7,4725.55,3735.36,0.534842,0.676621,0.474895
3,1998-12-31,110683.7,52787.6,15261.9,2409.2,97831.0,4858.13,3856.25,0.539579,0.679766,0.476923
4,1999-12-31,119603.3,57079.5,16858.8,2832.7,105577.2,4933.24,3941.86,0.540642,0.676614,0.47724
5,2000-12-31,128414.4,61820.4,17437.4,2392.1,113369.1,5041.86,4028.61,0.545302,0.682453,0.481413
6,2001-12-31,135775.0,65394.8,18383.8,2663.1,120054.3,5130.09,4090.76,0.54471,0.683103,0.481641
7,2002-12-31,142554.3,68422.3,19981.4,2818.6,125391.5,5149.93,4132.89,0.545669,0.67995,0.479974
8,2003-12-31,146067.9,69832.1,20416.1,3000.8,128652.6,5100.19,4088.21,0.542796,0.677158,0.47808
9,2004-12-31,152248.4,72361.7,21246.0,2820.2,133822.6,5064.18,4102.18,0.540729,0.667535,0.475287


In [59]:
import plotly.express as px

df = (
    labor_share[["time", "LIS_na", "LIS_a", "LIS_bad"]]
    .dropna()
    .sort_values("time")
    .copy()
)
df["time"] = df["time"] - pd.DateOffset(months=6)

df_long = (
    df.rename(
        columns={
            "LIS_na": "não ajustada: D1/GVAfp",
            "LIS_a": "ajustada: (D1/GVAfp)(EMP/SAL)",
            "LIS_bad": "ingénua/incorrecta: D1/PIB",
        }
    )
    .melt(
        id_vars="time",
        value_vars=[
            "ajustada: (D1/GVAfp)(EMP/SAL)",
            "não ajustada: D1/GVAfp",
            "ingénua/incorrecta: D1/PIB",
        ],
        var_name="series",
        value_name="lis",
    )
)

df_long["lis"] = df_long["lis"] * 100

fig = px.line(
    df_long,
    x="time",
    y="lis",
    color="series",
    title="Portugal: parte do rendimento do trabalho (labor share)",
    template="plotly_white",
    color_discrete_map={
        "não ajustada: D1/GVAfp": "red",
        "ajustada: (D1/GVAfp)(EMP/SAL)": "blue",
        "ingénua/incorrecta: D1/PIB": "orange",
    },
    markers=True,
    line_shape="linear",
)

fig.update_traces(
    line_width=2.5,
    marker_size=6,
    marker_color="white",
    marker_symbol="circle",
    marker_line_width=2.0,
)

fig.update_layout(
    title_x=0.5,
    legend_title_text="",
    xaxis_title="",
    yaxis_title="",
    legend=dict(
        x=0.98,
        y=0.02,
        xanchor="right",
        yanchor="bottom",
        bgcolor="rgba(0,0,0,0)",
        borderwidth=0,
    ),
)
fig.update_xaxes(dtick="M36", tickformat="%Y", hoverformat="%Y")
fig.update_yaxes(tickformat=".1f", ticksuffix="%", range=[25, 85])
fig.show()


In [61]:
# Symmetric input-output table (2017, coarse CPA level)
url = "https://ec.europa.eu/eurostat/api/dissemination/statistics/1.0/data/naio_10_cp1700"
params = {
    "geo": "PT",
    "unit": "MIO_EUR",
    "stk_flow": "TOTAL",
    "freq": "A",
    "time": "2017",
    "format": "JSON",
}

resp = requests.get(url, params=params, timeout=30)
resp.raise_for_status()
siot_json = resp.json()


def jsonstat_to_df(j):
    dims = j["id"]
    cats = [list(j["dimension"][d]["category"]["index"].keys()) for d in dims]
    index = pd.MultiIndex.from_product(cats, names=dims)
    values = pd.Series(index=index, dtype="float64")
    for k, v in j.get("value", {}).items():
        values.iloc[int(k)] = v
    return values.reset_index().rename(columns={0: "value"})


df = jsonstat_to_df(siot_json)

use_codes = [
    c
    for c in siot_json["dimension"]["prd_use"]["category"]["index"].keys()
    if c.startswith("CPA_")
]
ava_codes = [
    c
    for c in siot_json["dimension"]["prd_ava"]["category"]["index"].keys()
    if c.startswith("CPA_")
]
common_codes = [c for c in use_codes if c in ava_codes]

siot = (
    df[df["prd_use"].isin(common_codes) & df["prd_ava"].isin(common_codes)]
    .pivot(index="prd_ava", columns="prd_use", values="value")
    .loc[common_codes, common_codes]
)

siot


prd_use,CPA_A01,CPA_A02,CPA_A03,CPA_B,CPA_B05,CPA_B06,CPA_B07,CPA_B08,CPA_B09,CPA_C10-12,...,CPA_R91,CPA_R92,CPA_R93,CPA_S94,CPA_S95,CPA_S96,CPA_T,CPA_T97,CPA_T98,CPA_U
prd_ava,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CPA_A01,813.22,13.19,0.00,0.0,,,,,,4271.37,...,,,0.59,0.00,0.00,25.89,0.0,,,0.0
CPA_A02,0.37,133.00,0.00,0.0,,,,,,0.00,...,,,0.00,0.00,0.00,0.00,0.0,,,0.0
CPA_A03,0.00,0.00,59.37,0.0,,,,,,75.38,...,,,0.00,0.00,0.00,0.00,0.0,,,0.0
CPA_B,1.15,0.03,0.00,209.8,,,,,,15.03,...,,,0.16,0.00,0.02,0.03,0.0,,,0.0
CPA_B05,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CPA_S96,0.00,0.00,0.00,0.0,,,,,,0.00,...,,,37.76,9.93,0.00,17.53,0.0,,,0.0
CPA_T,0.00,0.00,0.00,0.0,,,,,,0.00,...,,,0.00,0.00,0.00,0.00,0.0,,,0.0
CPA_T97,,,,,,,,,,,...,,,,,,,,,,
CPA_T98,,,,,,,,,,,...,,,,,,,,,,
