In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly

In [2]:
# Price index weights on different goods
# https://ec.europa.eu/eurostat/databrowser/view/PRC_HICP_INW__custom_2846029/default/table?lang=en
weights = pd.read_csv('data/prc_hicp_inw__custom_2846029_page_tabular.tsv', sep="\t")


# Clean weights
def extract_coicop(col, position):
    return col.str.split(",", expand=True)[position]

weights["coicop"] = extract_coicop(weights["freq,coicop,geo\TIME_PERIOD"], 1)
weights.columns = [c.strip() for c in weights.columns]
weights = weights.drop("freq,coicop,geo\TIME_PERIOD", axis=1)
weights.head()

Unnamed: 0,2019,2020,2021,2022,coicop
0,90.2,82.93,94.82,121.47,AP
1,3.85,3.63,4.21,4.56,APF
2,86.36,79.3,90.6,116.92,APM
3,30.45,26.48,30.47,31.62,AP_NNRG
4,59.76,56.45,64.35,89.85,AP_NRG


In [3]:
# Monthly inflation data
# https://ec.europa.eu/eurostat/databrowser/view/PRC_HICP_MMOR__custom_2846191/default/table?lang=en
monthly_inflation = pd.read_csv('data/prc_hicp_mmor__custom_2846191_page_tabular.tsv', sep="\t")

# Clean monthly inflation data
monthly_inflation["coicop"] = extract_coicop(monthly_inflation["freq,unit,coicop,geo\TIME_PERIOD"], 2)
monthly_inflation.columns = [c.strip() for c in monthly_inflation.columns]
monthly_inflation = monthly_inflation.drop("freq,unit,coicop,geo\TIME_PERIOD", axis=1)
monthly_inflation.head()

Unnamed: 0,2021-08,2021-09,2021-10,2021-11,2021-12,2022-01,2022-02,2022-03,2022-04,2022-05,coicop
0,0.5,10.7,-7.2,7.8,29.2,-10.7,-5.0,3.0,24.3,:,AP
1,0.0,0.0,0.0,0.0,0.0,1.3,0.0,0.0,0.0,:,APF
2,0.5,11.2,-7.5,8.2,30.3,-11.1,-5.2,3.2,25.4,:,APM
3,0.0,0.0,0.0,0.0,0.1,0.4,0.3,2.1,0.7,:,AP_NNRG
4,0.7,15.0,-9.7,10.9,39.3,-14.6,-7.2,3.4,34.8,:,AP_NRG


In [4]:
# COICOP classifier
# https://ec.europa.eu/eurostat/ramon/nomenclatures/index.cfm?TargetUrl=LST_CLS_DLD&StrLanguageCode=EN&StrNom=CL_COICOP&StrLayoutCode=LINEAR
coicop = pd.read_excel('data/COICOP.xls', skiprows=2)

# Clean COICOP
coicop.columns = [c.strip() for c in coicop.columns]
coicop = coicop.drop(labels=["DE_DESC", "FR_DESC", "DESC_AGGR", "INT_CODE"], axis=1)
coicop.head()

Unnamed: 0,CODE_DIFF,LEVEL,EN_DESC
0,TOTAL,1,Total
1,TOT_X_CP041_042,1,Total except actual rents
2,CP00,1,All-items HICP
3,CP01,1,Food and non-alcoholic beverages
4,CP011,2,Food


# Treemap

In [22]:
MAX_LEVEL = 4 # How deep detail do we want to show?

merged = monthly_inflation.merge(coicop, left_on="coicop", right_on="CODE_DIFF")
merged = merged[merged["coicop"].str.slice(0, 2) == "CP"]
merged = merged[merged["LEVEL"] == MAX_LEVEL]
merged = merged.reset_index()

# Add hierarchical description
def get_level_description(codes, level=1):
    """Get the COICOP hierarchical description corresponding to `level` in `code`."""
    level_code = codes.str.slice(0, 3+level)
    df = pd.DataFrame({"level_code": level_code})
    df = df.merge(coicop, left_on="level_code", right_on="CODE_DIFF")
    level_coicop_code = df["CODE_DIFF"]
    level_coicop_desc = df["EN_DESC"]
    return level_coicop_desc

for level in range(1, MAX_LEVEL+1):
    merged[f"L{level}_DESC"] = get_level_description(merged["coicop"], level=level)
    
# Multiply percentage growths by index weight
def get_coicop_weight(codes, month: str):
    """Get the weights in price index, given the COICOP code."""
    year = month[0:4]
    df = pd.DataFrame({"coicop": codes})
    df = df.merge(weights, left_on="coicop", right_on="coicop")
#    df = df.rename({year: "weight"})
#    df = df["coicop", "weight"]
    
    return df[year]

In [64]:
def get_adjacent_month(month, delta):
    months = merged.columns[1:10].tolist()
    current_idx = months.index(month)
    if current_idx < 0 or current_idx + delta >= len(months):
        return None
    return months[current_idx + delta]

def plot_month(month, show=True):
    percs = merged[month]
    vals = merged[month]
    vals = np.maximum(np.zeros(vals.shape), vals.to_numpy())
    vals = vals * get_coicop_weight(merged["coicop"], month=month)
    columns = {
        "val": vals,
        "perc": percs
    }
    
    for level in range(1, MAX_LEVEL+1):
        columns[f"L{level}_DESC"] = merged[f"L{level}_DESC"]
    df = pd.DataFrame(columns)
    
    TREEMAP_LEVELS = [f"L{level}_DESC" for level in range(1, MAX_LEVEL+1)]
    
    # Title and links
    perc_growth = monthly_inflation[monthly_inflation["coicop"] == "CP00"][month].iloc[0]
    prev_month = get_adjacent_month(month, -1)
    prev_month_link = f'<a href="https://taivoai-public.s3.eu-central-1.amazonaws.com/inflation_charts/{prev_month}.html">{prev_month}</a>'
    next_month = get_adjacent_month(month, 1)
    next_month_link = f'<a href="https://taivoai-public.s3.eu-central-1.amazonaws.com/inflation_charts/{next_month}.html">{next_month}</a>'
    
    fig_title = f"""
<b>Consumer price growth by spending category, {month} ({perc_growth}% month total) </b><br>
{"Prev: " + prev_month_link if prev_month is not None else ""}
{"Next: " + next_month_link if next_month is not None else ""}
| Data: <a href="https://ec.europa.eu/eurostat/databrowser/view/PRC_HICP_MMOR__custom_2846191/default/table?lang=en">Eurostat</a>
| Author: <a href="https://taivo.ai/estonia-inflation-2022">Taivo Pungas</a>
    """
    
    
    fig = px.treemap(df, path=[px.Constant("all"), *TREEMAP_LEVELS], values="val",
                    title=fig_title)
    fig.update_traces(root_color="lightgrey")
    fig.update_layout(margin = dict(t=100, l=25, r=25, b=25), font=dict(size=13))
    
    plotly.io.write_html(fig, f"figures/{month}.html")
    
    if show:
        fig.show()

    
plot_month("2022-03")

In [65]:
for month in merged.columns[1:10]:
    print(month)
    plot_month(month, show=False)

2021-08
2021-09
2021-10
2021-11
2021-12
2022-01
2022-02
2022-03
2022-04
