<a href="https://colab.research.google.com/github/noproblama25/inflation/blob/main/Inflation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Investigating inflation trends in the Netherlands

# 1. Inflation (target)


### 1.1 Datasets

Source (ECB): https://data.ecb.europa.eu/data/datasets/ICP/ICP.M.BE.N.000000.4.ANR

In [None]:
pip install eurostat

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import matplotlib.dates as mdates
import eurostat
from datetime import datetime
import requests

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import seaborn as sns

import json
from pandas import json_normalize
from io import StringIO

In [None]:
df_de = pd.read_csv("/content/inflatie/ECB Data Portal_20251029152601_DE.csv")
df_nl = pd.read_csv("/content/inflatie/ECB Data Portal_20251029152915_NL.csv")
df_eu = pd.read_csv("/content/inflatie/ECB Data Portal_20251029153138_EU.csv")
df_be = pd.read_csv("/content/inflatie/ECB Data Portal_20251029153237_BE.csv")

In [None]:
df_de.columns

In [None]:
# Determine the name of the third column (index 2 since indexing starts at 0)
current_columns_de = df_de.columns
current_columns_nl = df_nl.columns
current_columns_eu = df_eu.columns
current_columns_be = df_be.columns

# Rename the column DE

column_to_rename_de = current_columns_de[2]
new_column_name_de = "Inflation_DE"  # Replace with your desired new name
df_de = df_de.rename(columns={column_to_rename_de: new_column_name_de})

# Rename the column NL

column_to_rename_nl = current_columns_nl[2]
new_column_name_nl = "Inflation_NL"  # Replace with your desired new name
df_nl = df_nl.rename(columns={column_to_rename_nl: new_column_name_nl})

# Rename the column BE
column_to_rename_be = current_columns_be[2]
new_column_name_be = "Inflation_BE"  # Replace with your desired new name
df_be = df_be.rename(columns={column_to_rename_be: new_column_name_be})

# Rename the column EU
column_to_rename_eu = current_columns_eu[2]
new_column_name_eu = "Inflation_EU"  # Replace with your desired new name
df_eu = df_eu.rename(columns={column_to_rename_eu: new_column_name_eu})


In [None]:
# adding country code label

df_de['Country_Code'] = 'DE'
df_nl['Country_Code'] = 'NL'
df_be['Country_Code'] = 'BE'
df_eu['Country_Code'] = 'EU'

In [None]:
# combine all dataframes into a single dataframe by concatenating rows
df_all = pd.concat([df_de, df_nl, df_be, df_eu], axis=0, ignore_index=True)

In [None]:
# Melt the DataFrame
df_melted = pd.melt(df_all, id_vars=['DATE', 'TIME PERIOD', 'Country_Code'],
                    value_vars=['Inflation_DE', 'Inflation_NL', 'Inflation_BE', 'Inflation_EU'],
                    var_name='Country_Inflation_Column', value_name='Inflation')

# Drop the original 'Country_Inflation_Column' as Country_Code is already available
df_melted = df_melted.drop(columns=['Country_Inflation_Column'])

# convert "DATE" column to date format
df_melted['DATE'] = pd.to_datetime(df_melted['DATE'])

# clean all recordsw with blank/null values in Inflation column
df_melted = df_melted.dropna(subset=['Inflation'])

In [None]:
import matplotlib.dates as mdates

plt.figure(figsize=(12, 6))
sns.lineplot(data=df_melted, x='DATE', y='Inflation', hue='Country_Code')
plt.title('Inflation Trends Over Time by Country')
plt.xlabel('Date')
plt.ylabel('Inflation (%)')
plt.grid(False)

# Set the x-axis major locator and formatter
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.YearLocator(1)) # Show a tick every year
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y')) # Format the date as YYYY-MM-DD

# Set the x-axis limits to match the data range
ax.set_xlim(df_melted['DATE'].min(), df_melted['DATE'].max())

plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

### Observations: Nethelrands had increased inflation in thesee periods:

Hypothesis: There are common underlying factors that cause NL to repeatedly overshoot BE/DE inflation across different time periods and different shock types.

Testing: Is it the same mechanism (e.g., wage indexation, housing constraints, energy dependence) that amplifies inflation in NL regardless of whether the shock is:

Euro transition (2001-2003)  
Financial crisis (2009)  
Eurozone debt crisis (2013)  
Energy crisis (2022)  



### 1.2 Isolating periods of elevated inflation

In [None]:
# changing DATE format
df_de['DATE'] = pd.to_datetime(df_de['DATE'])
df_be['DATE'] = pd.to_datetime(df_be['DATE'])
df_nl['DATE'] = pd.to_datetime(df_nl['DATE'])
df_eu['DATE'] = pd.to_datetime(df_eu['DATE'])

# setting index
df_de.set_index('DATE', inplace=True)
df_be.set_index('DATE', inplace=True)
df_nl.set_index('DATE', inplace=True)
df_eu.set_index('DATE', inplace=True)

# combine all dataframes into a single dataframe using DATE column as index
df_all = pd.concat([df_de, df_nl, df_be, df_eu], axis=1, ignore_index=False,join='inner')

# Drop all records where DATE <= 1996-12-31
df_all = df_all.reset_index()
df_all = df_all[df_all['DATE'] > '1996-12-31']

# drop columns
df_all = df_all.drop(columns=['TIME PERIOD','Country_Code','TIME PERIOD','Country_Code','TIME PERIOD','Country_Code'])


Isolating periods where NL inflation was higher than DE and BE

In [None]:
# Convert to quarterly FIRST
df_all['quarter'] = df_all['DATE'].dt.to_period('Q')
df_quarterly = df_all.groupby('quarter').agg({
    'Inflation_NL': 'mean',
    'Inflation_BE': 'mean',
    'Inflation_DE': 'mean',
    'Inflation_EU': 'mean'
}).reset_index()

# Calculate meaningful spreads
df_quarterly['NL_vs_avg'] = df_quarterly['Inflation_NL'] - (
    (df_quarterly['Inflation_BE'] + df_quarterly['Inflation_DE']) / 2
)

# Identify significant elevation (>0.5pp above average)
df_quarterly['elevated'] = df_quarterly['NL_vs_avg'] > 0.5

# Identify continuous periods
df_quarterly['period_group'] = (
    df_quarterly['elevated'] != df_quarterly['elevated'].shift()
).cumsum()

# Get period summaries
elevated_periods = df_quarterly[df_quarterly['elevated']].groupby('period_group').agg({
    'quarter': ['first', 'last', 'count'],
    'NL_vs_avg': 'mean'
})


In [None]:
elevated_periods

### 1.3 Extracting components which can explain contribution to the inflation


In [None]:
COUNTRIES = ['NL', 'BE', 'DE']
START_YEAR = 1997
END_YEAR = 2025

def filter_dates(df, start_year=1997, end_year=2025):
    """Filter dataframe to date range"""
    if df.empty:
        return df

    # Find time column
    time_col = None
    for col in ['TIME_PERIOD', 'time', 'freq\\TIME_PERIOD']:
        if col in df.columns:
            time_col = col
            break

    if time_col is None:
        return df

    # Extract year from time column
    df['year'] = pd.to_datetime(df[time_col], errors='coerce').dt.year
    filtered = df[(df['year'] >= start_year) & (df['year'] <= end_year)].copy()
    filtered.drop('year', axis=1, inplace=True)

    return filtered

def get_all_eurostat_data():
    """Extract Eurostat data 1997-2025 for NL, BE, DE"""

    datasets = {}

    # CATEGORY 1: HICP Components
    try:
        df = eurostat.get_data_df('prc_hicp_midx',
                                  filter_pars={'geo': COUNTRIES,
                                              'coicop': ['CP00', 'NRG', 'FOOD', 'SERV', 'IGD_NNRG']})
        datasets['hicp_components'] = filter_dates(df, START_YEAR, END_YEAR)
    except:
        datasets['hicp_components'] = pd.DataFrame()

    # CATEGORY 2: Labor Market
    try:
        df = eurostat.get_data_df('une_rt_m',
                                  filter_pars={'geo': COUNTRIES, 's_adj': 'SA',
                                              'age': 'TOTAL', 'sex': 'T'})
        datasets['unemployment'] = filter_dates(df, START_YEAR, END_YEAR)
    except:
        datasets['unemployment'] = pd.DataFrame()

    try:
        df = eurostat.get_data_df('earn_mw_cur', filter_pars={'geo': COUNTRIES})
        datasets['minimum_wage'] = filter_dates(df, START_YEAR, END_YEAR)
    except:
        datasets['minimum_wage'] = pd.DataFrame()

    try:
        df = eurostat.get_data_df('lc_lci_r2_q',
                                  filter_pars={'geo': COUNTRIES, 's_adj': 'SCA',
                                              'nace_r2': 'B-S'})
        datasets['labor_costs'] = filter_dates(df, START_YEAR, END_YEAR)
    except:
        datasets['labor_costs'] = pd.DataFrame()

    try:
        df = eurostat.get_data_df('lc_ulc_r2_q',
                                  filter_pars={'geo': COUNTRIES, 's_adj': 'SCA',
                                              'nace_r2': 'B-S'})
        datasets['unit_labor_costs'] = filter_dates(df, START_YEAR, END_YEAR)
    except:
        datasets['unit_labor_costs'] = pd.DataFrame()

    # CATEGORY 4: Economic Growth
    try:
        df = eurostat.get_data_df('namq_10_gdp',
                                  filter_pars={'geo': COUNTRIES, 's_adj': 'SCA',
                                              'na_item': 'B1GQ', 'unit': 'CLV_PCH_PRE'})
        datasets['gdp_growth'] = filter_dates(df, START_YEAR, END_YEAR)
    except:
        datasets['gdp_growth'] = pd.DataFrame()

    try:
        df = eurostat.get_data_df('nama_10_lp_ulc',
                                  filter_pars={'geo': COUNTRIES, 'na_item': 'LPR_HW',
                                              'unit': 'PCH_PRE'})
        datasets['productivity'] = filter_dates(df, START_YEAR, END_YEAR)
    except:
        datasets['productivity'] = pd.DataFrame()

    return datasets

# Execute
data = get_all_eurostat_data()

# Access dataframes
hicp = data['hicp_components']
unemployment = data['unemployment']
min_wage = data['minimum_wage']
gdp = data['gdp_growth']


In [None]:
def get_ecb_inflation_data(countries=['NL', 'BE', 'DE'], start_year=1997):
    """Get HICP inflation from ECB Data Portal API"""

    components = {
        '000000': 'Total',
        'NRG': 'Energy',
        'FOOD': 'Food',
        'SERV': 'Services',
        'IGD_NNRG': 'Goods_excl_energy'
    }

    all_data = []

    for country in countries:
        for code, label in components.items():
            # Correct ECB SDMX API format
            url = f"https://data-api.ecb.europa.eu/service/data/ICP/M.{country}.N.{code}.4.ANR"

            params = {
                'startPeriod': f'{start_year}-01',
                'format': 'csvdata'
            }

            try:
                response = requests.get(url, params=params, timeout=30)
                if response.status_code == 200:
                    df = pd.read_csv(StringIO(response.text))
                    df['country'] = country
                    df['component'] = label
                    all_data.append(df)
            except:
                continue

    if not all_data:
        return pd.DataFrame()

    result = pd.concat(all_data, ignore_index=True)
    return result

# Execute
ecb_inflation = get_ecb_inflation_data(countries=['NL', 'BE', 'DE'], start_year=1997)



In [None]:
# load VAT.csv and tobacco_excise_cleaned

vat = pd.read_csv('VAT.csv')
tobacco_excise = pd.read_csv('tobacco_excise_cleaned.csv')