In [1]:
import pandas as pd


url = "https://www.insee.fr/fr/statistiques/fichier/4648335/TF106.csv"

df = pd.read_csv(url, sep=";")

In [2]:
# filtering values with exactly 3 characters, corresponding to ECOICOP groups
df = df.loc[df['NOMENCLATURE'].str.len() == 3]

In [3]:
# filtering out rows where the "FCOIC" value starts with "13"
df = df.loc[~df['NOMENCLATURE'].str.startswith('13')]

Now let's deal with Eurostat inflation stats.

In [4]:
import pandas as pd
import eurostat

In [5]:
df_inflation = eurostat.get_data_df(code="PRC_HICP_MANR", filter_pars={'geo':'FR','startPeriod':"2021-03"})
df_inflation = df_inflation.rename(columns={'geo\TIME_PERIOD': 'geo'})

In [6]:
# filter the rows using a regular expression, keeping only the ECOICOP groups (3 numbers)
df_inflation = df_inflation[df_inflation['coicop'].str.match(r'^CP\d{3}$')]

In [7]:
# remove the "CP" prefix from the values in the coicop column
df_inflation['coicop'] = df_inflation['coicop'].str.slice(start=2)

In [9]:
# keep only the specified columns using the .loc[] indexer
df_inflation = df_inflation.loc[:, ['coicop','2022-03','2023-03']]

In [10]:
# calculate the Cumulated inflation column based on the values in 2022-03 and 2023-03
df_inflation['Cumulated'] = ((1 + (df_inflation['2022-03'] / 100)) * (1 + (df_inflation['2023-03'] / 100))-1)*100

In [12]:
merged = pd.merge(df, df_inflation, left_on='NOMENCLATURE', right_on="coicop")


In [13]:
# calculating the sum of "CONSO" for each unique value in "DECUC" column
sum_by_decuc = merged.groupby('DECUC')['CONSO'].sum()

In [14]:
# adding a new column "PROP" with the percentage of each row relative to the sum for each unique value in "DECUC" column
merged['PROP'] = merged['CONSO'] / merged['DECUC'].map(sum_by_decuc)

Let's make sure sumPROP is equal to 1

In [15]:
sumPROP = merged.groupby('DECUC')['PROP'].sum()
sumPROP

DECUC
1      1.0
10     1.0
2      1.0
3      1.0
4      1.0
5      1.0
6      1.0
7      1.0
8      1.0
9      1.0
TOT    1.0
Name: PROP, dtype: float64

In [16]:
# calculate the weight_Group_inflation_2022_03 column based on 2022-03 and PROP
merged['weight_Group_inflation_2022_03'] = merged['2022-03'] * merged['PROP']

In [17]:
# calculate the weight_Group_inflation_2023_03 column based on 2023-03 and PROP
merged['weight_Group_inflation_2023_03'] = merged['2023-03'] * merged['PROP']

In [18]:
# calculate the weight_Group_inflation_2023_03 column based on 2023-03 and PROP
merged['weight_Group_inflation_Cumulated'] = merged['Cumulated'] * merged['PROP']

In [19]:
sumInflationMarch2022 = merged.groupby('DECUC')['weight_Group_inflation_2022_03'].sum().to_frame()

In [20]:
sumInflationMarch2023 = merged.groupby('DECUC')['weight_Group_inflation_2023_03'].sum().to_frame()

In [21]:
sumCumulated = merged.groupby('DECUC')['weight_Group_inflation_Cumulated'].sum().to_frame()

Let's compute the difference with the ensemble

In [22]:
sumInflationMarch2022['diffTOT'] = sumInflationMarch2022["weight_Group_inflation_2022_03"] - sumInflationMarch2022.loc['TOT']["weight_Group_inflation_2022_03"]
sumInflationMarch2022 = sumInflationMarch2022.drop(labels='TOT')

In [23]:
sumInflationMarch2023['diffTOT'] = sumInflationMarch2023["weight_Group_inflation_2023_03"] - sumInflationMarch2023.loc['TOT']["weight_Group_inflation_2023_03"]
sumInflationMarch2023 = sumInflationMarch2023.drop(labels='TOT')

In [24]:
sumCumulated['diffTOT'] = sumCumulated["weight_Group_inflation_Cumulated"] - sumCumulated.loc['TOT']["weight_Group_inflation_Cumulated"]
sumCumulated = sumCumulated.drop(labels='TOT')

In [25]:
sumAllThree = sumInflationMarch2022.merge(sumInflationMarch2023, on="DECUC", suffixes=["_2022","_2023"]).merge(sumCumulated, on="DECUC")

In [26]:
sumAllThree = sumAllThree.sort_index(key=lambda x: pd.to_numeric(x))

In [None]:
import matplotlib.pyplot as plt
# draw a bar chart

# define the custom ordering of the index values
custom_order = ['D{}'.format(i) for i in range(1, 11)]



# define the colors based on the value of each bar
colors = ['red' if x > 0 else 'blue' for x in sumAllThree['diffTOT_2022']]


sumAllThree.plot.bar(y='diffTOT_2022', color=colors)

# set axis labels and title
plt.xlabel('Decile')
plt.ylabel('Excess inflation in absolute points')
plt.title('Excess Inflation by Decile between March 2021 and March 2022')

# set the y-axis scale to a specific range
plt.ylim(-1,1)

# add a "D" prefix to the tick labels of the x-axis
ticks = ['D' + tick for tick in sumInflationMarch2022.index]
plt.xticks(range(len(ticks)), ticks)

# remove the legend entirely
plt.legend(frameon=False, labels=[])

# show the plot
plt.show()

In [None]:
# define the colors based on the value of each bar
colors = ['red' if x > 0 else 'blue' for x in sumAllThree['diffTOT_2023']]


sumAllThree.plot.bar(y='diffTOT_2023', color=colors)

# set axis labels and title
plt.xlabel('Decile')
plt.ylabel('Excess inflation in absolute points')
plt.title('Excess Inflation by Decile between March 2022 and March 2023')

# set the y-axis scale to a specific range
plt.ylim(-1,1)

# add a "D" prefix to the tick labels of the x-axis
ticks = ['D' + tick for tick in sumInflationMarch2022.index]
plt.xticks(range(len(ticks)), ticks)

# remove the legend entirely
plt.legend(frameon=False, labels=[])

# show the plot
plt.show()

In [None]:
# define the colors based on the value of each bar
colors = ['red' if x > 0 else 'blue' for x in sumAllThree['diffTOT']]


sumAllThree.plot.bar(y='diffTOT', color=colors)

# set axis labels and title
plt.xlabel('Decile')
plt.ylabel('Excess inflation in absolute points')
plt.title('Excess Inflation by Decile between March 2021 and March 2023')

# set the y-axis scale to a specific range
plt.ylim(-1, 1)

# add a "D" prefix to the tick labels of the x-axis
ticks = ['D' + tick for tick in sumInflationMarch2022.index]
plt.xticks(range(len(ticks)), ticks)

# remove the legend entirely
plt.legend(frameon=False, labels=[])

# show the plot
plt.show()