In [1]:
import pandas as pd


url = "https://www.insee.fr/fr/statistiques/fichier/4648335/TF106.csv"

df = pd.read_csv(url, sep=";")

In [2]:
# filtering values with exactly 3 characters, corresponding to ECOICOP groups
df = df.loc[df['NOMENCLATURE'].str.len() == 3]

In [3]:
# filtering out rows where the "FCOIC" value starts with "13"
df = df.loc[~df['NOMENCLATURE'].str.startswith('13')]

In [4]:
import eurostat

In [6]:
df_inflation = eurostat.get_data_df(code="PRC_HICP_MANR", filter_pars={
                                    'geo': 'FR', 'startPeriod': "2022-03", 'endPeriod': "2022-03"})
df_inflation = df_inflation.rename(columns={'geo\TIME_PERIOD': 'geo'})

In [7]:
# filter the rows using a regular expression, keeping only the ECOICOP groups (3 numbers)
df_inflation = df_inflation[df_inflation['coicop'].str.match(r'^CP\d{3}$')]

In [8]:
# remove the "CP" prefix from the values in the coicop column
df_inflation['coicop'] = df_inflation['coicop'].str.slice(start=2)

In [9]:
# keep only the specified columns using the .loc[] indexer
df_inflation = df_inflation.loc[:, ['coicop', '2022-03']]

In [11]:
merged = pd.merge(df, df_inflation, left_on='NOMENCLATURE', right_on="coicop")

In [12]:
# calculating the sum of "CONSO" for each unique value in "DECUC" column
sum_by_decuc = merged.groupby('DECUC')['CONSO'].sum()

In [13]:
# adding a new column "PROP" with the percentage of each row relative to the sum for each unique value in "DECUC" column
merged['PROP'] = merged['CONSO'] / merged['DECUC'].map(sum_by_decuc)

In [14]:
# calculate the weight_Group_inflation_2022_03 column based on 2022-03 and PROP
merged['weight_Group_inflation_2022_03'] = merged['2022-03'] * merged['PROP']

In [15]:
sumInflationMarch2022 = merged.groupby(
    'DECUC')['weight_Group_inflation_2022_03'].sum().to_frame()