These data come from a survey conducted by INSEE, Budget des Familles, which are protected by the Statistical Secrecy Committee. To reproduce the analysis, one must request access to this data.

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('../BDF/Csv/C05.csv', sep=";", index_col="IDENT_MEN")

In [3]:
df_liste_menages = pd.read_csv('menagesEtudiant2.csv', index_col="IDENT_MEN")

In [4]:
df = df_liste_menages.merge(right=df, left_index=True, right_index=True)

We need to remove the columns starting with 13 (taxes) and 14 (subsidies) which are not consumption. And also the columns total CTOT and pondmen (weight household) which are not useful.

In [5]:
columns_to_delete = df.columns[df.columns.str.startswith('C13') | df.columns.str.startswith('C14')]
df = df.drop(columns=columns_to_delete)
df = df.drop(columns=["CTOT", "pondmen"])

In [6]:
column_averages = df.mean()

# Ensuite, vous pouvez créer une nouvelle ligne dans le DataFrame contenant les moyennes
average_row = pd.DataFrame(column_averages).T

# Maintenant, vous pouvez concaténer la ligne moyenne au DataFrame d'origine
df_with_average_row = pd.concat([df, average_row])

# Si vous voulez supprimer les lignes d'origine et ne garder que la ligne moyenne
df = df_with_average_row.iloc[-1:]

In [7]:
df_prop = df.divide(df.sum(axis=1), axis=0)


In [8]:
df_prop.rename(columns=lambda x: x.replace('C', ''), inplace=True)


We know have the proportion of each consumption area per household.

In [9]:
import eurostat

In [10]:
df_inflation_raw = eurostat.get_data_df(code="PRC_HICP_MANR", filter_pars={
                                    'geo': 'FR', 'startPeriod': "1997-01", "endPeriod": "2023-06"})
df_inflation_raw = df_inflation_raw.rename(columns={'geo\TIME_PERIOD': 'geo'})

In [11]:
df_correspondances = pd.read_csv("../correspondances.csv", dtype="str", sep=";")

In [12]:
df_correspondances.set_index('colonnes de df_prop', inplace=True)

In [13]:
df_inflation = df_inflation_raw[df_inflation_raw['coicop'].str.startswith('CP')]

In [14]:
df_inflation['coicop'] = df_inflation['coicop'].str.replace('CP', '')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_inflation['coicop'] = df_inflation['coicop'].str.replace('CP', '')


In [15]:
df_inflation = df_inflation.drop(["freq", "unit", "geo"], axis=1)


In [16]:
df_inflation = df_inflation.set_index('coicop')

In [17]:
df_prop = df_prop.transpose()

In [18]:
df_prop.head()

Unnamed: 0,0
1111,0.002373
1112,0.010616
1113,0.003708
1114,0.005331
1115,0.003013


In [19]:
df_inflation.head()

Unnamed: 0_level_0,1997-01,1997-02,1997-03,1997-04,1997-05,1997-06,1997-07,1997-08,1997-09,1997-10,...,2022-09,2022-10,2022-11,2022-12,2023-01,2023-02,2023-03,2023-04,2023-05,2023-06
coicop,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.8,1.7,1.1,1.0,0.9,1.0,1.1,1.6,1.4,1.1,...,6.2,7.1,7.1,6.7,7.0,7.3,6.7,6.9,6.0,5.3
1,2.5,1.5,0.1,0.5,0.4,1.0,1.5,2.8,2.9,3.0,...,10.6,12.9,13.0,12.9,14.1,15.8,16.9,15.7,14.9,14.3
11,2.8,1.6,0.2,0.5,0.3,0.8,1.4,2.7,2.8,2.9,...,10.8,13.2,13.2,13.1,14.4,16.1,17.2,15.9,15.0,14.3
111,1.9,2.1,1.9,1.7,1.8,1.7,1.6,1.6,1.4,1.3,...,9.9,11.2,11.8,12.1,13.4,14.4,15.4,15.8,14.7,14.1
1111,0.9,1.2,1.5,0.9,1.1,0.9,0.9,0.4,-0.3,1.1,...,12.2,14.1,15.8,16.1,17.2,19.3,21.2,22.9,20.2,18.8


In [20]:
df_prop = df_prop.merge(right=df_correspondances, right_on="colonnes de df_prop", left_index=True)

In [21]:
df_prop.rename(columns={"colonnes de df_inflation correspondante": "coicop"}, inplace=True)


In [22]:
df_prop = df_prop.set_index("coicop")

In [23]:
df_inflation.dtypes

1997-01    float64
1997-02    float64
1997-03    float64
1997-04    float64
1997-05    float64
            ...   
2023-02    float64
2023-03    float64
2023-04    float64
2023-05    float64
2023-06    float64
Length: 318, dtype: object

In [24]:
# Colonne '0' de df_prop
df_prop_column = df_prop[0]

# Multiplier chaque colonne de df_inflation par la colonne '0' de df_prop
result_df = df_inflation.multiply(df_prop_column, axis=0)

In [26]:
result_df = result_df.dropna()

In [28]:
result_df = result_df.sum()

In [31]:
result_df = pd.DataFrame(result_df)

In [33]:
result_df = result_df.rename_axis('date')

In [35]:
result_df = result_df.rename(columns={0: 'inflationEtudiant'})

In [36]:
result_df.to_csv(f"inflationEtudiantBudgetMoyen.csv")