These data come from a survey conducted by INSEE, Budget des Familles, which are protected by the Statistical Secrecy Committee. To reproduce the analysis, one must request access to this data.

In [196]:
import pandas as pd
date_analysis = "2022-09"
df_inflation_by_household = pd.read_csv(f'BDF/computed_inflation_by_household_{date_analysis}.csv', index_col="IDENT_MEN")

# Household

In [197]:
df_menage = pd.read_csv("BDF/Csv/MENAGE.csv", sep=";", encoding='latin1')

  df_menage = pd.read_csv("BDF/Csv/MENAGE.csv", sep=";", encoding='latin1')


In [198]:
df_menage = df_menage.set_index("IDENT_MEN")

In [199]:
df_dep_men = pd.read_csv("BDF/Csv/DEPMEN.csv", sep=";", encoding='latin1')

  df_dep_men = pd.read_csv("BDF/Csv/DEPMEN.csv", sep=";", encoding='latin1')


In [200]:
df_dep_men = df_dep_men.set_index("IDENT_MEN")

In [201]:
df_menage = pd.merge(left=df_menage, right=df_dep_men, left_index=True, right_index=True)

In [202]:
variables = ["AGEPR", "TUU", "DNIVIE1", "Stalog", "TYPMEN5"]
df_filtered = df_menage.loc[:, variables]

# Cleaning variables

In [203]:
df_filtered['TYPMEN5'] = df_filtered['TYPMEN5'].replace({
    1: 'Personne seule',
    2: 'Famille monoparentale',
    3: 'Couple sans enfant',
    4: 'Couple avec au moins un enfant',
    5: 'Autre type de ménage (ménage complexe)'
})

In [204]:
def clean_variable(df, variable, variable_ref, numerical, labels=[],  bins=[]):
    df[f'{variable}_category'] = pd.cut(x=df[variable], bins=bins, labels=labels, right=True) if numerical else df[variable]
    df = pd.concat([df, pd.get_dummies(data=df[f"{variable}_category"])], axis=1)
    df = df.drop([variable, f"{variable}_category", variable_ref], axis=1)
    return df


In [205]:
df_filtered = clean_variable(df=df_filtered, variable="TYPMEN5", variable_ref="Personne seule", numerical=False)

Age

In [206]:
df_filtered = clean_variable(df=df_filtered, variable="AGEPR", labels=["Moins de 30 ans", "De 30 à 44 ans", "De 45 à 59 ans", "De 60 à 74 ans", "75 ans et plus"], variable_ref = "De 45 à 59 ans", bins = [0, 30, 45, 60, 75, 102], numerical=True)

Type menage

TUUU

In [207]:
df_filtered = clean_variable(df=df_filtered, variable="TUU", variable_ref = "Ville moyenne", numerical=True, bins=[0,4,6,7,9], labels=["Rural et petites villes", "Ville moyenne", "Grande ville", "Agglomération parisienne"])

Qintiles niveau de vie

In [208]:
df_filtered = clean_variable(df=df_filtered, variable="DNIVIE1", variable_ref = "D4-8", numerical=True, bins=[0,4,8,11], labels=["D1-3", "D4-8", "D8-10"])

In [209]:
df_filtered = df_filtered.dropna()

Stalog

In [210]:
df_filtered = clean_variable(df=df_filtered, variable="Stalog", variable_ref = "Locataire", numerical=True, bins=[0,3,6], labels=["Propriétaire", "Locataire"])

# Regression


In [211]:
df_filtered = pd.merge(df_filtered, df_inflation_by_household, left_index=True, right_index=True)

In [212]:
df_filtered = df_filtered.dropna()


In [213]:
y = df_filtered['inflation']
df_filtered = df_filtered.drop(columns=['inflation'])
X = df_filtered


In [214]:
import statsmodels.api as sm
X = sm.add_constant(X)

In [215]:
model = sm.OLS(y, X)

In [216]:
results = model.fit()


In [217]:
results.summary().tables[1]


0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,5.5037,0.062,89.061,0.000,5.383,5.625
Autre type de ménage (ménage complexe),0.0301,0.088,0.341,0.733,-0.143,0.203
Couple avec au moins un enfant,0.1902,0.053,3.591,0.000,0.086,0.294
Couple sans enfant,0.3603,0.051,7.094,0.000,0.261,0.460
Famille monoparentale,0.1085,0.059,1.826,0.068,-0.008,0.225
Moins de 30 ans,-0.5171,0.067,-7.723,0.000,-0.648,-0.386
De 30 à 44 ans,-0.1902,0.048,-3.994,0.000,-0.284,-0.097
De 60 à 74 ans,0.3416,0.053,6.501,0.000,0.239,0.445
75 ans et plus,0.8534,0.067,12.677,0.000,0.721,0.985
