These data come from a survey conducted by INSEE, Budget des Familles, which are protected by the Statistical Secrecy Committee. To reproduce the analysis, one must request access to this data.

In [449]:
import pandas as pd
date_analysis = "2022-08"
df_inflation_by_household = pd.read_csv(f'BDF/computed_inflation_by_household_{date_analysis}.csv', index_col="IDENT_MEN")

# Household

In [450]:
df_menage = pd.read_csv("BDF/Csv/MENAGE.csv", sep=";", encoding='latin1')

  df_menage = pd.read_csv("BDF/Csv/MENAGE.csv", sep=";", encoding='latin1')


In [451]:
df_menage = df_menage.set_index("IDENT_MEN")

In [452]:
df_dep_men = pd.read_csv("BDF/Csv/DEPMEN.csv", sep=";", encoding='latin1')

  df_dep_men = pd.read_csv("BDF/Csv/DEPMEN.csv", sep=";", encoding='latin1')


In [453]:
df_dep_men = df_dep_men.set_index("IDENT_MEN")

In [454]:
df_menage = pd.merge(left=df_menage, right=df_dep_men, left_index=True, right_index=True)

In [455]:
variables = ["AGEPR", "TUU", "DNIVIE1", "Stalog", "TYPMEN5", "TYPVOIS"]
df_filtered = df_menage.loc[:, variables]

# Cleaning variables

In [456]:
df_filtered['TYPMEN5'] = df_filtered['TYPMEN5'].replace({
    1: 'Personne seule',
    2: 'Famille monoparentale',
    3: 'Couple sans enfant',
    4: 'Couple avec au moins un enfant',
    5: 'Autre type de ménage (ménage complexe)'
})

In [457]:
def clean_variable(df, variable, variable_ref, numerical, labels=[],  bins=[]):
    df[f'{variable}_category'] = pd.cut(x=df[variable], bins=bins, labels=labels, right=False) if numerical else df[variable]
    df = pd.concat([df, pd.get_dummies(data=df[f"{variable}_category"])], axis=1)
    df = df.drop([variable, f"{variable}_category", variable_ref], axis=1)
    return df


In [458]:
df_filtered = clean_variable(df=df_filtered, variable="TYPMEN5", variable_ref="Personne seule", numerical=False)

Age

In [459]:
df_filtered = clean_variable(df=df_filtered, variable="AGEPR", labels=["Moins de 30 ans", "De 30 à 44 ans", "De 45 à 59 ans", "De 60 à 74 ans", "75 ans et plus"], variable_ref = "De 45 à 59 ans", bins = [0, 30, 45, 60, 75, 102], numerical=True)

Type menage

TUUU

In [460]:
df_filtered = clean_variable(df=df_filtered, variable="TUU", variable_ref = "Ville moyenne", numerical=True, bins=[0,4,6,7,9], labels=["Rural et petites villes", "Ville moyenne", "Grande ville", "Agglomération parisienne"])

Qintiles niveau de vie

In [461]:
df_filtered = clean_variable(df=df_filtered, variable="DNIVIE1", variable_ref = "D4-8", numerical=True, bins=[0,4,8,11], labels=["D1-3", "D4-8", "D8-10"])

In [462]:
df_filtered = df_filtered.dropna()

Stalog

In [463]:
df_filtered = clean_variable(df=df_filtered, variable="Stalog", variable_ref = "Locataire", numerical=True, bins=[0,3,6], labels=["Propriétaire", "Locataire"])

In [464]:
df_filtered = clean_variable(df=df_filtered, variable="TYPVOIS", variable_ref = "Immeubles", numerical=True, labels=["Pavillonnaire", "Immeubles"], bins=[0,3,6])

# Regression


In [465]:
df_filtered = pd.merge(df_filtered, df_inflation_by_household, left_index=True, right_index=True)

In [466]:
df_filtered = df_filtered.dropna()


In [467]:
y = df_filtered['inflation']
df_filtered = df_filtered.drop(columns=['inflation'])
X = df_filtered


In [468]:
import statsmodels.api as sm
X = sm.add_constant(X)

In [469]:
model = sm.OLS(y, X)

In [470]:
results = model.fit()


In [472]:
results.summary()


0,1,2,3
Dep. Variable:,inflation,R-squared:,0.128
Model:,OLS,Adj. R-squared:,0.127
Method:,Least Squares,F-statistic:,166.0
Date:,"Tue, 13 Jun 2023",Prob (F-statistic):,0.0
Time:,22:43:37,Log-Likelihood:,-40931.0
No. Observations:,16978,AIC:,81890.0
Df Residuals:,16962,BIC:,82020.0
Df Model:,15,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,5.6603,0.083,68.351,0.000,5.498,5.823
Autre type de ménage (ménage complexe),-0.0524,0.105,-0.500,0.617,-0.258,0.153
Couple avec au moins un enfant,0.1252,0.064,1.972,0.049,0.001,0.250
Couple sans enfant,0.3185,0.061,5.242,0.000,0.199,0.438
Famille monoparentale,0.0773,0.070,1.100,0.271,-0.060,0.215
Moins de 30 ans,-0.5547,0.083,-6.672,0.000,-0.718,-0.392
De 30 à 44 ans,-0.2010,0.056,-3.566,0.000,-0.311,-0.090
De 60 à 74 ans,0.1871,0.062,3.032,0.002,0.066,0.308
75 ans et plus,0.3874,0.077,5.023,0.000,0.236,0.539

0,1,2,3
Omnibus:,4919.436,Durbin-Watson:,2.015
Prob(Omnibus):,0.0,Jarque-Bera (JB):,26232.834
Skew:,1.295,Prob(JB):,0.0
Kurtosis:,8.511,Cond. No.,10.2
