In [2]:
import numpy as np
import pandas as pd
import os
import csv
import ast
import seaborn as sns
from matplotlib import pyplot as plt
import wquantiles 

from tqdm import tqdm
from wquantiles import quantile
from openfisca_survey_manager.utils import asof

from openfisca_france_indirect_taxation import FranceIndirectTaxationTaxBenefitSystem
from openfisca_france_indirect_taxation.surveys import SurveyScenario
from openfisca_france_indirect_taxation.examples.utils_example import (
    dataframe_by_group,
    df_weighted_average_grouped,
    wavg)
from openfisca_france_indirect_taxation.build_survey_data.utils import weighted_sum
from openfisca_france_indirect_taxation.projects.TVA_Herve_IPP.new_calage_bdf_cn import new_get_inflators_by_year
from openfisca_france_indirect_taxation.projects.TVA_Herve_IPP.Utils import weighted_quantiles
from openfisca_france_indirect_taxation.utils import assets_directory, get_input_data_frame

In [3]:
simulated_variables = ['aise',
'depenses_totales',
'depenses_tot',
'rev_disponible',
 'niveau_de_vie',
 'niveau_vie_decile',
 'ocde10',
 'pondmen',
 'nactifs',
 'npers',
 'identifiant_menage']

In [4]:
# Première simulation : données BdF 2017 brutes
year = 2017
data_year = 2017
tax_benefit_system = FranceIndirectTaxationTaxBenefitSystem()

survey_scenario = SurveyScenario.create(
    tax_benefit_system = tax_benefit_system,
    year = year,
    data_year = data_year
    )

In [175]:
def taux_epargne(survey_scenario):
    depenses_tot = survey_scenario.compute_aggregate(variable = 'depenses_tot', use_baseline = False, period = year)
    rev_disp_tot = survey_scenario.compute_aggregate(variable = 'rev_disponible', use_baseline = False, period = year)
    taux_epargne_tot = 1 - depenses_tot / rev_disp_tot
     
    df = survey_scenario.create_data_frame_by_entity(simulated_variables, period = 2017)['menage']
    df_by_decile = df_weighted_average_grouped(dataframe = df, groupe = 'niveau_vie_decile', varlist =['depenses_tot','rev_disponible'])
    df_by_decile['taux_epargne'] = 1 - df_by_decile['depenses_tot'] / df_by_decile['rev_disponible']

    return([depenses_tot,rev_disp_tot,taux_epargne_tot,df_by_decile])
    

In [176]:
df_brut_by_decile = taux_epargne(survey_scenario)[3]
df_brut_by_decile

Unnamed: 0_level_0,depenses_tot,rev_disponible,taux_epargne
niveau_vie_decile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1.0,16625.372415,12015.018932,-0.383716
2.0,18761.176763,18776.227846,0.000802
3.0,20128.629852,22042.378099,0.086821
4.0,22750.77932,25739.88178,0.116127
5.0,23942.214775,28714.357431,0.166194
6.0,27006.557205,32428.782916,0.167204
7.0,29289.681016,37092.999439,0.210372
8.0,32579.120602,42136.181623,0.226814
9.0,35864.902187,50048.298501,0.283394
10.0,48580.17463,76150.455123,0.36205


In [5]:
bdf_brut_2017 = survey_scenario.create_data_frame_by_entity(simulated_variables, period = 2017)['menage']
bdf_brut_2017['epargne'] = (bdf_brut_2017['rev_disponible'] > bdf_brut_2017['depenses_tot']).astype(float)
bdf_brut_2017['aise_1_2_3'] = (bdf_brut_2017['aise'] <= 3).astype(float)
bdf_brut_2017_by_decile = df_weighted_average_grouped(dataframe = bdf_brut_2017, groupe = 'niveau_vie_decile', varlist = ['rev_disponible','niveau_de_vie','epargne','aise_1_2_3'])
bdf_brut_2017_by_decile

Unnamed: 0_level_0,rev_disponible,niveau_de_vie,epargne,aise_1_2_3
niveau_vie_decile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.0,12015.018932,7759.958752,0.362391,0.555877
2.0,18776.227846,12213.33393,0.606507,0.648576
3.0,22042.378099,14727.650104,0.66632,0.717547
4.0,25739.88178,16995.327595,0.699768,0.798693
5.0,28714.357431,19088.682334,0.724057,0.839383
6.0,32428.782916,21296.039939,0.745249,0.876706
7.0,37092.999439,23766.502561,0.781013,0.911315
8.0,42136.181623,27037.907354,0.78993,0.928622
9.0,50048.298501,32009.629118,0.856748,0.9496
10.0,76150.455123,48912.69445,0.871183,0.979489


In [6]:
# Deuxième simulation : données BdF callées sur la compta nat en 2017 
inflators_by_year = new_get_inflators_by_year(rebuild = False, year_range = range(2017, 2025), data_year = data_year)
inflation_kwargs = dict(inflator_by_variable = inflators_by_year[year])

survey_scenario = SurveyScenario.create(
    inflation_kwargs =  inflation_kwargs,
    tax_benefit_system = tax_benefit_system,
    year = year,
    data_year = data_year
    )

In [182]:
df_cale_by_decile = taux_epargne(survey_scenario)[3]
df_cale_by_decile

Unnamed: 0_level_0,depenses_tot,rev_disponible,taux_epargne
niveau_vie_decile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1.0,21426.338557,14599.208011,-0.467637
2.0,23894.268637,22814.61709,-0.047323
3.0,25748.476216,26783.250618,0.038635
4.0,29196.376843,31276.013087,0.066493
5.0,31542.8204,34890.238635,0.095941
6.0,36029.864614,39403.562384,0.085619
7.0,37516.550538,45070.958138,0.167611
8.0,42793.08228,51198.827453,0.164178
9.0,45639.467491,60812.681652,0.249507
10.0,59054.32645,92528.887655,0.361774


In [7]:
bdf_cale_2017 = survey_scenario.create_data_frame_by_entity(simulated_variables, period = 2017)['menage']
bdf_cale_2017['epargne'] = (bdf_cale_2017['rev_disponible'] > bdf_cale_2017['depenses_tot']).astype(float)
bdf_cale_2017['aise_1_2_3'] = (bdf_cale_2017['aise'] <= 3).astype(float)
bdf_cale_2017_by_decile = df_weighted_average_grouped(dataframe = bdf_cale_2017, groupe = 'niveau_vie_decile', varlist = ['rev_disponible','niveau_de_vie','epargne','aise_1_2_3'])

In [None]:
# Comparaison avec l'ERFS 
erfs_path = "C:/Users/veve1/OneDrive/Documents/ENSAE 3A/Memoire MiE/Data/erfs_fpr/2017/csv"
erfs_menage_2017 = pd.read_csv(os.path.join(erfs_path,"fpr_menage_2017.csv"), sep = ";")
erfs_mrf17 = pd.read_csv(os.path.join(erfs_path,"fpr_mrf17e17t4.csv"), sep = ";") 

erfs_2017 = erfs_menage_2017.merge(erfs_mrf17, how = "left", left_on='ident17', right_on = 'ident17')
erfs_2017.columns = erfs_2017.columns.str.lower()
erfs_2017.rename({'wprm' : 'pondmen'}, axis = 1, inplace= True)

erfs_2017['niveau_de_vie'] = erfs_2017['revdispm'] / erfs_2017['nb_uci']
erfs_2017['niveau_vie_decile'] = weighted_quantiles(erfs_2017['niveau_de_vie'], labels = np.arange(1,11), weights = erfs_2017['pondmen'], return_quantiles=False)
erfs_2017_by_decile = df_weighted_average_grouped(erfs_2017, groupe = 'niveau_vie_decile', varlist = ['revdispm','niveau_de_vie','pondmen'])

In [131]:
bdf_brut_2017_by_decile

Unnamed: 0_level_0,rev_disponible,niveau_de_vie,epargne,aise_1_2_3
niveau_vie_decile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.0,12015.018932,7759.958752,0.362391,0.555877
2.0,18776.227846,12213.33393,0.606507,0.648576
3.0,22042.378099,14727.650104,0.66632,0.717547
4.0,25739.88178,16995.327595,0.699768,0.798693
5.0,28714.357431,19088.682334,0.724057,0.839383
6.0,32428.782916,21296.039939,0.745249,0.876706
7.0,37092.999439,23766.502561,0.781013,0.911315
8.0,42136.181623,27037.907354,0.78993,0.928622
9.0,50048.298501,32009.629118,0.856748,0.9496
10.0,76150.455123,48912.69445,0.871183,0.979489


In [140]:
erfs_2017_by_decile

Unnamed: 0_level_0,revdispm,niveau_de_vie,pondmen
niveau_vie_decile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1.0,10253.188974,6707.186622,933.366422
2.0,19062.038954,12460.852376,848.820444
3.0,22606.187196,15090.238057,826.891437
4.0,26038.912132,17339.439824,784.582197
5.0,29340.806896,19484.71284,806.298726
6.0,33467.879331,21785.178555,770.483543
7.0,37979.628313,24456.084581,759.360545
8.0,43456.888879,27921.908165,765.799384
9.0,51963.296945,33582.202336,694.372846
10.0,88051.156627,56994.081424,741.289297


In [188]:
bdf_cale_2017_by_decile

Unnamed: 0_level_0,rev_disponible,niveau_de_vie,epargne,aise_1_2_3
niveau_vie_decile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,14599.208011,9428.969917,0.369413,0.555877
2,22814.61709,14840.176618,0.600163,0.648576
3,26783.250618,17895.271651,0.651841,0.717547
4,31276.013087,20650.681035,0.684043,0.798693
5,34890.238635,23194.274268,0.707745,0.839383
6,39403.562384,25876.390129,0.716664,0.876706
7,45070.958138,28878.199605,0.75893,0.911315
8,51198.827453,32853.21787,0.758794,0.928622
9,60812.681652,38894.257006,0.825088,0.9496
10,92528.887655,59432.83214,0.85414,0.979489


In [12]:
input_bdf_2017 = get_input_data_frame(2017)
input_bdf_2017_grouped = input_bdf_2017.filter(like='poste_').groupby(lambda col: '_'.join(col.split('_')[:3]), axis=1).sum()

to_drop = [col for col in input_bdf_2017_grouped.columns.tolist() if col.startswith('poste_17') or col.startswith('poste_18')]
input_bdf_2017_grouped.drop(columns= to_drop, axis = 1, inplace= True)
input_bdf_2017_grouped = pd.concat([input_bdf_2017[['pondmen','ident_men','rev_disponible','ocde10','aise','stalog','typmen']], input_bdf_2017_grouped], axis=1)

## Méthode 1 : redressement des revenus sur la base d’un filtre de cohérence revenu/consommation

Tiré de Bellamy et al. (2009): "Le redressement du revenu basé sur le filtre de cohérence revenu - consommation est le
suivant :
    (i) sur l’ensemble du fichier de l’enquête les très faibles revenus sont redressés : le
niveau de vie d’un ménage déclarant moins de 300 € par mois et par UC est
ramené automatiquement à 300 € par mois et par UC. Ce cas concerne 16
ménages sur les 10 240 de l’échantillon.
    (ii) On définit ensuite une "consommation courante" en éliminant les
consommations exceptionnelles.
    (iii) Si cette consommation courante est supérieure de plus de 20 % au revenu
courant (revenu constaté une fois ce dernier éventuellement mis à 300 € par
mois et par UC en (i)), soit pour 15,3 % des ménages, on effectue le test
suivant, analogue à celui utilisé par Loisy (1999).

Si le ménage :
1 - déclare être « à l’aise » financièrement, que « ça va » ou que « c’est juste, mais il
faut faire attention »
et
2 - déclare mettre de l’argent de côté ou que revenus et dépenses s’équilibrent
alors on remonte le revenu calculé en (i) au niveau de la consommation courante (ii). Dans
les autres cas, on laisse le revenu (i) et la consommation inchangés. Au total 10,7 % des
ménages sont affectés par ce redressement. On notera que ce traitement touche a priori
tous les ménages, quel que soit leur niveau de vie initial."

In [None]:
input_bdf_2017_grouped.drop('poste_04_2', axis= 1, inplace= True)        # on retire les loyers imputés des dépenses
input_bdf_2017_grouped = input_bdf_2017_grouped.apply(pd.to_numeric, errors='coerce')

liste_poste = [col for col in input_bdf_2017_grouped.columns.tolist() if col.startswith('poste')]
for poste in liste_poste:
   results = weighted_quantiles(input_bdf_2017_grouped['{}'.format(poste)], labels = np.arange(1,11), weights = input_bdf_2017_grouped['pondmen'], return_quantiles=True)
   input_bdf_2017_grouped['conso_courante_{}'.format(poste)] = input_bdf_2017_grouped['{}'.format(poste)].clip(upper=results[1][9])

input_bdf_2017_grouped['depenses_tot'] = input_bdf_2017_grouped[liste_poste].sum(axis = 1) 
input_bdf_2017_grouped['conso_courante_totale'] = input_bdf_2017_grouped.filter(like = 'conso_courante').sum(axis = 1)

input_bdf_2017_grouped['rev_disponible_2'] = input_bdf_2017_grouped['rev_disponible'].clip(lower= 3600 * input_bdf_2017_grouped['ocde10'])   

In [53]:
len(input_bdf_2017_grouped.loc[input_bdf_2017_grouped['rev_disponible_2'] > input_bdf_2017_grouped['rev_disponible']])

137

Il y a 137 ménages dont le niveau de vie est inférieur à 300€ par mois par UC.

In [54]:
input_bdf_2017_grouped['TEST'] = input_bdf_2017_grouped['conso_courante_totale'] > 1.2 * input_bdf_2017_grouped['rev_disponible_2']
input_bdf_2017_grouped['rev_disponible_3'] = input_bdf_2017_grouped['TEST']*(input_bdf_2017_grouped['aise'] <= 3)*input_bdf_2017_grouped['conso_courante_totale'] + input_bdf_2017_grouped['TEST'] * (input_bdf_2017_grouped['aise']>3) * input_bdf_2017_grouped['rev_disponible_2'] + (1 - input_bdf_2017_grouped['TEST']) * input_bdf_2017_grouped['rev_disponible_2']

In [55]:
input_bdf_2017_grouped['TEST'].sum(axis = 0)

825

Il y a 825 ménages dont la consommation courante est supérieure de plus de 20% à leur revenu disponible

In [56]:
(input_bdf_2017_grouped['TEST']*(input_bdf_2017_grouped['aise'] <= 3)).sum()

559

Dont 559 qui déclare être « à l’aise » financièrement, que « ça va » ou que « c’est juste, mais il
faut faire attention ».

In [None]:
for rev in ['','_2','_3']:
    input_bdf_2017_grouped['niveau_de_vie{}'.format(rev)] = input_bdf_2017_grouped['rev_disponible{}'.format(rev)] / input_bdf_2017_grouped['ocde10']
    
input_bdf_2017_grouped['niveau_vie_decile_3'] = weighted_quantiles(input_bdf_2017_grouped['niveau_de_vie_3'], labels = np.arange(1,11), weights = input_bdf_2017_grouped['pondmen'], return_quantiles=False)
input_bdf_2017_by_decile = df_weighted_average_grouped(input_bdf_2017_grouped, groupe = 'niveau_vie_decile_3', varlist = ['niveau_de_vie_3','rev_disponible_3','depenses_tot','pondmen'])

input_bdf_2017_by_decile['taux_epargne'] = 1 - input_bdf_2017_by_decile['depenses_tot'] / input_bdf_2017_by_decile['rev_disponible_3'] 

In [60]:
input_bdf_2017_by_decile

Unnamed: 0_level_0,niveau_de_vie_3,rev_disponible_3,depenses_tot,pondmen,taux_epargne
niveau_vie_decile_3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1.0,8872.168247,13905.247431,14752.4506,3140.21438,-0.060927
2.0,12777.672843,19592.399149,17969.821377,2991.408651,0.082817
3.0,15195.757493,23047.613399,20788.119763,2962.8348,0.098036
4.0,17341.16251,25745.871152,22751.391082,3043.587586,0.116309
5.0,19355.708373,29333.302928,24169.493902,2897.232694,0.176039
6.0,21500.351553,32503.878114,27367.187653,2882.988366,0.158033
7.0,23910.130301,36833.738639,29895.06726,3045.108348,0.188378
8.0,27111.663798,42041.328835,33119.224533,2959.890188,0.212222
9.0,32034.729314,49925.746328,35975.475574,2848.817929,0.27942
10.0,49165.826911,76506.585547,48717.300346,2793.34144,0.363227


In [141]:
erfs_2017_by_decile

Unnamed: 0_level_0,revdispm,niveau_de_vie,pondmen
niveau_vie_decile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1.0,10253.188974,6707.186622,933.366422
2.0,19062.038954,12460.852376,848.820444
3.0,22606.187196,15090.238057,826.891437
4.0,26038.912132,17339.439824,784.582197
5.0,29340.806896,19484.71284,806.298726
6.0,33467.879331,21785.178555,770.483543
7.0,37979.628313,24456.084581,759.360545
8.0,43456.888879,27921.908165,765.799384
9.0,51963.296945,33582.202336,694.372846
10.0,88051.156627,56994.081424,741.289297


## Méthode 2 : Imputation des données de consommation par strates

### Méthode 2.1 : En utilisant le revenu disponible de BdF (brut)

In [13]:
to_drop = [col for col in input_bdf_2017.columns.tolist() if col.startswith('poste_17') or col.startswith('poste_18')]
input_bdf_2017.drop(to_drop, axis = 1, inplace = True)
input_bdf_2017['niveau_de_vie'] = input_bdf_2017['rev_disponible'] / input_bdf_2017['ocde10']
input_bdf_2017['niveau_de_vie'] = input_bdf_2017['niveau_de_vie'].astype(float)
input_bdf_2017['pondmen'] = input_bdf_2017['pondmen'].astype(float)

input_bdf_2017['niveau_vie_decile'] = weighted_quantiles(data = input_bdf_2017['niveau_de_vie'], labels = np.arange(1,11), weights = input_bdf_2017['pondmen'], return_quantiles=False)
input_bdf_2017['stalog_2'] = input_bdf_2017['stalog'].replace({1:1, 3:2, 4:2, 5:2})

In [14]:
def assign_strate_ines(row):
    if row['typmen'] == 5:
        return 'T5'
    elif row['typmen'] == 2:
        return f'T2_D{int(row["niveau_vie_decile"])}'
    elif row['typmen'] in [1, 3, 4]:
        return f'T{int(row["typmen"])}_D{int(row["niveau_vie_decile"])}_S{int(row["stalog_2"])}'
    else:
        return 'Other'

input_bdf_2017['strate_ines'] = input_bdf_2017.apply(assign_strate_ines, axis=1)

In [15]:
input_bdf_2017['strate_ines'].nunique()

71

In [16]:
input_bdf_2017[['niveau_vie_decile','typmen','stalog','strate_ines']]

Unnamed: 0,niveau_vie_decile,typmen,stalog,strate_ines
0,7.0,3,1,T3_D7_S1
1,10.0,4,2,T4_D10_S2
2,7.0,4,1,T4_D7_S1
3,2.0,1,3,T1_D2_S2
4,7.0,4,1,T4_D7_S1
...,...,...,...,...
12076,5.0,1,3,T1_D5_S2
12077,10.0,4,2,T4_D10_S2
12078,4.0,4,1,T4_D4_S1
12079,8.0,3,1,T3_D8_S1


In [17]:
input_bdf_2017['strate_ines'].value_counts()

T3_D10_S1    416
T2_D1        387
T2_D2        322
T3_D9_S1     307
T1_D1_S2     307
            ... 
T2_D9         55
T3_D1_S1      51
T4_D3_S1      51
T2_D10        48
T4_D1_S1      43
Name: strate_ines, Length: 71, dtype: int64

In [18]:
liste_var = ['rev_disponible'] + [col for col in input_bdf_2017.columns.tolist() if col.startswith('poste_')]

input_bdf_2017_by_strate = df_weighted_average_grouped(input_bdf_2017, groupe = 'strate_ines', varlist = liste_var)
for poste in [col for col in input_bdf_2017.columns.tolist() if col.startswith('poste_')] : 
    input_bdf_2017_by_strate['part_{}'.format(poste)] = input_bdf_2017_by_strate['{}'.format(poste)] / input_bdf_2017_by_strate['rev_disponible']
    
part_conso_by_strat = input_bdf_2017_by_strate.filter(like = 'part').reset_index()

  input_bdf_2017_by_strate['part_{}'.format(poste)] = input_bdf_2017_by_strate['{}'.format(poste)] / input_bdf_2017_by_strate['rev_disponible']


In [19]:
new_input_bdf_2017 = input_bdf_2017.merge(part_conso_by_strat, how = 'left', left_on = 'strate_ines', right_on = 'strate_ines')
for poste in [col for col in input_bdf_2017.columns.tolist() if col.startswith('poste_')] : 
    new_input_bdf_2017['new_{}'.format(poste)] = new_input_bdf_2017['part_{}'.format(poste)] * new_input_bdf_2017['rev_disponible']
    
liste_new_poste = [col for col in new_input_bdf_2017.columns.tolist() if col.startswith('new_poste')]
new_input_bdf_2017['depenses_tot'] = new_input_bdf_2017[liste_new_poste].sum(axis = 1)

new_input_bdf_2017_by_decile = df_weighted_average_grouped(dataframe = new_input_bdf_2017, groupe = 'niveau_vie_decile', varlist =['depenses_tot','rev_disponible'])
new_input_bdf_2017_by_decile['taux_epargne'] = 1 - new_input_bdf_2017_by_decile['depenses_tot'] / new_input_bdf_2017_by_decile['rev_disponible']

  new_input_bdf_2017['new_{}'.format(poste)] = new_input_bdf_2017['part_{}'.format(poste)] * new_input_bdf_2017['rev_disponible']


In [20]:
new_input_bdf_2017_by_decile

Unnamed: 0_level_0,depenses_tot,rev_disponible,taux_epargne
niveau_vie_decile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1.0,18605.736666,11579.071758,-0.606842
2.0,21203.822108,18776.383284,-0.129281
3.0,23233.597076,22042.586374,-0.054032
4.0,26527.755716,25740.164076,-0.030598
5.0,28080.530965,28714.647747,0.022083
6.0,31619.497785,32429.025493,0.024963
7.0,34487.678857,37093.212019,0.070243
8.0,38562.396927,42136.38701,0.08482
9.0,42921.945324,50048.483498,0.142393
10.0,57520.698573,76519.241506,0.248285


In [21]:
def imputation_depenses_ines(input_df):
    to_drop = [col for col in input_df.columns.tolist() if col.startswith('poste_17') or col.startswith('poste_18')]
    input_df.drop(to_drop, axis = 1, inplace = True)
    
    input_df['niveau_de_vie'] = input_df['rev_disponible'] / input_df['ocde10']
    input_df['niveau_de_vie'] = input_df['niveau_de_vie'].astype(float)
    input_df['pondmen'] = input_df['pondmen'].astype(float)
    input_df['niveau_vie_decile'] = weighted_quantiles(data = input_df['niveau_de_vie'], labels = np.arange(1,11), weights = input_df['pondmen'], return_quantiles=False)
    input_df['stalog_2'] = input_df['stalog'].replace({1:1, 3:2, 4:2, 5:2})
    
    input_df['strate_ines'] = input_df.apply(assign_strate_ines, axis=1)
    liste_var = ['rev_disponible'] + [col for col in input_df.columns.tolist() if col.startswith('poste_')]

    input_df_by_strate = df_weighted_average_grouped(input_df, groupe = 'strate_ines', varlist = liste_var)
    for poste in [col for col in input_df.columns.tolist() if col.startswith('poste_')] : 
        input_df_by_strate['part_{}'.format(poste)] = input_df_by_strate['{}'.format(poste)] / input_df_by_strate['rev_disponible']
    part_conso_by_strat = input_df_by_strate.filter(like = 'part').reset_index()
    
    new_input_df = input_df.merge(part_conso_by_strat, how = 'left', left_on = 'strate_ines', right_on = 'strate_ines')
    for poste in [col for col in input_df.columns.tolist() if col.startswith('poste_')] : 
        new_input_df['new_{}'.format(poste)] = new_input_df['part_{}'.format(poste)] * new_input_df['rev_disponible']
        
    liste_new_poste = [col for col in new_input_df.columns.tolist() if col.startswith('new_poste')]
    new_input_df['depenses_tot'] = new_input_df[liste_new_poste].sum(axis = 1)

    new_input_df_by_decile = df_weighted_average_grouped(dataframe = new_input_df, groupe = 'niveau_vie_decile', varlist =['depenses_tot','rev_disponible'])
    new_input_df_by_decile['taux_epargne'] = 1 - new_input_df_by_decile['depenses_tot'] / new_input_df_by_decile['rev_disponible']
    
    return(new_input_df,new_input_df_by_decile)

### Méthode 2.2 : En utilisant les consos et revenus BdF calés en 2017

In [22]:
input_bdf_cale_2017 = survey_scenario.create_data_frame_by_entity(liste_var + ['ocde10','pondmen','stalog','typmen'], period = 2017)['menage']

In [23]:
new_input_bdf_cale_2017, new_input_bdf_cale_2017_by_decile = imputation_depenses_ines(input_bdf_cale_2017)

  input_df_by_strate['part_{}'.format(poste)] = input_df_by_strate['{}'.format(poste)] / input_df_by_strate['rev_disponible']
  new_input_df['new_{}'.format(poste)] = new_input_df['part_{}'.format(poste)] * new_input_df['rev_disponible']


In [24]:
new_input_bdf_cale_2017_by_decile

Unnamed: 0_level_0,depenses_tot,rev_disponible,taux_epargne
niveau_vie_decile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1.0,20721.93751,14069.24448,-0.472854
2.0,23707.599907,22814.61709,-0.039141
3.0,25635.818912,26783.250618,0.042841
4.0,29297.191408,31276.013087,0.06327
5.0,31639.180262,34890.238635,0.09318
6.0,35881.453326,39403.562384,0.089386
7.0,37357.819939,45070.958138,0.171133
8.0,43128.24187,51198.827453,0.157632
9.0,46018.035831,60812.681652,0.243282
10.0,59414.528094,92976.841634,0.360975


In [28]:
new_input_bdf_cale_2017_by_decile[['depenses_tot','rev_disponible']].sum(axis = 0)

depenses_tot      352801.807058
rev_disponible    419296.235171
dtype: float64

In [29]:
1 - 352801/ 419296

0.15858725101121884

In [171]:
new_input_bdf_2017_by_decile

Unnamed: 0_level_0,depenses_tot,rev_disponible,taux_epargne
niveau_vie_decile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1.0,18605.736666,11579.071758,-0.606842
2.0,21203.822108,18776.383284,-0.129281
3.0,23233.597076,22042.586374,-0.054032
4.0,26527.755716,25740.164076,-0.030598
5.0,28080.530965,28714.647747,0.022083
6.0,31619.497785,32429.025493,0.024963
7.0,34487.678857,37093.212019,0.070243
8.0,38562.396927,42136.38701,0.08482
9.0,42921.945324,50048.483498,0.142393
10.0,57520.698573,76519.241506,0.248285


### Méthode 2.3 : En utilisant les revenus de l'ERFS