In [1]:
import numpy
import pandas as pd
import os
import csv
import ast
import seaborn as sns
import matplotlib
from matplotlib import pyplot as plt

from wquantiles import quantile
from openfisca_survey_manager.utils import asof

from openfisca_france_indirect_taxation import FranceIndirectTaxationTaxBenefitSystem
from openfisca_france_indirect_taxation.examples.utils_example import (
    wavg,
    collapse,
    dataframe_by_group,
    graph_builder_bar,
    df_weighted_average_grouped)
from openfisca_france_indirect_taxation.build_survey_data.utils import weighted_sum
from openfisca_france_indirect_taxation.almost_ideal_demand_system.utils import add_niveau_vie_decile
from openfisca_france_indirect_taxation.surveys import SurveyScenario
from openfisca_france_indirect_taxation.calibration import get_inflators_by_year_energy
from openfisca_france_indirect_taxation.utils import assets_directory, get_input_data_frame

In [2]:
data_year = 2017
#inflators_by_year = get_inflators_by_year_energy(rebuild = True, year_range = range(2017, 2020), data_year = data_year)

In [34]:
 simulated_variables = [
        'tva_taux_plein',
        'tva_taux_intermediaire',
        'tva_taux_reduit',
        'tva_taux_super_reduit',
        'tva_total',
        'rev_disponible',
        'pondmen',
        'depenses_tot',
        'loyer_impute'
        ]

In [35]:
agregates = pd.DataFrame(columns=['tva_total','tva_taux_super_reduit','tva_taux_reduit','tva_taux_intermediaire','tva_taux_plein','depenses_tot','loyer_impute'])
for year in [2017,2018,2019] :
    inflation_kwargs = dict(inflator_by_variable = inflators_by_year[year])
    survey_scenario = SurveyScenario.create(
        inflation_kwargs = inflation_kwargs,
        year = year,
        data_year = data_year
        )
    df_sum = dataframe_by_group(survey_scenario, category = 'niveau_vie_decile', variables = simulated_variables, aggfunc = 'sum')
    agregates.loc[year] = df_sum.sum()


In [36]:
agregates

Unnamed: 0,tva_total,tva_taux_super_reduit,tva_taux_reduit,tva_taux_intermediaire,tva_taux_plein,depenses_tot,loyer_impute
2017,79076340000.0,146193700.0,7305056000.0,8793053000.0,62832030000.0,1012287000000.0,174934000000.0
2018,79076340000.0,146193700.0,7305056000.0,8793053000.0,62832030000.0,1038613000000.0,178082700000.0
2019,79076340000.0,146193700.0,7305056000.0,8793053000.0,62832030000.0,1064471000000.0,181852100000.0


In [9]:
from openfisca_france_indirect_taxation.calibration import (get_bdf_aggregates_energy, 
                                                            get_cn_aggregates_energy,
                                                            get_inflators_bdf_to_cn_energy,
                                                            get_inflators_cn_to_cn_energy,
                                                            get_inflators_energy
                                                            )

In [10]:
data_cn = get_cn_aggregates_energy(data_year)
data_bdf = get_bdf_aggregates_energy(data_year)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [26]:
data_cn

Unnamed: 0,conso_CN_2017
loyer_impute,174972900000.0
depenses_electricite,27205000000.0
depenses_gaz_ville,12994650000.0
depenses_combustibles_liquides,6088465000.0
depenses_combustibles_solides,1976545000.0
depenses_carburants,36523970000.0
depenses_tot,1186224000000.0
rev_disponible,1388087000000.0
rev_disp_loyerimput,1213114000000.0


In [27]:
data_bdf

Unnamed: 0,bdf_aggregates
depenses_carburants,31655040000.0
depenses_combustibles_liquides,3546686000.0
depenses_combustibles_solides,1584850000.0
depenses_electricite,23613100000.0
depenses_gaz_ville,9198009000.0
depenses_tot,922354600000.0
loyer_impute,135074300000.0
rev_disponible,986128800000.0
rev_disp_loyerimput,1121203000000.0


In [11]:
data_bdf.loc['depenses_tot']

bdf_aggregates    9.223546e+11
Name: depenses_tot, dtype: float64

In [12]:
data_cn.loc['depenses_tot']

conso_CN_2017    1.186224e+12
Name: depenses_tot, dtype: float64

In [22]:
inflator_depenses_tot_bdf_to_cn =  float(data_cn.loc['depenses_tot']) / float(data_bdf.loc['depenses_tot'])
inflator_depenses_tot_bdf_to_cn == get_inflators_bdf_to_cn_energy(2017).get('depenses_tot')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


True

In [21]:
(agregates.loc[2017]['depenses_tot'] + agregates.loc[2017]['loyer_impute']) / float(data_cn.loc['depenses_tot'])

1.000840269461446

In [29]:
data_cn_2018 = get_cn_aggregates_energy(2018)
(agregates.loc[2018]['depenses_tot'] + agregates.loc[2018]['loyer_impute']) / float(data_cn_2018.loc['depenses_tot'])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


0.9996893440873598

In [32]:
depenses = get_input_data_frame(data_year)

# Construct depenses_tot for total consumption
liste_variables = depenses.columns.tolist()
postes_agreges = ['poste_{}'.format(index) for index in
        ["0{}".format(i) for i in range(1, 10)] + ["10", "11", "12"]]
depenses['depenses_tot'] = 0
for element in liste_variables:
    for poste in postes_agreges:
        if element[:8] == poste:
            depenses['depenses_tot'] += depenses[element]


In [94]:
depenses[['poste_04_2_1','loyer_impute']]

Unnamed: 0,poste_04_2_1,loyer_impute
0,3732,3732
1,10944,10944
2,14424,14424
3,0,0
4,4908,4908
...,...,...
12076,0,0
12077,15660,15660
12078,3996,3996
12079,10956,10956


In [56]:
postes_agreges_simul.sum()

poste_agrege_01    1.265307e+11
poste_agrege_02    2.276444e+10
poste_agrege_03    3.936474e+10
poste_agrege_04    1.282405e+11
poste_agrege_05    4.461621e+10
poste_agrege_06    1.455828e+10
poste_agrege_07    1.281280e+11
poste_agrege_08    2.097434e+10
poste_agrege_09    7.400954e+10
poste_agrege_10    6.136412e+09
poste_agrege_11    5.690687e+10
poste_agrege_12    1.248788e+11
dtype: float64

In [59]:
liste_variables = depenses.columns.tolist()
postes_agreges = ['poste_{}'.format(index) for index in
        ["0{}".format(i) for i in range(1, 10)] + ["10", "11", "12"]]

for poste in postes_agreges:
    depenses[poste] = 0
    for element in liste_variables:
        if element[:8] == poste:
            depenses[poste] += depenses[element]

depenses_bdf_by_poste_agreges = [(depenses['pondmen'] * depenses[poste]).sum() for poste in postes_agreges]

In [86]:
depenses_bdf_by_poste_agreges

[126558517616.19775,
 22769282779.833675,
 39373557642.18781,
 263341119399.63644,
 44626189503.00444,
 14561448588.157013,
 128156427135.21976,
 20978921247.8749,
 74025777235.73172,
 6137827496.527738,
 56919092435.17008,
 124906429309.42918]

In [87]:
postes_agreges_simul.sum()

poste_agrege_01    1.265307e+11
poste_agrege_02    2.276444e+10
poste_agrege_03    3.936474e+10
poste_agrege_04    1.282405e+11
poste_agrege_05    4.461621e+10
poste_agrege_06    1.455828e+10
poste_agrege_07    1.281280e+11
poste_agrege_08    2.097434e+10
poste_agrege_09    7.400954e+10
poste_agrege_10    6.136412e+09
poste_agrege_11    5.690687e+10
poste_agrege_12    1.248788e+11
dtype: float64

On reconstruit les différentes fonctions qui servent à réaliser le calage et le veillissement des données à partir de la comptabilité nationale (examples.calage_bdf_cn_bis)

**Par poste agrégé**

In [49]:
def get_bdf_aggregates(data_year = None):
    assert data_year is not None
    depenses = get_input_data_frame(2017)
    liste_variables = depenses.columns.tolist()
    postes_agreges = ['poste_{}'.format(index) for index in ["0{}".format(i) for i in range(1, 10)] + ["10", "11", "12"]]
    for poste in postes_agreges:
        depenses[poste] = 0
        for element in liste_variables:
            if element[:8] == poste:
                depenses[poste] += depenses[element]
            
    depenses_by_poste_agrege = depenses[postes_agreges]      
    depenses_by_poste_agrege = pd.concat([depenses_by_poste_agrege, depenses['pondmen']], axis = 1)
    bdf_aggregates_by_poste_agrege = pd.DataFrame()
    for poste_agrege in postes_agreges:
        bdf_aggregates_by_poste_agrege.loc[poste_agrege, 'bdf_aggregates'] = (depenses_by_poste_agrege[poste_agrege] * depenses_by_poste_agrege['pondmen']).sum()
        
    return bdf_aggregates_by_poste_agrege

In [4]:
get_bdf_aggregates(data_year= 2017)

Unnamed: 0,bdf_aggregates
poste_01,126558500000.0
poste_02,22769280000.0
poste_03,39373560000.0
poste_04,263341100000.0
poste_05,44626190000.0
poste_06,14561450000.0
poste_07,128803900000.0
poste_08,22916580000.0
poste_09,71440590000.0
poste_10,6137827000.0


**Par poste agrégé**

In [48]:
def get_cn_aggregates(target_year = None):
    assert target_year is not None

    parametres_fiscalite_file_path = os.path.join(
            assets_directory,
            'legislation',
            'conso_eff_fonction_2023.xls'
            )

    masses_cn_data_frame = pd.read_excel(parametres_fiscalite_file_path, sheet_name = "MEURcour", header = 4)
    masses_cn_data_frame.rename(columns={'Unnamed: 0' : 'Code' , 'Unnamed: 1' : 'Label'}, inplace = True)
    masses_cn_data_frame = masses_cn_data_frame.loc[:, ['Code', '{}'.format(target_year)]].copy()
    masses_cn_data_frame.loc[:,'Code'] = masses_cn_data_frame.loc[:,'Code'].str.replace(r'^CP','',regex=True)
    masses_cn_data_frame.loc[:,'Code'] = masses_cn_data_frame.loc[:,'Code'].str.strip()
    
    
    codes_postes_agreges = ["0{}".format(i) for i in range(1, 10)] + ["10", "11", "12"]
    masses_cn_12postes_data_frame = masses_cn_data_frame.loc[masses_cn_data_frame['Code'].isin(codes_postes_agreges)]
    masses_cn_12postes_data_frame.loc[:,'Code'] = masses_cn_12postes_data_frame.loc[:,'Code'].astype(str).apply(lambda x: f"poste_{x}")
    masses_cn_12postes_data_frame.set_index('Code', inplace = True)
    masses_cn_12postes_data_frame.rename(columns= {'{}'.format(target_year): 'conso_CN_{}'.format(target_year)}, inplace= True)
    masses_cn_12postes_data_frame

    return masses_cn_12postes_data_frame*1e6

In [6]:
masses_cn_data_frame= get_cn_aggregates(2017)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [7]:
masses_cn_data_frame

Unnamed: 0_level_0,conso_CN_2017
Code,Unnamed: 1_level_1
poste_01,147641700000.0
poste_02,44372500000.0
poste_03,46684800000.0
poste_04,338541000000.0
poste_05,51822700000.0
poste_06,48770100000.0
poste_07,156293500000.0
poste_08,45865300000.0
poste_09,83158500000.0
poste_10,8878900000.0


**Par poste agrégé**

In [10]:
def get_inflators_bdf_to_cn(data_year):    
    data_cn = get_cn_aggregates(data_year)
    data_bdf = get_bdf_aggregates(data_year)
    masses = data_cn.merge(data_bdf, left_index = True, right_index = True)
    masses.rename(columns = {'bdf_aggregates': 'conso_bdf{}'.format(data_year)}, inplace = True)
    
    return (masses['conso_CN_{}'.format(data_year)] / masses['conso_bdf{}'.format(data_year)]).to_dict()

In [50]:
data_cn = get_cn_aggregates(data_year)
data_bdf = get_bdf_aggregates(data_year)
masses = data_cn.merge(data_bdf, left_index = True, right_index = True)
masses.rename(columns = {'bdf_aggregates': 'conso_bdf{}'.format(data_year)}, inplace = True)
masses['inflateur'] = masses['conso_CN_{}'.format(data_year)] / masses['conso_bdf{}'.format(data_year)]

masses.sort_index(inplace= True)
masses.to_csv(os.path.join(assets_directory,'inflateurs', 'inflators_bdf_cn_2017_postes_agreges.csv'))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [11]:
get_inflators_bdf_to_cn(2017)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


{'poste_01': 1.1665884112813272,
 'poste_02': 1.9487877782123153,
 'poste_03': 1.1856891476318707,
 'poste_04': 1.2855607235657076,
 'poste_05': 1.1612620431442182,
 'poste_06': 3.349261559022724,
 'poste_07': 1.2134216757352296,
 'poste_08': 2.001402062048447,
 'poste_09': 1.1640230623944932,
 'poste_10': 1.4465867613618872,
 'poste_11': 1.4897672533444115,
 'poste_12': 0.9759995382485754}

**Par poste agrégé**

In [11]:
def get_inflators_cn_to_cn(target_year, data_year):
    '''
        Calcule l'inflateur de vieillissement à partir des masses de comptabilité nationale.
    '''
    data_year_cn_aggregates = get_cn_aggregates(data_year)['conso_CN_{}'.format(data_year)].to_dict()
    target_year_cn_aggregates = get_cn_aggregates(target_year)['conso_CN_{}'.format(target_year)].to_dict()

    return dict(
        (key, target_year_cn_aggregates[key] / data_year_cn_aggregates[key])
        for key in list(data_year_cn_aggregates.keys())
        )

In [15]:
get_inflators_cn_to_cn(target_year= 2018, data_year = 2017)

{'poste_01': 1.0150987153358435,
 'poste_02': 1.0423685841455856,
 'poste_03': 0.9772688326821578,
 'poste_04': 1.025518327174552,
 'poste_05': 1.0049322015255864,
 'poste_06': 1.0117284975835603,
 'poste_07': 1.0617492090202088,
 'poste_08': 1.014965562200618,
 'poste_09': 1.0272780293054828,
 'poste_10': 1.0132110959690952,
 'poste_11': 1.0643354301254067,
 'poste_12': 1.0632043436466656}

**Par poste agrégé**

In [12]:
def get_inflators(target_year,data_year):
    '''
    Fonction qui calcule les ratios de calage (bdf sur cn pour année de données) et de vieillissement
    à partir des masses de comptabilité nationale et des masses de consommation de bdf.
    '''
    inflators_bdf_to_cn = get_inflators_bdf_to_cn(data_year)
    inflators_cn_to_cn = get_inflators_cn_to_cn(target_year,data_year)
    
    tax_benefit_system = FranceIndirectTaxationTaxBenefitSystem()
    liste_variables = list(tax_benefit_system.variables.keys())
    ratio_by_variable = dict()
    for element in liste_variables:
        for key in list(inflators_cn_to_cn.keys()):
            if element[:8] == key:
                ratio_by_variable[element] = inflators_bdf_to_cn[key] * inflators_cn_to_cn[key]

    return ratio_by_variable

In [14]:
inflators_postes_agreges = get_inflators(2019,2017)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


**Par poste agrégé**

In [6]:
def get_inflators_by_year(rebuild = False, year_range = None, data_year = None):
    if year_range is None:
        year_range = range(2000, 2020)

    if rebuild is not False:
        inflators_by_year = dict()
        for target_year in year_range:
            inflators = get_inflators(target_year = target_year, data_year = data_year)
            inflators_by_year[target_year] = inflators

        writer_inflators = csv.writer(open(os.path.join(assets_directory, 'inflateurs', 'inflators_by_postes_agreges_by_year.csv'), 'w'))
        for year in year_range:
            for key, value in list(inflators_by_year[year].items()):
                writer_inflators.writerow([key, value, year])

        return inflators_by_year
    else:
        re_build_inflators = dict()
        inflators_from_csv = pd.read_csv(os.path.join(assets_directory, 'inflateurs', 'inflators_by_postes_agreges_by_year.csv'),
            index_col = 0, header = None)
        for year in year_range:
            inflators_from_csv_by_year = inflators_from_csv[inflators_from_csv[2] == year]
            inflators_to_dict = pd.DataFrame.to_dict(inflators_from_csv_by_year)
            inflators = inflators_to_dict[1]
            re_build_inflators[year] = inflators

        return re_build_inflators

In [21]:
data_year = 2017
inflators_by_year = get_inflators_by_year(rebuild = False, year_range = range(2017, 2023), data_year = data_year)
simulated_variables = ['tva_taux_plein',
    'tva_taux_intermediaire',
    'tva_taux_reduit',
    'tva_taux_super_reduit',
    'tva_total',
    'rev_disponible',
    'pondmen',
    'depenses_tot',
    'loyer_impute'
    ]

agregates = pd.DataFrame(columns=['tva_total','tva_taux_super_reduit','tva_taux_reduit','tva_taux_intermediaire','tva_taux_plein','depenses_tot','loyer_impute'])
for year in range(2017,2023) :
    inflation_kwargs = dict(inflator_by_variable = inflators_by_year[year])
    survey_scenario = SurveyScenario.create(
        inflation_kwargs = inflation_kwargs,
        year = year,
        data_year = data_year
        )
    df_sum = dataframe_by_group(survey_scenario, category = 'niveau_vie_decile', variables = simulated_variables, aggfunc = 'sum')
    agregates.loc[year] = df_sum.sum()



In [22]:
agregates

Unnamed: 0,tva_total,tva_taux_super_reduit,tva_taux_reduit,tva_taux_intermediaire,tva_taux_plein,depenses_tot,loyer_impute
2017,99859650000.0,339828100.0,8960419000.0,11412280000.0,79147120000.0,954855800000.0,135044300000.0
2018,102705700000.0,345054600.0,9117203000.0,11954890000.0,81288590000.0,985654400000.0,135044300000.0
2019,104942200000.0,351989300.0,9324577000.0,12467850000.0,82797750000.0,1007516000000.0,135044300000.0
2020,95092040000.0,322120600.0,9573070000.0,9809840000.0,75387010000.0,934195600000.0,135044300000.0
2021,102655800000.0,355069000.0,9747673000.0,11032240000.0,81520830000.0,999409300000.0,135044300000.0
2022,112490200000.0,375688400.0,10376920000.0,13992240000.0,87745340000.0,1099850000000.0,135044300000.0


In [36]:
from openfisca_france_indirect_taxation.projects.TVA_Herve_IPP.new_calage_bdf_cn import (
    new_get_cn_aggregates,
    new_get_bdf_aggregates,
    new_get_inflators_bdf_to_cn,
    new_get_inflators_cn_to_cn,
    new_get_inflators,
    new_get_inflators_by_year,
    new_get_inflators_bdf_to_cn)

In [39]:
data_cn = new_get_cn_aggregates(data_year)
liste_postes_cn = data_cn.index.tolist()

data_bdf = new_get_bdf_aggregates(data_year)
data_bdf_postes_cn = pd.DataFrame()
liste_postes_bdf = data_bdf.index.tolist()

data_bdf_postes_cn = pd.DataFrame(index=[0])
for poste in liste_postes_cn:
    data_bdf_postes_cn[poste] = 0
    for element in liste_postes_bdf:
        if poste in element:
            data_bdf_postes_cn[poste] += float(data_bdf.loc[element])
data_bdf_postes_cn = data_bdf_postes_cn.transpose()
data_bdf_postes_cn.rename(columns={0 : 'bdf_aggregates'}, inplace = True)

masses = data_cn.merge(data_bdf_postes_cn, left_index = True, right_index = True)
masses.rename(columns = {'bdf_aggregates': 'conso_bdf{}'.format(data_year)}, inplace = True)

masses['inflator'] = masses['conso_CN_{}'.format(data_year)] / masses['conso_bdf{}'.format(data_year)]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
  data_bdf_postes_cn[poste] = 0


In [45]:
masses.sort_index(inplace=True)
masses.to_csv(os.path.join(assets_directory, 'inflateurs', 'inflators_bdf_cn_2017.csv'))

In [9]:
data_year = 2017
target_year = 2019

In [20]:
def new_get_inflators(target_year,data_year) :
    inflators_bdf_to_cn = new_get_inflators_bdf_to_cn(data_year)
    inflators_cn_to_cn = new_get_inflators_cn_to_cn(target_year,data_year)

    tax_benefit_system = FranceIndirectTaxationTaxBenefitSystem()
    liste_variables = list(tax_benefit_system.variables.keys())
    ratio_by_variable = dict()
    for element in liste_variables:
        if element[:6] == 'poste_':
            for key in list(inflators_cn_to_cn.keys()):
                if key in list(inflators_bdf_to_cn.keys()):
                    if key in element:
                        ratio_by_variable[element] = inflators_bdf_to_cn[key] * inflators_cn_to_cn[key]

    return ratio_by_variable

In [26]:
inflators = new_get_inflators(2019,2017)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
  data_bdf_postes_cn[poste] = 0


In [23]:
data_year = 2017
inflators_by_year = new_get_inflators_by_year(rebuild = True, year_range = range(2017, 2023), data_year = data_year)
simulated_variables = ['tva_taux_plein',
    'tva_taux_intermediaire',
    'tva_taux_reduit',
    'tva_taux_super_reduit',
    'tva_total',
    'rev_disponible',
    'pondmen',
    'depenses_tot',
    'loyer_impute'
    ]

agregates = pd.DataFrame(columns=['tva_total','tva_taux_super_reduit','tva_taux_reduit','tva_taux_intermediaire','tva_taux_plein','depenses_tot','loyer_impute'])
for year in range(2017,2023) :
    inflation_kwargs = dict(inflator_by_variable = inflators_by_year[year])
    survey_scenario = SurveyScenario.create(
        inflation_kwargs = inflation_kwargs,
        year = year,
        data_year = data_year
        )
    df_sum = dataframe_by_group(survey_scenario, category = 'niveau_vie_decile', variables = simulated_variables, aggfunc = 'sum')
    agregates.loc[year] = df_sum.sum()

In [24]:
agregates['Total_depenses'] = agregates['depenses_tot'] + agregates['loyer_impute']

In [25]:
agregates

Unnamed: 0,tva_total,tva_taux_super_reduit,tva_taux_reduit,tva_taux_intermediaire,tva_taux_plein,depenses_tot,loyer_impute,Total_depenses
2017,103340800000.0,291658800.0,8497522000.0,13328140000.0,81223490000.0,974350400000.0,135044300000.0,1109395000000.0
2018,106194800000.0,290634600.0,8608283000.0,14009090000.0,83286790000.0,1005746000000.0,135044300000.0,1140790000000.0
2019,108223700000.0,291524000.0,8771526000.0,14755370000.0,84405290000.0,1027281000000.0,135044300000.0,1162325000000.0
2020,98845840000.0,282879200.0,9215032000.0,10291000000.0,79056920000.0,952170300000.0,135044300000.0,1087215000000.0
2021,107186300000.0,298787600.0,9273624000.0,11883360000.0,85730500000.0,1021302000000.0,135044300000.0,1156346000000.0
2022,116810100000.0,305966200.0,9664235000.0,16790060000.0,90049850000.0,1122780000000.0,135044300000.0,1257824000000.0


In [228]:
agregates

Unnamed: 0,tva_total,tva_taux_super_reduit,tva_taux_reduit,tva_taux_intermediaire,tva_taux_plein,depenses_tot,loyer_impute
2017,102772500000.0,353672900.0,9686963000.0,12062920000.0,80668910000.0,1046474000000.0,135044300000.0
2018,105403500000.0,356908900.0,9821486000.0,12603330000.0,82621750000.0,1074177000000.0,135044300000.0
2019,107594900000.0,362254200.0,10015600000.0,13065020000.0,84152050000.0,1096664000000.0,135044300000.0
