In [206]:
import numpy
import pandas as pd
import os
import csv
import ast
import seaborn as sns
import matplotlib
from matplotlib import pyplot as plt

from wquantiles import quantile
from openfisca_survey_manager.utils import asof

from openfisca_france_indirect_taxation import FranceIndirectTaxationTaxBenefitSystem
from openfisca_france_indirect_taxation.examples.utils_example import (
    wavg,
    collapse,
    dataframe_by_group,
    graph_builder_bar,
    df_weighted_average_grouped)
from openfisca_france_indirect_taxation.build_survey_data.utils import weighted_sum
from openfisca_france_indirect_taxation.almost_ideal_demand_system.utils import add_niveau_vie_decile
from openfisca_france_indirect_taxation.surveys import SurveyScenario
from openfisca_france_indirect_taxation.calibration import get_inflators_by_year_energy
from openfisca_france_indirect_taxation.utils import assets_directory, get_input_data_frame

In [33]:
data_year = 2017
inflators_by_year = get_inflators_by_year_energy(rebuild = True, year_range = range(2017, 2020), data_year = data_year)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [34]:
 simulated_variables = [
        'tva_taux_plein',
        'tva_taux_intermediaire',
        'tva_taux_reduit',
        'tva_taux_super_reduit',
        'tva_total',
        'rev_disponible',
        'pondmen',
        'depenses_tot',
        'loyer_impute'
        ]

In [35]:
agregates = pd.DataFrame(columns=['tva_total','tva_taux_super_reduit','tva_taux_reduit','tva_taux_intermediaire','tva_taux_plein','depenses_tot','loyer_impute'])
for year in [2017,2018,2019] :
    inflation_kwargs = dict(inflator_by_variable = inflators_by_year[year])
    survey_scenario = SurveyScenario.create(
        inflation_kwargs = inflation_kwargs,
        year = year,
        data_year = data_year
        )
    df_sum = dataframe_by_group(survey_scenario, category = 'niveau_vie_decile', variables = simulated_variables, aggfunc = 'sum')
    agregates.loc[year] = df_sum.sum()


In [36]:
agregates

Unnamed: 0,tva_total,tva_taux_super_reduit,tva_taux_reduit,tva_taux_intermediaire,tva_taux_plein,depenses_tot,loyer_impute
2017,79076340000.0,146193700.0,7305056000.0,8793053000.0,62832030000.0,1012287000000.0,174934000000.0
2018,79076340000.0,146193700.0,7305056000.0,8793053000.0,62832030000.0,1038613000000.0,178082700000.0
2019,79076340000.0,146193700.0,7305056000.0,8793053000.0,62832030000.0,1064471000000.0,181852100000.0


In [9]:
from openfisca_france_indirect_taxation.calibration import (get_bdf_aggregates_energy, 
                                                            get_cn_aggregates_energy,
                                                            get_inflators_bdf_to_cn_energy,
                                                            get_inflators_cn_to_cn_energy,
                                                            get_inflators_energy
                                                            )

In [10]:
data_cn = get_cn_aggregates_energy(data_year)
data_bdf = get_bdf_aggregates_energy(data_year)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [26]:
data_cn

Unnamed: 0,conso_CN_2017
loyer_impute,174972900000.0
depenses_electricite,27205000000.0
depenses_gaz_ville,12994650000.0
depenses_combustibles_liquides,6088465000.0
depenses_combustibles_solides,1976545000.0
depenses_carburants,36523970000.0
depenses_tot,1186224000000.0
rev_disponible,1388087000000.0
rev_disp_loyerimput,1213114000000.0


In [27]:
data_bdf

Unnamed: 0,bdf_aggregates
depenses_carburants,31655040000.0
depenses_combustibles_liquides,3546686000.0
depenses_combustibles_solides,1584850000.0
depenses_electricite,23613100000.0
depenses_gaz_ville,9198009000.0
depenses_tot,922354600000.0
loyer_impute,135074300000.0
rev_disponible,986128800000.0
rev_disp_loyerimput,1121203000000.0


In [11]:
data_bdf.loc['depenses_tot']

bdf_aggregates    9.223546e+11
Name: depenses_tot, dtype: float64

In [12]:
data_cn.loc['depenses_tot']

conso_CN_2017    1.186224e+12
Name: depenses_tot, dtype: float64

In [22]:
inflator_depenses_tot_bdf_to_cn =  float(data_cn.loc['depenses_tot']) / float(data_bdf.loc['depenses_tot'])
inflator_depenses_tot_bdf_to_cn == get_inflators_bdf_to_cn_energy(2017).get('depenses_tot')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


True

In [21]:
(agregates.loc[2017]['depenses_tot'] + agregates.loc[2017]['loyer_impute']) / float(data_cn.loc['depenses_tot'])

1.000840269461446

In [29]:
data_cn_2018 = get_cn_aggregates_energy(2018)
(agregates.loc[2018]['depenses_tot'] + agregates.loc[2018]['loyer_impute']) / float(data_cn_2018.loc['depenses_tot'])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


0.9996893440873598

In [32]:
depenses = get_input_data_frame(data_year)

# Construct depenses_tot for total consumption
liste_variables = depenses.columns.tolist()
postes_agreges = ['poste_{}'.format(index) for index in
        ["0{}".format(i) for i in range(1, 10)] + ["10", "11", "12"]]
depenses['depenses_tot'] = 0
for element in liste_variables:
    for poste in postes_agreges:
        if element[:8] == poste:
            depenses['depenses_tot'] += depenses[element]


In [94]:
depenses[['poste_04_2_1','loyer_impute']]

Unnamed: 0,poste_04_2_1,loyer_impute
0,3732,3732
1,10944,10944
2,14424,14424
3,0,0
4,4908,4908
...,...,...
12076,0,0
12077,15660,15660
12078,3996,3996
12079,10956,10956


In [56]:
postes_agreges_simul.sum()

poste_agrege_01    1.265307e+11
poste_agrege_02    2.276444e+10
poste_agrege_03    3.936474e+10
poste_agrege_04    1.282405e+11
poste_agrege_05    4.461621e+10
poste_agrege_06    1.455828e+10
poste_agrege_07    1.281280e+11
poste_agrege_08    2.097434e+10
poste_agrege_09    7.400954e+10
poste_agrege_10    6.136412e+09
poste_agrege_11    5.690687e+10
poste_agrege_12    1.248788e+11
dtype: float64

In [59]:
liste_variables = depenses.columns.tolist()
postes_agreges = ['poste_{}'.format(index) for index in
        ["0{}".format(i) for i in range(1, 10)] + ["10", "11", "12"]]

for poste in postes_agreges:
    depenses[poste] = 0
    for element in liste_variables:
        if element[:8] == poste:
            depenses[poste] += depenses[element]

depenses_bdf_by_poste_agreges = [(depenses['pondmen'] * depenses[poste]).sum() for poste in postes_agreges]

In [86]:
depenses_bdf_by_poste_agreges

[126558517616.19775,
 22769282779.833675,
 39373557642.18781,
 263341119399.63644,
 44626189503.00444,
 14561448588.157013,
 128156427135.21976,
 20978921247.8749,
 74025777235.73172,
 6137827496.527738,
 56919092435.17008,
 124906429309.42918]

In [87]:
postes_agreges_simul.sum()

poste_agrege_01    1.265307e+11
poste_agrege_02    2.276444e+10
poste_agrege_03    3.936474e+10
poste_agrege_04    1.282405e+11
poste_agrege_05    4.461621e+10
poste_agrege_06    1.455828e+10
poste_agrege_07    1.281280e+11
poste_agrege_08    2.097434e+10
poste_agrege_09    7.400954e+10
poste_agrege_10    6.136412e+09
poste_agrege_11    5.690687e+10
poste_agrege_12    1.248788e+11
dtype: float64

On reconstruit les différentes fonctions qui servent à réaliser le calage et le veillissement des données à partir de la comptabilité nationale (examples.calage_bdf_cn_bis)

**Par poste agrégé**

In [82]:
def new_get_bdf_aggregates(data_year = None):
    assert data_year is not None
    depenses = get_input_data_frame(2017)
    liste_variables = depenses.columns.tolist()
    postes_agreges = ['poste_{}'.format(index) for index in ["0{}".format(i) for i in range(1, 10)] + ["10", "11", "12"]]
    for poste in postes_agreges:
        depenses[poste] = 0
        for element in liste_variables:
            if element[:8] == poste:
                depenses[poste] += depenses[element]
            
    depenses_by_poste_agrege = depenses[postes_agreges]      
    depenses_by_poste_agrege = pd.concat([depenses_by_poste_agrege, depenses['pondmen']], axis = 1)
    bdf_aggregates_by_poste_agrege = pd.DataFrame()
    for poste_agrege in postes_agreges:
        bdf_aggregates_by_poste_agrege.loc[poste_agrege, 'bdf_aggregates'] = (depenses_by_poste_agrege[poste_agrege] * depenses_by_poste_agrege['pondmen']).sum()
        
    return bdf_aggregates_by_poste_agrege

**Par poste**

In [259]:
def new_get_bdf_aggregates(data_year = None):
    assert data_year is not None
    depenses = get_input_data_frame(2017)
    liste_variables = depenses.columns.tolist()
    liste_postes = [element for element in liste_variables if element[:6] == 'poste_']

    bdf_aggregates_by_poste = pd.DataFrame()
    for poste in liste_postes:
        bdf_aggregates_by_poste.loc[poste, 'bdf_aggregates'] = (depenses[poste] * depenses['pondmen']).sum()
        
    return bdf_aggregates_by_poste

**Par poste agrégé**

In [185]:
def new_get_cn_aggregates(target_year = None):
    assert target_year is not None

    parametres_fiscalite_file_path = os.path.join(
            assets_directory,
            'legislation',
            'conso-eff-fonction.xls'
            )

    masses_cn_data_frame = pd.read_excel(parametres_fiscalite_file_path, sheet_name = "M€cour", header = 3)
    masses_cn_data_frame = masses_cn_data_frame.loc[:, ['Code', target_year]].copy()
    masses_cn_data_frame['Code'] = masses_cn_data_frame['Code'].str.strip()
    
    codes_postes_agreges = ["0{}".format(i) for i in range(1, 10)] + ["10", "11", "12"]
    masses_cn_12postes_data_frame = masses_cn_data_frame.loc[masses_cn_data_frame['Code'].isin(codes_postes_agreges)]
    masses_cn_12postes_data_frame.loc[:,'Code'] = masses_cn_12postes_data_frame['Code'].astype(str).apply(lambda x: f"poste_{x}")
    masses_cn_12postes_data_frame.set_index('Code', inplace = True)
    masses_cn_12postes_data_frame.rename(columns= {target_year: 'conso_CN_{}'.format(target_year)}, inplace= True)
    masses_cn_12postes_data_frame

    return masses_cn_12postes_data_frame*1e6

In [188]:
cn_aggregates = new_get_cn_aggregates(2019)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [189]:
cn_aggregates

Unnamed: 0_level_0,conso_CN_2019
Code,Unnamed: 1_level_1
poste_01,165553300000.0
poste_02,48174500000.0
poste_03,44858770000.0
poste_04,332112200000.0
poste_05,57867900000.0
poste_06,50518470000.0
poste_07,178799100000.0
poste_08,29240110000.0
poste_09,100317500000.0
poste_10,6174029000.0


In [154]:
bdf_aggregates_by_poste_agrege

Unnamed: 0,bdf_aggregates
poste_01,126558500000.0
poste_02,22769280000.0
poste_03,39373560000.0
poste_04,263341100000.0
poste_05,44626190000.0
poste_06,14561450000.0
poste_07,128156400000.0
poste_08,20978920000.0
poste_09,74025780000.0
poste_10,6137827000.0


**Par poste**

In [335]:
def remove_prefixes(lst):
    lst_sorted = sorted(lst, key = len, reverse = True)
    filtered = []
    
    for item in lst_sorted:
        if not any(item != other and item in other for other in filtered):
            filtered.append(item)
    
    return filtered

In [373]:
def new_get_cn_aggregates(target_year) :
    target_year = 2017
    masses_cn_data_frame = pd.read_excel(parametres_fiscalite_file_path, sheet_name = "M€cour", header = 3)
    masses_cn_data_frame = masses_cn_data_frame.loc[:, ['Code', target_year]].copy()
    masses_cn_data_frame['Code'] = masses_cn_data_frame['Code'].str.strip()
    masses_cn_data_frame.dropna(inplace = True)
    masses_cn_data_frame.loc[:,'Code'] = masses_cn_data_frame['Code'].astype(str).apply(lambda x: f"poste_{x}")
    
    liste_postes_cn = remove_prefixes(masses_cn_data_frame['Code'].tolist())
    liste_postes_cn.remove('poste_01..12+15 (HS)')
    liste_12postes = ["poste_0{}".format(i) for i in range(1, 10)] + ["poste_10", "poste_11", "poste_12"]
    liste_postes_cn = [element for element in liste_postes_cn if element[:8] in liste_12postes]
    
    masses_cn_postes_data_frame = masses_cn_data_frame.loc[masses_cn_data_frame['Code'].isin(liste_postes_cn)]
    masses_cn_postes_data_frame['Code'] = masses_cn_postes_data_frame['Code'].str.replace(r'\.', '_', regex = True)
    masses_cn_postes_data_frame.set_index('Code', inplace = True)
    masses_cn_postes_data_frame.rename(columns= {target_year: 'conso_CN_{}'.format(target_year)}, inplace= True)
    
    return masses_cn_postes_data_frame*1e6

In [374]:
masses_cn_data_frame= new_get_cn_aggregates(2017)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  masses_cn_postes_data_frame['Code'] = masses_cn_postes_data_frame['Code'].str.replace(r'\.', '_', regex = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


**Par poste agrégé**

In [190]:
def new_get_inflators_bdf_to_cn(data_year):    
    data_cn = new_get_cn_aggregates(data_year)
    data_bdf = new_get_bdf_aggregates(data_year)
    masses = data_cn.merge(data_bdf, left_index = True, right_index = True)
    masses.rename(columns = {'bdf_aggregates': 'conso_bdf{}'.format(data_year)}, inplace = True)
    
    return (masses['conso_CN_{}'.format(data_year)] / masses['conso_bdf{}'.format(data_year)]).to_dict()

In [191]:
new_get_inflators_bdf_to_cn(2017)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


{'poste_01': 1.2664817589447321,
 'poste_02': 1.977376293990095,
 'poste_03': 1.166414689202266,
 'poste_04': 1.201416014032622,
 'poste_05': 1.3269834744912101,
 'poste_06': 3.4084634299616585,
 'poste_07': 1.2843478604965384,
 'poste_08': 1.387541363831978,
 'poste_09': 1.2989267332351238,
 'poste_10': 0.9635287083818541,
 'poste_11': 1.5246735899530455,
 'poste_12': 1.2005692487494686}

**Par poste**

In [361]:
masses_cn_dataframe = new_get_cn_aggregates(2017)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  masses_cn_postes_data_frame['Code'] = masses_cn_postes_data_frame['Code'].str.replace(r'\.', '_', regex = True)


In [327]:
masses_bdf = new_get_bdf_aggregates(data_year)

In [487]:
masses_bdf.index.tolist()

['poste_01_10_1',
 'poste_01_10_2',
 'poste_01_1_1_1_1',
 'poste_01_1_1_3_3',
 'poste_01_1_1_4_1',
 'poste_01_1_1_4_2',
 'poste_01_1_1_4_3',
 'poste_01_1_2_1_1',
 'poste_01_1_2_3_1',
 'poste_01_1_2_4_1',
 'poste_01_1_2_4_2',
 'poste_01_1_2_5_1',
 'poste_01_1_2_6_1',
 'poste_01_1_2_6_3',
 'poste_01_1_3_1_1',
 'poste_01_1_3_1_2',
 'poste_01_1_3_2_1_a',
 'poste_01_1_3_2_1_b',
 'poste_01_1_3_2_2',
 'poste_01_1_4_1_1_a',
 'poste_01_1_4_1_1_b',
 'poste_01_1_4_1_1_c',
 'poste_01_1_4_2_1',
 'poste_01_1_4_2_2',
 'poste_01_1_4_3_1',
 'poste_01_1_4_4_1',
 'poste_01_1_5_1_1',
 'poste_01_1_5_2_1_a',
 'poste_01_1_5_2_1_b',
 'poste_01_1_5_2_2_a',
 'poste_01_1_5_2_2_b',
 'poste_01_1_6_1_1_a',
 'poste_01_1_6_1_1_b',
 'poste_01_1_6_1_1_c',
 'poste_01_1_6_1_1_d',
 'poste_01_1_6_2_1_a',
 'poste_01_1_6_2_1_b',
 'poste_01_1_6_2_1_c',
 'poste_01_1_6_2_1_d',
 'poste_01_1_6_2_1_e',
 'poste_01_1_7_1_1_a',
 'poste_01_1_7_1_1_b',
 'poste_01_1_7_1_1_c',
 'poste_01_1_7_1_1_d',
 'poste_01_1_7_2_1_a',
 'poste_01_1_7_

In [380]:
data_cn = new_get_cn_aggregates(data_year)
liste_postes_cn = data_cn.index.tolist()

data_bdf = new_get_bdf_aggregates(data_year)
data_bdf_postes_cn = pd.DataFrame()
liste_postes_bdf = data_bdf.index.tolist()

data_bdf_postes_cn = pd.DataFrame(index=[0])
for poste in liste_postes_cn:
    data_bdf_postes_cn[poste] = 0
    for element in liste_postes_bdf:
        if poste in element:
            data_bdf_postes_cn[poste] += float(data_bdf.loc[element])
data_bdf_postes_cn = data_bdf_postes_cn.transpose()
data_bdf_postes_cn.rename(columns={0 : 'bdf_aggregates'}, inplace = True)

data_bdf_postes_cn.loc[data_bdf_postes_cn['bdf_aggregates'] == 0].drop(axis=0, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  masses_cn_postes_data_frame['Code'] = masses_cn_postes_data_frame['Code'].str.replace(r'\.', '_', regex = True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [486]:
data_bdf_postes_cn.loc[data_bdf_postes_cn['bdf_aggregates'] == 0]

Unnamed: 0,bdf_aggregates
poste_02_3,0.0
poste_03_2_2,0.0
poste_04_4_2,0.0
poste_04_4_3,0.0
poste_04_4_4,0.0
poste_05_1_3,0.0
poste_06_2-3,0.0
poste_07_3_4,0.0
poste_08_3,0.0
poste_09_2_1-2,0.0


In [482]:
masses = data_cn.merge(data_bdf_postes_cn, left_index = True, right_index = True)
masses.rename(columns = {'bdf_aggregates': 'conso_bdf{}'.format(data_year)}, inplace = True)
    
(masses['conso_CN_{}'.format(data_year)] / masses['conso_bdf{}'.format(data_year)]).to_dict()

{'poste_01_1_1': 1.1843793935121434,
 'poste_01_1_2': 1.3356837498863037,
 'poste_01_1_3': 1.0040233732271853,
 'poste_01_1_4': 1.2437565166624243,
 'poste_01_1_5': 1.237578547559778,
 'poste_01_1_6': 1.2812127630094925,
 'poste_01_1_7': 1.5522240918244112,
 'poste_01_1_8': 1.470298944466745,
 'poste_01_1_9': 1.763475420438462,
 'poste_01_2_1': 1.1630531027753708,
 'poste_01_2_2': 1.5773449779752688,
 'poste_02_1_1': 2.5340902866274195,
 'poste_02_1_2': 1.4977499240834986,
 'poste_02_1_3': 2.3816096695451905,
 'poste_02_2': 1.777794484636511,
 'poste_02_3': inf,
 'poste_03_1_1': 0.7088167681211979,
 'poste_03_1_2': 1.1973287253552254,
 'poste_03_1_3': 3.8398627059930983,
 'poste_03_1_4': 2.684634994631903,
 'poste_03_2_1': 1.0934565891908745,
 'poste_03_2_2': inf,
 'poste_04_1': 0.948391445334531,
 'poste_04_2': 1.2953827866905072,
 'poste_04_3': 3.785387159664177,
 'poste_04_4_1': 0.1849819669668938,
 'poste_04_4_2': inf,
 'poste_04_4_3': inf,
 'poste_04_4_4': inf,
 'poste_04_5_1': 1.

**Par poste agrégé**

In [192]:
def new_get_inflators_cn_to_cn(target_year, data_year):
    '''
        Calcule l'inflateur de vieillissement à partir des masses de comptabilité nationale.
    '''
    data_year_cn_aggregates = new_get_cn_aggregates(data_year)['conso_CN_{}'.format(data_year)].to_dict()
    target_year_cn_aggregates = new_get_cn_aggregates(target_year)['conso_CN_{}'.format(target_year)].to_dict()

    return dict(
        (key, target_year_cn_aggregates[key] / data_year_cn_aggregates[key])
        for key in list(data_year_cn_aggregates.keys())
        )

In [195]:
new_get_inflators_cn_to_cn(target_year= 2018, data_year = 2017)

{'poste_01': 1.014635242505159,
 'poste_02': 1.0431652046134192,
 'poste_03': 0.9767986235913612,
 'poste_04': 1.024969688721906,
 'poste_05': 0.9623770158830857,
 'poste_06': 1.0060181134552564,
 'poste_07': 1.0575058299967532,
 'poste_08': 1.0051190140712254,
 'poste_09': 1.0184559963619217,
 'poste_10': 1.0116124980617938,
 'poste_11': 1.0593448924816955,
 'poste_12': 1.0397346956949207}

**Par poste agrégé**

In [224]:
def new_get_inflators(target_year,data_year):
    '''
    Fonction qui calcule les ratios de calage (bdf sur cn pour année de données) et de vieillissement
    à partir des masses de comptabilité nationale et des masses de consommation de bdf.
    '''
    inflators_bdf_to_cn = new_get_inflators_bdf_to_cn(data_year)
    inflators_cn_to_cn = new_get_inflators_cn_to_cn(target_year,data_year)
    
    tax_benefit_system = FranceIndirectTaxationTaxBenefitSystem()
    liste_variables = list(tax_benefit_system.variables.keys())
    ratio_by_variable = dict()
    for element in liste_variables:
        for key in list(inflators_cn_to_cn.keys()):
            if element[:8] == key:
                ratio_by_variable[element] = inflators_bdf_to_cn[key] * inflators_cn_to_cn[key]

    return ratio_by_variable

In [225]:
ratio_by_variable = new_get_inflators(2019,2017)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [220]:
inflators_cn_to_cn = new_get_inflators_cn_to_cn(2019,data_year)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


**Par poste agrégé**

In [210]:
def new_get_inflators_by_year(rebuild = False, year_range = None, data_year = None):
    if year_range is None:
        year_range = range(2000, 2020)

    if rebuild is not False:
        inflators_by_year = dict()
        for target_year in year_range:
            inflators = new_get_inflators(target_year = target_year, data_year = data_year)
            inflators_by_year[target_year] = inflators

        writer_inflators = csv.writer(open(os.path.join(assets_directory, 'inflateurs', 'new_inflators_by_year.csv'), 'w'))
        for year in year_range:
            for key, value in list(inflators_by_year[year].items()):
                writer_inflators.writerow([key, value, year])

        return inflators_by_year
    else:
        re_build_inflators = dict()
        inflators_from_csv = pd.read_csv(os.path.join(assets_directory, 'inflateurs', 'new_inflators_by_year.csv'),
            index_col = 0, header = None)
        for year in year_range:
            inflators_from_csv_by_year = inflators_from_csv[inflators_from_csv[2] == year]
            inflators_to_dict = pd.DataFrame.to_dict(inflators_from_csv_by_year)
            inflators = inflators_to_dict[1]
            re_build_inflators[year] = inflators

        return re_build_inflators

In [226]:
new_get_inflators_by_year(rebuild = True, year_range = range(2017,2020), data_year = 2017)

{2017: {'poste_01_1_1_1_1': 1.2664817589447321,
  'poste_01_1_1_3_3': 1.2664817589447321,
  'poste_01_1_1_4_1': 1.2664817589447321,
  'poste_01_1_1_4_2': 1.2664817589447321,
  'poste_01_1_1_4_3': 1.2664817589447321,
  'poste_01_1_2_1_1': 1.2664817589447321,
  'poste_01_1_2_3_1': 1.2664817589447321,
  'poste_01_1_2_4_1': 1.2664817589447321,
  'poste_01_1_2_4_2': 1.2664817589447321,
  'poste_01_1_2_5_1': 1.2664817589447321,
  'poste_01_1_2_6_1': 1.2664817589447321,
  'poste_01_1_2_6_3': 1.2664817589447321,
  'poste_01_1_3_1_1': 1.2664817589447321,
  'poste_01_1_3_1_2': 1.2664817589447321,
  'poste_01_1_3_2_1_a': 1.2664817589447321,
  'poste_01_1_3_2_1_b': 1.2664817589447321,
  'poste_01_1_3_2_2': 1.2664817589447321,
  'poste_01_1_4_1_1_a': 1.2664817589447321,
  'poste_01_1_4_1_1_b': 1.2664817589447321,
  'poste_01_1_4_1_1_c': 1.2664817589447321,
  'poste_01_1_4_2_1': 1.2664817589447321,
  'poste_01_1_4_2_2': 1.2664817589447321,
  'poste_01_1_4_3_1': 1.2664817589447321,
  'poste_01_1_4_4_

In [227]:
data_year = 2017
inflators_by_year = new_get_inflators_by_year(rebuild = False, year_range = range(2017, 2020), data_year = data_year)
simulated_variables = [
        'tva_taux_plein',
        'tva_taux_intermediaire',
        'tva_taux_reduit',
        'tva_taux_super_reduit',
        'tva_total',
        'rev_disponible',
        'pondmen',
        'depenses_tot',
        'loyer_impute'
        ]
 
agregates = pd.DataFrame(columns=['tva_total','tva_taux_super_reduit','tva_taux_reduit','tva_taux_intermediaire','tva_taux_plein','depenses_tot','loyer_impute'])
for year in [2017,2018,2019] :
    inflation_kwargs = dict(inflator_by_variable = inflators_by_year[year])
    survey_scenario = SurveyScenario.create(
        inflation_kwargs = inflation_kwargs,
        year = year,
        data_year = data_year
        )
    df_sum = dataframe_by_group(survey_scenario, category = 'niveau_vie_decile', variables = simulated_variables, aggfunc = 'sum')
    agregates.loc[year] = df_sum.sum()



In [228]:
agregates

Unnamed: 0,tva_total,tva_taux_super_reduit,tva_taux_reduit,tva_taux_intermediaire,tva_taux_plein,depenses_tot,loyer_impute
2017,102772500000.0,353672900.0,9686963000.0,12062920000.0,80668910000.0,1046474000000.0,135044300000.0
2018,105403500000.0,356908900.0,9821486000.0,12603330000.0,82621750000.0,1074177000000.0,135044300000.0
2019,107594900000.0,362254200.0,10015600000.0,13065020000.0,84152050000.0,1096664000000.0,135044300000.0
