In [1]:
import numpy
import pandas as pd
import os
import csv
import ast
import seaborn as sns
import matplotlib
from matplotlib import pyplot as plt

from wquantiles import quantile
from openfisca_survey_manager.utils import asof

from openfisca_france_indirect_taxation import FranceIndirectTaxationTaxBenefitSystem
from openfisca_france_indirect_taxation.examples.utils_example import (
    wavg,
    collapse,
    dataframe_by_group,
    graph_builder_bar,
    df_weighted_average_grouped)
from openfisca_france_indirect_taxation.build_survey_data.utils import weighted_sum
from openfisca_france_indirect_taxation.almost_ideal_demand_system.utils import add_niveau_vie_decile
from openfisca_france_indirect_taxation.surveys import SurveyScenario
from openfisca_france_indirect_taxation.calibration import get_inflators_by_year_energy
from openfisca_france_indirect_taxation.utils import assets_directory, get_input_data_frame

In [2]:
data_year = 2017
#inflators_by_year = get_inflators_by_year_energy(rebuild = True, year_range = range(2017, 2020), data_year = data_year)

In [34]:
 simulated_variables = [
        'tva_taux_plein',
        'tva_taux_intermediaire',
        'tva_taux_reduit',
        'tva_taux_super_reduit',
        'tva_total',
        'rev_disponible',
        'pondmen',
        'depenses_tot',
        'loyer_impute'
        ]

In [35]:
agregates = pd.DataFrame(columns=['tva_total','tva_taux_super_reduit','tva_taux_reduit','tva_taux_intermediaire','tva_taux_plein','depenses_tot','loyer_impute'])
for year in [2017,2018,2019] :
    inflation_kwargs = dict(inflator_by_variable = inflators_by_year[year])
    survey_scenario = SurveyScenario.create(
        inflation_kwargs = inflation_kwargs,
        year = year,
        data_year = data_year
        )
    df_sum = dataframe_by_group(survey_scenario, category = 'niveau_vie_decile', variables = simulated_variables, aggfunc = 'sum')
    agregates.loc[year] = df_sum.sum()


In [36]:
agregates

Unnamed: 0,tva_total,tva_taux_super_reduit,tva_taux_reduit,tva_taux_intermediaire,tva_taux_plein,depenses_tot,loyer_impute
2017,79076340000.0,146193700.0,7305056000.0,8793053000.0,62832030000.0,1012287000000.0,174934000000.0
2018,79076340000.0,146193700.0,7305056000.0,8793053000.0,62832030000.0,1038613000000.0,178082700000.0
2019,79076340000.0,146193700.0,7305056000.0,8793053000.0,62832030000.0,1064471000000.0,181852100000.0


In [9]:
from openfisca_france_indirect_taxation.calibration import (get_bdf_aggregates_energy, 
                                                            get_cn_aggregates_energy,
                                                            get_inflators_bdf_to_cn_energy,
                                                            get_inflators_cn_to_cn_energy,
                                                            get_inflators_energy
                                                            )

In [10]:
data_cn = get_cn_aggregates_energy(data_year)
data_bdf = get_bdf_aggregates_energy(data_year)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [26]:
data_cn

Unnamed: 0,conso_CN_2017
loyer_impute,174972900000.0
depenses_electricite,27205000000.0
depenses_gaz_ville,12994650000.0
depenses_combustibles_liquides,6088465000.0
depenses_combustibles_solides,1976545000.0
depenses_carburants,36523970000.0
depenses_tot,1186224000000.0
rev_disponible,1388087000000.0
rev_disp_loyerimput,1213114000000.0


In [27]:
data_bdf

Unnamed: 0,bdf_aggregates
depenses_carburants,31655040000.0
depenses_combustibles_liquides,3546686000.0
depenses_combustibles_solides,1584850000.0
depenses_electricite,23613100000.0
depenses_gaz_ville,9198009000.0
depenses_tot,922354600000.0
loyer_impute,135074300000.0
rev_disponible,986128800000.0
rev_disp_loyerimput,1121203000000.0


In [11]:
data_bdf.loc['depenses_tot']

bdf_aggregates    9.223546e+11
Name: depenses_tot, dtype: float64

In [12]:
data_cn.loc['depenses_tot']

conso_CN_2017    1.186224e+12
Name: depenses_tot, dtype: float64

In [22]:
inflator_depenses_tot_bdf_to_cn =  float(data_cn.loc['depenses_tot']) / float(data_bdf.loc['depenses_tot'])
inflator_depenses_tot_bdf_to_cn == get_inflators_bdf_to_cn_energy(2017).get('depenses_tot')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


True

In [21]:
(agregates.loc[2017]['depenses_tot'] + agregates.loc[2017]['loyer_impute']) / float(data_cn.loc['depenses_tot'])

1.000840269461446

In [29]:
data_cn_2018 = get_cn_aggregates_energy(2018)
(agregates.loc[2018]['depenses_tot'] + agregates.loc[2018]['loyer_impute']) / float(data_cn_2018.loc['depenses_tot'])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


0.9996893440873598

In [32]:
depenses = get_input_data_frame(data_year)

# Construct depenses_tot for total consumption
liste_variables = depenses.columns.tolist()
postes_agreges = ['poste_{}'.format(index) for index in
        ["0{}".format(i) for i in range(1, 10)] + ["10", "11", "12"]]
depenses['depenses_tot'] = 0
for element in liste_variables:
    for poste in postes_agreges:
        if element[:8] == poste:
            depenses['depenses_tot'] += depenses[element]


In [94]:
depenses[['poste_04_2_1','loyer_impute']]

Unnamed: 0,poste_04_2_1,loyer_impute
0,3732,3732
1,10944,10944
2,14424,14424
3,0,0
4,4908,4908
...,...,...
12076,0,0
12077,15660,15660
12078,3996,3996
12079,10956,10956


In [56]:
postes_agreges_simul.sum()

poste_agrege_01    1.265307e+11
poste_agrege_02    2.276444e+10
poste_agrege_03    3.936474e+10
poste_agrege_04    1.282405e+11
poste_agrege_05    4.461621e+10
poste_agrege_06    1.455828e+10
poste_agrege_07    1.281280e+11
poste_agrege_08    2.097434e+10
poste_agrege_09    7.400954e+10
poste_agrege_10    6.136412e+09
poste_agrege_11    5.690687e+10
poste_agrege_12    1.248788e+11
dtype: float64

In [59]:
liste_variables = depenses.columns.tolist()
postes_agreges = ['poste_{}'.format(index) for index in
        ["0{}".format(i) for i in range(1, 10)] + ["10", "11", "12"]]

for poste in postes_agreges:
    depenses[poste] = 0
    for element in liste_variables:
        if element[:8] == poste:
            depenses[poste] += depenses[element]

depenses_bdf_by_poste_agreges = [(depenses['pondmen'] * depenses[poste]).sum() for poste in postes_agreges]

In [86]:
depenses_bdf_by_poste_agreges

[126558517616.19775,
 22769282779.833675,
 39373557642.18781,
 263341119399.63644,
 44626189503.00444,
 14561448588.157013,
 128156427135.21976,
 20978921247.8749,
 74025777235.73172,
 6137827496.527738,
 56919092435.17008,
 124906429309.42918]

In [87]:
postes_agreges_simul.sum()

poste_agrege_01    1.265307e+11
poste_agrege_02    2.276444e+10
poste_agrege_03    3.936474e+10
poste_agrege_04    1.282405e+11
poste_agrege_05    4.461621e+10
poste_agrege_06    1.455828e+10
poste_agrege_07    1.281280e+11
poste_agrege_08    2.097434e+10
poste_agrege_09    7.400954e+10
poste_agrege_10    6.136412e+09
poste_agrege_11    5.690687e+10
poste_agrege_12    1.248788e+11
dtype: float64

On reconstruit les différentes fonctions qui servent à réaliser le calage et le veillissement des données à partir de la comptabilité nationale (examples.calage_bdf_cn_bis)

**Par poste agrégé**

In [103]:
def new_get_bdf_aggregates(data_year = None):
    assert data_year is not None
    depenses = get_input_data_frame(2017)
    liste_variables = depenses.columns.tolist()
    postes_agreges = ['poste_{}'.format(index) for index in ["0{}".format(i) for i in range(1, 10)] + ["10", "11", "12"]]
    for poste in postes_agreges:
        depenses[poste] = 0
        for element in liste_variables:
            if element[:8] == poste:
                depenses[poste] += depenses[element]
            
    depenses_by_poste_agrege = depenses[postes_agreges]      
    depenses_by_poste_agrege = pd.concat([depenses_by_poste_agrege, depenses['pondmen']], axis = 1)
    bdf_aggregates_by_poste_agrege = pd.DataFrame()
    for poste_agrege in postes_agreges:
        bdf_aggregates_by_poste_agrege.loc[poste_agrege, 'bdf_aggregates'] = (depenses_by_poste_agrege[poste_agrege] * depenses_by_poste_agrege['pondmen']).sum()
        
    return bdf_aggregates_by_poste_agrege

**Par poste**

In [3]:
def new_get_bdf_aggregates(data_year = None):
    assert data_year is not None
    depenses = get_input_data_frame(2017)
    liste_variables = depenses.columns.tolist()
    liste_postes = [element for element in liste_variables if element[:6] == 'poste_']

    bdf_aggregates_by_poste = pd.DataFrame()
    for poste in liste_postes:
        bdf_aggregates_by_poste.loc[poste, 'bdf_aggregates'] = (depenses[poste] * depenses['pondmen']).sum()
        
    return bdf_aggregates_by_poste

In [5]:
depenses = get_input_data_frame(2017)

In [6]:
from openfisca_survey_manager.survey_collections import SurveyCollection
from openfisca_survey_manager import default_config_files_directory as config_files_directory

In [8]:
coicop_poste_bdf = new_bdf()[['code_bdf', 'code_coicop']].copy()

In [11]:
set(conso.columns).difference(set(coicop_poste_bdf.code_bdf))

set()

In [12]:
set(coicop_poste_bdf.code_bdf).difference(set(conso.columns))

{'c02311', 'c04421', 'c12131'}

In [13]:
coicop_by_poste_bdf = coicop_poste_bdf.dropna().set_index('code_bdf').to_dict()['code_coicop']

In [14]:
set(coicop_by_poste_bdf.keys()).difference(set(conso.columns))

{'c02311', 'c04421', 'c12131'}

In [15]:
set(conso.columns).difference(list(coicop_by_poste_bdf.keys()))

set()

In [7]:
new_get_bdf_aggregates(data_year= 2017)

Unnamed: 0,bdf_aggregates
poste_01_1_1_1,8.156951e+08
poste_01_1_1_2,1.423809e+10
poste_01_1_1_3,1.778719e+09
poste_01_1_1_4,4.174176e+09
poste_01_1_1_5,1.529143e+09
...,...
poste_17_5_1_1,2.863564e+10
poste_17_6_1_1,0.000000e+00
poste_17_7_1_1,9.390755e+10
poste_17_7_2_2,1.574634e+09


**Par poste agrégé**

In [None]:
def new_get_cn_aggregates(target_year = None):
    assert target_year is not None

    parametres_fiscalite_file_path = os.path.join(
            assets_directory,
            'legislation',
            'conso_eff_fonction_2023.xls'
            )

    masses_cn_data_frame = pd.read_excel(parametres_fiscalite_file_path, sheet_name = "MEURcour", header = 4)
    masses_cn_data_frame.rename(columns={'Unnamed: 0' : 'Code' , 'Unnamed: 1' : 'Label'}, inplace = True)
    masses_cn_data_frame = masses_cn_data_frame.loc[:, ['Code', '{}'.format(target_year)]].copy()
    masses_cn_data_frame.loc[:,'Code'] = masses_cn_data_frame.loc[:,'Code'].str.replace(r'^CP','',regex=True)
    masses_cn_data_frame.loc[:,'Code'] = masses_cn_data_frame.loc[:,'Code'].str.strip()
    
    
    codes_postes_agreges = ["0{}".format(i) for i in range(1, 10)] + ["10", "11", "12"]
    masses_cn_12postes_data_frame = masses_cn_data_frame.loc[masses_cn_data_frame['Code'].isin(codes_postes_agreges)]
    masses_cn_12postes_data_frame.loc[:,'Code'] = masses_cn_12postes_data_frame.loc[:,'Code'].astype(str).apply(lambda x: f"poste_{x}")
    masses_cn_12postes_data_frame.set_index('Code', inplace = True)
    masses_cn_12postes_data_frame.rename(columns= {'{}'.format(target_year): 'conso_CN_{}'.format(target_year)}, inplace= True)
    masses_cn_12postes_data_frame

    return masses_cn_12postes_data_frame*1e6

In [154]:
new_get_cn_aggregates(target_year = 2017)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


Unnamed: 0_level_0,conso_CN_2017
Code,Unnamed: 1_level_1
poste_01_1_1,2.318610e+10
poste_01_1_2,3.057370e+10
poste_01_1_3,7.408100e+09
poste_01_1_4,1.860840e+10
poste_01_1_5,3.151800e+09
...,...
poste_12_1_2,1.243350e+10
poste_12_1_3,7.077300e+09
poste_12_1_4,8.169700e+09
poste_12_1_9,1.834800e+09


**Par poste**

In [8]:
def remove_prefixes(lst):
    lst_sorted = sorted(lst, key = len, reverse = True)
    filtered = []
    
    for item in lst_sorted:
        if not any(item != other and item in other for other in filtered):
            filtered.append(item)
    
    return filtered

In [9]:
def format_poste(code):
    if code.startswith("poste_"):
        num_part = code[6:] 
        formatted_num = "_".join([num_part[:2]] + list(num_part[2:]))
        return f"poste_{formatted_num}"
    return code

In [10]:
ajust_postes_cn = {
    'CP0942' : 'CP09421', #Location, entretien et réparation de gros biens durables à fonction récréactive (S)
    'CP0943' : 'CP09422', #Location et réparation de jeux, jouets et articles de loisirs (S)
    'CP0944' : 'CP09423', #Location et réparaton d'articles de sport, de matériel de camping et activités de plein air (S)
    'CP0946' : 'CP09424', #Services récréatifs et sportifs (S)
    'CP0945' : 'CP09631', #Services vétérinaires et autres pour animaux de companies
    'CP0963' : 'CP09632', #Services photographiques
}


In [11]:
def new_get_cn_aggregates(target_year) :
        parametres_fiscalite_file_path = os.path.join(
                assets_directory,
                'legislation',
                'conso_eff_fonction_2023.xls'
                )

        masses_cn_data_frame = pd.read_excel(parametres_fiscalite_file_path, sheet_name = "MEURcour", header = 4)
        masses_cn_data_frame.rename(columns={'Unnamed: 0' : 'Code' , 'Unnamed: 1' : 'Label'}, inplace = True)
        masses_cn_data_frame = masses_cn_data_frame.loc[:, ['Code', '{}'.format(target_year)]].copy()
        masses_cn_data_frame.replace(to_replace = ajust_postes_cn, inplace= True)
        masses_cn_data_frame.loc[:,'Code'] = masses_cn_data_frame.loc[:,'Code'].str.replace(r'^CP','',regex=True)
        masses_cn_data_frame.loc[:,'Code'] = masses_cn_data_frame.loc[:,'Code'].str.strip()

        masses_cn_data_frame.dropna(inplace = True)
        masses_cn_data_frame.loc[:,'Code'] = masses_cn_data_frame['Code'].astype(str).apply(lambda x: f"poste_{x}")
        masses_cn_data_frame.loc[:,'Code'] = masses_cn_data_frame['Code'].astype(str).apply(lambda x: format_poste(x))

        # On garde les agrégats à un niveau supérieur pour correspondre à Bdf
        masses_cn_data_frame = masses_cn_data_frame[~masses_cn_data_frame['Code'].isin(['poste_05_1_1','poste_05_1_2','poste_05_2_1', 'poste_05_2_2'])]
        # On regroupe certains postes de consommation sous la même étiquette 
        new_index = masses_cn_data_frame.index.max() + 1
        masses_cn_data_frame.loc[new_index] = masses_cn_data_frame[masses_cn_data_frame['Code'].isin(['poste_09_4_2_1', 'poste_09_4_2_2', 'poste_09_4_2_3', 'poste_09_4_2_4'])].sum(numeric_only=True)
        masses_cn_data_frame.loc[new_index, 'Code'] = 'poste_09_4_2'
        masses_cn_data_frame = masses_cn_data_frame[~masses_cn_data_frame['Code'].isin(['poste_09_4_2_1', 'poste_09_4_2_2', 'poste_09_4_2_3', 'poste_09_4_2_4'])]

        new_index = masses_cn_data_frame.index.max() + 1
        masses_cn_data_frame.loc[new_index] = masses_cn_data_frame[masses_cn_data_frame['Code'].isin(['poste_09_6_3_1', 'poste_09_6_3_2'])].sum(numeric_only=True)
        masses_cn_data_frame.loc[new_index, 'Code'] = 'poste_09_6_3'
        masses_cn_data_frame = masses_cn_data_frame[~masses_cn_data_frame['Code'].isin(['poste_09_6_3_1', 'poste_09_6_3_2'])]

        liste_postes_cn = remove_prefixes(masses_cn_data_frame['Code'].tolist())
        liste_postes_cn.remove('poste__Z')
        liste_12postes = ["poste_0{}".format(i) for i in range(1, 10)] + ["poste_10", "poste_11", "poste_12"]
        liste_postes_cn = [element for element in liste_postes_cn if element[:8] in liste_12postes]

        masses_cn_postes_data_frame = masses_cn_data_frame.loc[masses_cn_data_frame['Code'].isin(liste_postes_cn)]
        masses_cn_postes_data_frame.set_index('Code', inplace = True)
        masses_cn_postes_data_frame.rename(columns= {'{}'.format(target_year): 'conso_CN_{}'.format(target_year)}, inplace= True)
    
        return masses_cn_postes_data_frame*1e6

In [12]:
masses_cn_data_frame= new_get_cn_aggregates(2017)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [13]:
masses_cn_data_frame

Unnamed: 0_level_0,conso_CN_2017
Code,Unnamed: 1_level_1
poste_01_1_1,2.318610e+10
poste_01_1_2,3.057370e+10
poste_01_1_3,7.408100e+09
poste_01_1_4,1.860840e+10
poste_01_1_5,3.151800e+09
...,...
poste_12_1_4,8.169700e+09
poste_12_1_9,1.834800e+09
poste_12_2,1.456770e+10
poste_09_4_2,1.119570e+10


**Par poste agrégé**

In [105]:
def new_get_inflators_bdf_to_cn(data_year):    
    data_cn = new_get_cn_aggregates(data_year)
    data_bdf = new_get_bdf_aggregates(data_year)
    masses = data_cn.merge(data_bdf, left_index = True, right_index = True)
    masses.rename(columns = {'bdf_aggregates': 'conso_bdf{}'.format(data_year)}, inplace = True)
    
    return (masses['conso_CN_{}'.format(data_year)] / masses['conso_bdf{}'.format(data_year)]).to_dict()

In [106]:
new_get_inflators_bdf_to_cn(2017)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)


{'poste_01': 1.1665884112813272,
 'poste_02': 1.9487877782123153,
 'poste_03': 1.1856891476318707,
 'poste_04': 1.2855607235657076,
 'poste_05': 1.1612620431442182,
 'poste_06': 3.349261559022724,
 'poste_07': 1.2134216757352296,
 'poste_08': 1.8861808870994206,
 'poste_09': 1.1872884360101217,
 'poste_10': 1.4465867613618872,
 'poste_11': 1.4897672533444115,
 'poste_12': 0.9759995382485754}

**Par poste**

In [14]:
masses_cn_dataframe = new_get_cn_aggregates(2017)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [16]:
masses_bdf = new_get_bdf_aggregates(data_year)

In [27]:
def new_get_inflators_bdf_to_cn(data_year):
    data_cn = new_get_cn_aggregates(data_year)
    liste_postes_cn = data_cn.index.tolist()

    data_bdf = new_get_bdf_aggregates(data_year)
    data_bdf_postes_cn = pd.DataFrame()
    liste_postes_bdf = data_bdf.index.tolist()

    data_bdf_postes_cn = pd.DataFrame(index=[0])
    for poste in liste_postes_cn:
        data_bdf_postes_cn[poste] = 0
        for element in liste_postes_bdf:
            if poste in element:
                data_bdf_postes_cn[poste] += float(data_bdf.loc[element])
    data_bdf_postes_cn = data_bdf_postes_cn.transpose()
    data_bdf_postes_cn.rename(columns={0 : 'bdf_aggregates'}, inplace = True)

    masses = data_cn.merge(data_bdf_postes_cn, left_index = True, right_index = True)
    masses.rename(columns = {'bdf_aggregates': 'conso_bdf{}'.format(data_year)}, inplace = True)
        
    return (masses['conso_CN_{}'.format(data_year)] / masses['conso_bdf{}'.format(data_year)]).to_dict()

In [36]:
new_get_inflators_bdf_to_cn(2017)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
  data_bdf_postes_cn[poste] = 0


{'poste_01_1_1': 1.0288551574737594,
 'poste_01_1_2': 1.1102187472242049,
 'poste_01_1_3': 0.8727957960706688,
 'poste_01_1_4': 1.1286142021082546,
 'poste_01_1_5': 1.2236731357224044,
 'poste_01_1_6': 1.0329001909792201,
 'poste_01_1_7': 1.2007684207369826,
 'poste_01_1_8': 1.5008816768838418,
 'poste_01_1_9': 3.488342685296373,
 'poste_01_2_1': 0.9699320608559848,
 'poste_01_2_2': 1.2735049139537253,
 'poste_01_2_3': 0.9170285005986617,
 'poste_01_2_5': 1.274563224757769,
 'poste_01_2_6': 1.641844644858161,
 'poste_01_2_9': 8.058526866017846,
 'poste_02_1_1': 1.9019808982449435,
 'poste_02_1_2': 2.2521652776790826,
 'poste_02_1_3': 2.1193103847911035,
 'poste_02_1_9': 0.1580365381603684,
 'poste_02_3': 1.7635919583735522,
 'poste_02_4': inf,
 'poste_03_1_1': 1.0327877067847278,
 'poste_03_1_2': 1.3568074584587102,
 'poste_03_1_3': 3.2910993124722863,
 'poste_03_1_4': 0.8911875760453848,
 'poste_03_2_1': 0.8886778953372103,
 'poste_03_2_2': 1.7516907728529518,
 'poste_04_1_1': 1.02888

**Par poste agrégé**

In [192]:
def new_get_inflators_cn_to_cn(target_year, data_year):
    '''
        Calcule l'inflateur de vieillissement à partir des masses de comptabilité nationale.
    '''
    data_year_cn_aggregates = new_get_cn_aggregates(data_year)['conso_CN_{}'.format(data_year)].to_dict()
    target_year_cn_aggregates = new_get_cn_aggregates(target_year)['conso_CN_{}'.format(target_year)].to_dict()

    return dict(
        (key, target_year_cn_aggregates[key] / data_year_cn_aggregates[key])
        for key in list(data_year_cn_aggregates.keys())
        )

In [195]:
new_get_inflators_cn_to_cn(target_year= 2018, data_year = 2017)

{'poste_01': 1.014635242505159,
 'poste_02': 1.0431652046134192,
 'poste_03': 0.9767986235913612,
 'poste_04': 1.024969688721906,
 'poste_05': 0.9623770158830857,
 'poste_06': 1.0060181134552564,
 'poste_07': 1.0575058299967532,
 'poste_08': 1.0051190140712254,
 'poste_09': 1.0184559963619217,
 'poste_10': 1.0116124980617938,
 'poste_11': 1.0593448924816955,
 'poste_12': 1.0397346956949207}

**Par poste** 

In [29]:
def new_get_inflators_cn_to_cn(target_year, data_year):
    '''
        Calcule l'inflateur de vieillissement à partir des masses de comptabilité nationale.
    '''
    data_year_cn_aggregates = new_get_cn_aggregates(data_year)['conso_CN_{}'.format(data_year)].to_dict()
    target_year_cn_aggregates = new_get_cn_aggregates(target_year)['conso_CN_{}'.format(target_year)].to_dict()

    return dict(
        (key, target_year_cn_aggregates[key] / data_year_cn_aggregates[key])
        for key in list(data_year_cn_aggregates.keys())
    )

In [37]:
new_get_inflators_cn_to_cn(2019, 2017)

{'poste_01_1_1': 1.072629722117993,
 'poste_01_1_2': 0.989716651893621,
 'poste_01_1_3': 0.9865957532970667,
 'poste_01_1_4': 1.044899077835816,
 'poste_01_1_5': 1.0867440827463672,
 'poste_01_1_6': 1.0380438590345407,
 'poste_01_1_7': 1.091947540048363,
 'poste_01_1_8': 1.015887041350934,
 'poste_01_1_9': 1.0372410140994242,
 'poste_01_2_1': 0.987624466571835,
 'poste_01_2_2': 1.0386036960985627,
 'poste_01_2_3': 1.038658849395383,
 'poste_01_2_5': 1.0345940577671109,
 'poste_01_2_6': 1.0442169267224035,
 'poste_01_2_9': 1.0390111136312088,
 'poste_02_1_1': 1.004154106315712,
 'poste_02_1_2': 0.9460651708835022,
 'poste_02_1_3': 1.1343291544411844,
 'poste_02_1_9': 1.1073541842772612,
 'poste_02_3': 1.1043116329761264,
 'poste_02_4': 1.1736073875484063,
 'poste_03_1_1': 1.0224358974358974,
 'poste_03_1_2': 0.9672618448703613,
 'poste_03_1_3': 1.0077945665060666,
 'poste_03_1_4': 1.0488151658767773,
 'poste_03_2_1': 1.0030912327670674,
 'poste_03_2_2': 1.0765987350667603,
 'poste_04_1_

**Par poste agrégé**

In [32]:
def new_get_inflators(target_year,data_year):
    '''
    Fonction qui calcule les ratios de calage (bdf sur cn pour année de données) et de vieillissement
    à partir des masses de comptabilité nationale et des masses de consommation de bdf.
    '''
    inflators_bdf_to_cn = new_get_inflators_bdf_to_cn(data_year)
    inflators_cn_to_cn = new_get_inflators_cn_to_cn(target_year,data_year)
    
    tax_benefit_system = FranceIndirectTaxationTaxBenefitSystem()
    liste_variables = list(tax_benefit_system.variables.keys())
    ratio_by_variable = dict()
    for element in liste_variables:
        for key in list(inflators_cn_to_cn.keys()):
            if element[:8] == key:
                ratio_by_variable[element] = inflators_bdf_to_cn[key] * inflators_cn_to_cn[key]

    return ratio_by_variable

In [38]:
inflators_bdf_to_cn = new_get_inflators_bdf_to_cn(2017)
inflators_cn_to_cn = new_get_inflators_cn_to_cn(2019,2017)

  data_bdf_postes_cn[poste] = 0


In [48]:
tax_benefit_system = FranceIndirectTaxationTaxBenefitSystem()
liste_variables = list(tax_benefit_system.variables.keys())

In [61]:
ratio_by_variable = dict()
for element in liste_variables:
    for key in list(inflators_cn_to_cn.keys()):
        if element in key:
            ratio_by_variable[element] = inflators_bdf_to_cn[key] * inflators_cn_to_cn[key]

In [63]:
liste_variables

['age_carte_grise',
 'age_vehicule',
 'aides_logement',
 'bat_49_74',
 'bat_ap_74',
 'bat_av_49',
 'cataeu',
 'dip14pr',
 'ident_men',
 'identifiant_menage',
 'isolation_fenetres',
 'isolation_murs',
 'isolation_toit',
 'log_indiv',
 'majorite_double_vitrage',
 'ocde10',
 'ouest_sud',
 'paris',
 'petite_ville',
 'pondmen',
 'rural',
 'situacj',
 'situapr',
 'stalog',
 'strate',
 'surfhab_d',
 'tchof',
 'tuu',
 'typmen',
 'vag',
 'vp_deplacements_pro',
 'vp_domicile_travail',
 'zeat',
 'age',
 'age_group_pr',
 'agepr',
 'birth',
 'nactifs',
 'nadultes',
 'nenfants',
 'npers',
 'role_menage',
 'brde_m2_depenses_tot',
 'brde_m2_rev_disponible',
 'brde_transports_depenses_tot',
 'brde_transports_rev_disponible',
 'cmu',
 'eligibilite_tarifs_sociaux_energies',
 'froid',
 'froid_3_deciles',
 'froid_4_criteres',
 'froid_4_criteres_3_deciles',
 'froid_cout',
 'froid_impaye',
 'froid_installation',
 'froid_isolation',
 'precarite_energetique_depenses_tot',
 'precarite_energetique_rev_disponible

In [None]:
'poste_01_1_1_2'

In [64]:
list(inflators_cn_to_cn.keys())

['poste_01_1_1',
 'poste_01_1_2',
 'poste_01_1_3',
 'poste_01_1_4',
 'poste_01_1_5',
 'poste_01_1_6',
 'poste_01_1_7',
 'poste_01_1_8',
 'poste_01_1_9',
 'poste_01_2_1',
 'poste_01_2_2',
 'poste_01_2_3',
 'poste_01_2_5',
 'poste_01_2_6',
 'poste_01_2_9',
 'poste_02_1_1',
 'poste_02_1_2',
 'poste_02_1_3',
 'poste_02_1_9',
 'poste_02_3',
 'poste_02_4',
 'poste_03_1_1',
 'poste_03_1_2',
 'poste_03_1_3',
 'poste_03_1_4',
 'poste_03_2_1',
 'poste_03_2_2',
 'poste_04_1_1',
 'poste_04_1_2',
 'poste_04_2_1',
 'poste_04_2_2',
 'poste_04_3_1',
 'poste_04_3_2',
 'poste_04_4_1',
 'poste_04_4_2',
 'poste_04_4_3',
 'poste_04_4_4',
 'poste_04_5_1',
 'poste_04_5_2',
 'poste_04_5_3',
 'poste_04_5_4',
 'poste_04_5_5',
 'poste_05_1',
 'poste_05_2',
 'poste_05_3_1',
 'poste_05_3_2',
 'poste_05_3_3',
 'poste_05_4',
 'poste_05_5_1',
 'poste_05_5_2',
 'poste_05_5_3',
 'poste_05_6_1',
 'poste_05_6_2',
 'poste_06_1',
 'poste_06_2',
 'poste_06_3',
 'poste_06_4',
 'poste_07_1_1',
 'poste_07_1_2',
 'poste_07_1_3'

**Par poste**

In [65]:
def new_get_inflators(target_year,data_year):
    '''
    Fonction qui calcule les ratios de calage (bdf sur cn pour année de données) et de vieillissement
    à partir des masses de comptabilité nationale et des masses de consommation de bdf.
    '''
    inflators_bdf_to_cn = new_get_inflators_bdf_to_cn(data_year)
    inflators_cn_to_cn = new_get_inflators_cn_to_cn(target_year,data_year)
    
    tax_benefit_system = FranceIndirectTaxationTaxBenefitSystem()
    liste_variables = list(tax_benefit_system.variables.keys())
    ratio_by_variable = dict()
    for element in liste_variables:
        for key in list(inflators_cn_to_cn.keys()):
            if key in element:
                ratio_by_variable[element] = inflators_bdf_to_cn[key] * inflators_cn_to_cn[key]

    return ratio_by_variable

In [67]:
new_get_inflators(2019,2017)

  data_bdf_postes_cn[poste] = 0


{'poste_01_1_1_2': 1.1035806216607424,
 'poste_01_1_1_4': 1.1035806216607424,
 'poste_01_1_1_5': 1.1035806216607424,
 'poste_01_1_1_3': 1.1035806216607424,
 'poste_01_1_1_1': 1.1035806216607424,
 'poste_01_1_2_1': 1.0988019813722705,
 'poste_01_1_2_3': 1.0988019813722705,
 'poste_01_1_2_2': 1.0988019813722705,
 'poste_01_1_2_5': 1.0988019813722705,
 'poste_01_1_2_4': 1.0988019813722705,
 'poste_01_1_2_7': 1.0988019813722705,
 'poste_01_1_2_6': 1.0988019813722705,
 'poste_01_1_3_0': 0.8610966258988545,
 'poste_01_1_3_2': 0.8610966258988545,
 'poste_01_1_3_1': 0.8610966258988545,
 'poste_01_1_3_3': 0.8610966258988545,
 'poste_01_1_3_4': 0.8610966258988545,
 'poste_01_1_4_1': 1.1792879390153206,
 'poste_01_1_4_2': 1.1792879390153206,
 'poste_01_1_4_3': 1.1792879390153206,
 'poste_01_1_4_4': 1.1792879390153206,
 'poste_01_1_4_6': 1.1792879390153206,
 'poste_01_1_4_5': 1.1792879390153206,
 'poste_01_1_4_7': 1.1792879390153206,
 'poste_01_1_5_1': 1.3298195394620154,
 'poste_01_1_5_3': 1.3298

**Par poste agrégé**

In [68]:
def new_get_inflators_by_year(rebuild = False, year_range = None, data_year = None):
    if year_range is None:
        year_range = range(2000, 2020)

    if rebuild is not False:
        inflators_by_year = dict()
        for target_year in year_range:
            inflators = new_get_inflators(target_year = target_year, data_year = data_year)
            inflators_by_year[target_year] = inflators

        writer_inflators = csv.writer(open(os.path.join(assets_directory, 'inflateurs', 'new_inflators_by_year.csv'), 'w'))
        for year in year_range:
            for key, value in list(inflators_by_year[year].items()):
                writer_inflators.writerow([key, value, year])

        return inflators_by_year
    else:
        re_build_inflators = dict()
        inflators_from_csv = pd.read_csv(os.path.join(assets_directory, 'inflateurs', 'new_inflators_by_year.csv'),
            index_col = 0, header = None)
        for year in year_range:
            inflators_from_csv_by_year = inflators_from_csv[inflators_from_csv[2] == year]
            inflators_to_dict = pd.DataFrame.to_dict(inflators_from_csv_by_year)
            inflators = inflators_to_dict[1]
            re_build_inflators[year] = inflators

        return re_build_inflators

In [69]:
new_get_inflators_by_year(rebuild = True, year_range = range(2017,2023), data_year = 2017)

  data_bdf_postes_cn[poste] = 0


{2017: {'poste_01_1_1_2': 1.0288551574737594,
  'poste_01_1_1_4': 1.0288551574737594,
  'poste_01_1_1_5': 1.0288551574737594,
  'poste_01_1_1_3': 1.0288551574737594,
  'poste_01_1_1_1': 1.0288551574737594,
  'poste_01_1_2_1': 1.1102187472242049,
  'poste_01_1_2_3': 1.1102187472242049,
  'poste_01_1_2_2': 1.1102187472242049,
  'poste_01_1_2_5': 1.1102187472242049,
  'poste_01_1_2_4': 1.1102187472242049,
  'poste_01_1_2_7': 1.1102187472242049,
  'poste_01_1_2_6': 1.1102187472242049,
  'poste_01_1_3_0': 0.8727957960706688,
  'poste_01_1_3_2': 0.8727957960706688,
  'poste_01_1_3_1': 0.8727957960706688,
  'poste_01_1_3_3': 0.8727957960706688,
  'poste_01_1_3_4': 0.8727957960706688,
  'poste_01_1_4_1': 1.1286142021082546,
  'poste_01_1_4_2': 1.1286142021082546,
  'poste_01_1_4_3': 1.1286142021082546,
  'poste_01_1_4_4': 1.1286142021082546,
  'poste_01_1_4_6': 1.1286142021082546,
  'poste_01_1_4_5': 1.1286142021082546,
  'poste_01_1_4_7': 1.1286142021082546,
  'poste_01_1_5_1': 1.223673135722

In [78]:
data_year = 2017
inflators_by_year = new_get_inflators_by_year(rebuild = False, year_range = range(2017, 2023), data_year = data_year)
simulated_variables = ['tva_taux_plein',
    'tva_taux_intermediaire',
    'tva_taux_reduit',
    'tva_taux_super_reduit',
    'tva_total',
    'rev_disponible',
    'pondmen',
    'depenses_tot',
    'loyer_impute'
    ]

agregates = pd.DataFrame(columns=['tva_total','tva_taux_super_reduit','tva_taux_reduit','tva_taux_intermediaire','tva_taux_plein','depenses_tot','loyer_impute'])
for year in range(2017,2023) :
    inflation_kwargs = dict(inflator_by_variable = inflators_by_year[year])
    survey_scenario = SurveyScenario.create(
        inflation_kwargs = inflation_kwargs,
        year = year,
        data_year = data_year
        )
    df_sum = dataframe_by_group(survey_scenario, category = 'niveau_vie_decile', variables = simulated_variables, aggfunc = 'sum')
    agregates.loc[year] = df_sum.sum()

  variable_name, inflator_by_variable[variable_name] * self.compute_aggregate(
  simulation.set_input(variable_name, period, inflator * array)  # insert inflated array


In [79]:
agregates

Unnamed: 0,tva_total,tva_taux_super_reduit,tva_taux_reduit,tva_taux_intermediaire,tva_taux_plein,depenses_tot,loyer_impute
2017,0.0,337356400.0,11105530000.0,0.0,153226500000.0,974350400000.0,135044300000.0
2018,0.0,331061300.0,11337880000.0,0.0,165004900000.0,1005746000000.0,135044300000.0
2019,0.0,329748600.0,11680230000.0,0.0,172033600000.0,1027281000000.0,135044300000.0
2020,0.0,305558200.0,12694760000.0,0.0,152473600000.0,952170300000.0,135044300000.0
2021,0.0,324574700.0,12951000000.0,0.0,185376100000.0,1021302000000.0,135044300000.0
2022,0.0,335417900.0,14017880000.0,0.0,229338100000.0,1122780000000.0,135044300000.0


In [228]:
agregates

Unnamed: 0,tva_total,tva_taux_super_reduit,tva_taux_reduit,tva_taux_intermediaire,tva_taux_plein,depenses_tot,loyer_impute
2017,102772500000.0,353672900.0,9686963000.0,12062920000.0,80668910000.0,1046474000000.0,135044300000.0
2018,105403500000.0,356908900.0,9821486000.0,12603330000.0,82621750000.0,1074177000000.0,135044300000.0
2019,107594900000.0,362254200.0,10015600000.0,13065020000.0,84152050000.0,1096664000000.0,135044300000.0


In [82]:
from openfisca_france_indirect_taxation.variables.base import get_legislation_data_frames

In [83]:
categories_fiscales_data_frame, codes_coicop_data_frame = get_legislation_data_frames()

In [86]:
set(categories_fiscales_data_frame['categorie_fiscale'].tolist())

{'',
 'alcools_forts',
 'assurance_sante',
 'assurance_transport',
 'autres_assurances',
 'biere',
 'cigares',
 'cigarettes',
 'tabac_a_rouler',
 'ticpe',
 'tva_taux_intermediaire',
 'tva_taux_plein',
 'tva_taux_reduit',
 'tva_taux_super_reduit',
 'vin'}