In [1]:
import numpy
import pandas as pd
import os
import csv
import ast
import seaborn as sns
import matplotlib
from matplotlib import pyplot as plt

from wquantiles import quantile
from openfisca_survey_manager.utils import asof

from openfisca_france_indirect_taxation import FranceIndirectTaxationTaxBenefitSystem
from openfisca_france_indirect_taxation.examples.utils_example import (
    wavg,
    collapse,
    dataframe_by_group,
    graph_builder_bar,
    df_weighted_average_grouped)
from openfisca_france_indirect_taxation.build_survey_data.utils import weighted_sum
from openfisca_france_indirect_taxation.almost_ideal_demand_system.utils import add_niveau_vie_decile
from openfisca_france_indirect_taxation.surveys import SurveyScenario
from openfisca_france_indirect_taxation.calibration import get_inflators_by_year_energy
from openfisca_france_indirect_taxation.utils import assets_directory, get_input_data_frame

In [2]:
from openfisca_france_indirect_taxation.projects.TVA_Herve_IPP.new_calage_bdf_cn import (
    new_get_bdf_aggregates,
    new_get_cn_aggregates,
    new_get_inflators_bdf_to_cn,
    new_get_inflators_cn_to_cn,
    new_get_inflators,
    new_get_inflators_by_year)

In [29]:
data_year = 2017
inflators_by_year = new_get_inflators_by_year(rebuild = True, year_range = range(2017, 2025), data_year = data_year)
simulated_variables = ['tva_taux_plein',
'tva_taux_intermediaire',
 'tva_taux_reduit',
 'tva_taux_super_reduit',
 'tva_total',
 'depenses_tva_exonere',
 'depenses_tva_taux_plein',
 'depenses_tva_taux_intermediaire',
 'depenses_tva_taux_reduit',
 'depenses_tva_taux_super_reduit',
 'rev_disponible',
 'rev_disp_yc_loyerimpute',
 'loyer_impute',
 'depenses_tot',
 'depenses_totales',
 'pondmen',
]

agregates = pd.DataFrame(columns = simulated_variables)
for year in range(2017,2025) :
    inflation_kwargs = dict(inflator_by_variable = inflators_by_year[year])
    survey_scenario = SurveyScenario.create(
        inflation_kwargs = inflation_kwargs,
        year = year,
        data_year = data_year
        )
    df_sum = dataframe_by_group(survey_scenario, category = 'niveau_vie_decile', variables = simulated_variables, aggfunc = 'sum')
    agregates.loc[year] = df_sum.sum()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
  data_bdf_postes_cn[poste] = 0


In [30]:
agregates['Conso BDF'] = agregates['depenses_tot'] + agregates['loyer_impute']
agregates.reset_index(inplace= True)
agregates.rename(columns = {'index' : 'year'}, inplace= True)
agregates

Unnamed: 0,year,tva_taux_plein,tva_taux_intermediaire,tva_taux_reduit,tva_taux_super_reduit,tva_total,depenses_tva_exonere,depenses_tva_taux_plein,depenses_tva_taux_intermediaire,depenses_tva_taux_reduit,depenses_tva_taux_super_reduit,rev_disponible,rev_disp_yc_loyerimpute,loyer_impute,depenses_tot,depenses_totales,pondmen,Conso BDF
0,2017,78760440000.0,11829560000.0,10856930000.0,291658800.0,101738600000.0,182858200000.0,472562700000.0,130125200000.0,208255600000.0,14180170000.0,1197964000000.0,1387077000000.0,189112700000.0,1007982000000.0,825123700000.0,84458590000.0,1197095000000.0
1,2018,81122040000.0,12416410000.0,11087010000.0,290634600.0,104916100000.0,190775200000.0,486732300000.0,136580500000.0,212669100000.0,14130380000.0,1234271000000.0,1426805000000.0,192533900000.0,1040887000000.0,850112300000.0,84458590000.0,1233421000000.0
2,2019,82566550000.0,13078850000.0,11358690000.0,291524000.0,107295600000.0,192948300000.0,495399300000.0,143867400000.0,217880400000.0,14173620000.0,1278664000000.0,1475381000000.0,196717000000.0,1064269000000.0,871320600000.0,84458590000.0,1260986000000.0
3,2020,76785790000.0,9625282000.0,10921140000.0,282879200.0,97615100000.0,190675800000.0,460714800000.0,105878100000.0,209487400000.0,13753320000.0,1289747000000.0,1489995000000.0,200247800000.0,980509300000.0,789833500000.0,84458590000.0,1180757000000.0
4,2021,83839460000.0,11399290000.0,11137130000.0,298787600.0,106674700000.0,199930900000.0,503036800000.0,125392200000.0,213630400000.0,14526770000.0,1358404000000.0,1562187000000.0,203783400000.0,1056517000000.0,856586100000.0,84458590000.0,1260300000000.0
5,2022,88622120000.0,15429470000.0,12276360000.0,305966200.0,116633900000.0,213606100000.0,531732700000.0,169724100000.0,235482900000.0,14875790000.0,1434894000000.0,1642992000000.0,208097600000.0,1165422000000.0,951815500000.0,84458590000.0,1373519000000.0
6,2023,92622920000.0,16942310000.0,13441580000.0,311221600.0,123318000000.0,254669200000.0,555737600000.0,186365400000.0,257833800000.0,15131300000.0,1560302000000.0,1773999000000.0,213696800000.0,1269737000000.0,1015068000000.0,84458590000.0,1483434000000.0
7,2024,95288880000.0,17429960000.0,13828460000.0,320179500.0,126867500000.0,261999300000.0,571733300000.0,191729600000.0,265255100000.0,15566820000.0,1605212000000.0,1825060000000.0,219847600000.0,1306284000000.0,1044285000000.0,84458590000.0,1526132000000.0


In [31]:
cn_agregates_by_year = pd.DataFrame()
for year in range(2017,2024):
    cn_agregates = new_get_cn_aggregates(year)
    cn_agregates.drop(['poste_02_4', 'poste_04_4_3','rev_disp_yc_loyerimpute','rev_disponible'], axis= 0, inplace = True)
    cn_agregates_by_year = pd.concat([cn_agregates_by_year,pd.DataFrame(cn_agregates.sum())])
cn_agregates_by_year
cn_agregates_by_year.reset_index(inplace = True)
cn_agregates_by_year['year'] = cn_agregates_by_year['index'].apply(lambda x : x[-4:]).astype(int)
cn_agregates_by_year.drop('index', axis = 1, inplace= True)
cn_agregates_by_year.rename(columns = {0 : 'Conso CN'}, inplace= True)
cn_agregates_by_year

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


Unnamed: 0,Conso CN,year
0,1197358000000.0,2017
1,1233693000000.0,2018
2,1261263000000.0,2019
3,1181017000000.0,2020
4,1260578000000.0,2021
5,1373821000000.0,2022
6,1483760000000.0,2023


In [35]:
to_compare = agregates.loc[:,['Conso BDF']].merge(cn_agregates_by_year, left_index= True, right_index= True)
to_compare.set_index('year', inplace = True)
to_compare = to_compare* 1e-9 
to_compare = to_compare.round(1)

In [36]:
to_compare['Ecart BDF / CN'] = (to_compare['Conso CN'] / to_compare['Conso BDF'] -1)* 100
to_compare

Unnamed: 0_level_0,Conso BDF,Conso CN,Ecart BDF / CN
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017,1197.1,1197.4,0.025061
2018,1233.4,1233.7,0.024323
2019,1261.0,1261.3,0.023791
2020,1180.8,1181.0,0.016938
2021,1260.3,1260.6,0.023804
2022,1373.5,1373.8,0.021842
2023,1483.4,1483.8,0.026965


In [9]:
to_compare['Growth_rate_CN'] = to_compare['Conso CN'].pct_change()
to_compare

Unnamed: 0,Conso BDF,Conso CN,year,Ecart BDF / CN,Growth_rate_CN
0,1197095000000.0,1197358000000.0,2017,0.022011,
1,1233421000000.0,1233693000000.0,2018,0.022007,0.030346
2,1260986000000.0,1261263000000.0,2019,0.022001,0.022348
3,1180757000000.0,1181017000000.0,2020,0.02201,-0.063624
4,1260300000000.0,1260578000000.0,2021,0.022014,0.067366
5,1373519000000.0,1373821000000.0,2022,0.021994,0.089835
6,1483434000000.0,1483760000000.0,2023,0.02198,0.080024


**Par postes agrégés**

In [10]:
from openfisca_france_indirect_taxation.projects.TVA_Herve_IPP.new_calage_bdf_cn_by_postes_agreges import (get_bdf_aggregates,
    get_cn_aggregates,
    get_inflators_bdf_to_cn,
    get_inflators_cn_to_cn,
    get_inflators_by_year)

In [11]:
aggregates_13_postes = get_cn_aggregates(2023)
aggregates_13_postes


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


Unnamed: 0_level_0,conso_CN_2023
Code,Unnamed: 1_level_1
poste_01,184382400000.0
poste_02,51220700000.0
poste_03,44200500000.0
poste_04,404336800000.0
poste_05,58979900000.0
poste_06,56155500000.0
poste_07,191751400000.0
poste_08,52949400000.0
poste_09,102602300000.0
poste_10,10540300000.0


In [12]:
cn_agregates = new_get_cn_aggregates(2023)
to_substract = cn_agregates.loc[cn_agregates.index.isin(['poste_02_4', 'poste_04_4_3','loyer_impute'])]
to_substract.rename(index ={'loyer_impute' : 'poste_04_2'}, columns = {'conso_CN_2023' : 'to_substract'}, inplace = True)
to_substract = to_substract.groupby(to_substract.index.str[:8]).sum()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [13]:
aggregates_13_postes = aggregates_13_postes.merge(to_substract, left_index= True, right_index= True, how = 'left')
aggregates_13_postes.fillna(0, inplace = True)

In [14]:
aggregates_13_postes['right_conso_CN_2023'] = aggregates_13_postes['conso_CN_2023'] - aggregates_13_postes['to_substract']
aggregates_13_postes.drop(columns=['conso_CN_2023','to_substract'], axis= 1,  inplace= True)
aggregates_13_postes.rename(columns={'right_conso_CN_2023' : 'conso_CN_2023'}, inplace = True)

aggregates_13_postes.index = aggregates_13_postes.index.to_series().apply(lambda x :x.replace('poste_' ,'poste_agrege_'))

In [15]:
data_year = 2017
inflators = new_get_inflators(2023,2017)
simulated_variables = ["poste_agrege_0{}".format(i) for i in range(1, 10)] + ["poste_agrege_10", "poste_agrege_11", "poste_agrege_12", "poste_agrege_13"]

postes_agreges = pd.DataFrame(columns=["poste_agrege_0{}".format(i) for i in range(1, 10)] + ["poste_agrege_10", "poste_agrege_11", "poste_agrege_12", "poste_agrege_13"])
inflation_kwargs = dict(inflator_by_variable = inflators)
survey_scenario = SurveyScenario.create(
    inflation_kwargs = inflation_kwargs,
    year = 2023,
    data_year = data_year
    )
df_sum = dataframe_by_group(survey_scenario, category = 'niveau_vie_decile', variables = simulated_variables, aggfunc = 'sum')
postes_agreges = df_sum.sum()
conso_by_poste = pd.DataFrame(postes_agreges.T).reset_index().rename(columns={'index' : 'Code', 0 : 'conso_OFF_IT_2023'}).set_index('Code')

  data_bdf_postes_cn[poste] = 0


In [None]:
aggregates_13_postes = aggregates_13_postes.merge(conso_by_poste, left_index= True, right_index= True)
aggregates_13_postes = aggregates_13_postes * 1e-9
aggregates_13_postes = aggregates_13_postes.round(2)
aggregates_13_postes.rename({'poste_agrege_13' : 'poste_agrege_12' },axis = 0, inplace = True)
aggregates_13_postes = aggregates_13_postes.groupby(aggregates_13_postes.index).sum()

In [55]:
label_postes_agreges = ['Alimentation', 'Alcools et tabacs', 'Habillement et chaussures', 'Logement', 'Ameublement et équipement ménager', 'Santé',
                        'Transports', 'Communication' , 'Loisirs et cultures' , 'Education', 'Restauration et hôtellerie', 'Services et biens divers']
liste_postes_agreges = ["poste_agrege_0{}".format(i) for i in range(1, 10)] + ["poste_agrege_{}".format(i) for i in range(10, 13)]
label_dict = dict(zip(liste_postes_agreges,label_postes_agreges))

In [59]:
aggregates_13_postes.rename(label_dict, axis = 0, inplace = True)

In [60]:
aggregates_13_postes

Unnamed: 0_level_0,conso_CN_2023,conso_OFF_IT_2023
Code,Unnamed: 1_level_1,Unnamed: 2_level_1
Alimentation,184.38,184.34
Alcools et tabacs,46.13,46.12
Habillement et chaussures,44.2,44.19
Logement,183.76,183.72
Ameublement et équipement ménager,58.98,58.97
Santé,56.16,56.14
Transports,191.75,191.71
Communication,52.95,52.94
Loisirs et cultures,102.6,102.58
Education,10.54,10.54
