In [1]:
# !pip install -U git+https://github.com/openfisca/openfisca-ceq.git@master#egg=OpenFisca-CEQ
# !pip install -U git+https://github.com/openfisca/openfisca-senegal.git@ceq#egg=OpenFisca-Senegal
# !pip install -U git+https://github.com/openfisca/openfisca-mali.git@master#egg=OpenFisca-Mali
# !pip install -U git+https://github.com/openfisca/openfisca-cote-d-ivoire.git@master#egg=OpenFisca-COTE-D-IVOIRE

In [2]:
from matplotlib import pyplot as plt
%matplotlib inline
import pandas as pd
from openfisca_ceq.tools.survey_scenario import build_ceq_survey_scenario
from openfisca_ceq.tools.indirect_taxation.tax_benefit_system_indirect_taxation_completion import indirect_tax_by_country
from openfisca_ceq.tools.data import year_by_country
from openfisca_ceq.tools.data_ceq_correspondence import (
    ceq_input_by_harmonized_variable,
    ceq_intermediate_by_harmonized_variable,
    non_ceq_input_by_harmonized_variable,
    )

# Build country scenario

## Country selection

In [3]:
country = "senegal"

In [4]:
year = year_by_country[country]
survey_scenario = build_ceq_survey_scenario(legislation_country = country, year = year)

In [5]:
household_variables = sorted([
    key
    for key, value in survey_scenario.tax_benefit_system.variables.items()
    if value.entity.key == 'household'
    ])
display(household_variables)

['agricultural_inputs_subsidies',
 'alimony',
 'all_income_excluding_transfers',
 'autoconsumption',
 'cash_transfers',
 'consumable_income',
 'consumption',
 'contributions_health',
 'contributions_pensions',
 'corporate_income_tax',
 'customs_duties',
 'decile_consumable_income_per_capita',
 'decile_disposable_income_per_capita',
 'decile_final_income_per_capita',
 'decile_gross_income_per_capita',
 'decile_market_income_per_capita',
 'decile_market_income_plus_pensions_per_capita',
 'decile_survey_income_per_capita',
 'depenses_ht_hd_poste_10_1_1_1',
 'depenses_ht_hd_poste_11_1_1_1_1',
 'depenses_ht_hd_poste_12_1_3_item_1',
 'depenses_ht_hd_poste_12_1_3_item_2',
 'depenses_ht_hd_poste_12_3_1_1_item_1',
 'depenses_ht_hd_poste_12_3_1_1_item_2',
 'depenses_ht_hd_poste_12_7_1_2_1',
 'depenses_ht_hd_poste_1_1_1_1_1_item_1',
 'depenses_ht_hd_poste_1_1_1_1_1_item_2',
 'depenses_ht_hd_poste_1_1_1_4_1_item_1',
 'depenses_ht_hd_poste_1_1_1_4_1_item_2',
 'depenses_ht_hd_poste_1_1_1_4_1_item_3'

In [6]:
person_variables = [
    key
    for key, value in survey_scenario.tax_benefit_system.variables.items()
    if value.entity.key == 'person'
    ]
display(person_variables)

['age',
 'date_naissance',
 'est_cadre',
 'est_celibataire',
 'est_divorce',
 'est_marie',
 'est_veuf',
 'nombre_enfants',
 'statut_marital',
 'actions_interets',
 'benefices_non_salarie',
 'conjoint_a_des_revenus',
 'jetons_et_autres_remunerations',
 'lots',
 'obligations',
 'pension_retraite',
 'produits_des_comptes',
 'revenu_non_salarie',
 'revenus_fonciers_brut',
 'salaire',
 'salaire_brut',
 'person_weight',
 'accidents_du_travail',
 'cotisations_employeur',
 'cotisations_salariales',
 'famille',
 'retraite_employeur',
 'retraite_salarie',
 'salaire_imposable',
 'salaire_super_brut',
 'sante_employeur',
 'sante_salarie',
 'contribution_forfaitaire_charge_employeur',
 'contribution_globale_fonciere',
 'contribution_globale_unique',
 'droit_progressif',
 'droit_progressif_pension_retraite',
 'droit_progressif_salaire',
 'droit_proportionnel',
 'droit_proportionnel_autres_revenus',
 'droit_proportionnel_salaire',
 'impot_revenus',
 'nombre_de_parts',
 'pension_net_a_payer',
 'reduct

In [7]:
dataframe_by_entity = survey_scenario.create_data_frame_by_entity(['salaire_super_brut', 'consumption'])

In [8]:
dataframe_by_entity.keys()

dict_keys(['household', 'person'])

In [9]:
person_dataframe = dataframe_by_entity['person']
household_dataframe = dataframe_by_entity['household']

In [10]:
person_dataframe.head(30)

Unnamed: 0,salaire_super_brut
0,0.0
1,0.0
2,0.0
3,0.0
4,0.0
5,0.0
6,0.0
7,0.0
8,0.0
9,0.0


In [11]:
person_dataframe.to_csv('person.csv')
household_dataframe.to_csv('household.csv')

In [12]:
merged_dataframe = survey_scenario.create_data_frame_by_entity(
    ['salaire_super_brut', 'consumption'], 
    merge = True, 
    index = True,
    )

In [13]:
merged_dataframe

Unnamed: 0,salaire_super_brut,household_id,household_role,household_position,consumption
0,0.0,0,2,0,705904.625
1,0.0,0,2,1,705904.625
2,0.0,0,1,2,705904.625
3,0.0,0,2,3,705904.625
4,0.0,0,2,4,705904.625
...,...,...,...,...,...
55011,0.0,5952,2,10,1185516.250
55012,0.0,5952,0,11,1185516.250
55013,0.0,5952,2,12,1185516.250
55014,0.0,5952,3,13,1185516.250


In [14]:
from openfisca_ceq.tools.results.inequality import inequality_table, incidence_table, concentration_share, taxpayers_share, net_payers_beneficiaries

income_variable = 'gross_income'
by_variable = 'labor_type'
tax_variables = ['personal_income_tax', 'customs_duties', 'value_added_tax', 'education_net_transfers']

table_by_indicator_by_scenario = dict()
from openfisca_ceq.tools.tax_benefit_system_ceq_completion import labor_type_by_index

display(incidence_table(survey_scenario, income_variable, tax_variables, by_variable).rename(index = labor_type_by_index))
display(concentration_share(survey_scenario, tax_variables, by_variable).rename(index = labor_type_by_index))
display(taxpayers_share(survey_scenario, tax_variables, by_variable).rename(index = labor_type_by_index))


Unnamed: 0_level_0,personal_income_tax,customs_duties,value_added_tax,education_net_transfers
labor_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Inactive,0.03,0.02,0.07,0.09
Formal public wage worker,0.18,0.01,0.05,0.05
Formal private wage worker,0.14,0.01,0.06,0.05
Informal wage worker,0.01,0.02,0.08,0.09
Informal independent worker,0.01,0.01,0.06,0.05
Agricultural worker,0.0,0.02,0.05,0.1


Unnamed: 0_level_0,personal_income_tax,customs_duties,value_added_tax,education_net_transfers
labor_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Inactive,0.28,0.37,0.38,0.38
Formal public wage worker,0.36,0.04,0.05,0.05
Formal private wage worker,0.28,0.04,0.06,0.04
Informal wage worker,0.02,0.09,0.1,0.09
Informal independent worker,0.04,0.21,0.22,0.15
Agricultural worker,0.03,0.25,0.19,0.29


Unnamed: 0_level_0,personal_income_tax,customs_duties,value_added_tax,education_net_transfers
labor_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Inactive,0.18,0.99,1.0,0.61
Formal public wage worker,0.99,0.99,1.0,0.58
Formal private wage worker,0.99,1.0,1.0,0.49
Informal wage worker,0.07,1.0,1.0,0.58
Informal independent worker,0.1,1.0,1.0,0.5
Agricultural worker,0.03,0.99,1.0,0.59


In [15]:
net_payers_beneficiaries(survey_scenario, by_variable = 'decile_gross_income_per_capita')

TypeError: <lambda>() got an unexpected keyword argument 'axis'

In [None]:
tax_benefits_variables = ['direct_taxes', 'indirect_taxes', 'education_net_transfers']
by
series = (
    survey_scenario.compute_pivot_table(
        aggfunc = "sum",
        values = tax_benefits_variables,
        index = by_variable,
        period = survey_scenario.year,
        concat_axis = 1)
    .eval("net_contribution = direct_taxes + indirect_taxes - education_net_transfers")
    .round(2)
    )

In [None]:
series

In [26]:
import numpy as np
by_variable = "decile_gross_income_per_capita"
net_contribution = 'direct_taxes + indirect_taxes - education_net_transfers'
tax_benefits_variables = ['direct_taxes', 'indirect_taxes', 'education_net_transfers']
entity_key = survey_scenario.tax_benefit_system.variables[tax_benefits_variables[0]].entity.key
weight_variable = survey_scenario.weight_variable_by_entity[entity_key]
series = (
    (
        survey_scenario.create_data_frame_by_entity(
            variables = tax_benefits_variables + [by_variable, weight_variable],
            period = survey_scenario.year,
            )
        )[entity_key]
    .eval("net_payer = ({} > 0)".format(net_contribution))
    .eval("net_beneficiaries = ({} < 0)".format(net_contribution))
    .groupby(by_variable)
    .apply(
        lambda x: pd.DataFrame(dict(
            net_payer = np.average(x.net_payer, weights = x[weight_variable]),
            net_beneficiaries = np.average(x.net_beneficiaries, weights = x[weight_variable]),
            )
            )
        )
    )

TypeError: __init__() got an unexpected keyword argument 'axis'

In [19]:
series

labor_type
0    [0.6462248568471919, 0.35377514315280806]
1    [0.8125343726557145, 0.18746562734428548]
2    [0.8986536496167497, 0.10134635038325038]
3    [0.6243707579099239, 0.37562924209007603]
4    [0.7068826460276888, 0.29311735397231115]
5    [0.5341756624967835, 0.46538933186507736]
dtype: object