In [None]:
# !pip install -U git+https://github.com/openfisca/openfisca-ceq.git@master#egg=OpenFisca-CEQ
# !pip install -U git+https://github.com/openfisca/openfisca-senegal.git@ceq#egg=OpenFisca-Senegal
# !pip install -U git+https://github.com/openfisca/openfisca-mali.git@master#egg=OpenFisca-Mali
# !pip install -U git+https://github.com/openfisca/openfisca-cote-d-ivoire.git@master#egg=OpenFisca-COTE-D-IVOIRE

In [2]:
from matplotlib import pyplot as plt
%matplotlib inline
import pandas as pd
from openfisca_ceq.tools.survey_scenario import build_ceq_survey_scenario
from openfisca_ceq.tools.indirect_taxation.tax_benefit_system_indirect_taxation_completion import indirect_tax_by_country
from openfisca_ceq.tools.data import year_by_country
from openfisca_ceq.tools.data_ceq_correspondence import (
    ceq_input_by_harmonized_variable,
    ceq_intermediate_by_harmonized_variable,
    non_ceq_input_by_harmonized_variable,
    )

# Build country scenario

## Country selection

In [3]:
country = "mali"

In [4]:
year = year_by_country[country]
survey_scenario = build_ceq_survey_scenario(legislation_country = country, year = year)

# Aggegates check

## Income variables

### Disagregated income variables

In [5]:
ceq_by_harmonized_variable = dict()
ceq_by_harmonized_variable.update(ceq_input_by_harmonized_variable)
ceq_by_harmonized_variable.update(ceq_intermediate_by_harmonized_variable)
ceq_by_harmonized_variable.update(non_ceq_input_by_harmonized_variable)
ceq_by_harmonized_variable['autres_revenus_du_capital_brut'] = 'autres_revenus_du_capital_brut'
ceq_by_harmonized_variable['pension_retraite_brut'] = 'pension_retraite_brut'
ceq_by_harmonized_variable['revenu_foncier_brut'] = 'revenu_foncier_brut'
ceq_by_harmonized_variable['revenu_non_salarie_brut'] = 'revenu_non_salarie_brut' 
ceq_by_harmonized_variable['salaire_brut'] = 'salaire_brut'
ceq_by_harmonized_variable['salaire_super_brut'] = 'salaire_super_brut'
ceq_by_harmonized_variable

{'rev_i_autoconsommation': 'autoconsumption',
 'rev_i_autres': 'other_income',
 'rev_i_autres_transferts': 'gifts_sales_durables',
 'rev_i_loyers_imputes': 'imputed_rent',
 'rev_i_transferts_publics': 'direct_transfers',
 'rev_i_agricoles': 'revenu_agricole',
 'rev_i_autres_revenus_capital': 'autres_revenus_du_capital',
 'rev_i_independants_Ntaxe': 'revenu_informel_non_salarie',
 'rev_i_independants_taxe': 'revenu_non_salarie',
 'rev_i_locatifs': 'revenu_locatif',
 'rev_i_pensions': 'pension_retraite',
 'rev_i_salaires_formels': 'salaire',
 'rev_i_salaires_informels': 'revenu_informel_salarie',
 'autres_revenus_du_capital_brut': 'autres_revenus_du_capital_brut',
 'pension_retraite_brut': 'pension_retraite_brut',
 'revenu_foncier_brut': 'revenu_foncier_brut',
 'revenu_non_salarie_brut': 'revenu_non_salarie_brut',
 'salaire_brut': 'salaire_brut',
 'salaire_super_brut': 'salaire_super_brut'}

In [6]:
data = [
    (harmonized_variable, openfisca_variable, survey_scenario.compute_aggregate(openfisca_variable, period = year) / 1e9) 
    for harmonized_variable, openfisca_variable in ceq_by_harmonized_variable.items()
    ]

In [7]:
revenus = pd.DataFrame(data, columns = ["harmonized", "openfisca", "aggregate"])
revenus.round(0).astype({"aggregate": int})

Unnamed: 0,harmonized,openfisca,aggregate
0,rev_i_autoconsommation,autoconsumption,258
1,rev_i_autres,other_income,282
2,rev_i_autres_transferts,gifts_sales_durables,71
3,rev_i_loyers_imputes,imputed_rent,148
4,rev_i_transferts_publics,direct_transfers,0
5,rev_i_agricoles,revenu_agricole,606
6,rev_i_autres_revenus_capital,autres_revenus_du_capital,0
7,rev_i_independants_Ntaxe,revenu_informel_non_salarie,465
8,rev_i_independants_taxe,revenu_non_salarie,45
9,rev_i_locatifs,revenu_locatif,0


### Agregated income variables

#### CEQ agregated income variables

In [8]:
ceq_variables = ["market_income", "market_income_plus_pensions", "pensions", "contributions_pensions", "net_market_income", "direct_taxes", "other_contributions", "contributions_health", ]

In [9]:
ceq_aggregates = pd.DataFrame(
    index = ceq_variables,
    columns = ['aggregate'],
    data = [
        survey_scenario.compute_aggregate(ceq_variable, period = year) / 1e9 
        for ceq_variable in ceq_variables
        ]
    )
ceq_aggregates.round(0).astype({"aggregate": int})

Unnamed: 0,aggregate
market_income,2160
market_income_plus_pensions,2152
pensions,0
contributions_pensions,7
net_market_income,2142
direct_taxes,7
other_contributions,7
contributions_health,3


#### Direct check

In [10]:
income_components_summed_in_total = list(ceq_by_harmonized_variable.values()) 
# income_components_summed_in_total.remove("revenu_non_salarie_total")
income_components_summed_in_total.remove("direct_transfers")
income_components_summed_in_total.remove("salaire")
income_components_summed_in_total.remove("salaire_brut")
income_components_summed_in_total.remove("pension_retraite")
income_components_summed_in_total.remove('autres_revenus_du_capital')
income_components_summed_in_total.remove('revenu_locatif')
income_components_summed_in_total.remove('revenu_non_salarie') 

total_income = sum(
    survey_scenario.compute_aggregate(income_component, period = survey_scenario.year) 
    for income_component in income_components_summed_in_total
    ) 
total_income = (
    total_income 
    - survey_scenario.compute_aggregate("contributions_pensions", period = survey_scenario.year)
    ) / 1e9


In [11]:
from numpy.testing import assert_almost_equal
assert_almost_equal(ceq_aggregates.loc['market_income_plus_pensions'].values, total_income, decimal = 5) 
# print('market_income_plus_pensions = {} vs {} = total_income'.format(
#         ceq_aggregates.loc['market_income_plus_pensions'].values,
#         total_income
#         )
#     )


#### Wages by public/private sector

In [12]:
"Count : {}".format(round(
    survey_scenario.compute_aggregate("secteur_public", period = year)
    ))

'Count : 77306.0'

In [13]:
(
    survey_scenario.compute_pivot_table(
        columns = ["secteur_public"], 
        values = ["salaire", "salaire_brut", "salaire_super_brut"],
        aggfunc = "sum",
        period = year,
        concat_axis = 0,
        ) / 1e9
    ).round(0).astype(int)

secteur_public,False,True
salaire,40,28
salaire_brut,48,33
salaire_super_brut,57,39


## Education variables

### Counts (in millions)

In [14]:
print((survey_scenario.compute_pivot_table(columns = ['eleve_enseignement_niveau'], index = ['eleve_enseignement_public'], aggfunc = 'count', period = survey_scenario.year) / 1e6).round(2))

eleve_enseignement_niveau  -1.0   0.0   1.0   2.0   3.0
eleve_enseignement_public                              
1.0                        12.4  0.11  1.77  1.27  0.12


In [15]:
variables = [
    'pre_school_person',
    'pre_school',
    'primary_education_person',
    'primary_education',
    'secondary_education_person',
    'secondary_education',
    'tertiary_education_person',
    'tertiary_education',
    'education_net_transfers',
    ]
print("In kind education tranfers")
for variable in variables:
    print(
        "{variable}: {aggregate} billions FCFA".format(
            variable = variable,
            aggregate = int(round(survey_scenario.compute_aggregate(variable, period = survey_scenario.year) / 1e9))
            )
        )

In kind education tranfers
pre_school_person: 0 billions FCFA
pre_school: 0 billions FCFA
primary_education_person: 119 billions FCFA
primary_education: 119 billions FCFA
secondary_education_person: 179 billions FCFA
secondary_education: 179 billions FCFA
tertiary_education_person: 16 billions FCFA
tertiary_education: 16 billions FCFA
education_net_transfers: 314 billions FCFA


## Inflating income and consumption

In [16]:
inflated_survey_scenario = build_ceq_survey_scenario(
    legislation_country = country, 
    year = year,
    inflate = True,
    )

AssertionError: 

In [None]:
%debug

> [0;32m/home/mbenjelloul/openfisca/openfisca-ceq/openfisca_ceq/tools/survey_scenario.py[0m(152)[0;36minflate_variables_sum_to_target[0;34m()[0m
[0;32m    150 [0;31m[0;34m[0m[0m
[0m[0;32m    151 [0;31m        [0mtotal[0m [0;34m=[0m [0msum[0m[0;34m([0m[0maggregate_by_variable[0m[0;34m.[0m[0mvalues[0m[0;34m([0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 152 [0;31m        [0;32massert[0m [0mtotal[0m [0;34m!=[0m [0;36m0[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    153 [0;31m        share_by_variable = dict(
[0m[0;32m    154 [0;31m            [0;34m([0m[0mtarget_variable[0m[0;34m,[0m [0maggregate_by_variable[0m[0;34m[[0m[0mtarget_variable[0m[0;34m][0m [0;34m/[0m [0mtotal[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  u


> [0;32m/home/mbenjelloul/openfisca/openfisca-ceq/openfisca_ceq/tools/survey_scenario.py[0m(127)[0;36minflate_to_match_gross_valued_added_and_consumption[0;34m()[0m
[0;32m    125 [0;31m            [0mtarget_variables[0m [0;34m=[0m [0mconsumption_variables[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    126 [0;31m            [0mtarget[0m [0;34m=[0m [0mconsumption_target[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 127 [0;31m            [0mperiod[0m [0;34m=[0m [0mself[0m[0;34m.[0m[0myear[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    128 [0;31m            )
[0m[0;32m    129 [0;31m[0;34m[0m[0m
[0m


ipdb>  self.year


2011


ipdb>  consumption_variables


['poste_1_1_1_1_1', 'poste_1_1_1_4_1_item_1', 'poste_1_1_1_4_1_item_2', 'poste_1_1_1_4_1_item_3', 'poste_1_1_1_4_3', 'poste_1_1_2_item_1', 'poste_1_1_2_item_2', 'poste_1_1_3_item_1', 'poste_1_1_3_item_2', 'poste_1_1_4_item_1', 'poste_1_1_4_item_2', 'poste_1_1_5_item_1', 'poste_1_1_5_item_2', 'poste_1_1_7_item_1', 'poste_1_1_7_item_2', 'poste_1_1_7_item_3', 'poste_1_1_7_item_4', 'poste_1_1_8', 'poste_1_1_9', 'poste_1_2', 'poste_2', 'poste_3_1_2', 'poste_3_2_1', 'poste_4_1_1_item_1', 'poste_4_1_1_item_2', 'poste_4_3_2_2_1_item_1', 'poste_4_3_2_2_1_item_2', 'poste_4_4_1_1_1', 'poste_4_4_1_2_1', 'poste_4_5_1_1_1', 'poste_4_5_2_1_1', 'poste_4_5_3_1_1', 'poste_4_5_4_1_1', 'poste_5_1', 'poste_5_2', 'poste_5_3_1_item_1', 'poste_5_3_1_item_2', 'poste_5_3_1_item_3', 'poste_5_3_1_item_4', 'poste_5_3_1_item_5', 'poste_5_3_1_item_6', 'poste_5_3_1_item_7', 'poste_5_3_1_item_8', 'poste_5_3_1_item_9', 'poste_5_3_2_item_1', 'poste_5_3_2_item_2', 'poste_5_4_1', 'poste_5_5_1_item_1', 'poste_5_5_1_item_2'

ipdb>  consumption_target


4283357000000.0


ipdb>  d


> [0;32m/home/mbenjelloul/openfisca/openfisca-ceq/openfisca_ceq/tools/survey_scenario.py[0m(152)[0;36minflate_variables_sum_to_target[0;34m()[0m
[0;32m    150 [0;31m[0;34m[0m[0m
[0m[0;32m    151 [0;31m        [0mtotal[0m [0;34m=[0m [0msum[0m[0;34m([0m[0maggregate_by_variable[0m[0;34m.[0m[0mvalues[0m[0;34m([0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 152 [0;31m        [0;32massert[0m [0mtotal[0m [0;34m!=[0m [0;36m0[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    153 [0;31m        share_by_variable = dict(
[0m[0;32m    154 [0;31m            [0;34m([0m[0mtarget_variable[0m[0;34m,[0m [0maggregate_by_variable[0m[0;34m[[0m[0mtarget_variable[0m[0;34m][0m [0;34m/[0m [0mtotal[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  total


0.0


ipdb>  aggregate_by_variable


{'poste_1_1_1_1_1': 0.0, 'poste_1_1_1_4_1_item_1': 0.0, 'poste_1_1_1_4_1_item_2': 0.0, 'poste_1_1_1_4_1_item_3': 0.0, 'poste_1_1_1_4_3': 0.0, 'poste_1_1_2_item_1': 0.0, 'poste_1_1_2_item_2': 0.0, 'poste_1_1_3_item_1': 0.0, 'poste_1_1_3_item_2': 0.0, 'poste_1_1_4_item_1': 0.0, 'poste_1_1_4_item_2': 0.0, 'poste_1_1_5_item_1': 0.0, 'poste_1_1_5_item_2': 0.0, 'poste_1_1_7_item_1': 0.0, 'poste_1_1_7_item_2': 0.0, 'poste_1_1_7_item_3': 0.0, 'poste_1_1_7_item_4': 0.0, 'poste_1_1_8': 0.0, 'poste_1_1_9': 0.0, 'poste_1_2': 0.0, 'poste_2': 0.0, 'poste_3_1_2': 0.0, 'poste_3_2_1': 0.0, 'poste_4_1_1_item_1': 0.0, 'poste_4_1_1_item_2': 0.0, 'poste_4_3_2_2_1_item_1': 0.0, 'poste_4_3_2_2_1_item_2': 0.0, 'poste_4_4_1_1_1': 0.0, 'poste_4_4_1_2_1': 0.0, 'poste_4_5_1_1_1': 0.0, 'poste_4_5_2_1_1': 0.0, 'poste_4_5_3_1_1': 0.0, 'poste_4_5_4_1_1': 0.0, 'poste_5_1': 0.0, 'poste_5_2': 0.0, 'poste_5_3_1_item_1': 0.0, 'poste_5_3_1_item_2': 0.0, 'poste_5_3_1_item_3': 0.0, 'poste_5_3_1_item_4': 0.0, 'poste_5_3_1_ite

In [None]:
from openfisca_ceq.tools.data.income_targets import read_target
gross_value_added = read_target(country, "gross_value_added") / 1e9
gross_value_added_computed = sum(inflated_survey_scenario.compute_aggregate(income_variable, period = year) / 1e9
    for income_variable in inflated_survey_scenario.income_variables
    )
display("{} == {}".format(gross_value_added, gross_value_added_computed))

## Tax variables

### Direct taxes variables

In [None]:
from openfisca_ceq.tools.data.tax_targets import detailed_taxes_by_country, build_country_result
direct_taxes_variables = detailed_taxes_by_country[country]

direct_taxes = pd.concat(
    [
        pd.DataFrame(
            index = direct_taxes_variables,
            columns = ['aggregate'],
            data = [
                survey_scenario.compute_aggregate(variable, period = year) / 1e9 
                for variable in direct_taxes_variables
                ],
            ),
        pd.DataFrame(
            index = direct_taxes_variables,
            columns = ['inflated_aggregate'],
            data = [
                    inflated_survey_scenario.compute_aggregate(variable, period = year) / 1e9 
                for variable in direct_taxes_variables
                ],
            )
        ],
    axis = 1
)

direct_taxes.round().astype({"aggregate": int})

In [None]:
build_country_result(survey_scenario, inflated_survey_scenario, add_country_details = True)

### Indirect tax variables

In [None]:
indirect_tax_variables = [
    variable 
    for tax in indirect_tax_by_country[country]
    for variable in survey_scenario.tax_benefit_system.variables.keys() 
    if tax in variable 
    ]
indirect_tax_variables

In [None]:
taxes_and_tariffs = pd.DataFrame(
    index = indirect_tax_variables,
    columns = ['aggregate'],
    data = [
        survey_scenario.compute_aggregate(variable, period = year) / 1e9 
        for variable in indirect_tax_variables
        ]
    )
taxes_and_tariffs.round().astype({"aggregate": int})

### Verify consumption

In [None]:
consumption = survey_scenario.compute_aggregate('consumption', period = year) / 1e9
consumption

In [None]:
# assert consumption == 
sum(taxes_and_tariffs['aggregate'][
    taxes_and_tariffs.index.str.contains('tva_')
    ])

In [None]:
# assert consumption == 
(
    sum(
        survey_scenario.compute_aggregate(variable, period = year)
        for variable in survey_scenario.tax_benefit_system.variables
        if "depenses_ht_hd_poste" in variable 
        ) 
    + survey_scenario.compute_aggregate("droits_douane", period = year)
    + survey_scenario.compute_aggregate("tva", period = year)
    ) / 1e9

# Inflate incomes   

## Income variables

In [None]:
income_variables = [
    'autoconsumption',
    'other_income',
    'gifts_sales_durables',
    'imputed_rent',
    'revenu_agricole',
    'autres_revenus_du_capital',
    'revenu_informel_non_salarie',
    'revenu_non_salarie',
    'revenu_locatif',
    'pension_retraite',
    'salaire_super_brut',
    ]

In [None]:
sum(
    survey_scenario.compute_aggregate(income_variable, period = year)
    for income_variable in income_variables
    ) / 1e9

In [None]:
sum(
    inflated_survey_scenario.compute_aggregate(income_variable, period = year)
    for income_variable in income_variables
    ) / 1e9

# Indicators

In [None]:
from openfisca_survey_manager.statshelpers import lorenz
from openfisca_ceq.tools.results.inequality import inequality_table
display(inequality_table(survey_scenario))

In [None]:
survey_scenario.compute_aggregate("impots_indirects", period = year) / 1e9

In [None]:
survey_scenario.compute_aggregate("direct_taxes", period = year) / 1e9

In [None]:
survey_scenario.compute_aggregate("personal_income_tax", period = year) / 1e9

In [None]:
from openfisca_ceq import list_variables_from_directory

In [None]:
ceq_variables = sorted(list_variables_from_directory(survey_scenario.tax_benefit_system))
[
    ceq_variable
    for ceq_variable in ceq_variables
    if survey_scenario.compute_aggregate(ceq_variable, period = year) == 0
    ]

In [None]:
variable = "market_income"
nb_persons = survey_scenario.calculate_variable("number_of_people_per_household", period = year)
weights = survey_scenario.calculate_variable("household_weight", period = year) * nb_persons

y, x = lorenz(survey_scenario.calculate_variable(variable, period = year) / nb_persons, weights = weights)
plt.plot(x, y)

In [None]:
survey_scenario.compute_aggregate('direct_transfers', period = year) / 1e9

In [None]:
ceq_variables

In [None]:
survey_scenario.compute_aggregate('customs_duties', period = year) 

In [None]:
inflated_survey_scenario = build_ceq_survey_scenario(legislation_country = country, year = year, inflate = True)

In [None]:
sorted([
    variable 
    for variable in survey_scenario.tax_benefit_system.variables 
    if variable.startswith('poste_')
    ])

In [None]:
survey_scenario.compute_aggregate('indirect_taxes', period = year)

Direct taxes