In [120]:
import numpy as np
import pandas as pd
import seaborn as sn
import os
import sklearn as sk
import statsmodels.api as sm

class country:
    def __init__(self,name,code_country,year,z_used='NA'):
        self.country_name = name
        self.year = year
        self.code_country = code_country
        url_base = 'https://github.com/mermozhk/Poverty/raw/main/'
        #print('/'.join([url_base,name,year,'ehcvm_welfare_'+''.join([code_country,year])])+'.dta')
        self.welfare_data = pd.read_stata('/'.join([url_base,name,year,'ehcvm_welfare_'+''.join([code_country,year])])+'.dta')
        self.welfare_data['dtot_corrected']=self.welfare_data['dtot']/self.welfare_data['def_spa']
        
        self.conso_data = pd.read_stata('/'.join([url_base,name,year,'ehcvm_conso_'+''.join([code_country,year])])+'.dta')
        self.menage_data = pd.read_stata('/'.join([url_base,name,year,'ehcvm_menage_'+''.join([code_country,year])])+'.dta')
        self.individu_data = pd.read_stata('/'.join([url_base,name,year,'ehcvm_individu_'+''.join([code_country,year])])+'.dta')
        
        self.health_data = self.conso_data.loc[(self.conso_data['codpr']>=681) & (self.conso_data['codpr']<=692)]
        self.health_expenditure = pd.DataFrame(self.health_data.groupby(['hhid','vague','grappe','menage']).sum().drop(columns=['year','hhweight','codpr'])).reset_index()
        
        self.health_merged_data = pd.merge(self.welfare_data,
                   self.health_expenditure,
                   on = 'hhid',
                   how='outer').replace(np.nan, 0)
        self.health_merged_data['constant'] = 1
        
        self.health_merged_data['not_health']=self.health_merged_data['dtot']-self.health_merged_data['depan']
        
       
            
            
            
        if z_used=='NA':
            self.poverty_merged_data = pd.merge(self.individu_data,
                   self.welfare_data[['hhid','zref','dtot_corrected','hhsize']],
                   on = 'hhid',
                   how='outer')
        else:
            self.poverty_merged_data = pd.merge(self.individu_data,
                   self.welfare_data[['hhid','dtot_corrected','hhsize']],
                   on = 'hhid',
                   how='outer')
            self.poverty_merged_data['zref']=float(z_used)
            
        self.poverty_merged_data['constant'] = 1
        
        
    def catastrophic_expenditure(self,proportion,data):
            self.health_merged_data['health_expenditure_proportion'] = ((self.health_merged_data['depan']/self.health_merged_data['dtot'])>=proportion).map({False:0, True:1})
            return self.estimation_procedure(index='health_expenditure_proportion',data=self.health_merged_data)
         
    
    
    def estimation_procedure(self,index,data):
        wls_model = sm.WLS(data[index],data['constant'], weights=data['hhweight'])
        return wls_model.fit()
        
    def prevalence(self,data):
        data['prevalence']=(data['dtot_corrected']<
                                       (data['zref']*data['hhsize'])).map({False:0, True:1})
        return self.estimation_procedure('prevalence',data=data)
    
    def gap(self,data):
        data['gap'] = (1-data['dtot_corrected']/(data['zref']*data['hhsize']))*data['prevalence']
        return self.estimation_procedure('gap',data=data)
    
    def severity(self,data):
        if not('prevalence' in self.merge_data.columns):
            
            data['prevalence']=(data['dtot_corrected']<
                                       (data['zref']*data['hhsize'])).map({False:0, True:1})
            
        data['severity'] = ((1-data['dtot_corrected']/(data['zref']*data['hhsize']))**2)*data['prevalence']
        
        return self.estimation_procedure('severity',data=data)
    
    def aart_welfare_index(self,data):
        data['aart_welfare'] = (data['zref']*data['hhsize'])/data['dtot_corrected']
        return self.estimation_procedure('aart_welfare',data=data)
    
    def aart_poverty_index(self,data):
        if not('aart_welfare' in data.columns):
            data['aart_welfare'] = (data['zref']*data['hhsize'])/data['dtot_corrected']
        
        data['aart_poverty'] = data.apply(lambda x: max(x.aart_welfare,1),axis=1)
        return self.estimation_procedure('aart_poverty',data=data)
        

In [121]:
Togo = country(name='Togo',code_country='TGO',year='2018')

  self.health_expenditure = pd.DataFrame(self.health_data.groupby(['hhid','vague','grappe','menage']).sum().drop(columns=['year','hhweight','codpr'])).reset_index()


AttributeError: 'country' object has no attribute 'heath_merged_data'

In [114]:
Togo.aart_welfare_index().summary()

0,1,2,3
Dep. Variable:,aart_welfare,R-squared:,-0.0
Model:,WLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,
Date:,"Sat, 30 Mar 2024",Prob (F-statistic):,
Time:,15:48:00,Log-Likelihood:,-32650.0
No. Observations:,27482,AIC:,65300.0
Df Residuals:,27481,BIC:,65310.0
Df Model:,0,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
constant,1.1067,0.004,248.581,0.000,1.098,1.115

0,1,2,3
Omnibus:,9930.978,Durbin-Watson:,0.284
Prob(Omnibus):,0.0,Jarque-Bera (JB):,65632.542
Skew:,1.585,Prob(JB):,0.0
Kurtosis:,9.875,Cond. No.,1.0


In [116]:
Togo.catastrophic_expenditure(.05).summary()

0,1,2,3
Dep. Variable:,health_expenditure_proportion,R-squared:,-0.0
Model:,WLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,
Date:,"Sat, 30 Mar 2024",Prob (F-statistic):,
Time:,15:48:16,Log-Likelihood:,-4229.2
No. Observations:,6171,AIC:,8460.0
Df Residuals:,6170,BIC:,8467.0
Df Model:,0,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
constant,0.2745,0.006,48.311,0.000,0.263,0.286

0,1,2,3
Omnibus:,915.246,Durbin-Watson:,2.003
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1376.22
Skew:,1.152,Prob(JB):,1.4399999999999998e-299
Kurtosis:,2.786,Cond. No.,1.0


In [119]:
Togo.health_merged_data['dtot']-Togo.health_merged_data['depan']

0       3.596460e+06
1       3.833842e+06
2       2.907922e+06
3       1.347160e+06
4       2.534826e+06
            ...     
6166    7.135184e+05
6167    7.319176e+05
6168    1.177033e+06
6169    5.464736e+05
6170    9.936798e+05
Length: 6171, dtype: float64

In [82]:
Togo.aart_poverty_index().summary()

0,1,2,3
Dep. Variable:,aart_poverty,R-squared:,-0.0
Model:,WLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,
Date:,"Sat, 30 Mar 2024",Prob (F-statistic):,
Time:,15:29:28,Log-Likelihood:,-26567.0
No. Observations:,27482,AIC:,53140.0
Df Residuals:,27481,BIC:,53140.0
Df Model:,0,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
constant,1.3139,0.004,368.244,0.000,1.307,1.321

0,1,2,3
Omnibus:,17931.519,Durbin-Watson:,0.267
Prob(Omnibus):,0.0,Jarque-Bera (JB):,305569.345
Skew:,2.903,Prob(JB):,0.0
Kurtosis:,18.269,Cond. No.,1.0


In [90]:
Togo.estimation_procedure('health_expenditure_proportion',data=Togo.health_merged_data)

ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [92]:
wls_model = sm.WLS(Togo.health_merged_data['health_expenditure_proportion'],Togo.health_merged_data['constant'], weights=Togo.health_merged_data['hhweight'])

In [94]:
wls_model.fit().summary()

0,1,2,3
Dep. Variable:,health_expenditure_proportion,R-squared:,0.0
Model:,WLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,
Date:,"Sat, 30 Mar 2024",Prob (F-statistic):,
Time:,15:37:00,Log-Likelihood:,6582.3
No. Observations:,6171,AIC:,-13160.0
Df Residuals:,6170,BIC:,-13160.0
Df Model:,0,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
constant,0.0448,0.001,45.425,0.000,0.043,0.047

0,1,2,3
Omnibus:,5733.536,Durbin-Watson:,2.005
Prob(Omnibus):,0.0,Jarque-Bera (JB):,292941.338
Skew:,4.397,Prob(JB):,0.0
Kurtosis:,35.588,Cond. No.,1.0


In [111]:
Togo.prevalence().summary()

0,1,2,3
Dep. Variable:,prevalence,R-squared:,-0.0
Model:,WLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,
Date:,"Sat, 30 Mar 2024",Prob (F-statistic):,
Time:,15:44:54,Log-Likelihood:,-21835.0
No. Observations:,27482,AIC:,43670.0
Df Residuals:,27481,BIC:,43680.0
Df Model:,0,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
constant,0.4544,0.003,151.294,0.000,0.449,0.460

0,1,2,3
Omnibus:,124639.857,Durbin-Watson:,0.338
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3151.017
Skew:,-0.033,Prob(JB):,0.0
Kurtosis:,1.342,Cond. No.,1.0


In [84]:
Togo.severity().summary()

0,1,2,3
Dep. Variable:,severity,R-squared:,-0.0
Model:,WLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,
Date:,"Sat, 30 Mar 2024",Prob (F-statistic):,
Time:,15:29:45,Log-Likelihood:,16212.0
No. Observations:,27482,AIC:,-32420.0
Df Residuals:,27481,BIC:,-32410.0
Df Model:,0,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
constant,0.0687,0.001,91.321,0.000,0.067,0.070

0,1,2,3
Omnibus:,9466.116,Durbin-Watson:,0.265
Prob(Omnibus):,0.0,Jarque-Bera (JB):,28233.215
Skew:,1.83,Prob(JB):,0.0
Kurtosis:,6.355,Cond. No.,1.0


0        246541.546875
1        246541.546875
2        246541.546875
3        246541.546875
4        246541.546875
             ...      
42338    246541.546875
42339    246541.546875
42340    246541.546875
42341    246541.546875
42342    246541.546875
Name: zref, Length: 42343, dtype: float32

In [21]:
Togo.menage_data

Unnamed: 0,country,hhid,logem,mur,toit,sol,eauboi_ss,eauboi_sp,elec_ac,elec_ur,...,petitrum,porc,lapin,volail,sh_id_demo,sh_co_natu,sh_co_eco,sh_id_eco,sh_co_vio,sh_co_oth
0,TGO,101.0,Proprietaire titre,Oui,Oui,Oui,Oui,Oui,Oui,Oui,...,,,,,Non,Non,Non,Non,Non,Non
1,TGO,103.0,Locataire,Oui,Oui,Oui,Oui,Oui,Oui,Oui,...,,,,,Non,Non,Non,Non,Non,Non
2,TGO,104.0,Locataire,Oui,Oui,Oui,Oui,Oui,Oui,Oui,...,,,,,Non,Non,Non,Non,Non,Non
3,TGO,105.0,Locataire,Oui,Oui,Oui,Oui,Oui,Non,Non,...,,,,,Non,Non,Non,Oui,Non,Non
4,TGO,106.0,Autre,Oui,Oui,Oui,Oui,Oui,Oui,Oui,...,,,,,Non,Non,Non,Non,Non,Non
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6166,TGO,54008.0,Proprietaire sans titre,Non,Non,Oui,Oui,Oui,Non,Non,...,7.0,0.0,0.0,10.0,Non,Non,Non,Non,Non,Non
6167,TGO,54009.0,Proprietaire sans titre,Non,Non,Oui,Oui,Oui,Non,Non,...,13.0,7.0,0.0,20.0,Non,Non,Non,Non,Non,Non
6168,TGO,54010.0,Proprietaire sans titre,Oui,Oui,Oui,Non,Non,Non,Non,...,5.0,11.0,0.0,25.0,Oui,Non,Non,Non,Non,Non
6169,TGO,54011.0,Proprietaire sans titre,Oui,Oui,Oui,Non,Non,Non,Non,...,4.0,0.0,0.0,1.0,Oui,Non,Non,Non,Non,Non


In [22]:
Togo.conso_data

Unnamed: 0,country,year,hhid,vague,grappe,menage,region,milieu,hhweight,codpr,modep,depan
0,TGO,2018,101.0,1,1,1,Grand Lomé,Urbain,598.235010,3,Don,74971.250000
1,TGO,2018,101.0,1,1,1,Grand Lomé,Urbain,598.235010,6,Don,5944.895020
2,TGO,2018,101.0,1,1,1,Grand Lomé,Urbain,598.235010,48,Don,171028.578125
3,TGO,2018,101.0,1,1,1,Grand Lomé,Urbain,598.235010,58,Don,12489.623047
4,TGO,2018,101.0,1,1,1,Grand Lomé,Urbain,598.235010,62,Don,22579.154297
...,...,...,...,...,...,...,...,...,...,...,...,...
302987,TGO,2018,54012.0,2,540,12,Savanes,Rural,147.037387,662,Achat,6150.000000
302988,TGO,2018,54012.0,2,540,12,Savanes,Rural,147.037387,685,Achat,76000.000000
302989,TGO,2018,54012.0,2,540,12,Savanes,Rural,147.037387,804,Valeur usage BD,1889.541748
302990,TGO,2018,54012.0,2,540,12,Savanes,Rural,147.037387,829,Valeur usage BD,65905.617188


In [34]:
Togo_sante = Togo.conso_data.loc[(Togo.conso_data['codpr']>=681) & (Togo.conso_data['codpr']<=692)]
Togo_sante.head()

Unnamed: 0,country,year,hhid,vague,grappe,menage,region,milieu,hhweight,codpr,modep,depan
39,TGO,2018,101.0,1,1,1,Grand Lomé,Urbain,598.23501,681,Achat,18000.0
150,TGO,2018,104.0,1,1,4,Grand Lomé,Urbain,598.23501,681,Achat,4400.0
151,TGO,2018,104.0,1,1,4,Grand Lomé,Urbain,598.23501,686,Achat,4600.0
231,TGO,2018,106.0,1,1,6,Grand Lomé,Urbain,598.23501,682,Achat,6000.0
232,TGO,2018,106.0,1,1,6,Grand Lomé,Urbain,598.23501,685,Achat,29600.0


In [63]:
Togo_depan = pd.DataFrame(Togo_sante.groupby(['hhid','vague','grappe','menage']).sum().drop(columns=['year','hhweight','codpr'])).reset_index()

  Togo_depan = pd.DataFrame(Togo_sante.groupby(['hhid','vague','grappe','menage']).sum().drop(columns=['year','hhweight','codpr'])).reset_index()


In [62]:
Togo_depan

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,depan
hhid,vague,grappe,menage,Unnamed: 4_level_1
101.0,1,1,1,18000.0
104.0,1,1,4,9000.0
106.0,1,1,6,47600.0
108.0,1,1,8,356000.0
110.0,1,1,10,32000.0
...,...,...,...,...
54008.0,2,540,8,8380.0
54009.0,2,540,9,10000.0
54010.0,2,540,10,16000.0
54011.0,2,540,11,91800.0


In [38]:
Togo.welfare_data

Unnamed: 0,country,year,hhid,grappe,menage,vague,zae,region,milieu,hhweight,...,hsectins,hcsp,dali,dnal,dtot,pcexp,zref,def_spa,def_temp,dtot_corrected
0,TGO,2018,101.0,1,1,1,Lome,Lomé commune,Urbain,598.235010,...,,,2.048111e+06,1.566349e+06,3.614460e+06,9.378899e+05,273618.5,1.284607,0.991413,2.813670e+06
1,TGO,2018,103.0,1,3,1,Lome,Lomé commune,Urbain,598.235010,...,Entreprise Privée,Travailleur pour compte propre,2.268928e+06,1.564914e+06,3.833842e+06,2.984447e+06,273618.5,1.284607,0.991413,2.984447e+06
2,TGO,2018,104.0,1,4,1,Lome,Lomé commune,Urbain,598.235010,...,Entreprise publique/ parapublique,Ouvrier ou employé non qualifié,1.344899e+06,1.572023e+06,2.916922e+06,7.568909e+05,273618.5,1.284607,0.991413,2.270673e+06
3,TGO,2018,105.0,1,5,1,Lome,Lomé commune,Urbain,598.235010,...,Entreprise Privée,Travailleur pour compte propre,7.348667e+05,6.122931e+05,1.347160e+06,5.243471e+05,273618.5,1.284607,0.991413,1.048694e+06
4,TGO,2018,106.0,1,6,1,Lome,Lomé commune,Urbain,598.235010,...,Entreprise publique/ parapublique,Cadre moyen/agent de maîtrise,9.933412e+05,1.589084e+06,2.582426e+06,2.010285e+06,273618.5,1.284607,0.991413,2.010285e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6166,TGO,2018,54008.0,540,8,2,Zone des plaines du nord,Savanes,Rural,147.037387,...,Entreprise Privée,Travailleur pour compte propre,4.322133e+05,2.896851e+05,7.218984e+05,1.805633e+05,273618.5,0.799607,1.000356,9.028163e+05
6167,TGO,2018,54009.0,540,9,2,Zone des plaines du nord,Savanes,Rural,147.037387,...,Entreprise Privée,Travailleur pour compte propre,3.927010e+05,3.492167e+05,7.419176e+05,1.030947e+05,273618.5,0.799607,1.000356,9.278526e+05
6168,TGO,2018,54010.0,540,10,2,Zone des plaines du nord,Savanes,Rural,147.037387,...,Entreprise Privée,Travailleur pour compte propre,8.242112e+05,3.688216e+05,1.193033e+06,1.865029e+05,273618.5,0.799607,1.000356,1.492024e+06
6169,TGO,2018,54011.0,540,11,2,Zone des plaines du nord,Savanes,Rural,147.037387,...,Entreprise Privée,Travailleur pour compte propre,3.134760e+05,3.247975e+05,6.382736e+05,1.596468e+05,273618.5,0.799607,1.000356,7.982339e+05


In [25]:
Togo.conso_data.dtypes

country       object
year           int16
hhid         float32
vague           int8
grappe         int16
menage          int8
region      category
milieu      category
hhweight     float64
codpr          int16
modep       category
depan        float64
dtype: object

In [65]:
Togo_sante = pd.merge(Togo.welfare_data,
                   Togo_depan,
                   on = 'hhid',
                   how='outer').replace(np.nan, 0)

In [67]:
Togo_prop = Togo_sante['depan']/Togo_sante['dtot']

In [70]:
(Togo_prop>=.25).mean()

0.028358450818343865

In [42]:
Togo.welfare_data['hhid'].unique().shape

(6171,)

In [60]:
Togo_depan['hhid'].unique().shape

(3772,)

In [58]:
pd.DataFrame(Togo_depan).reset_index()

Unnamed: 0,hhid,vague,grappe,menage,depan
0,101.0,1,1,1,18000.0
1,104.0,1,1,4,9000.0
2,106.0,1,1,6,47600.0
3,108.0,1,1,8,356000.0
4,110.0,1,1,10,32000.0
...,...,...,...,...,...
3767,54008.0,2,540,8,8380.0
3768,54009.0,2,540,9,10000.0
3769,54010.0,2,540,10,16000.0
3770,54011.0,2,540,11,91800.0
