In [None]:
import numpy as np
import pandas as pd
import seaborn as sn
import os
import sklearn as sk
import statsmodels.api as sm

WAEMU_ehcvm_data = pd.read_stata('https://github.com/mermozhk/Poverty/raw/main/WAEMU_data/WAEMU_data.dta')


class country:
    def __init__(self,name,code_country,year,z_used='NA'):
        self.country_name = name
        self.year = year
        self.code_country = code_country
        url_base = 'https://github.com/mermozhk/Poverty/raw/main/'
        #print('/'.join([url_base,name,year,'ehcvm_welfare_'+''.join([code_country,year])])+'.dta')
        self.welfare_data = pd.read_stata('/'.join([url_base,name,year,'ehcvm_welfare_'+''.join([code_country,year])])+'.dta')
        self.welfare_data['dtot_corrected']=self.welfare_data['dtot']/self.welfare_data['def_spa']
        
        self.conso_data = pd.read_stata('/'.join([url_base,name,year,'ehcvm_conso_'+''.join([code_country,year])])+'.dta')
        self.menage_data = pd.read_stata('/'.join([url_base,name,year,'ehcvm_menage_'+''.join([code_country,year])])+'.dta')
        self.individu_data = pd.read_stata('/'.join([url_base,name,year,'ehcvm_individu_'+''.join([code_country,year])])+'.dta')
        
        self.health_data = self.conso_data.loc[(self.conso_data['codpr']>=681) & (self.conso_data['codpr']<=692)]
        self.health_expenditure = pd.DataFrame(self.health_data.groupby(['hhid','vague','grappe','menage']).sum().drop(columns=['year','hhweight','codpr'])).reset_index()
        
        self.health_merged_data = pd.merge(self.welfare_data,
                   self.health_expenditure,
                   on = 'hhid',
                   how='outer').replace(np.nan, 0)
        self.health_merged_data['constant'] = 1
        
        self.health_merged_data['not_health']=self.health_merged_data['dtot']-self.health_merged_data['depan']
        
       
            
            
            
        if z_used=='NA':
            self.poverty_merged_data = pd.merge(self.individu_data,
                   self.welfare_data[['hhid','zref','dtot_corrected','hhsize']],
                   on = 'hhid',
                   how='outer')
        else:
            self.poverty_merged_data = pd.merge(self.individu_data,
                   self.welfare_data[['hhid','dtot_corrected','hhsize']],
                   on = 'hhid',
                   how='outer')
            self.poverty_merged_data['zref']=float(z_used)
            
        self.poverty_merged_data['constant'] = 1
        
        
    def catastrophic_expenditure(self,proportion,data):
            self.health_merged_data['health_expenditure_proportion'] = ((self.health_merged_data['depan']/self.health_merged_data['dtot'])>=proportion).map({False:0, True:1})
            return self.estimation_procedure(index='health_expenditure_proportion',data=self.health_merged_data)
         
    
    
    def estimation_procedure(self,index,data):
        wls_model = sm.WLS(data[index],data['constant'], weights=data['hhweight'])
        return wls_model.fit()
        
    def prevalence(self,data):
        data['prevalence']=(data['dtot_corrected']<
                                       (data['zref']*data['hhsize'])).map({False:0, True:1})
        return self.estimation_procedure('prevalence',data=data)
    
    def gap(self,data):
        data['gap'] = (1-data['dtot_corrected']/(data['zref']*data['hhsize']))*data['prevalence']
        return self.estimation_procedure('gap',data=data)
    
    def severity(self,data):
        if not('prevalence' in self.merge_data.columns):
            
            data['prevalence']=(data['dtot_corrected']<
                                       (data['zref']*data['hhsize'])).map({False:0, True:1})
            
        data['severity'] = ((1-data['dtot_corrected']/(data['zref']*data['hhsize']))**2)*data['prevalence']
        
        return self.estimation_procedure('severity',data=data)
    
    def aart_welfare_index(self,data):
        data['aart_welfare'] = (data['zref']*data['hhsize'])/data['dtot_corrected']
        return self.estimation_procedure('aart_welfare',data=data)
    
    def aart_poverty_index(self,data):
        if not('aart_welfare' in data.columns):
            data['aart_welfare'] = (data['zref']*data['hhsize'])/data['dtot_corrected']
        
        data['aart_poverty'] = data.apply(lambda x: max(x.aart_welfare,1),axis=1)
        return self.estimation_procedure('aart_poverty',data=data)
        

In [23]:
import numpy as np
import pandas as pd
import seaborn as sn
import os
import sklearn as sk
import statsmodels.api as sm

WAEMU_ehcvm_data = pd.read_stata('https://github.com/mermozhk/Poverty/raw/main/WAEMU_data/WAEMU_data.dta')
#WAEMU_ehcvm_data[WAEMU_ehcvm_data['year'==2018] & WAEMU_ehcvm_data['country'=='BEN']]

dummies = pd.get_dummies(WAEMU_ehcvm_data.CY)
Merged_WAEMU = pd.concat([WAEMU_ehcvm_data, dummies], axis='columns')
Merged_WAEMU 

Unnamed: 0,country,year,hhid,grappe,menage,vague,zae,region,milieu,hhweight,...,GNB2018,GNB2021,MLI2018,MLI2021,NER2018,NER2021,SEN2018,SEN2021,TGO2018,TGO2021
0,BEN,2018,1005.0,1,5,2,"Soudano-sahélien, faibles précipitations, 1 sa...",ALIBORI,Urbain,181.727624,...,0,0,0,0,0,0,0,0,0,0
1,BEN,2018,1019.0,1,19,2,"Soudano-sahélien, faibles précipitations, 1 sa...",ALIBORI,Urbain,181.727624,...,0,0,0,0,0,0,0,0,0,0
2,BEN,2018,1026.0,1,26,2,"Soudano-sahélien, faibles précipitations, 1 sa...",ALIBORI,Urbain,181.727624,...,0,0,0,0,0,0,0,0,0,0
3,BEN,2018,1032.0,1,32,2,"Soudano-sahélien, faibles précipitations, 1 sa...",ALIBORI,Urbain,181.727624,...,0,0,0,0,0,0,0,0,0,0
4,BEN,2018,1046.0,1,46,2,"Soudano-sahélien, faibles précipitations, 1 sa...",ALIBORI,Urbain,181.727624,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119184,TGO,2021,54008.0,540,8,2,,COLLINES,Rural,165.818072,...,0,0,0,0,0,0,0,0,0,1
119185,TGO,2021,54009.0,540,9,2,,COLLINES,Rural,165.818072,...,0,0,0,0,0,0,0,0,0,1
119186,TGO,2021,54010.0,540,10,2,,COLLINES,Rural,165.818072,...,0,0,0,0,0,0,0,0,0,1
119187,TGO,2021,54011.0,540,11,2,,COLLINES,Rural,165.818072,...,0,0,0,0,0,0,0,0,0,1


In [24]:
Merged_WAEMU['dtot_corrected']=Merged_WAEMU.dtot/Merged_WAEMU.def_spa

In [25]:
Merged_WAEMU['prevalence']=(Merged_WAEMU['dtot_corrected']<
                                       (Merged_WAEMU['zref']*Merged_WAEMU['hhsize'])).map({False:0, True:1})

In [26]:
Merged_WAEMU['zref_intl'] = 2.15*365

In [27]:
Merged_WAEMU['dtot_PPA_ind'] = (Merged_WAEMU['dtot']/Merged_WAEMU['PPP'])/Merged_WAEMU['hhsize']

In [28]:
Merged_WAEMU['prevalence_ind']=(Merged_WAEMU['dtot_PPA_ind']<Merged_WAEMU['zref_intl']).map({False:0, True:1})

In [None]:
Merged_WAEMU

In [29]:
dummies

Unnamed: 0,BEN2018,BEN2021,BFA2018,BFA2021,CIV2018,CIV2021,GNB2018,GNB2021,MLI2018,MLI2021,NER2018,NER2021,SEN2018,SEN2021,TGO2018,TGO2021
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119184,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
119185,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
119186,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
119187,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


In [30]:
wls_model = sm.WLS(Merged_WAEMU['prevalence_ind'],dummies)
wls_model.fit()

<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7fe8c81577f0>

In [31]:
wls_model.fit().summary()

0,1,2,3
Dep. Variable:,prevalence_ind,R-squared:,0.062
Model:,WLS,Adj. R-squared:,0.062
Method:,Least Squares,F-statistic:,528.2
Date:,"Sat, 29 Jun 2024",Prob (F-statistic):,0.0
Time:,17:46:46,Log-Likelihood:,-38251.0
No. Observations:,119189,AIC:,76530.0
Df Residuals:,119173,BIC:,76690.0
Df Model:,15,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
BEN2018,0.1385,0.004,37.178,0.000,0.131,0.146
BEN2021,0.0616,0.004,16.559,0.000,0.054,0.069
BFA2018,0.2163,0.004,54.284,0.000,0.208,0.224
BFA2021,0.1190,0.004,30.224,0.000,0.111,0.127
CIV2018,0.1040,0.003,35.534,0.000,0.098,0.110
CIV2021,0.0612,0.003,20.879,0.000,0.055,0.067
GNB2018,0.1654,0.005,36.271,0.000,0.156,0.174
GNB2021,0.1209,0.005,26.517,0.000,0.112,0.130
MLI2018,0.1121,0.004,27.304,0.000,0.104,0.120

0,1,2,3
Omnibus:,40172.346,Durbin-Watson:,1.541
Prob(Omnibus):,0.0,Jarque-Bera (JB):,100506.1
Skew:,1.931,Prob(JB):,0.0
Kurtosis:,5.307,Cond. No.,1.56


In [32]:
Merged_WAEMU['weight'] = Merged_WAEMU['hhweight']*Merged_WAEMU['hhsize']

In [33]:
wls_model = sm.WLS(Merged_WAEMU['prevalence_ind'],dummies,weights=Merged_WAEMU['weight'])
wls_model.fit().summary()

0,1,2,3
Dep. Variable:,prevalence_ind,R-squared:,0.131
Model:,WLS,Adj. R-squared:,0.131
Method:,Least Squares,F-statistic:,1203.0
Date:,"Sat, 29 Jun 2024",Prob (F-statistic):,0.0
Time:,17:56:46,Log-Likelihood:,-88250.0
No. Observations:,119189,AIC:,176500.0
Df Residuals:,119173,BIC:,176700.0
Df Model:,15,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
BEN2018,0.1874,0.005,36.234,0.000,0.177,0.198
BEN2021,0.0858,0.005,17.299,0.000,0.076,0.095
BFA2018,0.3302,0.004,83.966,0.000,0.322,0.338
BFA2021,0.1847,0.004,48.423,0.000,0.177,0.192
CIV2018,0.1224,0.004,34.932,0.000,0.116,0.129
CIV2021,0.0664,0.003,20.499,0.000,0.060,0.073
GNB2018,0.2060,0.014,15.091,0.000,0.179,0.233
GNB2021,0.1683,0.013,12.597,0.000,0.142,0.194
MLI2018,0.1675,0.004,42.023,0.000,0.160,0.175

0,1,2,3
Omnibus:,54759.421,Durbin-Watson:,1.564
Prob(Omnibus):,0.0,Jarque-Bera (JB):,379076.853
Skew:,2.102,Prob(JB):,0.0
Kurtosis:,10.659,Cond. No.,4.21


KeyError: 'groups'

In [43]:
Merged_WAEMU['Mena']=Merged_WAEMU['CY']+ (Merged_WAEMU['hhid'].astype(int)).apply(str)

In [44]:
Merged_WAEMU['Mena']

0          BEN20181005
1          BEN20181019
2          BEN20181026
3          BEN20181032
4          BEN20181046
              ...     
119184    TGO202154008
119185    TGO202154009
119186    TGO202154010
119187    TGO202154011
119188    TGO202154012
Name: Mena, Length: 119189, dtype: object

In [47]:
sm.WLS(Merged_WAEMU['prevalence_ind'],dummies, weights=Merged_WAEMU['weight']).fit(cov_type='cluster',
                                                                                   cov_kwds={'groups': Merged_WAEMU['Mena']}).summary()

0,1,2,3
Dep. Variable:,prevalence_ind,R-squared:,0.131
Model:,WLS,Adj. R-squared:,0.131
Method:,Least Squares,F-statistic:,
Date:,"Sat, 29 Jun 2024",Prob (F-statistic):,
Time:,18:23:47,Log-Likelihood:,-88250.0
No. Observations:,119189,AIC:,176500.0
Df Residuals:,119173,BIC:,176700.0
Df Model:,15,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
BEN2018,0.1874,0.006,31.372,0.000,0.176,0.199
BEN2021,0.0858,0.004,19.180,0.000,0.077,0.095
BFA2018,0.3302,0.009,35.948,0.000,0.312,0.348
BFA2021,0.1847,0.008,24.502,0.000,0.170,0.200
CIV2018,0.1224,0.004,30.255,0.000,0.114,0.130
CIV2021,0.0664,0.003,21.798,0.000,0.060,0.072
GNB2018,0.2060,0.008,24.609,0.000,0.190,0.222
GNB2021,0.1683,0.007,22.692,0.000,0.154,0.183
MLI2018,0.1675,0.007,23.347,0.000,0.153,0.182

0,1,2,3
Omnibus:,54759.421,Durbin-Watson:,1.564
Prob(Omnibus):,0.0,Jarque-Bera (JB):,379076.853
Skew:,2.102,Prob(JB):,0.0
Kurtosis:,10.659,Cond. No.,4.21


In [49]:
pd.unique(Merged_WAEMU.CY)

array(['BEN2018', 'BEN2021', 'BFA2018', 'BFA2021', 'CIV2018', 'CIV2021',
       'GNB2018', 'GNB2021', 'MLI2018', 'MLI2021', 'NER2018', 'NER2021',
       'SEN2018', 'SEN2021', 'TGO2018', 'TGO2021'], dtype=object)