In [204]:
import numpy as np
import pandas as pd
import seaborn as sn
import os
import sklearn as sk
import statsmodels.api as sm

code_map = {'BEN':'BENIN','BFA':'BURKINA FASO','CIV':'COTE D\'IVOIRE','GNB':'GUINEE BISSAU','MLI':'MALI','NER':'NIGER','SEN':'SENEGAL','TGO':'TOGO'}


class WAEMU:
    def __init__(self):
        self.data = pd.read_stata('https://github.com/mermozhk/Poverty/raw/main/WAEMU_data/WAEMU_data.dta')
        self.data['zref_intl'] = 2.15*365
        self.data['weight'] = self.data['hhweight']*self.data['hhsize']
        self.dummies = pd.get_dummies(self.data.CY)
        self.data = pd.concat([self.data, self.dummies], axis='columns')
        self.data['clusters'] = self.data.apply(lambda x: x['country']+str(x['year'])+str(x['hhid']),axis=1)
        self.data['dtot_non_sante_ppa_ind']= self.data['dtot_ppa_ind']-(self.data['dep_sante']/(self.data['PPP']*self.data['hhsize']))
        self.dummies_country = pd.get_dummies(self.data.country)
        self.data = pd.concat([self.data, self.dummies_country], axis='columns')
        self.dummies_year = pd.get_dummies(self.data.year)
        self.data = pd.concat([self.data, self.dummies_year], axis='columns')
    
    def estimation_procedure(self,index,data=None,name=None):
        if data is None:
            wls_model = sm.WLS(self.data[index],self.dummies, weights=self.data['weight'])
            return wls_model.fit(cov_type='cluster',cov_kwds={'groups': self.data['clusters']})
        else:
            wls_model = sm.WLS(data,self.dummies, weights=self.data['weight'])
            return wls_object(wls_model.fit(cov_type='cluster',cov_kwds={'groups': self.data['clusters']}))
            
    
    def prevalence(self,data_index=None,name=None):
        if data_index is None:
            self.data['prevalence_ind'] =(self.data['dtot_ppa_ind']<self.data['zref_intl']).map({False:0, True:1})
            return wls_object(self.estimation_procedure('prevalence_ind'))
        else:
            column_name = '_'.join(['prevalence',name])
            self.data[column_name] = (self.data[data_index]<self.data['zref_intl']).map({False:0, True:1})
            return wls_object(self.estimation_procedure(column_name))
            
       
    
      
    def gap(self,data_index=None,name=None,data_prevalence=None):
        if data_index is None:
            if not('prevalence_ind' in self.data.columns):
            
                self.data['prevalence_ind'] =(self.data['dtot_ppa_ind']<self.data['zref_intl']).map({False:0, True:1})
            #self.data['gap_ind'] = (1-(self.data['dtot']/self.data['def_spa'])/(self.data['zref_intl']*self.data['hhsize']))*self.data['prevalence_ind']
            self.data['gap_ind'] = (1-(self.data['dtot_ppa_ind']/(self.data['zref_intl'])))*self.data['prevalence_ind']
            return wls_object(self.estimation_procedure('gap_ind'))
        else:
            column_name = '_'.join(['gap',name]) #data_index=dtot_non_sante_ppa_ind; data_prevalence=prevalence_after_dep_sante
            self.data[column_name] = (1-self.data[data_index]/(self.data['zref_intl']))*data_prevalence
            return wls_object(self.estimation_procedure(column_name))
            
    
    def severity(self,data_index=None,name=None,data_prevalence=None):
        if data_index is None:
            if not('prevalence_ind' in self.data.columns):
            
                self.data['prevalence_ind'] =(self.data['dtot_ppa_ind']<self.data['zref_intl']).map({False:0, True:1})
            
            self.data['severity_ind'] = ((1-self.data['dtot_ppa_ind']/(self.data['zref_intl']))**2)*self.data['prevalence_ind']
        
            return wls_object(self.estimation_procedure('severity_ind'))
        else:
            column_name = '_'.join(['severity',name]) #data_index=dtot_non_sante_ppa_ind; data_prevalence=prevalence_after_dep_sante
            self.data[column_name] = ((1-self.data[data_index]/(self.data['zref_intl']))**2)*data_prevalence
            return wls_object(self.estimation_procedure(column_name))
            
    
    def catastrophic_expenditure(self,proportion):
        self.data['che'] = ((self.data['dep_sante']/self.data['dtot'])>=proportion).map({False:0, True:1})
        return wls_object(self.estimation_procedure(index='che'))
        
        
class wls_object:
    def __init__(self,fitting_output):
        self.object = fitting_output
        
    def print_result(self):
        return print(self.object.summary())
        
    def to_table(self,index,columns,by_table=False):
        #self.object.fittedvalues.unique()
        data=pd.DataFrame({'Country':columns,'Index':self.object.fittedvalues.unique()})
        data.columns = ['Country',index]
        data['year'] = data['Country'].apply(lambda x:x[-4:])
        data['Code']=data['Country'].apply(lambda x:x[:3])
        data['Country Name']=data['Code'].map(code_map)
        df=data.drop(columns=['Country','Code'])
        if not by_table:
            return df
        else:
            return df.pivot(index='year',columns='Country Name')
        

In [205]:
wa = WAEMU() # Crée un objet WAEMU comportant la base de données

In [150]:
# Exemple de calcul de prévalence
resultat_non_sante = wa.prevalence(data_index='dtot_non_sante_ppa_ind',name='after_dep_sante')

In [151]:
#Affichage simple des résultats
resultat_non_sante.print_result()

                                WLS Regression Results                                
Dep. Variable:     prevalence_after_dep_sante   R-squared:                       0.129
Model:                                    WLS   Adj. R-squared:                  0.129
Method:                         Least Squares   F-statistic:                       nan
Date:                        Thu, 18 Jul 2024   Prob (F-statistic):                nan
Time:                                17:12:40   Log-Likelihood:                -92661.
No. Observations:                      119189   AIC:                         1.854e+05
Df Residuals:                          119173   BIC:                         1.855e+05
Df Model:                                  15                                         
Covariance Type:                      cluster                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------

In [127]:
# Exemple de calcul de prévalence
resultat_non_gap = wa.gap(data_index='dtot_non_sante_ppa_ind',name='after_dep_sante',data_prevalence=wa.data['prevalence_after_dep_sante'])

In [128]:
resultat_non_gap.print_result()

                             WLS Regression Results                            
Dep. Variable:     gap_after_dep_sante   R-squared:                       0.124
Model:                             WLS   Adj. R-squared:                  0.124
Method:                  Least Squares   F-statistic:                       nan
Date:                 Thu, 18 Jul 2024   Prob (F-statistic):                nan
Time:                         16:44:32   Log-Likelihood:                 37891.
No. Observations:               119189   AIC:                        -7.575e+04
Df Residuals:                   119173   BIC:                        -7.559e+04
Df Model:                           15                                         
Covariance Type:               cluster                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
BEN2018        0.0534      0.002     27.20

In [152]:
result_gap = wa.gap()

In [153]:
result_gap.print_result()

                            WLS Regression Results                            
Dep. Variable:                gap_ind   R-squared:                       0.132
Model:                            WLS   Adj. R-squared:                  0.132
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 18 Jul 2024   Prob (F-statistic):                nan
Time:                        17:12:58   Log-Likelihood:                 48505.
No. Observations:              119189   AIC:                        -9.698e+04
Df Residuals:                  119173   BIC:                        -9.682e+04
Df Model:                          15                                         
Covariance Type:              cluster                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
BEN2018        0.0436      0.002     24.492      0.0

In [154]:
resultat_severity = wa.severity()

In [155]:
resultat_severity.print_result()

                            WLS Regression Results                            
Dep. Variable:           severity_ind   R-squared:                       0.101
Model:                            WLS   Adj. R-squared:                  0.101
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 18 Jul 2024   Prob (F-statistic):                nan
Time:                        17:13:57   Log-Likelihood:             1.3357e+05
No. Observations:              119189   AIC:                        -2.671e+05
Df Residuals:                  119173   BIC:                        -2.670e+05
Df Model:                          15                                         
Covariance Type:              cluster                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
BEN2018        0.0151      0.001     18.653      0.0

In [105]:
resultat_prevalence = wa.prevalence()

In [106]:
resultat_prevalence.print_result()

                            WLS Regression Results                            
Dep. Variable:         prevalence_ind   R-squared:                       0.131
Model:                            WLS   Adj. R-squared:                  0.131
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 18 Jul 2024   Prob (F-statistic):                nan
Time:                        16:23:57   Log-Likelihood:                -88250.
No. Observations:              119189   AIC:                         1.765e+05
Df Residuals:                  119173   BIC:                         1.767e+05
Df Model:                          15                                         
Covariance Type:              cluster                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
BEN2018        0.1874      0.006     31.372      0.0

In [139]:
wa.data.columns

Index(['country', 'year', 'hhid', 'grappe', 'menage', 'vague', 'zae', 'region',
       'milieu', 'hhweight', 'hhsize', 'eqadu1', 'eqadu2', 'hgender', 'hage',
       'hmstat', 'hreligion', 'hnation', 'halfab', 'heduc', 'hdiploma',
       'hhandig', 'hactiv7j', 'hactiv12m', 'hbranch', 'hsectins', 'hcsp',
       'dali', 'dnal', 'dtot', 'pcexp', 'zref', 'def_spa', 'def_temp', 'halfa',
       'halfa2', 'milieu2', 'dtet', 'decile', 'zzae', 'dep_sante', 'PPP', 'CY',
       'Code_CY', 'dtot_ppa_ind', 'poids', 'Quintile', 'zref_intl', 'weight',
       'BEN2018', 'BEN2021', 'BFA2018', 'BFA2021', 'CIV2018', 'CIV2021',
       'GNB2018', 'GNB2021', 'MLI2018', 'MLI2021', 'NER2018', 'NER2021',
       'SEN2018', 'SEN2021', 'TGO2018', 'TGO2021', 'clusters',
       'dtot_non_sante_ppa_ind', 'prevalence_ind', 'gap_ind'],
      dtype='object')

In [140]:
wa.data['def_spa']

0         0.971703
1         0.971703
2         0.971703
3         0.971703
4         0.971703
            ...   
119184         NaN
119185         NaN
119186         NaN
119187         NaN
119188         NaN
Name: def_spa, Length: 119189, dtype: float32

In [108]:
impact.print_result()

                            WLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.002
Model:                            WLS   Adj. R-squared:                  0.002
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 18 Jul 2024   Prob (F-statistic):                nan
Time:                        16:24:07   Log-Likelihood:                 25697.
No. Observations:              119189   AIC:                        -5.136e+04
Df Residuals:                  119173   BIC:                        -5.121e+04
Df Model:                          15                                         
Covariance Type:              cluster                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
BEN2018        0.0337      0.003     12.670      0.0

In [83]:
wa.data['prevalence_ind'].describe()

count    119189.000000
mean          0.137563
std           0.344442
min           0.000000
25%           0.000000
50%           0.000000
75%           0.000000
max           1.000000
Name: prevalence_ind, dtype: float64

In [84]:
wa.data['prevalence_after_dep_sante'].describe()

count    119189.000000
mean          0.154419
std           0.361351
min           0.000000
25%           0.000000
50%           0.000000
75%           0.000000
max           1.000000
Name: prevalence_after_dep_sante, dtype: float64

In [93]:
np.mean((wa.data['prevalence_after_dep_sante']-wa.data['prevalence_ind'])<wa.data['zref_intl'])

1.0

In [129]:
#
resultat_prevalence.to_table('Prevalence',wa.dummies.columns)
#wa.dummies.columns
#res.object.fittedvalues.unique()

Unnamed: 0,Prevalence,year,Country Name
0,0.187436,2018,BENIN
1,0.085756,2021,BENIN
2,0.33019,2018,BURKINA FASO
3,0.184733,2021,BURKINA FASO
4,0.122367,2018,COTE D'IVOIRE
5,0.066431,2021,COTE D'IVOIRE
6,0.205967,2018,GUINEE BISSAU
7,0.1683,2021,GUINEE BISSAU
8,0.167538,2018,MALI
9,0.156681,2021,MALI


In [130]:
resultat_severite = wa.severity()
resultat_severite.to_table('Severity',wa.dummies.columns)

Unnamed: 0,Severity,year,Country Name
0,0.015143,2018,BENIN
1,0.006195,2021,BENIN
2,0.034855,2018,BURKINA FASO
3,0.01398,2021,BURKINA FASO
4,0.008477,2018,COTE D'IVOIRE
5,0.003507,2021,COTE D'IVOIRE
6,0.014033,2018,GUINEE BISSAU
7,0.009518,2021,GUINEE BISSAU
8,0.011078,2018,MALI
9,0.009002,2021,MALI


In [131]:
resultat_gap = wa.gap()
resultat_gap.to_table('Gap',wa.dummies.columns)

Unnamed: 0,Gap,year,Country Name
0,0.043599,2018,BENIN
1,0.018132,2021,BENIN
2,0.090497,2018,BURKINA FASO
3,0.041804,2021,BURKINA FASO
4,0.025837,2018,COTE D'IVOIRE
5,0.012056,2021,COTE D'IVOIRE
6,0.043921,2018,GUINEE BISSAU
7,0.032666,2021,GUINEE BISSAU
8,0.034772,2018,MALI
9,0.030806,2021,MALI


In [5]:
wa.data.columns

Index(['country', 'year', 'hhid', 'grappe', 'menage', 'vague', 'zae', 'region',
       'milieu', 'hhweight', 'hhsize', 'eqadu1', 'eqadu2', 'hgender', 'hage',
       'hmstat', 'hreligion', 'hnation', 'halfab', 'heduc', 'hdiploma',
       'hhandig', 'hactiv7j', 'hactiv12m', 'hbranch', 'hsectins', 'hcsp',
       'dali', 'dnal', 'dtot', 'pcexp', 'zref', 'def_spa', 'def_temp', 'halfa',
       'halfa2', 'milieu2', 'dtet', 'decile', 'zzae', 'dep_sante', 'PPP', 'CY',
       'Code_CY', 'dtot_ppa_ind', 'poids', 'Quintile', 'zref_intl', 'weight',
       'BEN2018', 'BEN2021', 'BFA2018', 'BFA2021', 'CIV2018', 'CIV2021',
       'GNB2018', 'GNB2021', 'MLI2018', 'MLI2021', 'NER2018', 'NER2021',
       'SEN2018', 'SEN2021', 'TGO2018', 'TGO2021'],
      dtype='object')

In [157]:
wa.data['milieu']

0         Urbain
1         Urbain
2         Urbain
3         Urbain
4         Urbain
           ...  
119184     Rural
119185     Rural
119186     Rural
119187     Rural
119188     Rural
Name: milieu, Length: 119189, dtype: category
Categories (2, object): ['Urbain' < 'Rural']

0          BEN20181005.0
1          BEN20181019.0
2          BEN20181026.0
3          BEN20181032.0
4          BEN20181046.0
               ...      
119184    TGO202154008.0
119185    TGO202154009.0
119186    TGO202154010.0
119187    TGO202154011.0
119188    TGO202154012.0
Length: 119189, dtype: object

In [43]:
wa.data['PPP']

0         213.103818
1         213.103818
2         213.103818
3         213.103818
4         213.103818
             ...    
119184    230.335638
119185    230.335638
119186    230.335638
119187    230.335638
119188    230.335638
Name: PPP, Length: 119189, dtype: float64

In [45]:
'_'.join(['s','i'])

's_i'

In [158]:
data_conflict = pd.read_stata('https://github.com/mermozhk/Poverty/raw/main/wgidataset.dta')

In [159]:
data_conflict.columns

Index(['code', 'countryname', 'year', 'vae', 'vas', 'van', 'var', 'val', 'vau',
       'pve', 'pvs', 'pvn', 'pvr', 'pvl', 'pvu', 'gee', 'ges', 'gen', 'ger',
       'gel', 'geu', 'rqe', 'rqs', 'rqn', 'rqr', 'rql', 'rqu', 'rle', 'rls',
       'rln', 'rlr', 'rll', 'rlu', 'cce', 'ccs', 'ccn', 'ccr', 'ccl', 'ccu'],
      dtype='object')

In [211]:
code_countries = {'BEN','BFA','CIV','GNB','MLI','NER','SEN','TGO'}
years = [2018,2021]

In [212]:
WAEMU_conflicts=data_conflict[(data_conflict['code'].isin(code_countries))& (data_conflict['year'].isin(years))]

In [173]:
raw_data = pd.read_stata('https://github.com/mermozhk/Poverty/raw/main/WAEMU_data/WAEMU_data.dta')

In [175]:
raw_data.columns

Index(['country', 'year', 'hhid', 'grappe', 'menage', 'vague', 'zae', 'region',
       'milieu', 'hhweight', 'hhsize', 'eqadu1', 'eqadu2', 'hgender', 'hage',
       'hmstat', 'hreligion', 'hnation', 'halfab', 'heduc', 'hdiploma',
       'hhandig', 'hactiv7j', 'hactiv12m', 'hbranch', 'hsectins', 'hcsp',
       'dali', 'dnal', 'dtot', 'pcexp', 'zref', 'def_spa', 'def_temp', 'halfa',
       'halfa2', 'milieu2', 'dtet', 'decile', 'zzae', 'dep_sante', 'PPP', 'CY',
       'Code_CY', 'dtot_ppa_ind', 'poids', 'Quintile'],
      dtype='object')

In [213]:
WAEMU_conflicts=WAEMU_conflicts.rename(columns={'code':'country'})

In [181]:
WAEMU_conflicts.columns

Index(['country', 'countryname', 'year', 'vae', 'vas', 'van', 'var', 'val',
       'vau', 'pve', 'pvs', 'pvn', 'pvr', 'pvl', 'pvu', 'gee', 'ges', 'gen',
       'ger', 'gel', 'geu', 'rqe', 'rqs', 'rqn', 'rqr', 'rql', 'rqu', 'rle',
       'rls', 'rln', 'rlr', 'rll', 'rlu', 'cce', 'ccs', 'ccn', 'ccr', 'ccl',
       'ccu'],
      dtype='object')

In [183]:
data_merged = pd.merge(raw_data,WAEMU_conflicts,how='right',on=['country','year'])

In [184]:
data_merged.columns

Index(['country', 'year', 'hhid', 'grappe', 'menage', 'vague', 'zae', 'region',
       'milieu', 'hhweight', 'hhsize', 'eqadu1', 'eqadu2', 'hgender', 'hage',
       'hmstat', 'hreligion', 'hnation', 'halfab', 'heduc', 'hdiploma',
       'hhandig', 'hactiv7j', 'hactiv12m', 'hbranch', 'hsectins', 'hcsp',
       'dali', 'dnal', 'dtot', 'pcexp', 'zref', 'def_spa', 'def_temp', 'halfa',
       'halfa2', 'milieu2', 'dtet', 'decile', 'zzae', 'dep_sante', 'PPP', 'CY',
       'Code_CY', 'dtot_ppa_ind', 'poids', 'Quintile', 'countryname', 'vae',
       'vas', 'van', 'var', 'val', 'vau', 'pve', 'pvs', 'pvn', 'pvr', 'pvl',
       'pvu', 'gee', 'ges', 'gen', 'ger', 'gel', 'geu', 'rqe', 'rqs', 'rqn',
       'rqr', 'rql', 'rqu', 'rle', 'rls', 'rln', 'rlr', 'rll', 'rlu', 'cce',
       'ccs', 'ccn', 'ccr', 'ccl', 'ccu'],
      dtype='object')

In [185]:
data_merged[(data_merged['country']=='BEN')&(data_merged['year']==2018)]

Unnamed: 0,country,year,hhid,grappe,menage,vague,zae,region,milieu,hhweight,...,rln,rlr,rll,rlu,cce,ccs,ccn,ccr,ccl,ccu
0,BEN,2018,1005.0,1,5,2,"Soudano-sahélien, faibles précipitations, 1 sa...",ALIBORI,Urbain,181.727624,...,13.0,26.190475,17.142857,35.238094,-0.391388,0.150585,13.0,41.42857,29.047619,50.952381
1,BEN,2018,1019.0,1,19,2,"Soudano-sahélien, faibles précipitations, 1 sa...",ALIBORI,Urbain,181.727624,...,13.0,26.190475,17.142857,35.238094,-0.391388,0.150585,13.0,41.42857,29.047619,50.952381
2,BEN,2018,1026.0,1,26,2,"Soudano-sahélien, faibles précipitations, 1 sa...",ALIBORI,Urbain,181.727624,...,13.0,26.190475,17.142857,35.238094,-0.391388,0.150585,13.0,41.42857,29.047619,50.952381
3,BEN,2018,1032.0,1,32,2,"Soudano-sahélien, faibles précipitations, 1 sa...",ALIBORI,Urbain,181.727624,...,13.0,26.190475,17.142857,35.238094,-0.391388,0.150585,13.0,41.42857,29.047619,50.952381
4,BEN,2018,1046.0,1,46,2,"Soudano-sahélien, faibles précipitations, 1 sa...",ALIBORI,Urbain,181.727624,...,13.0,26.190475,17.142857,35.238094,-0.391388,0.150585,13.0,41.42857,29.047619,50.952381
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8007,BEN,2018,670087.0,670,87,2,"Précipations moyennes, 1 saison des pluies, coton",ZOU,Urbain,377.871786,...,13.0,26.190475,17.142857,35.238094,-0.391388,0.150585,13.0,41.42857,29.047619,50.952381
8008,BEN,2018,670105.0,670,105,2,"Précipations moyennes, 1 saison des pluies, coton",ZOU,Urbain,377.871786,...,13.0,26.190475,17.142857,35.238094,-0.391388,0.150585,13.0,41.42857,29.047619,50.952381
8009,BEN,2018,670114.0,670,114,2,"Précipations moyennes, 1 saison des pluies, coton",ZOU,Urbain,377.871786,...,13.0,26.190475,17.142857,35.238094,-0.391388,0.150585,13.0,41.42857,29.047619,50.952381
8010,BEN,2018,670123.0,670,123,2,"Précipations moyennes, 1 saison des pluies, coton",ZOU,Urbain,377.871786,...,13.0,26.190475,17.142857,35.238094,-0.391388,0.150585,13.0,41.42857,29.047619,50.952381


In [190]:
PVE= data_merged['pve']

In [196]:
(WAEMU_conflicts.pivot(index='country',columns='year')['pve']).to_excel('Violence.xlsx')

In [199]:
wa.data['IND_BENIN'] = wa.data['country'].apply(lambda x:x=='BEN').map({False:0,True:1})

In [201]:
wa.data['year_2018'] = wa.data['year'].apply(lambda x:x==2018).map({False:0,True:1})

In [203]:
pd.get_dummies(wa.data.country)

Unnamed: 0,BEN,BFA,CIV,GNB,MLI,NER,SEN,TGO
0,1,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...
119184,0,0,0,0,0,0,0,1
119185,0,0,0,0,0,0,0,1
119186,0,0,0,0,0,0,0,1
119187,0,0,0,0,0,0,0,1


In [206]:
wa.dummies_country

Unnamed: 0,BEN,BFA,CIV,GNB,MLI,NER,SEN,TGO
0,1,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...
119184,0,0,0,0,0,0,0,1
119185,0,0,0,0,0,0,0,1
119186,0,0,0,0,0,0,0,1
119187,0,0,0,0,0,0,0,1


In [208]:
prev = wa.prevalence()

In [209]:
prev.print_result()

                            WLS Regression Results                            
Dep. Variable:         prevalence_ind   R-squared:                       0.131
Model:                            WLS   Adj. R-squared:                  0.131
Method:                 Least Squares   F-statistic:                       nan
Date:                Fri, 19 Jul 2024   Prob (F-statistic):                nan
Time:                        14:26:23   Log-Likelihood:                -88250.
No. Observations:              119189   AIC:                         1.765e+05
Df Residuals:                  119173   BIC:                         1.767e+05
Df Model:                          15                                         
Covariance Type:              cluster                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
BEN2018        0.1874      0.006     31.372      0.0

In [210]:
wa.data.columns

Index([               'country',                   'year',
                         'hhid',                 'grappe',
                       'menage',                  'vague',
                          'zae',                 'region',
                       'milieu',               'hhweight',
                       'hhsize',                 'eqadu1',
                       'eqadu2',                'hgender',
                         'hage',                 'hmstat',
                    'hreligion',                'hnation',
                       'halfab',                  'heduc',
                     'hdiploma',                'hhandig',
                     'hactiv7j',              'hactiv12m',
                      'hbranch',               'hsectins',
                         'hcsp',                   'dali',
                         'dnal',                   'dtot',
                        'pcexp',                   'zref',
                      'def_spa',               'def_temp

In [214]:
data_merged = pd.merge(wa.data,WAEMU_conflicts,how='right',on=['country','year'])

In [221]:
data_merged['constant']=1

In [240]:
res = sm.WLS(data_merged['prevalence_ind'],data_merged[['pve','constant','BEN','BFA','CIV','NER','MLI','SEN',2018]], weights=data_merged['weight']).fit(cov_type='cluster',cov_kwds={'groups': data_merged['clusters']})

In [238]:
res = sm.WLS(data_merged['prevalence_ind'],data_merged[['pve','constant']], weights=data_merged['weight']).fit(cov_type='cluster',cov_kwds={'groups': data_merged['clusters']})

In [241]:
res.summary()

0,1,2,3
Dep. Variable:,prevalence_ind,R-squared:,0.129
Model:,WLS,Adj. R-squared:,0.129
Method:,Least Squares,F-statistic:,615.6
Date:,"Fri, 19 Jul 2024",Prob (F-statistic):,0.0
Time:,16:13:06,Log-Likelihood:,-88407.0
No. Observations:,119189,AIC:,176800.0
Df Residuals:,119180,BIC:,176900.0
Df Model:,8,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
pve,0.0802,0.013,5.948,0.000,0.054,0.107
constant,0.2334,0.011,21.861,0.000,0.212,0.254
BEN,-0.1067,0.008,-13.260,0.000,-0.122,-0.091
BFA,0.1007,0.012,8.430,0.000,0.077,0.124
CIV,-0.1059,0.005,-21.572,0.000,-0.116,-0.096
NER,0.3365,0.013,26.859,0.000,0.312,0.361
MLI,0.0761,0.021,3.605,0.000,0.035,0.118
SEN,-0.1808,0.009,-19.506,0.000,-0.199,-0.163
2018,0.0626,0.003,18.111,0.000,0.056,0.069

0,1,2,3
Omnibus:,55367.052,Durbin-Watson:,1.561
Prob(Omnibus):,0.0,Jarque-Bera (JB):,392130.085
Skew:,2.122,Prob(JB):,0.0
Kurtosis:,10.807,Cond. No.,29.4


In [237]:
data_merged['BEN'].describe()

count    119189.000000
mean          0.134610
std           0.341308
min           0.000000
25%           0.000000
50%           0.000000
75%           0.000000
max           1.000000
Name: BEN, dtype: float64