In [None]:
import numpy as np
import pandas as pd
import seaborn as sn
import os
import sklearn as sk
import statsmodels.api as sm

class country:
    def __init__(self,name,code_country,year,z_used='NA'):
        self.country_name = name
        self.year = year
        self.code_country = code_country
        url_base = 'https://github.com/mermozhk/Poverty/raw/main/'
        #print('/'.join([url_base,name,year,'ehcvm_welfare_'+''.join([code_country,year])])+'.dta')
        self.welfare_data = pd.read_stata('/'.join([url_base,name,year,'ehcvm_welfare_'+''.join([code_country,year])])+'.dta')
        self.welfare_data['dtot_corrected']=self.welfare_data['dtot']/self.welfare_data['def_spa']
        
        self.conso_data = pd.read_stata('/'.join([url_base,name,year,'ehcvm_conso_'+''.join([code_country,year])])+'.dta')
        self.menage_data = pd.read_stata('/'.join([url_base,name,year,'ehcvm_menage_'+''.join([code_country,year])])+'.dta')
        self.individu_data = pd.read_stata('/'.join([url_base,name,year,'ehcvm_individu_'+''.join([code_country,year])])+'.dta')
        
        self.health_data = self.conso_data.loc[(self.conso_data['codpr']>=681) & (self.conso_data['codpr']<=692)]
        self.health_expenditure = pd.DataFrame(self.health_data.groupby(['hhid','vague','grappe','menage']).sum().drop(columns=['year','hhweight','codpr'])).reset_index()
        
        self.health_merged_data = pd.merge(self.welfare_data,
                   self.health_expenditure,
                   on = 'hhid',
                   how='outer').replace(np.nan, 0)
        self.health_merged_data['constant'] = 1
        
        self.health_merged_data['not_health']=self.health_merged_data['dtot']-self.health_merged_data['depan']
        
       
            
            
            
        if z_used=='NA':
            self.poverty_merged_data = pd.merge(self.individu_data,
                   self.welfare_data[['hhid','zref','dtot_corrected','hhsize']],
                   on = 'hhid',
                   how='outer')
        else:
            self.poverty_merged_data = pd.merge(self.individu_data,
                   self.welfare_data[['hhid','dtot_corrected','hhsize']],
                   on = 'hhid',
                   how='outer')
            self.poverty_merged_data['zref']=float(z_used)
            
        self.poverty_merged_data['constant'] = 1
        
        
    def catastrophic_expenditure(self,proportion,data):
            self.health_merged_data['health_expenditure_proportion'] = ((self.health_merged_data['depan']/self.health_merged_data['dtot'])>=proportion).map({False:0, True:1})
            return self.estimation_procedure(index='health_expenditure_proportion',data=self.health_merged_data)
         
    
    
    def estimation_procedure(self,index,data):
        wls_model = sm.WLS(data[index],data['constant'], weights=data['hhweight'])
        return wls_model.fit()
        
    def prevalence(self,data):
        data['prevalence']=(data['dtot_corrected']<
                                       (data['zref']*data['hhsize'])).map({False:0, True:1})
        return self.estimation_procedure('prevalence',data=data)
    
    def gap(self,data):
        data['gap'] = (1-data['dtot_corrected']/(data['zref']*data['hhsize']))*data['prevalence']
        return self.estimation_procedure('gap',data=data)
    
    def severity(self,data):
        if not('prevalence' in self.merge_data.columns):
            
            data['prevalence']=(data['dtot_corrected']<
                                       (data['zref']*data['hhsize'])).map({False:0, True:1})
            
        data['severity'] = ((1-data['dtot_corrected']/(data['zref']*data['hhsize']))**2)*data['prevalence']
        
        return self.estimation_procedure('severity',data=data)
    
    def aart_welfare_index(self,data):
        data['aart_welfare'] = (data['zref']*data['hhsize'])/data['dtot_corrected']
        return self.estimation_procedure('aart_welfare',data=data)
    
    def aart_poverty_index(self,data):
        if not('aart_welfare' in data.columns):
            data['aart_welfare'] = (data['zref']*data['hhsize'])/data['dtot_corrected']
        
        data['aart_poverty'] = data.apply(lambda x: max(x.aart_welfare,1),axis=1)
        return self.estimation_procedure('aart_poverty',data=data)
        