### MICS IRAQ

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pyreadstat
import os

In [2]:
pd.set_option('display.max_columns', None)
os.chdir('C:/Users/511232/Desktop/MICS/microdata/iraq')
[f for f in os.listdir() if 'sav' in f]

['bh.sav',
 'ch.sav',
 'fg.sav',
 'fs.sav',
 'hh.sav',
 'hl.sav',
 'mm.sav',
 'wm.sav',
 'wm_including HC14 & HH48.sav']

In [4]:
'''data processing prior to generating crosstabs'''
class mics_iraq:

    def __init__(self):
        #reading in the .sav files and their metadata files
        os.chdir('C:/Users/511232/Desktop/MICS/microdata/iraq')
        df_hh,meta_hh=pyreadstat.read_sav('hh.sav', apply_value_formats=False)
        df_wm,meta_wm=pyreadstat.read_sav('wm.sav', apply_value_formats=False)
        df_hl,meta_hl=pyreadstat.read_sav('hl.sav', apply_value_formats=False)
        
        self.col_names_hh=meta_hh.column_names_to_labels
        self.col_vals_hh=meta_hh.variable_value_labels
        self.col_names_hl=meta_hl.column_names_to_labels
        self.col_vals_hl=meta_hl.variable_value_labels
        self.col_names_wm=meta_wm.column_names_to_labels
        self.col_vals_wm=meta_wm.variable_value_labels

        self.data_hh=df_hh.copy()
        self.data_wm=df_wm.copy()
        self.data_hl=df_hl.copy()

        self.disability_levels={1:'No difficulty',
        2:'Some difficulty',
        3:'A lot of difficulty',
        4:'Cannot do at all',
        9: 'No response'}
        
        self.disability_cols=['AF6','AF8','AF9','AF10','AF11','AF12']
        self.other_cols=['WAGE','HH6','disability','windex5u','windex5r','windex5','MSTATUS','HC14','WB6A','WB5','WB14',
        'TA3','CP2','CM17','MA6']

        self.dis_names={'AF6': 'Difficulty seeing, even if wearing glasses or contact lenses',
        'AF8': 'Difficulty hearing, even if using a hearing aid',
        'AF9': 'Difficulty walking or climbing steps',
        'AF10': 'Difficulty remembering or concentrating',
        'AF11': 'Difficulty with self-care, such as washing all over or dressing',
        'AF12': 'Difficulty communicating'}

    def process_data(self):

        os.chdir('C:/Users/511232/Desktop/MICS/microdata/iraq/Crosstabs')
        ###################### VARIABLE CREATION & MERGES #####################################
        #calculate hh_type variable from HL3 from dataframe df_hl
        def family_type(df):
            nuclear=[1,2,3,13]
            extended=nuclear+[4,5,6,7,8,9,10,11,12]
            composite=extended+[14,96,98]

            if len(df['HL3'])==1:
                df['hh_type']='One person'
            elif all(df['HL3'].isin(nuclear)):
                df['hh_type']='Nuclear'
            elif all(df['HL3'].isin(extended)):
                df['hh_type']='Extended'
            elif all(df['HL3'].isin(composite)):
                df['hh_type']='Composite'
            else:
                df['hh_type']='Unknown'
            return(df)
        
        #create hh_type
        self.data_hl=self.data_hl.groupby(['HH1','HH2']).apply(family_type)
        print('household type created')

        #create disability_combined from multiple disability types columns
        #replace code 9 with 0 and take the maximum code within each row with a temporary dataframe
        df_temp=self.data_wm[self.disability_cols].replace(9,0)
        self.data_wm['disability_combined']=df_temp.apply(lambda x: x.max(), axis=1)
        self.data_wm['disability_combined']=self.data_wm['disability_combined'].map(self.disability_levels)
        print('disability_combined created')

        #create marital_status
        '''Marital Status. Nada
        RECODE MA1 (1=2) (9=9) INTO MaritalStatus.
        RECODE MA5 (3=1) (9=9) INTO MaritalStatus.
        RECODE MA6 (1=3) (2=4) (3=5) (9=9) INTO MaritalStatus'''

        cond1=(self.data_wm['MA1']==1)
        cond2=(self.data_wm['MA5']==3)
        cond3=(self.data_wm['MA6']==1)
        cond4=(self.data_wm['MA6']==2)
        cond5=(self.data_wm['MA6']==3)
        cond6=(self.data_wm['MA1']==9)|(self.data_wm['MA5']==9)|(self.data_wm['MA6']==9)
        rslt=[2,1,3,4,5,9]

        self.data_wm['marital_status']=np.select(condlist=[cond1,cond2,cond3,cond4,cond5,cond6],\
            choicelist=rslt)
        print('marital_status created')

        #create Smoker
        ''' * Tobacco use.
            IF ((TA1 = 9|TA3 = 9|TA5 = 99) | (TA6 = 9|TA7 = 9|TA9=99)) TobaccoUse = 9.
            IF ((TA5 > 0 & TA5 <99) | (TA9 > 0 & TA9 <99)) TobaccoUse = 1.
            IF ((TA1 = 2|TA2 = 0|TA3 = 2|TA5 = 0) & (TA6 = 2|TA7 = 2|TA9=0)) TobaccoUse = 2.
            '''

        cond_smoker_missing=(((self.data_wm['TA1']==9) | (self.data_wm['TA3']==9) | (self.data_wm['TA5']==99)) | \
            ((self.data_wm['TA6']==9) | (self.data_wm['TA7']==9) | (self.data_wm['TA9']==99)))

        cond_smoker=(((self.data_wm['TA5']>0) & (self.data_wm['TA5']<99)) | \
            ((self.data_wm['TA9']>0) & (self.data_wm['TA9']<99)))
        
        cond_non_smoker=(((self.data_wm['TA1']==2) | (self.data_wm['TA2']==0) | (self.data_wm['TA3']==2) | (self.data_wm['TA5']==0)) & \
            ((self.data_wm['TA6']==2) | (self.data_wm['TA7']==2) | (self.data_wm['TA9']==0)))

        self.data_wm['Smoker']=np.select([cond_smoker_missing,cond_smoker,cond_non_smoker], 
        ['Missing','Smoker','Non-smoker'], default=np.nan)
        print('smoker created')

        #create literacy
        '''
        RECODE WB14 (1=2) (2=2) (3=1) (4=9) (9=9) INTO Literacy.
        RECODE WB6A (2 thru 7 = 1) INTO Literacy.
        '''
        cond=[(self.data_wm['WB14']==1),(self.data_wm['WB14']==2),(self.data_wm['WB14']==3),
        (self.data_wm['WB14']==4),(self.data_wm['WB14']==9),self.data_wm['WB6A'].isin([2,3,4,5,6,7])]

        rslt=['Illiterate','Illiterate','Literate',9,9,'Literate']
        self.data_wm['Literacy']=np.select(cond,rslt)
        print('literacy created')

        #create edu_level
        ''' RECODE WB6A (0=2) (1=3) (2=4) (3=7) (4=5) (5=8) (6=8) (7=9) (8=12) (9=12) INTO EducationLevel.
            IF (WB5 = 2) EducationLevel = 1.
            IF (WB5 = 9) EducationLevel = 12.
        '''
        edu_lev_label={1:'ISCED X',2:'ECD',3:'Primary',4:'Intermediate',5:'Secondary',6:'ISCED 4',7:'Diploma (five years after intermediate)',
        8:'Diploma/Bachelors degree',9:'Higher Education',10:'ISCED 8',11:'Not classifiable', 12:'DK'}
        
        #main recoding
        cond_list_main=[self.data_wm['WB6A']==0,self.data_wm['WB6A']==1,self.data_wm['WB6A']==2,self.data_wm['WB6A']==3,
        self.data_wm['WB6A']==4,self.data_wm['WB6A']==5,self.data_wm['WB6A']==6,
        self.data_wm['WB6A']==7,self.data_wm['WB6A']==8,self.data_wm['WB6A']==9]
        edu_level_temp=np.select(cond_list_main, [2,3,4,7,5,8,8,9,12])

        #subsequent recoding (by choosing default=edu_level_temp it is asif we are preserving the main recoding)
        cond_list=[self.data_wm['WB5']==2, self.data_wm['WB5']==9]
        self.data_wm['edu_level']=np.select(cond_list, [1,12], default=edu_level_temp)

        # self.data_wm['edu_level']=self.data_wm['edu_level'].map(edu_lev_label)
        print('education level created')

        #create Birth_Skilled_Per 'Birth attended by skilled personnels'
        '''Birth Assisted Delivery. Nada
        IF (MN19NR = "?") BirthAttendedBySkilledPersonnels = 9.
        IF (MN19F = "F" | MN19G = "G" | MN19H = "H" | MN19X = "X" | MN19Y = "Y") BirthAttendedBySkilledPersonnels = 2.
        IF (MN19A = "A" | MN19B = "B" | MN19C = "C") BirthAttendedBySkilledPersonnels = 1.
        '''
        Birth_Skilled_Per_label={1:'Yes', 2:'No', 9:'No repsonse'}
        cond=[(self.data_wm['MN19A']=='A') | (self.data_wm['MN19B']=='B') | (self.data_wm['MN19B']=='C'),
        (self.data_wm['MN19F']=='F') | (self.data_wm['MN19G'] == 'G') | (self.data_wm['MN19H'] == 'H')\
            | (self.data_wm['MN19X'] == 'X') | (self.data_wm['MN19Y'] == 'Y'),
        (self.data_wm['MN19NR']=='?')]
        rslt=[1,2,9]

        self.data_wm['Birth_Skilled_Per']=np.select(cond,rslt,default=np.nan)
        self.data_wm['Birth_Skilled_Per']=self.data_wm['Birth_Skilled_Per'].map(Birth_Skilled_Per_label)
        print('Birth_skilled_per created')

        #create school attendence
        '''COMPUTE SchoolAttendance = WB9.
        IF (WB5 = 2 | WB6A = 0) SchoolAttendance = 2.
        IF (WB4>=25) SchoolAttendance = $SYSMIS.'''

        School_attendence_label={0:'No', 1:'Yes'}

        cond=[(self.data_wm['WB5']==2) | (self.data_wm['WB6']==0),(self.data_wm['WB4']>=25)]
        rslt=[2,np.nan]

        self.data_wm['School_attendence']=np.select(cond,rslt,default=self.data_wm['WB9'])
        self.data_wm['School_attendence']=self.data_wm['School_attendence'].map(School_attendence_label)
        print('School attendence created')

        #internet_use
        '''RECODE MT9 (2=2) (9=9) INTO InternetUse.
        RECODE MT10 (0=2) (1 thru 3=1) (9=9) INTO InternetUse.
        '''

        cond_list=[self.data_wm['MT9']==2,self.data_wm['MT9']==np.nan,self.data_wm['MT10']==0,
        self.data_wm['MT10'].isin([1,2,3]), self.data_wm['MT10']==np.nan]
        rslt=[2,9,2,1,9]
        self.data_wm['internet_use']=np.select(cond_list, rslt)
        print('internet_use created')
        
        #create age_groups columns
        #agegrp4_1[5-9,10-14,15-19,20-24,25-29,30+]
        cond=[(self.data_hl['HL6']>=5)&(self.data_hl['HL6']<=9),
        (self.data_hl['HL6']>=10)&(self.data_hl['HL6']<=14),
        (self.data_hl['HL6']>=15)&(self.data_hl['HL6']<=19),
        (self.data_hl['HL6']>=20)&(self.data_hl['HL6']<=24),
        (self.data_hl['HL6']>=25)&(self.data_hl['HL6']<=29),
        self.data_hl['HL6']>=30,self.data_hl['HL6'].isna()]
        result=['5-9','10-14','15-19','20-24','25-29','30+','Missing']
        self.data_hl['agegrp4_1']=np.select(cond,result,default='<5')

        #agegrp4_2 [5-9,10-14,15-19,20-24,25-64]
        cond=[(self.data_hl['HL6']>=5)&(self.data_hl['HL6']<=9),
        (self.data_hl['HL6']>=10)&(self.data_hl['HL6']<=14),
        (self.data_hl['HL6']>=15)&(self.data_hl['HL6']<=19),
        (self.data_hl['HL6']>=20)&(self.data_hl['HL6']<=24),
        (self.data_hl['HL6']>=25)&(self.data_hl['HL6']<=64),
        self.data_hl['HL6']>=65,self.data_hl['HL6'].isna()]
        result=['5-9','10-14','15-19','20-24','25-64','65+','Missing']
        self.data_hl['agegrp4_2']=np.select(cond,result,default='<5')

        #age_ict [0-14,15-60,60+]
        cond=[(self.data_hl['HL6']>=0)&(self.data_hl['HL6']<=14),
        (self.data_hl['HL6']>=15)&(self.data_hl['HL6']<=60),
        self.data_hl['HL6']>60,self.data_hl['HL6'].isna()]
        result=['0-14','15-60','60+','Missing']
        self.data_hl['age_ict']=np.select(cond,result,default='<5')

        #agegrp5[<15,15-19,20-24,25-29,30-34,35-39,40-44,45-49,50-54,55-59,60-64,65+]
        cond=[(self.data_hl['HL6']>=15)&(self.data_hl['HL6']<=19),
        (self.data_hl['HL6']>=20)&(self.data_hl['HL6']<=24),
        (self.data_hl['HL6']>=25)&(self.data_hl['HL6']<=29),
        (self.data_hl['HL6']>=30)&(self.data_hl['HL6']<=34),
        (self.data_hl['HL6']>=35)&(self.data_hl['HL6']<=39),
        (self.data_hl['HL6']>=40)&(self.data_hl['HL6']<=44),
        (self.data_hl['HL6']>=45)&(self.data_hl['HL6']<=49),
        (self.data_hl['HL6']>=50)&(self.data_hl['HL6']<=54),
        (self.data_hl['HL6']>=55)&(self.data_hl['HL6']<=59),
        (self.data_hl['HL6']>=60)&(self.data_hl['HL6']<=64),
        self.data_hl['HL6']>=65,self.data_hl['HL6'].isna()]
        result=['15-19','20-24','25-29','30-34','35-39','40-44','45-49','50-54','55-59','60-64','65+','Missing']
        self.data_hl['agegrp5']=np.select(cond,result,default='<15')

        #agegrp15[15-29,30-44,45-64,65+]
        cond=[(self.data_hl['HL6']>=15)&(self.data_hl['HL6']<=29),
        (self.data_hl['HL6']>=30)&(self.data_hl['HL6']<=44),
        (self.data_hl['HL6']>=45)&(self.data_hl['HL6']<=64),
        self.data_hl['HL6']>=65,self.data_hl['HL6'].isna()]
        result=['15-29','30-44','45-64','65+','Missing']
        self.data_hl['agegrp15']=np.select(cond,result,default='<15')

        #agegrp10 [15-24, 25-64, 65+]
        cond=[(self.data_hl['HL6']>=15)&(self.data_hl['HL6']<=24),
        (self.data_hl['HL6']>=25)&(self.data_hl['HL6']<=64),
        self.data_hl['HL6']>=65,self.data_hl['HL6'].isna()]
        result=['15-24','25-64','65+','Missing']
        self.data_hl['agegrp10']=np.select(cond,result,default='<15')

        #agegrp_9 [<18,18-24,25-64,65+,'Not stated']
        cond=[(self.data_hl['HL6']<18),
        (self.data_hl['HL6']>=18)&(self.data_hl['HL6']<=24),
        (self.data_hl['HL6']>=25)&(self.data_hl['HL6']<=64),
        self.data_hl['HL6']>=65,
        self.data_hl['HL6'].isna()]
        result=['<18','18-24','25-64','65+','Not stated']
        self.data_hl['agegrp_9']=np.select(cond,result)

        #agegrp5p age 5+
        cond=[self.data_hl['HL6']>=5,self.data_hl['HL6'].isna()]
        result=['5+','Missing']
        self.data_hl['agegrp5p']=np.select(cond,result,default='<5')

        #agegrp15p age 15+
        cond=[self.data_hl['HL6']>=15,self.data_hl['HL6'].isna()]
        result=['15+','Missing']
        self.data_hl['agegrp15p']=np.select(cond,result,default='<15')

        #agegrp18p age 18+
        cond=[self.data_hl['HL6']>=18,self.data_hl['HL6'].isna()]
        result=['18+','Missing']
        self.data_hl['agegrp18p']=np.select(cond,result,default='<18')

        #agegrp25p age 25+
        cond=[self.data_hl['HL6']>=25,self.data_hl['HL6'].isna()]
        result=['25+','Missing']
        self.data_hl['agegrp25p']=np.select(cond,result,default='<25')
        print('different categories for age groups created')

        #create hh_size
        self.data_hh['hh_size']=np.where(self.data_hh['HH48']>=8, '8+',self.data_hh['HH48'])
        print('household size created')

        #create living_alone
        cond=[self.data_hh['HH48']==1,self.data_hh['HH48'].isna(), self.data_hh['HH48']>1]
        result=['alone','Missing','not alone']
        self.data_hh['living_alone']=np.select(cond,result)
        print('living alone created')

        #create modern method of contraception 'modern_contraceptive'
        '''
        'CP4A': {'?': 'NO RESPONSE', 'A': 'FEMALE STERILIZATION'},
        'CP4B': {'?': 'NO RESPONSE', 'B': 'MALE STERILIZATION'},
        'CP4C': {'?': 'NO RESPONSE', 'C': 'IUD'},
        'CP4D': {'?': 'NO RESPONSE', 'D': 'INJECTABLES'},
        'CP4E': {'?': 'NO RESPONSE', 'E': 'IMPLANTS'},
        'CP4F': {'?': 'NO RESPONSE', 'F': 'PILL'},
        'CP4G': {'?': 'NO RESPONSE', 'G': 'MALE CONDOM'},
        'CP4H': {'?': 'NO RESPONSE', 'H': 'FEMALE CONDOM'},
        'CP4I': {'?': 'NO RESPONSE', 'I': 'DIAPHRAGM'},
        'CP4J': {'?': 'NO RESPONSE', 'J': 'FOAM / JELLY'}'''

        methods=['A','B','C','D','E','F','G','H','I','J']
        cols=['CP4A','CP4B','CP4C','CP4D','CP4E','CP4F','CP4G','CP4H','CP4I','CP4J']
        self.data_wm['modern_contraceptive']=self.data_wm[cols].apply(lambda x: int(any(x.isin(methods))), axis=1)
        self.modern_contraceptive_labels={0:'No', 1:'Yes'}
        self.data_wm['modern_contraceptive']=self.data_wm['modern_contraceptive'].map(self.modern_contraceptive_labels)
        print('modern_contraceptive created')

        #MERGE [HH ---> WM] on [HH1,HH2] to add 'HC14': 'Household owns the dwelling'
        right_df=self.data_hh[['HH1','HH2','HC14']]
        left_df=self.data_wm
        self.data_wm=pd.merge(left_df,right_df, how='left',on=['HH1','HH2'])
        print('merge of HH with WM successful')

        #MERGE [HH ---> HL] to get 'hh_size','living_alone'
        right_df=self.data_hh[['HH1','HH2','hh_size','living_alone']]
        left_df=self.data_hl
        self.df_hl=pd.merge(left_df,right_df, how='left',on=['HH1','HH2'])
        print('merge of HH with HL successful')

        #MERGE [HL ---> WM] to add 'hh_size','HL3'(household head relation),'HL6'(age),'hh_type'
        right_df=self.df_hl[['HH1','HH2','HL1','hh_size','HL3','HL6','hh_type',
        'living_alone','agegrp4_1','agegrp4_2','agegrp10','agegrp15','agegrp5p','agegrp15p','agegrp25p','agegrp5',
        'agegrp_9','age_ict','agegrp18p']]
        left_df=self.data_wm
        self.data_wm=pd.merge(left_df,right_df, how='left', 
        left_on=['HH1','HH2','LN'], right_on=['HH1','HH2','HL1'])
        print('merge of HL with WM successful')

        #create head of household relationship variable as 1:HH 2:Other 
        self.data_wm['hh_rel']=np.where(self.data_wm['HL3']==1,1,2)
        self.data_wm['hh_rel']=self.data_wm['hh_rel'].map({1:'Head of household', 2:'Other'})
        print('head of household relationship created')

        ######################################################################################
        ######################################################################################
        #RECODING UNMET FAMILY NEED
        ######################################################################################

        # unmet_vars=['MSTATUS','CP4A','CP4B','UN7','UN2','UN4','UN8N','UN14U','CP1','MN35','MN35',
        # 'WDOI','WDOBLC','DB2','DB4','CP3','CP2','UN12B','UN12D','UN12C','UN14N','UN8U']
        # self.data_wm=p.data_wm[unmet_vars].copy()

        print('recoding unmet family needs')

        '''recode MSTATUS (else=sysmis) into unmet.
        do if (sysmis(unmet) & CP2=1).
        + compute unmet=3.
        + if (CP4A='A' or CP4B='B' or UN7=2 or UN7 = 3 or UN8N = 94) unmet=4.
        end if.
        '''
        self.data_wm['unmet']=np.nan

        main_cond=(self.data_wm['unmet'].isna()) & (self.data_wm['CP2']==1)
        cond1=(self.data_wm['CP4A']=='A') | (self.data_wm['CP4B']=='B') | (self.data_wm['UN7']==2) | (self.data_wm['UN7']==3) | (self.data_wm['UN8N'] == 94)
        self.data_wm['unmet']=np.select([main_cond, (main_cond) & (cond1)], [3,4], default=np.nan)
        print('column unmet created')
        print(self.data_wm['unmet'].value_counts(), '\n')

        '''compute tsinceb=wdoi-wdoblc.
        do if (UN14N>=00 & UN14N<=90).
        if (UN14U = 1) tsincep=trunc(UN14N/30).
        if (UN14U = 2) tsincep=trunc(UN14N/4.3).
        if (UN14U = 3) tsincep=UN14N.
        if (UN14U = 4) tsincep=UN14N*12.
        end if.
        '''
        self.data_wm['tsinceb']=self.data_wm['WDOI']-self.data_wm['WDOBLC']
        print('column tsinceb created')
        main_cond=(self.data_wm['UN14N']>=0) & (self.data_wm['UN14N']<=90)
        cond1=(main_cond) & (self.data_wm['UN14U']==1)
        cond2=(main_cond) & (self.data_wm['UN14U']==2)
        cond3=(main_cond) & (self.data_wm['UN14U']==3)
        cond4=(main_cond) & (self.data_wm['UN14U']==4)

        rslt1=np.trunc(self.data_wm['UN14N']/30)
        rslt2=np.trunc(self.data_wm['UN14N']/4.3)
        rslt3=self.data_wm['UN14N']
        rslt4=self.data_wm['UN14N']*12

        self.data_wm['tsincep']=np.select([cond1,cond2,cond3,cond4],[rslt1,rslt2,rslt3,rslt4])
        print('column tsincep created')
        print('for trincep')
        print(self.data_wm['tsincep'].value_counts(), '\n')
        print('for trinceb')
        print(self.data_wm['tsinceb'].value_counts(), '\n')

        '''compute pregPPA=0.
        if (CP1=1 | MN35=2) pregPPA=1.
        do if (MN35=9).
        + if (not sysmis(tsinceb) & not sysmis(tsincep) & tsincep>tsinceb & tsinceb<60) pregPPA=1.
        + if (not sysmis(tsinceb) & UN14N=94 & tsinceb<60) pregPPA=1.
        end if.
        '''
        cond1=(self.data_wm['CP1']==1) | (self.data_wm['MN35']==2)
        main_cond=self.data_wm['MN35']==9
        cond2=(main_cond) & (self.data_wm['tsinceb'].notna()) & (self.data_wm['tsincep'].notna()) & (self.data_wm['tsincep']>self.data_wm['tsinceb']) & (self.data_wm['tsinceb']<60)
        cond3=(main_cond) & (self.data_wm['tsinceb'].notna()) & (self.data_wm['UN14N']==94) & (self.data_wm['tsinceb']<60)

        cond=[cond1,cond2,cond3]
        rslt=[1,1,1]

        self.data_wm['pregPPA']=np.select([cond1,cond2,cond3],[rslt1,rslt2,rslt3],default=0)
        print('column pregPPA created')
        print(self.data_wm['pregPPA'].value_counts(), '\n')

        '''compute pregPPA24=0.
        if (CP1=1 | (pregPPA=1 & tsinceb<24)) pregPPA24=1.
        '''
        cond=(self.data_wm['CP1']==1) | ((self.data_wm['pregPPA']==1) & (self.data_wm['tsinceb']<24))
        self.data_wm['pregPPA24']=np.where(cond,1,0)
        print('column pregPPA24 created')
        print(self.data_wm['pregPPA24'].value_counts(), '\n')

        '''do if (CP1=1).
        + compute wantedlast=UN2.
        + if (UN2=2 and UN4<>9) wantedlast=UN4+1.
        + if (UN2=2 and UN4=9) wantedlast=9.
        else.
        + compute wantedlast=DB2.
        + if (DB2=2 and DB4<>9) wantedlast=DB4+1.
        + if (DB2=2 and DB4=9) wantedlast=9.
        end if.
        '''
        cols=['CP1','UN2','UN4','DB2','DB4']

        def recode1(r):
            main_cond1=r['CP1']==1
            main_cond2=r['CP1']!=1
            cond1=(r['UN2']==2) & (r['UN4']!=9)
            cond2=(r['UN2']==2) & (r['UN4']==9)
            cond3=(r['DB2']==2) & (r['DB4']!=9)
            cond4=(r['DB2']==2) & (r['DB4']==9)

            if main_cond1:
                return(r['UN2'])
            if (main_cond1) & (cond1):
                return(r['UN4']+1)
            if (main_cond1) & (cond2):
                return(9)
            if main_cond2:
                return(r['DB2'])
            if (main_cond2) & (cond3):
                return(r['DB4']+1)
            if (main_cond2) & (cond4):
                return(9)
            
        self.data_wm['wantedlast']=self.data_wm.apply(recode1,axis=1)
        print('column wantedlast created')
        print(self.data_wm['wantedlast'].value_counts(), '\n')

        '''
        if (sysmis(unmet) & MSTATUS<>1) unmet=97 .
        '''
        self.data_wm['unmet']=np.where((self.data_wm['unmet'].isna()) & (self.data_wm['MSTATUS']!=1),97,self.data_wm['unmet'])

        #the below 2 blocks of code are related by conditions
        '''
        do if (sysmis(unmet) & CP1<>1 & pregPPA24<>1).
        + compute infec=0.
        + if (MSTATUS=1 & wdoi-wdom>=60 & (sysmis(tsinceb) | tsinceb>=60) & CP3<>1) infec=1 .
        + if (UN7=3 or UN8N=94) infec=1.
        + if (UN12B='B' or UN12D='D') infec=1.
        + if (UN12C='C' and (sysmis(tsinceb) or tsinceb>=60)) infec=1.
        + if (not sysmis(tsincep) & tsincep>=6 & pregPPA<>1) infec=1.
        + if (UN14N=93) infec=1.
        + if (UN14N=95 & (sysmis(tsinceb) | tsinceb>=60)) infec=1.
        + if (UN14N=94 & not sysmis(tsinceb) & tsinceb>=60) infec=1.
        + if (UN14N=94 & sysmis(tsinceb)) infec=1.
        '''
        #to recode infec
        def recode2(r):
            main_cond=(r['unmet'].isna()) & (r['CP1']!=1) & (r['pregPPA24']!=1)
            cond1=(r['MSTATUS']==1) & (r['WDOI']-r['WDOM']>60) & (r['tsinceb'].isna() | r['tsinceb']>=60) & (r['CP3']!=1)
            cond2=(r['UN7']==3 | r['UN8N']==94)
            cond3=(r['UN12B']=='B' | r['UN12D']=='D')
            cond4=((r['UN12C']=='C') & (r['tsinceb'].isna() | r['tsinceb']>=60))
            cond5=(r['tsincep'].notna()) & (r['tsincep']>=6) & (r['pregPPA']!=1)
            cond6=(r['UN14N']==93)
            cond7=(r['UN14N']==95) & (r['tsinceb'].isna() | r['tsinceb']>=60)
            cond8=(r['UN14N']==94) & (r['tsinceb'].notna()) & (r['tsinceb']>=60)
            cond9=(r['UN14N']==94) & (r['tsinceb'].isna())

            if main_cond:
                return(0)
            if (main_cond) & cond1:
                return(1)
            if (main_cond) & cond2:
                return(1)
            if (main_cond) & cond3:
                return(1)
            if (main_cond) & cond4:
                return(1)
            if (main_cond) & cond5:
                return(1)
            if (main_cond) & cond6:
                return(1)
            if (main_cond) & cond7:
                return(1)
            if (main_cond) & cond8:
                return(1)
            if (main_cond) & cond9:
                return(1)
            
        self.data_wm['infec']=self.data_wm.apply(recode1,axis=1)
        print('column infec created')
        print(self.data_wm['infec'].value_counts(), '\n')

        #to recode unmet
        '''
        + if (infec=1) unmet=9.
        + do if sysmis(unmet).
        +   if (UN7=1 & UN8U=1 & UN8N<24) unmet=7.
        +   if (UN7=1 & UN8U=2 & UN8N<2) unmet=7.
        +   if (UN7=1 & UN8N=93) unmet=7.
        +   if (UN7=1 & sysmis(unmet) & UN8N<=90) unmet=1.
        +   if (UN7=1 & UN8N=95) unmet=1.
        +   if (UN7=1 & UN8N=98) unmet=1.
        +   if (UN7=8) unmet=1.
        +   if (UN7=2) unmet=2.
        + end if.
        end if.
        '''
        #main_cond1 is from the previous function since they are all indented under this main condition
        main_cond1=(self.data_wm['unmet'].isna()) & (self.data_wm['CP1']!=1) & (self.data_wm['pregPPA24']!=1)
        cond1=self.data_wm['infec']==1
        main_cond2=self.data_wm['unmet'].isna()
        cond21=(self.data_wm['UN7']==1) & (self.data_wm['UN8U']==1) & (self.data_wm['UN8N']<24)
        cond22=(self.data_wm['UN7']==1) & (self.data_wm['UN8U']==2) & (self.data_wm['UN8N']<2)
        cond23=(self.data_wm['UN7']==1) & (self.data_wm['UN8N']==93)
        cond24=(self.data_wm['UN7']==1) & (self.data_wm['unmet'].isna()) & (self.data_wm['UN8N']<=90)
        cond25=(self.data_wm['UN7']==1) & (self.data_wm['UN8N']==95)
        cond26=(self.data_wm['UN7']==1) & (self.data_wm['UN8N']==98)
        cond27=(self.data_wm['UN7']==8)
        cond28=(self.data_wm['UN7']==2)

        conds=[(main_cond1) & (cond1),
        (main_cond1) & (main_cond2) & (cond21),
        (main_cond1) & (main_cond2) & (cond22),
        (main_cond1) & (main_cond2) & (cond23),
        (main_cond1) & (main_cond2) & (cond24),
        (main_cond1) & (main_cond2) & (cond25),
        (main_cond1) & (main_cond2) & (cond26),
        (main_cond1) & (main_cond2) & (cond27),
        (main_cond1) & (main_cond2) & (cond28)]

        rslt=[9,7,7,7,1,1,1,1,2]
        
        self.data_wm['unmet']=np.select(conds,rslt, default=self.data_wm['unmet'])
        print('column unmet recoded')

        ###################### LABEL VALUES #########################################
        for col in self.other_cols:
            if col in self.col_vals_hh.keys():
                self.data_wm[col]=self.data_wm[col].map(self.col_vals_hh[col])
                print(f'{col} codes are translated from meta hh')
            elif col in self.col_vals_wm.keys():
                self.data_wm[col]=self.data_wm[col].map(self.col_vals_wm[col])
                print(f'{col} codes are translated from meta women')
            elif col in self.col_vals_hl.keys():
                self.data_wm[col]=self.data_wm[col].map(self.col_vals_hl[col])
                print(f'{col} codes are translated from meta hhl')
            else:
                print(f'!!! WARNING !!! {col} codes were not translated')
                

In [5]:
p=mics_iraq()

In [None]:
p.process_data()

In [5]:
t=pd.DataFrame({'a':[1,1,2,2,3,3],'b':[11,11,22,22,33,33],'c':[1,2,3,4,5,6]})
t

Unnamed: 0,a,b,c
0,1,11,1
1,1,11,2
2,2,22,3
3,2,22,4
4,3,33,5
5,3,33,6


In [58]:
class crosstab(mics_iraq):

    def __init__(self):
        super().__init__()
        self.data_wm=p.data_wm[p.data_wm['HL6']>=18].copy()
        self.data_hh=p.data_hh
        self.data_hl=p.data_hl
        

    def generate_xtabs(self):
        os.chdir('C:/Users/511232/Desktop/MICS/Crosstabs/iraq_xtabs')
        
        try:
            '''Table 1:Total population, by sex, age, location and disability status
            'disability_combined' column is calculated by taking the max(code) among ['AF6','AF8','AF9','AF10','AF11','AF12']
            '''
            print('generating Table 1')
            df=self.data_wm.copy()
            df_total=[]
            for col in ['WAGE', 'agegrp18p']:
                xtab=pd.crosstab([df['HH6'],df['disability_combined']],df[col],
                rownames=['Area','Disability level'],colnames=[col], values=df['wmweight'], aggfunc='sum',dropna=False)
                df_total.append(xtab)
            T=pd.concat(df_total, axis=1)     
            T.to_excel('Table 1 xtab_all_dis_ByAge.xlsx')
            print('Table 1 generated and saved')
            ###########################################################

            '''Table 2:Persons with disabilities, by type of disability,  sex, age and location
            -generate separate xtabs for all disability_cols
            -stack() them to have a multiindex series and add them to a generator
            -concatenate the generator items
            -stack() and unstack() to get to the final result 
            '''
            print('generating Table 2')
            df=self.data_wm.copy()
            df_total=[]
            def xtab():
                for col in self.disability_cols:
                    print(f'processing column {col}')
                    r=pd.crosstab([df['HH6'],df[col].map(self.disability_levels)],df['WAGE'],\
                        rownames=['Area','Level'],colnames=['Age'], values=df['wmweight'], 
                        aggfunc='sum',dropna=False).stack()
                    r.name=self.dis_names[col]
                    df_total.append(r)

            #concatenating the series in the resulting generator
            xtab()
            t=pd.concat(df_total, axis=1)
            t['All_disabilities']=t.sum(axis=1)
            #reshape the result
            T=t.stack().unstack([3,2]).sort_index(axis=1, level=0)
            T.to_excel('Table 2 separate disabilites.xlsx')
            print('Table 2 generated and saved')

            ###############################################################
            '''Table 3: Persons with disabilities, by cause of disability, sex and location
            rows: cause of disability ??????????????????????
            columns: disability, HH6 area
            '''
            print('WARNING !!! Table 3 cause of disability not found')
            df=self.data_wm.copy()

            ###############################################################
            '''Table 4 (VERIFIED): Persons with multi-dimensional disability, by number of functional disability domains, sex and location
            -calculate domain_num by summing the True over the array of disability_cols values
            if the array contains codes (3-a lot of difficulty) or (4-cannot at all) it will result as True
            '''
            print('generating Table 4')
            df=self.data_wm.copy()
            df['domain_num']=df[self.disability_cols].apply(lambda x: sum(x.isin([3,4])), axis=1)
            #generate xtab
            r=pd.crosstab([df['HH6'],df['disability_combined']],df['domain_num'],\
                rownames=['Area','Disability'],colnames=['Number of domains'], values=df['wmweight'], aggfunc='sum', dropna=False)
            
            r.to_excel('Table 4 Number_dis_domain.xlsx')
            print('Table 4 generated and saved')

            #################################################################
            '''Table 5 (VERIFIED) marital status: Population (15 years and older) marital status, by sex, age, location and disability status'''
            print('generating Table 5')
            df=self.data_wm.copy()
                       
            xtab=pd.crosstab([df['HH6'],df['marital_status'],df['MA6'],df['disability_combined']],df['WAGE'],
            rownames=['Area','Marital status','Current marital status','Disability level'],
            colnames=['Age'], values=df['wmweight'],aggfunc='sum',dropna=False)      

            xtab.to_excel('Table 5 MaritalStatus.xlsx')
            print('Table 5 generated and saved')

            ###############################################################
            # '''Table 6 head_HH: Head of household living below the national poverty line and by wealth quintile, 
            # sex of head of household, location and disability  status
            # 1-disability against head of household and othery type of relationship
            # -create head of household relationship (in the self_data_wm() )
            # df['hh_rel']=np.where(df['HL3']==1,1,2) where 1:HH 2:Other 
            # 2-disability by head of households by wealth quintiles
            # -will generate crosstab among disabled HH with wealth quintiles 
            # using windex and not specific windex5u for urban and windex5r for rural since
            # they differ from windex5 and will produce contradictory results between urban and rural xtabs
            # and the urban and rural disaggregation in the xtab for the total: camp/urban/rural
            # '''

            # #crosstab 1
            # print('generating Table 6.1')
            # df=self.data_wm.copy()
            # xtab=pd.crosstab([df['HH6'],df['disability_combined']],df['hh_rel'],
            # rownames=['Area','Disability level'],colnames=['HH relationship'], values=df['wmweight'],
            # aggfunc='sum',dropna=False)
            # xtab.to_excel('Table 6.1 head of HH.xlsx')
            # print('Table 6.1 generated and saved')
            
            # #crosstab 2
            # #filter out the HH
            # print('generating Table 6.2')
            # df_hh_only=df[df['hh_rel']=='Head of household']
            # xtab=pd.crosstab([df_hh_only['HH6'],df_hh_only['disability_combined']],df_hh_only['windex5'],
            # rownames=['Area','Disability level'],colnames=['wealth quintile'], values=df_hh_only['wmweight'],
            # aggfunc='sum',dropna=False)
            # xtab.to_excel('Table 6.2 head of HH_with wquintile.xlsx')
            # print('Table 6.2 generated and saved')

            ############################################################################
            '''Table 7 (VERIFIED) Poorest_type: Poorest persons with disabilities, by type of disability, sex and location
            -filter out the poorest quintile 'windex5' and crosstab with all disability types
            -loop over disability_cols and create crosstabs then stack to end up with multiindex series
            -put them in a generator and concatenate the generator items
            '''
            print('generating Table 7')
            df=self.data_wm.copy()
            #filter out the poorest
            df_poorest=df[df['windex5']=='Poorest'].copy()
            df_total=[]
            for col in self.disability_cols:
                print(f'selfing column {col}')
                r=pd.crosstab([df_poorest['HH6']],df_poorest[col].map(self.disability_levels),\
                    rownames=['Area'],colnames=['Disability level'], 
                    values=df_poorest['wmweight'], aggfunc='sum',dropna=False).stack()
                r.name=self.dis_names[col]
                df_total.append(r)

            #concatenating the series in the resulting generator
            t=pd.concat(df_total, axis=1)
            t['All_disabilities']=t.sum(axis=1)
            t.to_excel('Table 7 poorest_type.xlsx')
            print('Table 7 generated and saved')

            ###########################################################
            '''Table 8 HH_type&size:
            Households with one or more persons with disabilities (18 years and older), by location and type and size of household
            -data_wm will be filtered according to (age>=18 & disability_combined==3,4) 
            -get the 'HH1','HH2' of the resulting dataframe as a list by zipping both columns
            -filter data resulting from  self_data_wm() on the tuple ('HH1','HH2')

            steps for calculating type of household hh_type (in hl dataframe):
            -grouby hl by ['HH1','HH2']
            -if HL3 isin (1 head,2 spouse/partner,3 son/daughter,13 adopted son daughter)
            if ALL TRUE then code hh_type as 1 Nuclear
            -if HL3 isin (1 head,2 spouse/partner,3 son/daughter,13 adopted son daughter,
            4 son /daughter in law, 5 grnachild, 6 parent, 7 parentin law, 8 brother/sister,
            9, brother/sis in law, 10 uncle/aunt, 11 nephew/niece, 12 other)
            if ALL TRUE then code hh_type as 2 Extended
            if HL3 isin (1 head,2 spouse/partner,3 son/daughter,13 adopted son daughter,
            4 son /daughter in law, 5 grandchild, 6 parent, 7 parent in law, 8 brother/sister,
            9, brother/sis in law, 10 uncle/aunt, 11 nephew/niece, 12 other, 14 servant, 96 other, 98 dont know)
            if ALL TRUE then code hh_type as 3 composite
            WARNING: there is no way to distinguish two nuclear families in a single household from one
            since for example a HH might have 2 spouses or more 
            -data_wm with merge hl left_on=['HH1','HH2','LN'], right_on=['HH1','HH2','HL1'])
            to get the hh_type variable
            -perform corsstab
            '''
            # print('generating Table 8')
            # df=self.data_wm.copy()
            # #criteria 1 for being disabled, and criteria 2 for being >=18
            # criteria1=((df['disability_combined']=='Cannot do at all')|(df['disability_combined']=='A lot of difficulty'))
            # criteria2=(df['HL6']>=18)

            # #filter according to criteria1 & criteria2
            # df_filtered=df.loc[criteria1 & criteria2, ['HH1','HH2']].drop_duplicates()
            # # filter df_w according to resulting ['HH1','HH2']
            # hhd_filter=pd.Series(zip(df['HH1'],df['HH2'])).isin(list(zip(df_filtered['HH1'],df_filtered['HH2'])))
            # #filter according to tuple ('HH1','HH2')
            # df_wm_filtered=df[hhd_filter]
            
            # #generate the crosstab
            # xtab=pd.crosstab([df_wm_filtered['HH6'],df_wm_filtered['hh_type']],
            # [df_wm_filtered['disability_combined']],
            # rownames=['Area','Household type'],colnames=['Disability level'], values=df_wm_filtered['wmweight'],
            # aggfunc='sum',dropna=False)
            # xtab.to_excel('Table 8.1 hh_type_size1.xlsx')
            # print('Table 8.1 generated and saved')

            # xtab=pd.crosstab([df_wm_filtered['HH6'],df_wm_filtered['hh_type']],df_wm_filtered['hh_size'],
            # rownames=['Area','Household type'],colnames=['Household size'], values=df_wm_filtered['wmweight'],
            # aggfunc='sum',dropna=False)
            # xtab.to_excel('Table 8.2 hh_type_size2.xlsx')
            # print('Table 8.2 generated and saved')

            ########################################
            '''Table 9 living_type_age:
            Persons with disabilities living in household or in institution, by type of disability, sex, age and location
            rows: location(HH6), living_alone, hhd_inst (living alone, living with a family in hhd, living in institution)
            note: Palestine doesnt have a question for place of hh whether institution or not so in this case
            the hh_size is being used as alone versus not alone), disability (filter on disabled), disability combined
            columns: separate disabilities (that is on ['AF6','AF8','AF9','AF10','AF11','AF12']), agegroups (WAGE)'''
            
            df=self.data_wm.copy()
            #criteria 1 for being disabled, and criteria 2 for being >=18
            criteria=((df['disability_combined']=='Cannot do at all')|(df['disability_combined']=='A lot of difficulty'))

            #filter according to criteria1 & criteria2
            df_filtered=df[criteria].copy()
            df_total=[]

            for col in self.disability_cols:
                df_filtered[col]=df_filtered[col].map(self.disability_levels)
                print(f'processing column {col}')
                #generate the crosstab
                r=pd.crosstab([df_filtered['HH6'],df_filtered['living_alone'],
                df_filtered[col]],[df_filtered['agegrp_9']],
                rownames=['Area','Living alone','Disability level'],colnames=['Age group'], values=df_filtered['wmweight'],
                aggfunc='sum',dropna=False).stack()
                r.name=self.dis_names[col]
                df_total.append(r)

            #concatenating the series in the resulting generator
            t=pd.concat(df_total, axis=1)

            #reshape the result
            T=t.unstack([3]).sort_index(axis=1, level=0)
            T.to_excel('Table 9 living_type_age.xlsx')
            print('Table 9 generated and saved')

            ####################################################
            '''Table 10 (VERIFIED) house_ownership: 
            House ownership of population (18 years and older) living alone, by sex, location and disability status
            filter data_wm on 'living_alone' and HL6>=18
            Rows: location (HH6), disability (disability), disability combined
            columns: house ownership (HC14: {1.0: 'OWN', 2.0: 'RENT', 6.0: 'OTHER', 9.0: 'NO RESPONSE'})'''
            
            print('generating Table 10')
            df=self.data_wm.copy()

            criteria=((df['HL6']>=18)&(df['living_alone']=='alone'))
            #filter according to criteria1 & criteria2
            df_filtered=df[criteria].copy()

            #crosstab
            r=pd.crosstab([df_filtered['HH6'],df_filtered['disability_combined']],
            [df_filtered['HC14']],
            rownames=['Area','Disability level'],colnames=['house ownership'], values=df_filtered['wmweight'],
            aggfunc='sum',dropna=False)
            r['Total 18+']=r.sum(axis=1)

            r.to_excel('Table 10 house_ownership.xlsx')
            print('Table 10 generated and saved')

            #######################################################
            '''Table 11: Education attainment of population (5 years and older), by sex, age, location and disability status 
            recode HL6 to age_5, to age_15, to age_25
            recode HL6 into age_bounded 15-29, 30-44, 45-64, 65+
            filter data_wm by HL6>=5
            rows: location (HH6), disability (disability), disability combined
            columns: Edu attainment (WB6A), agegroups
            '''
            print('generating Table 11')
            df=self.data_wm.copy()
            df_filtered=df[df['HL6']>=5].copy()

            def xtab():
                for col in ['agegrp5p','agegrp15p','agegrp25p','agegrp15']:
                    #for agegrp5p age 5+
                    r=pd.crosstab([df_filtered['HH6'],df_filtered['disability_combined']],
                    [df_filtered['edu_level'],df_filtered[col]],
                    rownames=['Area','Disability level'],colnames=['Edu attainment','Age'], values=df_filtered['wmweight'],
                    aggfunc='sum',dropna=False).stack().stack()
                    r.name=col
                    yield(r)

            #concatenating the series in the resulting generator
            s=xtab()
            t=pd.concat(s, axis=1)

            #reshape the result
            T=t.unstack([3,2]).sort_index(axis=1, level=0)
            T.to_excel('Table 11 living_type_age.xlsx')
            print('Table 11 generated and saved')
        
            #######################################################
            '''Table 12: Education attainment of persons with disabilities (5 years and older), by type of disability, sex, age and location
            filter on age 'HL6']>=5
            rows: ['Area' (HH6),'Disability','Disability level']
            cols: ['Edu attainment' (WB6A),'Age']
            create a new column = disability_cols values
            loop over ['agegrp5p','agegrp15p','agegrp25p'] and yield the result
            loop over disability_cols=['AF6','AF8','AF9','AF10','AF11','AF12'] and yield the result in a generator
            concat all over axis=0
            '''
            print('generating Table 12')
            df=self.data_wm.copy()
            df_filtered=df[df['HL6']>=5].copy()

            df_total=[]
            for dis in self.disability_cols:
                #map the lables to the values
                df_filtered[dis]=df_filtered[dis].map(self.disability_levels)

                df_ages=[]
                for age in ['agegrp5p','agegrp15p','agegrp25p']:

                    r=pd.crosstab([df_filtered['HH6'],df_filtered[dis]],
                    [df_filtered['edu_level'],df_filtered[age]],
                    rownames=['Area','Disability level'],colnames=['Edu attainment','Age'], values=df_filtered['wmweight'],
                    aggfunc='sum',dropna=True)
                    #add the disability type as a column
                    r['disability type']=self.dis_names[dis]
                    df_ages.append(r)

                #concatenate on axis=0   
                t=pd.concat(df_ages, axis=0)
                df_total.append(t)

            T=pd.concat(df_total, axis=0)
            #bring the disability type column to the front
            newcols_list=list(T.columns)
            newcols=[newcols_list[-1]]+newcols_list[:-1]
            T=T[newcols]
            T.to_excel('Table 12 EducationAttainment_type.xlsx')

            ################################################################
            '''Table 13: School attendance of population (5 years and older), by sex, age, location and disability status
            filter on age HL6>=5
            rows:['Area' (HH6),School_attendence,'Disability','disability_combined']
            columns:agegrp4_1[5-9,10-14,15-19,20-24,25-29,30+]
            '''
            print('generating Table 13')
            df=self.data_wm.copy()
            df_filtered=df[df['HL6']>=5].copy()

            r=pd.crosstab([df_filtered['HH6'],df_filtered['School_attendence'],df_filtered['disability_combined']],
            [df_filtered['agegrp4_1']],
            rownames=['Area','school attendance','disability_combined'],
            colnames=['Age'], values=df_filtered['wmweight'],
            aggfunc='sum',dropna=False)

            r.to_excel('Table 13 school attendance.xlsx')

            ##############################################################
            '''Table 14 School attendance of persons with disabilities (5 years and older), by type of disability, sex, age and location 
            filter on age HL6>=5
            rows:['Area' (HH6),School_attendence,'Disability','disability_combined']
            columns: disability types, by agegrp4_2 [5-9,10-14,15-19,20-24,25-64]
            loop over disability types ['AF6','AF8','AF9','AF10','AF11','AF12'] to produce xtab with that specific disability type
            then concatenate into a single dataframe
            '''

            print('generating Table 14')
            df=self.data_wm.copy()
            df_filtered=df[df['HL6']>=5].copy()

            df_total=[]
            for dis in self.disability_cols:

                r=pd.crosstab([df_filtered['HH6'],df_filtered['School_attendence'],df_filtered['disability_combined']],
                [df_filtered['agegrp4_2']],
                rownames=['Area','school attendance','Disability level'],colnames=['age'], values=df_filtered['wmweight'],
                aggfunc='sum',dropna=False)
                #add the disability type as a column
                r['disability type']=self.dis_names[dis]
                df_total.append(r)

            #concatenate on axis=0   
            T=pd.concat(df_total, axis=0)
            #append 'disability type' to the row index
            T.set_index('disability type', append=True, inplace=True)
            #make 'disbaility type' a column on top of age
            T=T.unstack(3)
            #swapping the order of column levels between age and disability type
            T.columns=T.columns.swaplevel(0,1)
            #sorting the disability type index
            T=T.sort_index(axis=1, level=0)
            T.to_excel('Table 14 SchoolAttendance_type.xlsx')

            ######################################################################
            '''Table 15: 
            Reasons for not going to/ drop out school for population (5 years and older), by sex, location and disability status
            filter on age HL6>=5 
            rows: ['Area' (HH6),'Disability','disability_combined']
            columns: Reasons for not going to school ???????????????????????????
            '''
            print('WARNING !!! Table 15 Reasons for not going to school not found')

            ######################################################################
            
            '''Table 16: Literacy status for population (15 years and older), by sex, age, location and disability status
            filter on age HL6>=15
            rows: ['Area' (HH6),'Disability','disability_combined']
            columns: WB14 (can read a part of a sentence), agegrp10 [15-24, 25-64, 65+] and another with 15+ to be concatenated
            '''
            print('generating Table 16')
            df=self.data_wm.copy()
            df_filtered=df[df['HL6']>=15].copy()

            df_total=[]
            for age in ['agegrp10','agegrp15p']:
                r=pd.crosstab([df_filtered['HH6'],df_filtered['disability_combined']],
                [df_filtered['Literacy'],df_filtered[age]],
                rownames=['Area','Disability level'],colnames=['Can read a part of a sentence','age'], 
                values=df_filtered['wmweight'],aggfunc='sum',dropna=False)
                df_total.append(r)
            #concatenate the dataframes
            T=pd.concat(df_total, axis=1)
            T.to_excel('Table 16 Literacy.xlsx')

            ########################################################################

            '''Table 17: Literacy status for persons with disabilities (15 years and older), by type of disability, sex, age and location
            filter on age HL6>=15
            rows: ['Area' (HH6),'Disability type','disability_combined'] loop over disability types and concatenate on axis=0
            columns: WB14 (can read a part of a sentence), agegrp10 [15-24, 25-64, 65+] and another with 15+ to be concatenated 
            '''
            print('generating Table 17')
            df=self.data_wm.copy()
            df_filtered=df[df['HL6']>=15].copy()
            df_total=[]

            for dis in self.disability_cols:
                #map the lables to the values
                df_filtered[dis]=df_filtered[dis].map(self.disability_levels)

                df_ages=[]
                for age in ['agegrp10','agegrp15p']:

                    r=pd.crosstab([df_filtered['HH6'],df_filtered['disability_combined']],
                    [df_filtered['WB14'],df_filtered[age]],
                    rownames=['Area','Disability level'],colnames=['Can read a part of a sentence','Age'],
                    values=df_filtered['wmweight'],aggfunc='sum',dropna=True)
                    #add the disability type as a column
                    r['disability type']=self.dis_names[dis]
                    df_ages.append(r)

                #concatenate on axis=0   
                t=pd.concat(df_ages, axis=0)
                df_total.append(t)

            T=pd.concat(df_total, axis=0)
            #bring the disability type column to the front
            newcols_list=list(T.columns)
            newcols=[newcols_list[-1]]+newcols_list[:-1]
            T=T[newcols]
            T.to_excel('Table 17 Literacy_type_age.xlsx')

            #######################################################################

            '''Table 18: Household education expenditures, by location and disability status
            rows: ['Area' (HH6),'Disability','disability_combined']
            columns: household education expenditure ????????
            '''
            print('generating Table 18')
            df=self.data_wm.copy()
            print('WARNING !!! Table 18 household education expenditure not found')

            #######################################################################

            '''Table 19: Activity status of population (15 years and older), by sex, age, location and disability status
            filter on age HL6>=15
            rows: ['Area' (HH6),'Current activity status' ????????????? ,'Disability','disability_combined']
            columns: agegrp5
            '''

            ########################################################################
            
            '''Table 32: (VERIFIED) Population with mobile phones and use internet, by sex, age, location and disability status
            rows: ['Area' (HH6),'Disability','disability_combined']
            columns: Own mobile phone ['MT11', age_ict] & ['MT10': 'Internet usage in the last 3 months' , age_ict]
            ['MT12': 'mobile usage in the last 3 months' , age_ict] concatenate both tables
            '''

            print('generating Table 32')
            df=self.data_wm.copy()
            df_total=[]

            multiindex_cols=[]

            for col in ['MT11','internet_use','MT12']:
                #add labels to values
                if col!='internet_use':
                    df[col]=df[col].map(self.col_vals_wm[col])
                else:
                    pass
                
                r=pd.crosstab([df['HH6'],df['disability_combined']],
                [df[col],df['age_ict']],
                rownames=['Area','Disability level'],colnames=[self.col_names_wm[col],'age'], 
                values=df['wmweight'],aggfunc='sum',dropna=False)
                df_total.append(r)
                #create the multiindex col and append to multiindex_cols
                idx=[]
                for i in r.columns:
                    l=list(i)
                    l.insert(0,self.col_names_wm[col])
                    multiindex_cols.append(tuple(l))
                
            #concatenate the dataframes
            T=pd.concat(df_total, axis=1)
            cols=pd.MultiIndex.from_tuples(multiindex_cols)
            #restructure the columns
            T.columns=cols
                
            #concatenate the dataframes
            T=pd.concat(df_total, axis=1)
            T.to_excel('Table 32 ICT.xlsx')
            #############################################################################

            '''Table 33:Persons with disabilities with mobile phones, by type of disability, sex, age and location
            filter on 'MT12': 'usage of mobile during last 3 months' {1.0: 'rarely yes', 2:'at least once a week','3':almost everyday'}
            rows: ['Area' (HH6),'Disability','disability_combined']
            columns: loop over disability_cols=['AF6','AF8','AF9','AF10','AF11','AF12']
            '''

            print('generating Table 33')
            df=self.data_wm.copy()
            df_filtered=df[(df['MT12']==1) | (df['MT12']==2) | (df['MT12']==3)].copy()
            df_filtered['MT12']=df_filtered['MT12'].map(self.col_vals_wm['MT12'])
            df_total=[]

            for col in self.disability_cols:
                #map the lables to the values
                df_filtered[col]=df_filtered[col].map(self.disability_levels)

                r=pd.crosstab([df_filtered['HH6']],
                [df_filtered[col]],
                rownames=['Area'],colnames=['Disability type'], 
                values=df_filtered['wmweight'],aggfunc='sum',dropna=False).stack()
                r.name=self.dis_names[col]
                df_total.append(r)

            #concatenate the dataframes
            T=pd.concat(df_total, axis=1)
            T.to_excel('Table 33 mobilephine_type.xlsx')

            ###############################################################

            '''Table 34:Population receiving social grants/ benefits/ health insurance, by sex, location and disability status
            rows: ['Area' (HH6),'Disability','disability_combined']
            columns: Disability benefits??????????????, Social assistance cash benefits?????????????
            '''

            print('generating Table 34')
            df=self.data_wm.copy()
            print('WARNING !!! Table 34 Disability benefits, Social assistance cash benefits, not found')

            ###############################################################
            
            '''Table 35:(VERIFIED) Population (15 years and older) who currently use any tobacco product on a 
            daily or non-daily basis, by sex, age, location and disability status
            filter on age HL6>=15 & 'TA3': 'Currently smoking cigarettes'{1.0: 'YES', 2.0: 'NO', 9.0: 'NO RESPONSE'}
            * Tobacco use.
            IF ((TA1 = 9 | TA3 = 9 | TA5 = 99) | (TA6 = 9 | TA7 = 9|TA9=99) | (TA10 = 9 | TA11 = 9 | TA13 = 99)) TobaccoUse = missing.
            IF ((TA1 = 2 | TA2 = 0 | TA3 = 2 | TA5 = 0) & (TA6 = 2 | TA7 = 2 | TA9=0) & (TA10 = 2|TA11 = 2 | TA13 = 0)) TobaccoUse = non-smoker.
            IF ((TA5 > 0 & TA5 <99) | (TA9 > 0 & TA9 <99) | (TA13 > 0 & TA13 <99)) TobaccoUse = smoker.
            rows: ['Area' (HH6),'Disability','disability_combined']
            columns: 'TA3': 'Currently smoking cigarettes' with agegrp15p, agegrp10
            '''
            print('generating Table 35')
            df=self.data_wm.copy()
            df=self.data_wm.copy()
            df_filtered=df[df['HL6']>=15].copy()
            df_total=[]

            for age in ['agegrp15p','agegrp10']:
                #for the xtab with smoking status
                r1=pd.crosstab([df_filtered['HH6'],df_filtered['disability_combined']],
                [df_filtered['Smoker'],df_filtered[age]],
                rownames=['Area','Disability level'],colnames=['Use of any tobacco product','Disability type'], 
                values=df_filtered['wmweight'],aggfunc='sum',dropna=False)
                df_total.append(r1)

            # concatenate the dataframes
            T=pd.concat(df_total, axis=1)
            T.sort_index(axis=1, level=1)
            T.to_excel('Table 35 Tobacco_use.xlsx')

            #############################################################
            
            '''Table 36:Women of reproductive age (15-49 years) who have their need for family 
            planning satisfied with modern methods, by location and disability status
            filter on age HL6>=15 & HL6<=49
            rows: ['Area' (HH6),'Disability','disability_combined']
            columns: 'CP2': 'Currently using a method to avoid pregnancy'

            modern methods
            'CP4A': {'?': 'NO RESPONSE', 'A': 'FEMALE STERILIZATION'},
            'CP4B': {'?': 'NO RESPONSE', 'B': 'MALE STERILIZATION'},
            'CP4C': {'?': 'NO RESPONSE', 'C': 'IUD'},
            'CP4D': {'?': 'NO RESPONSE', 'D': 'INJECTABLES'},
            'CP4E': {'?': 'NO RESPONSE', 'E': 'IMPLANTS'},
            'CP4F': {'?': 'NO RESPONSE', 'F': 'PILL'},
            'CP4G': {'?': 'NO RESPONSE', 'G': 'MALE CONDOM'},
            'CP4H': {'?': 'NO RESPONSE', 'H': 'FEMALE CONDOM'},
            'CP4I': {'?': 'NO RESPONSE', 'I': 'DIAPHRAGM'},
            'CP4J': {'?': 'NO RESPONSE', 'J': 'FOAM / JELLY'}
            
            traditional methods:
            'CP4K': {'?': 'NO RESPONSE', 'K': 'LACTATIONAL AMENORRHOEA METHOD (LAM)'},
            'CP4L': {'?': 'NO RESPONSE', 'L': 'PERIODIC ABSTINENCE / RHYTHM'},
            'CP4M': {'?': 'NO RESPONSE', 'M': 'WITHDRAWAL'},
            'CP4X': {'?': 'NO RESPONSE', 'X': 'OTHER'}
            '''

            print('generating Table 36')
            df=self.data_wm.copy()
            cond=(df['HL6']>=15) & (df['HL6']<=49)
            df_filtered=df[cond].copy()

            df_total=[]
            r1=pd.crosstab([df_filtered['HH6'],df_filtered['disability_combined']],
            [df_filtered['CP2']],
            rownames=['Area','Disability level'],colnames=[self.col_names_wm['CP2']], 
            values=df_filtered['wmweight'],aggfunc='sum',dropna=False)
            #create a multiindex column
            c1=[self.col_names_wm['CP2']]
            c2=list(r1.columns)
            idx=pd.MultiIndex.from_product([c1,c2])
            r1.columns=idx
            df_total.append(r1)

            #filter the currently married and not pregnant or dont know
            cond=((df['MA1']==1) & ((df['CP1']==2) | (df['CP1']==8)))
            df_filtered=df[cond].copy()

            r2=pd.crosstab([df_filtered['HH6'],df_filtered['disability_combined']],
            [df_filtered['modern_contraceptive']],
            rownames=['Area','Disability level'],colnames=['Modern contraceptive method'], 
            values=df_filtered['wmweight'],aggfunc='sum',dropna=False)
            c1=['modern_contraceptive']
            c2=list(r2.columns)
            idx=pd.MultiIndex.from_product([c1,c2])
            r2.columns=idx
            df_total.append(r2)

            T=pd.concat(df_total, axis=1)
            T.to_excel('Table 36 family plan.xlsx')

            ###############################################################

            '''Table 37:Births attended by skilled health personnel, by sex, location and disability status
            rows: ['Area' (HH6),'Disability','disability_combined']
            columns: 'CM17': 'Live births in last two years'
            'MN19A': 'Assistance at delivery: Doctor',
            'MN19B': 'Assistance at delivery: Nurse / Midwife',
            'MN19H': 'Assistance at delivery: Relative / Friend',
            'MN19X': 'Assistance at delivery: Other',
            'MN19Y': 'Assistance at delivery: No one',
            'MN19NR': 'Assistance at delivery: No response'
            '''

            print('generating Table 37')
            df=self.data_wm.copy()
            
            r=pd.crosstab([df['HH6'],df['disability_combined']],
            [df['Birth_Skilled_Per']],
            rownames=['Area','Disability level'],colnames=['Brith attended by skilled pers'],
            values=df['wmweight'],aggfunc='sum',dropna=False)

            r.to_excel('Table 37 skilled health personnel.xlsx')

            ################################################################

            '''Table 38: Women (15-49 years) who make their own informed decisions regarding sexual relations,
            contraceptive use and reproductive health care, by location and disability status
            
            xtab 1
            filter on age HL6>=15 & HL6<=49
            rows: ['Area' (HH6),'Disability','disability_combined']
            columns: 'MA1': 'Currently married or living with a man'

            xtab 2
            filter on age (HL6>=15 & HL6<=49) and 'MA1': {1.0: 'YES, CURRENTLY MARRIED'}
            rows: ['Area' (HH6),'Disability','disability_combined']
            columns: could not find the below
            "for whom decision on health care for themselves is not usually made by the husband/partner or someone else
            " for whom the decision on contraception is not mainly made by the husband/partner
            "who can say no to sex
            '''

            print('generating Table 38')
            df=self.data_wm.copy()
            cond=(df['HL6']>=15) & (df['HL6']<=49)
            df_filtered=df[cond].copy()
            print('WARNING !!! Table 38 related columns, not found')

            ##################################################################

            '''Table 39: Population with large household expenditures on health, by sex, location and disability status
            rows: ['Area' (HH6),'Disability','disability_combined']
            '''

            print('generating Table 39')
            df=self.data_wm.copy()
            cond=(df['HL6']>=15) & (df['HL6']<=49)
            df_filtered=df[cond].copy()
            print('WARNING !!! Table 39 household expenditure or income, not found')


        except Exception as e:
            raise(e)

In [59]:
xtab=crosstab()
xtab.generate_xtabs()

generating Table 1
Table 1 generated and saved
generating Table 2
processing column AF6
processing column AF8
processing column AF9
processing column AF10
processing column AF11
processing column AF12
Table 2 generated and saved
generating Table 4
Table 4 generated and saved
generating Table 5
Table 5 generated and saved
generating Table 7
selfing column AF6
selfing column AF8
selfing column AF9
selfing column AF10
selfing column AF11
selfing column AF12
Table 7 generated and saved
processing column AF6
processing column AF8
processing column AF9
processing column AF10
processing column AF11
processing column AF12
Table 9 generated and saved
generating Table 10
Table 10 generated and saved
generating Table 11
Table 11 generated and saved
generating Table 12
generating Table 13
generating Table 14
generating Table 16
generating Table 17
generating Table 18
generating Table 32
generating Table 33
generating Table 34
generating Table 35
generating Table 36
generating Table 37
generating T

In [56]:
df=p.data_wm.copy()
df_total=[]

multiindex_cols=[]

for col in ['MT11','MT10','MT12']:
    #add labels to values
    df[col]=df[col].map(p.col_vals_wm[col])
    
    r=pd.crosstab([df['HH6'],df['disability_combined']],
    [df[col],df['age_ict']],
    rownames=['Area','Disability level'],colnames=[p.col_names_wm[col],'age'], 
    values=df['wmweight'],aggfunc='sum',dropna=False)
    df_total.append(r)
    #create the multiindex col and append to multiindex_cols
    idx=[]
    for i in r.columns:
        l=list(i)
        l.insert(0,p.col_names_wm[col])
        multiindex_cols.append(tuple(l))
    
#concatenate the dataframes
T=pd.concat(df_total, axis=1)
cols=pd.MultiIndex.from_tuples(multiindex_cols)
#restructure the columns
T.columns=cols
    
#concatenate the dataframes
T=pd.concat(df_total, axis=1)