In [42]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pyreadstat
import os

In [43]:
pd.set_option('display.max_columns', None)
os.chdir('C:/Users/511232/Desktop/MICS/microdata')
[f for f in os.listdir() if 'sav' in f]

['bh.sav', 'ch.sav', 'fs.sav', 'hh.sav', 'hl.sav', 'wm.sav']

In [44]:
os.chdir('C:/Users/511232/Desktop/MICS/microdata')
df_hh,meta_hh=pyreadstat.read_sav('hh.sav', apply_value_formats=False)
df_wm,meta_wm=pyreadstat.read_sav('wm.sav', apply_value_formats=False)
df_hl,meta_hl=pyreadstat.read_sav('hl.sav', apply_value_formats=False)

col_names_hh=meta_hh.column_names_to_labels
col_vals_hh=meta_hh.variable_value_labels
col_names_hl=meta_hl.column_names_to_labels
col_vals_hl=meta_hl.variable_value_labels
col_names_wm=meta_wm.column_names_to_labels
col_vals_wm=meta_wm.variable_value_labels

In [None]:
col_vals_wm

In [101]:
'''data processing prior to generating crosstabs'''

class process_data:

    def __init__(self):
        #reading in the .sav files and their metadata files
        os.chdir('C:/Users/511232/Desktop/MICS/microdata')
        df_hh,meta_hh=pyreadstat.read_sav('hh.sav', apply_value_formats=False)
        df_wm,meta_wm=pyreadstat.read_sav('wm.sav', apply_value_formats=False)
        df_hl,meta_hl=pyreadstat.read_sav('hl.sav', apply_value_formats=False)
        
        self.col_names_hh=meta_hh.column_names_to_labels
        self.col_vals_hh=meta_hh.variable_value_labels
        self.col_names_hl=meta_hl.column_names_to_labels
        self.col_vals_hl=meta_hl.variable_value_labels
        self.col_names_wm=meta_wm.column_names_to_labels
        self.col_vals_wm=meta_wm.variable_value_labels

        self.data_hh=df_hh.copy()
        self.data_wm=df_wm.copy()
        self.data_hl=df_hl.copy()

        self.disability_levels={1:'No difficulty',
        2:'Some difficulty',
        3:'A lot of difficulty',
        4:'Cannot do at all'}
        
        self.disability_cols=['AF6','AF8','AF9','AF10','AF11','AF12']
        self.other_cols=['WAGE','HH6','disability','windex5u','windex5r','windex5']

        self.dis_names={'AF6': 'Difficulty seeing, even if wearing glasses or contact lenses',
        'AF8': 'Difficulty hearing, even if using a hearing aid',
        'AF9': 'Difficulty walking or climbing steps',
        'AF10': 'Difficulty remembering or concentrating',
        'AF11': 'Difficulty with self-care, such as washing all over or dressing',
        'AF12': 'Difficulty communicating'}

    def process_data_wm(self):

        os.chdir('C:/Users/511232/Desktop/MICS/Crosstabs')
        #merge with data_hl to get the HL3(household head relation) and HL6(age)
        right_df=self.data_hl[['HH1','HH2','HL1','HL3','HL6']]
        left_df=self.data_wm

        df_wm=pd.merge(left_df,right_df, how='left', 
        left_on=['HH1','HH2','LN'], right_on=['HH1','HH2','HL1'])

        #create 'disability_combined' column. takes the max(code) among ['AF6','AF8','AF9','AF10','AF11','AF12']
        df_wm['disability_combined']=df_wm[self.disability_cols].apply(lambda x: x.max(), axis=1)
        df_wm['disability_combined']=df_wm['disability_combined'].map(self.disability_levels)
        #create head of household relationship as 1:HH 2:Other 
        df_wm['hh_rel']=np.where(df_wm['HL3']==1,1,2)
        df_wm['hh_rel']=df_wm['hh_rel'].map({1:'Head of household', 2:'Other'})

        for col in self.other_cols:
            if col in self.col_vals_wm.keys():
                df_wm[col]=df_wm[col].map(self.col_vals_wm[col])
                print(f'{col} codes are translated from meta women')
            elif col in self.col_vals_hl.keys():
                df_wm[col]=df_wm[col].map(self.col_vals_hl[col])
                print(f'{col} codes are translated from meta hhl')
            else:
                print(f'!!! WARNING !!! {col} codes were not translated')

        return(df_wm)

            

In [102]:
process=process_data()
data=process.process_data_wm()

WAGE codes are translated from meta women
HH6 codes are translated from meta women
disability codes are translated from meta women
windex5u codes are translated from meta women
windex5r codes are translated from meta women
windex5 codes are translated from meta women


In [103]:
'''Table 1
steps:
'disability_combined' column is calculated by taking the max(code) among ['AF6','AF8','AF9','AF10','AF11','AF12']
'''

def combined_disabilities(age_disaggregated=1):
    
    df=data.copy()
    #crosstab
    if age_disaggregated:
        xtab=pd.crosstab([df['HH6'],df['disability'],df['disability_combined']],df['WAGE'],
        rownames=['Area','Disability','Disability level'],colnames=['Age'], values=df['wmweight'], aggfunc='sum',dropna=False)      
        #export as excel
        xtab.to_excel('xtab_all_dis_ByAge.xlsx')
    else:
        xtab=pd.crosstab([df['disability'],df['disability_combined']],df['HH6'],
        rownames=['Disability','Disability level'],colnames=['Area'], values=df['wmweight'], aggfunc='sum',dropna=False)
        #export as excel
        xtab.to_excel('xtab_all_dis_ByTotalAge.xlsx')

In [104]:
combined_disabilities(age_disaggregated=1)
combined_disabilities(age_disaggregated=0)

In [105]:
'''Table 2
steps:
-generate separate xtabs for all disability_cols
-stack() them to have a multiindex series and add them to a generator
-concatenate the generator items
-stack() and unstack() to get to the final result 
'''

def separate_disabilities():
        
    #will generate a list of multiindex series for each disability
    #generate a crosstab then stack to make it a multiindex series and put them 
    #all in a generator
    df=data.copy()
    def xtab():
        for col in process.disability_cols:
            print(f'processing column {col}')
            #translate the codes
            df[col]=df[col].map(process.disability_levels)
            r=pd.crosstab([df['HH6'],df['disability'],df[col]],df['WAGE'],\
                rownames=['Area','Disability','Level'],colnames=['Age'], values=df['wmweight'], aggfunc='sum').stack()
            r.name=dis_names[col]
            yield(r)

    #concatenating the series in the resulting generator
    s=xtab()
    t=pd.concat(s, axis=1)
    t['All_disabilities']=t.sum(axis=1)

    #reshape the result
    T=t.stack().unstack([4,3]).sort_index(axis=1, level=0)
    T.to_excel('separate disabilites.xlsx')

In [106]:
separate_disabilities()

processing column AF6
processing column AF8
processing column AF9
processing column AF10
processing column AF11
processing column AF12


In [107]:
'''Table 4
steps:
-calculate domain_num by summing the True over the array of disability_cols values
if the array contains codes (3-a lot of difficulty) or (4-cannot at all) it will reult as True
'''
def num_dis_domain():
    
    #for each row under disability_cols if the row contains 3 or 4 then True
    #sum over all the True/False results 
    df=data.copy()
    df['domain_num']=df[process.disability_cols].apply(lambda x: sum(x.isin([3,4])), axis=1)
    #generate xtab
    r=pd.crosstab([df['HH6'],df['disability']],df['domain_num'],\
        rownames=['Area','Disability'],colnames=['Number of domains'], values=df['wmweight'], aggfunc='sum', dropna=False)
    
    r.to_excel('Number_dis_domain.xlsx')

In [108]:
num_dis_domain()

In [109]:
'''Table 5 marital status'''

def marital_status():
    
    df=data.copy()
    marital_status= {1.0: 'Currently married/in union',
    2.0: 'Formerly married/in union',
    3.0: 'Never married/in union',
    9.0: 'No response'}

    #translate the codes
    df['MSTATUS']=df['MSTATUS'].map(marital_status)

    #crosstab
    xtab=pd.crosstab([df['HH6'],df['MSTATUS'],df['disability'],df['disability_combined']],df['WAGE'],
    rownames=['Area','Marital status','Disability','Disability level'],colnames=['Age'], values=df['wmweight'],
    aggfunc='sum',dropna=False)      
    #export as excel
    xtab.to_excel('MaritalStatus.xlsx')

In [110]:
marital_status()

In [111]:
'''Table 6: head_HH 2 crosstabs 
1-disability against head of household and othery type of relationship
steps:
-create head of household relationship (in the process_data_wm() )
df['hh_rel']=np.where(df['HL3']==1,1,2) where 1:HH 2:Other 
2-disability by head of households by wealth quintiles
steps
-will generate crosstab among disabled HH with wealth quintiles 
using windex and not specific windex5u for urban and windex5r for rural since
they differ from windex5 and will produce contradictory results between urban and rural xtabs
and the urban and rural disaggregation in the xtab for the total: camp/urban/rural
'''
def head_HH(quintile=0):
    
    df=data.copy()
    if not quintile:
        #crosstab
        xtab=pd.crosstab([df['HH6'],df['disability'],df['disability_combined']],df['hh_rel'],
        rownames=['Area','Disability','Disability level'],colnames=['HH relationship'], values=df['wmweight'],
        aggfunc='sum',dropna=False)

        #export as excel
        xtab.to_excel('head of HH.xlsx')
    else:
        #filter out the HH
        df_hh_only=df[df['hh_rel']=='Head of household']
        #crosstab
        xtab=pd.crosstab([df_hh_only['HH6'],df_hh_only['disability'],df_hh_only['disability_combined']],df_hh_only['windex5'],
        rownames=['Area','Disability','Disability level'],colnames=['wealth quintile'], values=df_hh_only['wmweight'],
        aggfunc='sum',dropna=False)
        xtab.to_excel('head of HH_with wquintile.xlsx')


In [112]:
head_HH(quintile=0)
head_HH(quintile=1)

In [113]:
'''Table 7: Poorest_type
steps
-filter out the poorest quintile 'windex5' and crosstab with all disability types
-loop over disability_cols and create crosstabs then stack to end up with multiindex series
-put them in a generator and concatenate the generator items
'''

def poorest_type():
    
    df=data.copy()
    #filter out the poorest
    df_poorest=df[df['windex5']=='Poorest'].copy()

    #will generate a list of multiindex series for each disability
    #generate a crosstab then stack to make it a multiindex series and put them 
    #all in a generator
    def xtab():
        for col in process.disability_cols:
            print(f'processing column {col}')
            #translate the codes
            df_poorest[col]=df_poorest[col].map(process.disability_levels)
            r=pd.crosstab([df_poorest['HH6'],df_poorest['disability']],df_poorest[col],\
                rownames=['Area','Disability'],colnames=['Disability level'], values=df_poorest['wmweight'], aggfunc='sum').stack()
            r.name=process.dis_names[col]
            yield(r)

    #concatenating the series in the resulting generator
    s=xtab()
    t=pd.concat(s, axis=1)
    t['All_disabilities']=t.sum(axis=1)
    t.to_excel('poorest_type.xlsx')

In [None]:
poorest_type()

In [35]:
data=process.process_data_wm()

WAGE codes are translated from meta women
HH6 codes are translated from meta women
disability codes are translated from meta women
windex5u codes are translated from meta women
windex5r codes are translated from meta women
windex5 codes are translated from meta women


In [13]:
'''Table 8: HH_type&size
Households with one or more persons with disabilities (18 years and older), by location and type and size of household
steps
-data will be filtered according to (age>=18 & disability_combined==3,4) 
-get the 'HH1','HH2' of the resulting dataframe as a list by zipping both columns
-filter data resulting from  process_data_wm() on the tuple ('HH1','HH2')

steps for calculating type of household hh_type (in hl dataframe):
-grouby hl by ['HH1','HH2']
-if HL3 isin (1 head,2 spouse/partner,3 son/daughter,13 adopted son daughter)
if ALL TRUE then code hh_type as 1 Nuclear
-if HL3 isin (1 head,2 spouse/partner,3 son/daughter,13 adopted son daughter,
4 son /daughter in law, 5 grnachild, 6 parent, 7 parentin law, 8 brother/sister,
9, brother/sis in law, 10 uncle/aunt, 11 nephew/niece, 12 other)
if ALL TRUE then code hh_type as 2 Extended
if HL3 isin (1 head,2 spouse/partner,3 son/daughter,13 adopted son daughter,
4 son /daughter in law, 5 grnachild, 6 parent, 7 parentin law, 8 brother/sister,
9, brother/sis in law, 10 uncle/aunt, 11 nephew/niece, 12 other, 14 servant, 96 other, 98 dont know)
if ALL TRUE then code hh_type as 3 composite
WARNING: there is no way to distinguish two nuclear families in a single household from one
since for example a HH might have 2 spouses or more 
'''

#criteria 1 for being disabled
criteria1=((data['disability_combined']=='Cannot do at all')|(data['disability_combined']=='A lot of difficulty'))
#criteria 2 for being >=18
criteria2=(data['HL6']>=18)

#filter according to criteria1 & criteria2
df1=data.loc[criteria1 & criteria2, ['HH1','HH2']].drop_duplicates()
# filter data according to resulting ['HH1','HH2']
hhd_filter=pd.Series(zip(data['HH1'],data['HH2'])).isin(list(zip(df1['HH1'],df1['HH2'])))
#filter according to tuple ('HH1','HH2')
data_filtered=data[hhd_filter]

#calculate family_type variable from HL3
#function to categorize family type
def family_type(df):
    nuclear=[1,2,3,13]
    extended=[1,2,3,13,4,5,6,7,8,9,10,11,12]
    composite=[1,2,3,13,4,5,6,7,8,9,10,11,12,14,96,98]

    if all(df['HL3'].isin(nuclear)):
        df['res']=1
    elif all(df['HL3'].isin(extended)):
        df['res']=2
    elif all(df['HL3'].isin(composite)):
        df['res']=3
    else:
         df['res']=4
    return(df)



In [12]:
t=pd.read_excel('C:/Users/511232/Desktop/Book1.xlsx')
t.head()

Unnamed: 0,HH1,HH2,HL3,type
0,1,1,1,nuc
1,1,1,2,nuc
2,1,1,3,nuc
3,2,1,1,ext
4,2,1,2,ext


In [20]:
def family_type(df=t):

    nuclear=[1,2,3,13]
    extended=[1,2,3,13,4,5,6,7,8,9,10,11,12]
    composite=[1,2,3,13,4,5,6,7,8,9,10,11,12,14,96,98]

    if all(df['HL3'].isin(nuclear)):
        df['res']=1
    elif all(df['HL3'].isin(extended)):
        df['res']=2
    elif all(df['HL3'].isin(composite)):
        df['res']=3
    else:
         df['res']=4
    return(df)

t1=t[(t['HH1']==4) & (t['HH2']==2)].copy()
family_type(t1)


Unnamed: 0,HH1,HH2,HL3,type,res
15,4,2,1,com,3
16,4,2,2,com,3
17,4,2,3,com,3
18,4,2,13,com,3
19,4,2,5,com,3
20,4,2,6,com,3
21,4,2,9,com,3
22,4,2,14,com,3
23,4,2,96,com,3


In [21]:
t.groupby(['HH1','HH2']).apply(family_type)

Unnamed: 0,HH1,HH2,HL3,type,res
0,1,1,1,nuc,1
1,1,1,2,nuc,1
2,1,1,3,nuc,1
3,2,1,1,ext,2
4,2,1,2,ext,2
5,2,1,4,ext,2
6,2,2,1,nuc,1
7,2,2,2,nuc,1
8,3,1,1,ext,2
9,3,1,2,ext,2


In [36]:
data['HL6'].unique()

array([49., 23., 21., 37., 24., 29., 28., 19., 40., 36., 27., 35., 34.,
       46., 17., 44., 39., 48., 16., 30., 47., 18., 45., 20., 25., 43.,
       22., 38., 33., 32., 41., 31., 15., 26., 42.])

In [97]:
tst=pd.read_excel('C:/Users/511232/Desktop/Book1.xlsx')
l=list(tst.loc[(tst['age']>=15)&(tst['dis']==1),'hh'].unique())
tst

Unnamed: 0,hh,age,dis
0,1,15,1
1,1,6,1
2,1,18,1
3,1,20,1
4,1,5,0
5,2,8,1
6,2,9,0
7,2,12,0
8,3,18,1
9,3,2,0


In [98]:
tst[tst['hh'].isin(l)]

Unnamed: 0,hh,age,dis
0,1,15,1
1,1,6,1
2,1,18,1
3,1,20,1
4,1,5,0
8,3,18,1
9,3,2,0
10,3,5,1


In [49]:
df,disability_levels,disability_cols,dis_names=process_data_wm()


WAGE codes are translated from meta women
HH6 codes are translated from meta women
disability codes are translated from meta women
windex5u codes are translated from meta women
windex5r codes are translated from meta women
windex5 codes are translated from meta women


In [60]:
df.head()


Unnamed: 0,HH1,HH2,LN,WM1,WM2,WM3,WMINT,WM4,WM5,WM6D,WM6M,WM6Y,WM8,WM9,WM17,WM7H,WM7M,WM10H,WM10M,WM11,WMHINT,WMFIN,WB3M,WB3Y,WB4,WB5,WB6A,WB6B,WB7,WB9,WB10A,WB10B,WB11,WB12A,WB12B,WB14,WB15,WB16,WB17,WB18,WB19E,WB19F,WB19G,WB19H,WB19X,WB19NR,MT1,MT2,MT3,MT4,MT5,MT6A,MT6B,MT6C,MT6D,MT6E,MT6F,MT6G,MT6H,MT6I,MT9,MT10,MT11,MT12,CM1,CM2,CM3,CM4,CM5,CM6,CM7,CM8,CM9,CM10,CM11,CM12,CM15,CM17,BH11,DB2,DB4,MN2,MN3A,MN3B,MN3X,MN3NR,MN4AU,MN4AN,MN5,MN6A,MN6B,MN6C,MN19A,MN19B,MN19H,MN19X,MN19Y,MN19NR,MN20,MN21,MN22,MN23,MN24,MN25,MN26U,MN26N,MN32,MN33,MN34A,MN34,MN35,MN36,MN37U,MN37N,MN38,MN39A,MN39B,MN39C,MN39D,MN39E,MN39F,MN39G,MN39H,MN39I,MN39J,MN39X,MN39Y,MN39NR,PN3U,PN3N,PN4,PN5,PN6,PN8,PN9,PN10,PN11,PN12,PN13U,PN13N,PN14A,PN14B,PN14H,PN14X,PN14NR,PN15,PN17,PN19,PN20,PN21,PN22U,PN22N,PN23A,PN23B,PN23H,PN23X,PN23NR,PN24,PN25A,PN25B,PN25C,PN27,PN29,PN30,CP1,CP2,CP2A,CP3,CP4A,CP4B,CP4C,CP4D,CP4E,CP4F,CP4G,CP4H,CP4I,CP4J,CP4K,CP4L,CP4M,CP4X,CP4NR,UN2,UN4,UN5,UN7,UN8U,UN8N,UN11,UN12A,UN12B,UN12C,UN12D,UN12E,UN12F,UN12G,UN12H,UN12I,UN12X,UN12Z,UN12NR,UN14U,UN14N,UN16,UN17,UN18,UN19,DV1A,DV1B,DV1C,DV1D,DV1E,VT1,VT2,VT3,VT5,VT6,VT7A,VT7B,VT7X,VT7NR,VT8,VT9,VT10,VT11,VT12,VT13,VT14,VT17,VT18A,VT18B,VT18X,VT18NR,VT19,VT20,VT21,VT22A,VT22B,VT22D,VT22E,VT22F,VT22G,VT22H,VT22I,VT22X,MA1,MA2,MA3,MA4,MA5,MA6,MA7,MA8M,MA8Y,MA11,AF2,AF3,AF6,AF8,AF9,AF10,AF11,AF12,HA1,HA2,HA3,HA4,HA5,HA6,HA7,HA8A,HA8B,HA8C,HA10,HA13A,HA13B,HA13C,HA13D,HA30,HA31,HA32,HA33,HA34,HA35,HA36,TA1,TA2,TA3,TA4,TA5,TA6,TA7,TA8A,TA8B,TA8D,TA8X,TA8NR,TA9,TA10,TA11,TA12A,TA12B,TA12C,TA12D,TA12NR,TA12X,TA13,LS1,LS2,LS3,LS4,HH4,HH6,HH7,REGION,WDOI,WAGE,WDOB,WDOM,WAGEM,WDOBFC,WDOBLC,MSTATUS,CEB,CSURV,CDEAD,BH3_FIRST,BH4M_FIRST,BH4Y_FIRST,BH6_FIRST,BH3_LAST,BH4M_LAST,BH4Y_LAST,BH6_LAST,welevel,insurance,disability,Refugee,wmweight,wscore,windex5,windex10,wscoreu,windex5u,windex10u,wscorer,windex5r,windex10r,wscorec,windex5c,windex10c,J1,PSU,stratum,nat_reg_lvl,HL1,HL3,disability_combined,hh_rel
0,1.0,2.0,2.0,1.0,2.0,2.0,41.0,900.0,41.0,31.0,12.0,2019.0,1.0,1.0,1.0,11.0,33.0,11.0,42.0,2.0,41.0,3.0,8.0,1970.0,49.0,1.0,1.0,5.0,1.0,,,,,,,,20.0,2.0,1.0,2.0,,,,,,,0.0,0.0,3.0,2.0,,,,,,,,,,,2.0,,1.0,2.0,1.0,2.0,,,1.0,1.0,2.0,2.0,,,3.0,1.0,1.0,0.0,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,2.0,2.0,2.0,2.0,2.0,,,,,,,,,,2.0,,,,,,,,,,,,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,,,,1.0,2.0,1.0,98.0,1998.0,,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,,,,,,,,,,,,,,,,,,,,,,2.0,,,,,2.0,,,,,,,,2.0,,,,,,,,,2.0,10.0,2.0,1.0,900.0,RURAL,1.0,1.0,1440.0,45-49,848.0,1178.0,27.0,1125.0,1166.0,2.0,3.0,3.0,0.0,2.0,9.0,1993.0,26.0,1.0,2.0,1997.0,22.0,1.0,2.0,Has no functional difficulty,3.0,1.094056,-0.35608,Second,3.0,,,,-0.658433,Poorest,1.0,,,,2.0,1.0,2.0,1.0,2.0,3.0,No difficulty,Other
1,1.0,5.0,2.0,1.0,5.0,2.0,43.0,900.0,43.0,31.0,12.0,2019.0,1.0,1.0,1.0,11.0,50.0,12.0,12.0,1.0,43.0,3.0,12.0,1995.0,23.0,1.0,3.0,4.0,1.0,2.0,,,2.0,,,,2.0,2.0,1.0,1.0,E,,,,,,1.0,1.0,3.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,1.0,1.0,2.0,1.0,3.0,1.0,3.0,1.0,1.0,0.0,1.0,2.0,,,2.0,,,1.0,1.0,1.0,1.0,2.0,1.0,,1.0,A,B,,,2.0,1.0,18.0,1.0,1.0,1.0,A,B,,,,,21.0,1.0,2.0,8.0,,1.0,2.0,1.0,3.0,1.0,1.0,3.6,1.0,1.0,1.0,2.0,1.0,,,,,,,G,,,,,,,1.0,1.0,1.0,1.0,1.0,,,,,2.0,2.0,2.0,A,,,,,32.0,1.0,,,1.0,2.0,2.0,A,,,,,11.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,1.0,,,,,,,,,,,,,,,M,,,,,,1.0,2.0,3.0,,,,,,,,,,,,,,2.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,,,,,,,,,,2.0,,,,,,,,,,,,3.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,29.0,2.0,,,,1.0,5.0,2017.0,,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,8.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,8.0,1.0,2.0,,,,,1.0,2.0,,,,,,,2.0,,,,,,,,,2.0,7.0,1.0,1.0,900.0,RURAL,1.0,1.0,1440.0,20-24,1152.0,1409.0,21.0,1422.0,1422.0,1.0,1.0,1.0,0.0,2.0,6.0,2018.0,1.0,2.0,6.0,2018.0,1.0,3.0,1.0,Has no functional difficulty,3.0,1.094056,0.579102,Fourth,7.0,,,,0.403067,Middle,6.0,,,,2.0,1.0,2.0,1.0,2.0,2.0,No difficulty,Other
2,1.0,6.0,2.0,1.0,6.0,2.0,45.0,900.0,45.0,19.0,1.0,2020.0,1.0,1.0,1.0,13.0,20.0,13.0,29.0,1.0,45.0,3.0,2.0,1998.0,21.0,1.0,2.0,2.0,1.0,2.0,,,2.0,,,,2.0,2.0,1.0,2.0,,,,,,,0.0,3.0,3.0,1.0,0.0,,,,,,,,,,1.0,3.0,1.0,3.0,1.0,1.0,1.0,1.0,2.0,,,2.0,,,2.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,A,,,,2.0,1.0,6.0,1.0,1.0,1.0,A,B,,,,,31.0,2.0,,1.0,1.0,1.0,1.0,12.0,3.0,1.0,2.0,2.0,2.0,1.0,0.0,0.0,2.0,,,,,,,,,,,,,,1.0,6.0,1.0,1.0,2.0,,,,,,,,,,,,,,1.0,,,1.0,3.0,1.0,A,,,,,31.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,24.0,2.0,,,,,,,,,,,,,,,,,,,1.0,2.0,6.0,1.0,,,,,,,,,,,,,4.0,1.0,,,,,2.0,2.0,2.0,2.0,2.0,2.0,,,,,,,,,,2.0,,,,,,,,,,,,2.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,27.0,2.0,,,,1.0,11.0,2017.0,,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,8.0,1.0,1.0,1.0,8.0,1.0,2.0,,,,,2.0,,,,,,,,2.0,,,,,,,,,1.0,10.0,1.0,1.0,900.0,RURAL,1.0,1.0,1441.0,20-24,1178.0,1415.0,19.0,1423.0,1437.0,1.0,2.0,2.0,0.0,1.0,7.0,2018.0,1.0,2.0,9.0,2019.0,0.0,2.0,2.0,Has no functional difficulty,3.0,1.094056,0.612506,Fourth,8.0,,,,0.440983,Middle,6.0,,,,2.0,1.0,2.0,1.0,2.0,2.0,No difficulty,Other
3,1.0,7.0,2.0,1.0,7.0,2.0,42.0,900.0,42.0,2.0,1.0,2020.0,1.0,1.0,1.0,12.0,52.0,13.0,9.0,1.0,42.0,3.0,5.0,1982.0,37.0,1.0,3.0,4.0,1.0,,,,,,,,95.0,,,1.0,E,,,,,,0.0,0.0,3.0,1.0,0.0,,,,,,,,,,1.0,2.0,1.0,3.0,1.0,1.0,1.0,0.0,2.0,,,2.0,,,1.0,1.0,1.0,0.0,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,1.0,,,,,,,,,,,,,,,M,,,,,,1.0,1.0,0.0,,,,,,,,,,,,,,1.0,5.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,,,,,,,,,,2.0,,,,,,,,,,,,4.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,53.0,1.0,1.0,,,1.0,3.0,2017.0,,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,8.0,1.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,,,,,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,,,,1.0,2.0,,,,,,,2.0,,,,,,,,,2.0,7.0,1.0,1.0,900.0,RURAL,1.0,1.0,1441.0,35-39,989.0,1407.0,34.0,1416.0,1416.0,1.0,1.0,1.0,0.0,1.0,12.0,2017.0,2.0,1.0,12.0,2017.0,2.0,3.0,1.0,Has no functional difficulty,3.0,1.094056,0.60454,Fourth,8.0,,,,0.431941,Middle,6.0,,,,2.0,1.0,2.0,1.0,2.0,2.0,No difficulty,Other
4,1.0,8.0,2.0,1.0,8.0,2.0,45.0,900.0,45.0,2.0,1.0,2020.0,1.0,1.0,1.0,13.0,16.0,13.0,25.0,1.0,45.0,3.0,1.0,1995.0,24.0,1.0,2.0,2.0,1.0,2.0,,,2.0,,,,5.0,2.0,1.0,1.0,E,,,,,,0.0,0.0,0.0,1.0,0.0,,,,,,,,,,1.0,3.0,1.0,3.0,1.0,1.0,2.0,0.0,2.0,,,2.0,,,2.0,1.0,1.0,0.0,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,1.0,,,,,C,,,,,,,,,,,,,,,,1.0,2.0,3.0,,,,,,,,,,,,,,2.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,8.0,2.0,,,,,,,,,,2.0,,,,,,,,,,,,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,33.0,2.0,,,,1.0,10.0,2014.0,,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,2.0,2.0,1.0,8.0,8.0,,,,,2.0,2.0,2.0,2.0,2.0,2.0,1.0,2.0,,,,,2.0,,,,,,,,2.0,,,,,,,,,1.0,9.0,1.0,1.0,900.0,RURAL,1.0,1.0,1441.0,20-24,1141.0,1378.0,19.0,1390.0,1402.0,1.0,2.0,2.0,0.0,1.0,10.0,2015.0,4.0,1.0,10.0,2016.0,3.0,2.0,1.0,Has no functional difficulty,3.0,1.094056,0.757477,Richest,9.0,,,,0.605536,Fourth,8.0,,,,2.0,1.0,2.0,1.0,2.0,2.0,No difficulty,Other


In [58]:
df['HH6'].unique()

array(['RURAL', 'URBAN', 'CAMP', nan], dtype=object)