In [1]:
import geopandas as gpd
import pandas as pd

# Household data

In [2]:
hr_2018 = pd.read_stata("2018/SNHR81DT/SNHR81FL.DTA")

In [3]:
hr_2018

Unnamed: 0,hhid,hv000,hv001,hv002,hv003,hv004,hv005,hv006,hv007,hv008,...,hml36_42,hml36_43,hml36_44,hml36_45,hml36_46,hml36_47,hml36_48,hml36_49,hml36_50,hml36_51
0,1 1,SN7,1,1,2,1,3568159,8,2018,1424,...,,,,,,,,,,
1,1 2,SN7,1,2,2,1,3568159,8,2018,1424,...,,,,,,,,,,
2,1 3,SN7,1,3,1,1,3568159,8,2018,1424,...,,,,,,,,,,
3,1 4,SN7,1,4,1,1,3568159,8,2018,1424,...,,,,,,,,,,
4,1 5,SN7,1,5,1,1,3568159,8,2018,1424,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4587,214 17,SN7,214,17,1,214,416458,10,2018,1426,...,,,,,,,,,,
4588,214 18,SN7,214,18,1,214,416458,10,2018,1426,...,,,,,,,,,,
4589,214 19,SN7,214,19,2,214,416458,10,2018,1426,...,,,,,,,,,,
4590,214 20,SN7,214,20,1,214,416458,10,2018,1426,...,,,,,,,,,,


In [4]:
col_list = ['hhid',# Case Identification
            'hv001',# Cluster number (unique ID)
            'hv002',# Household number (unique ID)
            'hv009',# Number of household members
                    # 0:90 Count of individuals
            'hv014',# Number of children 5 and under
                    # 0:25 Count of individuals
            'hv015',# Result of household interview
                    # 1  Completed
                    # 2  No Household member/no competent member at home
                    # 3  Entire Household absent for extended period of time
                    # 4  Postponed
                    # 5  Refused
                    # 6  Dwelling vacant or address not a dwelling
                    # 7  Dwelling destroyed
                    # 8  Dwelling not found
                    # 9  Other
            'hv024',# Region (note: different coding than 2005)
                    # 1  Dakar
                    # 2  Ziguinchor
                    # 3  Diourbel
                    # 4  Saint-Louis
                    # 5  Tambacounda
                    # 6  Kaolack
                    # 7  Thiès
                    # 8  Louga
                    # 9  Fatick
                    # 10 Kolda
                    # 11 Matam
                    # 12 Kaffrine
                    # 13 Kedougou
                    # 14 Sedhiou
            'hv025',# Type of place of residence
                    # 1  Urban
                    # 2  Rural
            'hv106_01',# Highest educational level attained by the head of the household
                    # 0  No education, preschool
                    # 1  Primary
                    # 2  Secondary
                    # 3  Higher
                    # 6  Others
                    # 8  Don't know
                    # 9  (m) Missing
                    #    (na) NotAppl
            'hv106_02',# Highest educational level attained by the spouse/partner of the head of the household
                    # 0  No education, preschool
                    # 1  Primary
                    # 2  Secondary
                    # 3  Higher
                    # 6  Others
                    # 8  Don't know
                    # 9  (m) Missing
                    #    (na) NotAppl (if no spouse/partner)
            'hv201',# Source of drinking water (note: different coding than 2005)
                    ## 10  PIPED WATER
                    #  11  Piped into dwelling
                    #  12  Piped into yard/plot
                    #  13  Piped to neighbor
                    #  14  Public tap/standpipe
                    ## 20  TUBE WELL WATER
                    #  21  Tube well or borehole
                    ## 30  DUG WELL (OPEN/PROTECTED)
                    #  31  Protected well
                    #  32  Unprotected well
                    ## 40  SURFACE FROM SPRING
                    #  41  Protected spring
                    #  42  Unprotected spring
                    #  43  River/dam/lake/ponds/stream/canal/irrigation channel
                    ## OTHER SOURCES
                    #  51  Rainwater
                    #  61  Tanker truck
                    #  62  Cart with small tank
                    #  71  Bottled water
                    #  96  Other
                    #  99  (m) Missing
                    #      (na) NotAppl
            'hv201a',# Water not available for at least a day last two (note: different variable than 2005)
                    # 0  No, not interrupted for a full day
                    # 1  Yes, interrupted for a full day or more
                    # 8  Don't know
                    # 9  (m) Missing
                    #    (na) NotAppl
            'hv204',# Time to get to water source (roundtrip)
                    # 1:900  Time in minutes
                    # 996    On premises
                    # 998    Don't know
                    # 999    (m) Missing
                    #        (na) NotAppl
            'hv205',# Type of toilet facility (note: different coding than 2005)
                    ## 10  FLUSH TOILET
                    #  11  Flush to piped sewer system
                    #  12  Flush to septic tank
                    #  13  Flush to pit latrine
                    #  14  Flush to somewhere else
                    #  15  Flush, don't know where
                    ## 20  PIT TOILET LATRINE
                    #  21  Ventilated Improved Pit latrine
                    #  22  Pit latrine with slab
                    #  23  Pit latrine without slab/open pit
                    ## 30  NO FACILITY
                    #  31  No facility, bush, field
                    #  41  Composting toilet
                    #  42  Bucket toilet
                    #  43  Hanging toilet/latrine
                    #  96  OTHER
                    #  99  (m) Missing
            'hv206',# Household has electricity
                    # 0  No
                    # 1  Yes
                    # 9  (m) Missing
            'hv207',# Household has radio
                    # 0  No
                    # 1  Yes
                    # 9  (m) Missing
            'hv208',# Household has television
                    # 0  No
                    # 1  Yes
                    # 9  (m) Missing
            'hv209',# Household has refrigerator
                    # 0  No
                    # 1  Yes
                    # 9  (m) Missing
            'hv210',# Household has bicycle
                    # 0  No
                    # 1  Yes
                    # 9  (m) Missing
            'hv211',# Household has motorcycle/scooter
                    # 0  No
                    # 1  Yes
                    # 9  (m) Missing
            'hv212',# Household has car/truck
                    # 0  No
                    # 1  Yes
                    # 9  (m) Missing
            'hv213',# Main floor material
                    ## 10  NATURAL
                    #  11  Earth, sand
                    #  12  Dung
                    ## 20  RUDIMENTARY
                    #  21  Wood planks
                    #  22  Palm, bamboo
                    ## 30  FINISHED
                    #  31  Parquet, polished wood
                    #  32  Vinyl, asphalt strips
                    #  33  Ceramic tiles
                    #  34  Cement
                    #  35  Carpet
                    #  96  OTHER
                    #  99  (m) Missing
                    #      (na) NotAppl
            'hv214',# Main wall material
                    ## 10  NATURAL
                    #  11  Earth, sand
                    #  12  Dung
                    ## 20  RUDIMENTARY
                    #  21  Wood planks
                    #  22  Palm, bamboo
                    ## 30  FINISHED
                    #  31  Parquet, polished wood
                    #  32  Vinyl, linoleum
                    #  33  Ceramic tiles
                    #  34  Cement
                    #  35  Carpet
                    #  96  OTHER
                    #  99  (m) Missing
                    #      (na) NotAppl
            'hv215',# Main roof material
                    ## 10  NATURAL
                    #  11  Earth, sand
                    #  12  Dung
                    ## 20  RUDIMENTARY
                    #  21  Wood planks
                    #  22  Palm, bamboo
                    ## 30  FINISHED
                    #  31  Parquet, polished wood
                    #  32  Vinyl, linoleum
                    #  33  Ceramic tiles
                    #  34  Cement
                    #  35  Carpet
                    #  96  OTHER
                    #  99  (m) Missing
                    #      (na) NotAppl
            'hv225',# Share toilet with other households
                    # 0  No
                    # 1  Yes
                    # 9  (m) Missing
            'hv226',# Type of cooking fuel (note: different coding than 2005)
                    # 1   Electricity
                    # 2   LPG
                    # 3   Natural gas
                    # 4   Biogas
                    # 5   Kerosene
                    # 6   Coal, lignite
                    # 7   Charcoal
                    # 8   Wood
                    # 9   Straw/shrubs/grass
                    # 11  Agricultural crop
                    # 12  Animal Dung
                    # 95  No food cooked in house
                    # 96  Other
                    # 99  (m) Missing
            'hv230a',# Place where household members wash their hands (note: different coding than 2005)
                    # 1  Observed, fixed place
                    # 2  Observed, mobile place
                    # 3  Not observed: not in dwelling
                    # 4  Not observed: no permission to see
                    # 5  Not observed: other reason
                    # 9  (m) Missing
            'hv230b',# Presence of water at hand washing place
                    # 0  No
                    # 1  Yes
                    # 9  (m) Missing
            'hv232',# Items present: Soap or detergent (note: different vaiable than 2005, which looked at any cleasing item)
                    # 0  No
                    # 1  Yes
                    # 9  (m) Missing
                    #    (na)NotAppl
            'hv232y',# Items present: None
                    # 0  No
                    # 1  Yes: no cleansing agent observed
                    # 9  (m) Missing
                    #    (na)NotAppl
            'hv270',# Wealth index
                    # 1  Poorest
                    # 2  Poorer
                    # 3  Middle
                    # 4  Richer
                    # 5  Richest
           ]

In [5]:
hr_2018_sel = hr_2018[col_list]

In [6]:
hr_2018_sel

Unnamed: 0,hhid,hv001,hv002,hv009,hv014,hv015,hv024,hv025,hv106_01,hv106_02,...,hv213,hv214,hv215,hv225,hv226,hv230a,hv230b,hv232,hv232y,hv270
0,1 1,1,1,5,1,completed,dakar,urban,"no education, preschool","no education, preschool",...,"vinyl, asphalt strips",31,33,yes,lpg,"observed, mobile place",water is available,yes,no,richer
1,1 2,1,2,5,2,completed,dakar,urban,primary,"no education, preschool",...,ceramic tiles,31,35,no,lpg,"observed, fixed place",water is available,yes,no,richest
2,1 3,1,3,1,0,completed,dakar,urban,higher,,...,ceramic tiles,31,35,no,lpg,"observed, fixed place",water is available,yes,no,richest
3,1 4,1,4,2,0,completed,dakar,urban,primary,higher,...,ceramic tiles,31,35,no,lpg,"observed, fixed place",water is available,yes,no,richest
4,1 5,1,5,3,1,completed,dakar,urban,higher,higher,...,ceramic tiles,31,35,no,lpg,"observed, fixed place",water is available,yes,no,richest
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4587,214 17,214,17,7,1,completed,sedhiou,rural,"no education, preschool","no education, preschool",...,cement,13,33,yes,"coal, lignite","observed, mobile place",water is available,yes,no,poorer
4588,214 18,214,18,13,3,completed,sedhiou,rural,"no education, preschool","no education, preschool",...,cement,12,33,yes,"coal, lignite","observed, fixed place",water not available,no,yes: no cleansing agent observed,poorer
4589,214 19,214,19,14,3,completed,sedhiou,rural,"no education, preschool","no education, preschool",...,cement,12,33,no,"coal, lignite","observed, mobile place",water not available,no,yes: no cleansing agent observed,poorer
4590,214 20,214,20,7,2,completed,sedhiou,rural,"no education, preschool","no education, preschool",...,cement,12,33,no,"coal, lignite","observed, mobile place",water not available,no,yes: no cleansing agent observed,poorer


In [7]:
hr_2018_sel.to_csv("2018_SNHR81FL_selected-vars_group-B.csv")

# Geodata

In [8]:
gps_2018 = gpd.read_file("2018/SNGE81FL/SNGE81FL.shp")

In [9]:
gps_2018

Unnamed: 0,DHSID,DHSCC,DHSYEAR,DHSCLUST,CCFIPS,ADM1FIPS,ADM1FIPSNA,ADM1SALBNA,ADM1SALBCO,ADM1DHS,...,DHSREGCO,DHSREGNA,SOURCE,URBAN_RURA,LATNUM,LONGNUM,ALT_GPS,ALT_DEM,DATUM,geometry
0,SN201800000001,SN,2018.0,1.0,SG,SG01,Dakar,,,1.0,...,2.0,Ouest,GPS,U,14.676428,-17.429576,9999.0,6.0,WGS84,POINT (-17.42958 14.67643)
1,SN201800000002,SN,2018.0,2.0,SG,SG01,Dakar,,,1.0,...,2.0,Ouest,GPS,U,14.703503,-17.453035,9999.0,10.0,WGS84,POINT (-17.45303 14.70350)
2,SN201800000003,SN,2018.0,3.0,SG,SG01,Dakar,,,1.0,...,2.0,Ouest,GPS,U,14.700487,-17.450062,9999.0,18.0,WGS84,POINT (-17.45006 14.70049)
3,SN201800000004,SN,2018.0,4.0,SG,SG01,Dakar,,,1.0,...,2.0,Ouest,GPS,U,14.738873,-17.465598,9999.0,32.0,WGS84,POINT (-17.46560 14.73887)
4,SN201800000005,SN,2018.0,5.0,SG,SG01,Dakar,,,1.0,...,2.0,Ouest,GPS,U,14.730071,-17.471727,9999.0,30.0,WGS84,POINT (-17.47173 14.73007)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,SN201800000210,SN,2018.0,210.0,SG,SG18,Sedhiou,,,14.0,...,4.0,Sud,GPS,U,12.495434,-15.634519,9999.0,0.0,WGS84,POINT (-15.63452 12.49543)
210,SN201800000211,SN,2018.0,211.0,SG,SG18,Sedhiou,,,14.0,...,4.0,Sud,GPS,R,12.559112,-15.911520,9999.0,0.0,WGS84,POINT (-15.91152 12.55911)
211,SN201800000212,SN,2018.0,212.0,SG,SG18,Sedhiou,,,14.0,...,4.0,Sud,GPS,R,12.538636,-15.714331,9999.0,0.0,WGS84,POINT (-15.71433 12.53864)
212,SN201800000213,SN,2018.0,213.0,SG,SG18,Sedhiou,,,14.0,...,4.0,Sud,GPS,R,12.790922,-15.327992,9999.0,0.0,WGS84,POINT (-15.32799 12.79092)


In [10]:
df_1 = gps_2018[['DHSCLUST','geometry']].rename(columns={"DHSCLUST": "hv001"})
df_2 = hr_2018_sel.merge(df_1,
                         on='hv001',
                         how='left')
gdf = gpd.GeoDataFrame(df_2,
                       geometry='geometry',
                       crs=gps_2018.crs.to_epsg())
gdf

Unnamed: 0,hhid,hv001,hv002,hv009,hv014,hv015,hv024,hv025,hv106_01,hv106_02,...,hv214,hv215,hv225,hv226,hv230a,hv230b,hv232,hv232y,hv270,geometry
0,1 1,1,1,5,1,completed,dakar,urban,"no education, preschool","no education, preschool",...,31,33,yes,lpg,"observed, mobile place",water is available,yes,no,richer,POINT (-17.42958 14.67643)
1,1 2,1,2,5,2,completed,dakar,urban,primary,"no education, preschool",...,31,35,no,lpg,"observed, fixed place",water is available,yes,no,richest,POINT (-17.42958 14.67643)
2,1 3,1,3,1,0,completed,dakar,urban,higher,,...,31,35,no,lpg,"observed, fixed place",water is available,yes,no,richest,POINT (-17.42958 14.67643)
3,1 4,1,4,2,0,completed,dakar,urban,primary,higher,...,31,35,no,lpg,"observed, fixed place",water is available,yes,no,richest,POINT (-17.42958 14.67643)
4,1 5,1,5,3,1,completed,dakar,urban,higher,higher,...,31,35,no,lpg,"observed, fixed place",water is available,yes,no,richest,POINT (-17.42958 14.67643)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4587,214 17,214,17,7,1,completed,sedhiou,rural,"no education, preschool","no education, preschool",...,13,33,yes,"coal, lignite","observed, mobile place",water is available,yes,no,poorer,POINT (-15.52263 12.60835)
4588,214 18,214,18,13,3,completed,sedhiou,rural,"no education, preschool","no education, preschool",...,12,33,yes,"coal, lignite","observed, fixed place",water not available,no,yes: no cleansing agent observed,poorer,POINT (-15.52263 12.60835)
4589,214 19,214,19,14,3,completed,sedhiou,rural,"no education, preschool","no education, preschool",...,12,33,no,"coal, lignite","observed, mobile place",water not available,no,yes: no cleansing agent observed,poorer,POINT (-15.52263 12.60835)
4590,214 20,214,20,7,2,completed,sedhiou,rural,"no education, preschool","no education, preschool",...,12,33,no,"coal, lignite","observed, mobile place",water not available,no,yes: no cleansing agent observed,poorer,POINT (-15.52263 12.60835)


In [11]:
# Convert categorical columns to string (otherwise not possible to export GPKG)
categorical_columns = gdf.select_dtypes(['category']).columns
for var in categorical_columns:
    gdf[var] = gdf[var].astype(str)

In [12]:
gdf.to_file("Export_students/dhs_household-data_2018_group-B.gpkg",
            driver="GPKG",
            crs=gps_2018.crs.to_epsg())