In [1]:
import geopandas as gpd
import pandas as pd

# Household data

In [2]:
hr_2005 = pd.read_stata("2005/SNHR4ADT/SNHR4AFL.DTA")

In [3]:
hr_2005

Unnamed: 0,hhid,hv000,hv001,hv002,hv003,hv004,hv005,hv006,hv007,hv008,...,hb69_11,hb69_12,hb69_13,hb69_14,hb69_15,hb69_16,hb69_17,hb69_18,hb69_19,hb69_20
0,1 3411,SN4,1,11,1,1,1755369,5,2005,1265,...,,,,,,,,,,
1,1 8355,SN4,1,55,2,1,1755369,5,2005,1265,...,,,,,,,,,,
2,112462,SN4,1,62,1,1,1755369,5,2005,1265,...,,,,,,,,,,
3,112724,SN4,1,24,1,1,1755369,5,2005,1265,...,,,,,,,,,,
4,117231,SN4,1,31,1,1,1755369,5,2005,1265,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7407,377518 3,SN4,377,3,1,377,665893,4,2005,1264,...,,,,,,,,,,
7408,37751828,SN4,377,28,1,377,665893,5,2005,1265,...,,,,,,,,,,
7409,37751843,SN4,377,43,1,377,665893,5,2005,1265,...,,,,,,,,,,
7410,37753424,SN4,377,24,2,377,665893,5,2005,1265,...,,,,,,,,,,


In [4]:
col_list = ['hhid',# Case Identification
            'hv001',# Cluster number (unique ID)
            'hv002',# Household number (unique ID)
            'hv009',# Number of household members
                    # 0:90 Count of individuals
            'hv014',# Number of children 5 and under
                    # 0:20 Count of individuals
            'hv015',# Result of household interview
                    # 1  Completed
                    # 2  HH present, no resp
                    # 3  HH absent
                    # 4  Postponed
                    # 5  Refused
                    # 6  Dwelling vacant
                    # 7  Dwelling destroyed
                    # 8  Dwelling not found
                    # 9  Other
            'hv024',# Region
                    # 1  Dakar
                    # 2  Diourbel
                    # 3  Fatick
                    # 4  Kaolack
                    # 5  Kolda
                    # 6  Louga
                    # 7  Matam
                    # 8  Saint-Louis
                    # 9  Tambacounda
                    # 10 Thiès
                    # 11 Zuguinchor
            'hv025',# Type of place of residence
                    # 1  Urban
                    # 2  Rural
            'hv106_01',# Highest educational level attained by the head of the household
                    # 0  No education, preschool
                    # 1  Primary
                    # 2  Secondary
                    # 3  Higher
                    # 8  (DK) Don't know
                    # 9  (m) Missing
                    #    (na) NotAppl
            'hv201',# Source of drinking water
                    ## 10  PIPED WATER
                    #  11  Piped into dwelling
                    #  12  Piped into yard/plot
                    #  13  Public tap
                    ## 20  OPEN WELL WATER
                    #  21  Open well in dwelling
                    #  22  Open well in yard/plot
                    #  23  Open public well
                    ## 30  COVERED WELL/BOREHOLE
                    #  31  Protected well in dwelling
                    #  32  Protected well in yard/plot
                    #  33  Protected public well
                    ## 40  SURFACE WATER
                    #  41  Spring
                    #  42  River, stream
                    #  43  Pond, lake
                    #  44  Dam
                    ## OTHER SOURCES
                    #  51  Rainwater
                    #  61  Tanker truck
                    #  71  Bottled water
                    #  96  (m) Other
                    #     (na) NotAppl
            'hv204',# Time to get to water source (roundtrip)
                    # 1:500  Time in minutes
                    # 996    On premises
                    # 999    (m) Missing
                    #        (na) NotAppl
            'hv205',# Type of toilet facility
                    ## 10  FLUSH TOILET
                    #  11  Flush toilet
                    #  12  Flush toilet with drainage
                    ## 20  PIT TOILET LATRINE
                    #  21  Traditional pit toilet
                    #  22  Ventilated improved pit latrine
                    ## 30  NO FACILITY
                    #  31  No facility, bush, field
                    #  96  OTHER
                    #  99  (m) Missing
                    #      (na) NotAppl
            'hv206',# Household has electricity
                    # 0  No
                    # 1  Yes
                    # 9  (m) Missing
                    #    (na) NotAppl
            'hv207',# Household has radio
                    # 0  No
                    # 1  Yes
                    # 9  (m) Missing
                    #    (na) NotAppl
            'hv208',# Household has television
                    # 0  No
                    # 1  Yes
                    # 9  (m) Missing
                    #    (na) NotAppl
            'hv209',# Household has refrigerator
                    # 0  No
                    # 1  Yes
                    # 9  (m) Missing
                    #    (na) NotAppl
            'hv210',# Household has bicycle
                    # 0  No
                    # 1  Yes
                    # 9  (m) Missing
                    #    (na) NotAppl
            'hv211',# Household has motorcycle/scooter
                    # 0  No
                    # 1  Yes
                    # 9  (m) Missing
                    #    (na) NotAppl
            'hv212',# Household has car/truck
                    # 0  No
                    # 1  Yes
                    # 9  (m) Missing
                    #    (na) NotAppl
            'hv213',# Main floor material
                    ## 10  NATURAL
                    #  11  Earth, sand
                    #  12  Dung
                    ## 20  RUDIMENTARY
                    #  21  Wood planks
                    #  22  Palm, bamboo
                    ## 30  FINISHED
                    #  31  Parquet, polished wood
                    #  32  Vinyl, linoleum
                    #  33  Ceramic tiles
                    #  34  Cement
                    #  35  Carpet
                    #  96  OTHER
                    #  99  (m) Missing
                    #      (na) NotAppl
            'hv214',# Main wall material
                    ## 10  NATURAL
                    #  11  Earth, sand
                    #  12  Dung
                    ## 20  RUDIMENTARY
                    #  21  Wood planks
                    #  22  Palm, bamboo
                    ## 30  FINISHED
                    #  31  Parquet, polished wood
                    #  32  Vinyl, linoleum
                    #  33  Ceramic tiles
                    #  34  Cement
                    #  35  Carpet
                    #  96  OTHER
                    #  99  (m) Missing
                    #      (na) NotAppl
            'hv215',# Main roof material
                    ## 10  NATURAL
                    #  11  Earth, sand
                    #  12  Dung
                    ## 20  RUDIMENTARY
                    #  21  Wood planks
                    #  22  Palm, bamboo
                    ## 30  FINISHED
                    #  31  Parquet, polished wood
                    #  32  Vinyl, linoleum
                    #  33  Ceramic tiles
                    #  34  Cement
                    #  35  Carpet
                    #  96  OTHER
                    #  99  (m) Missing
                    #      (na) NotAppl
            'hv225',# Share toilet with other households               152    1    N    I    1    0   No   No
                    # 0  No
                    # 1  Yes
                    # 9  (m) Missing
                    #    (na) NotAppl
            'hv226',# Type of cooking fuel
                    # 1   Electricity
                    # 2   LPG, natural gas
                    # 3   Biogas
                    # 4   Kerosene
                    # 5   Coal, lignite
                    # 6   Charcoal
                    # 7   Firewood, straw
                    # 8   Dung
                    # 96  Other
                    # 99  (m) Missing
            'hv230',# Place for hand washing
                    # 0  Nowhere
                    # 1  In dwelling/yard/plot
                    # 2  Somewhere else
                    # 9  (m) Missing
            'hv231',# Items present: Water, tap
                    # 0  No
                    # 1  Yes
                    # 9  (m) Missing
                    # (na)    NotAppl
            'hv232',# Items present: Soap/other cleansing agent
                    # 0  No
                    # 1  Yes
                    # 9  (m) Missing
                    #    (na)NotAppl
            'sh22c',#Frequency of water outages
                    # 0  Water always available (no outages)
                    # 1  Every day
                    # 2  Most days of the week
                    # 3  Some days of the week
                    # 4  Occasionally
                    # 9  (m) Missing
                    #    (na) Not applicable
            'sh24c',# Main means of sewage disposal in household
                    # 1  Truck
                    # 2  Wagon
                    # 3  DÈpÙt autorisÈ {FR}
                    # 4  DÈpot sauvage {FR}
                    # 5  Enfouissement {FR}
                    # 6  Incineration
                    # 7  Other
                    # 9  (m) Missing
                    #    (na) Not applicable
            'hv270',# Wealth index
                    # 1  Poorest
                    # 2  Poorer
                    # 3  Middle
                    # 4  Richer
                    # 5  Richest
           ]

In [5]:
hr_2005_sel = hr_2005[col_list]

In [6]:
hr_2005_sel

Unnamed: 0,hhid,hv001,hv002,hv009,hv014,hv015,hv024,hv025,hv106_01,hv201,...,hv214,hv215,hv225,hv226,hv230,hv231,hv232,sh22c,sh24c,hv270
0,1 3411,1,11,7,0,completed,diourbel,rural,"no education, preschool",piped into dwelling,...,,,,"firewood, straw",in dwelling/yard/plot,yes,yes,occasionally,dépot sauvage {fr},middle
1,1 8355,1,55,8,2,completed,diourbel,rural,"no education, preschool",public tap,...,,,no,"lpg, natural gas",in dwelling/yard/plot,yes,yes,water always available (no outages),wagon,middle
2,112462,1,62,4,0,completed,diourbel,rural,"no education, preschool",piped into dwelling,...,,,no,99,in dwelling/yard/plot,yes,yes,water always available (no outages),wagon,middle
3,112724,1,24,9,1,completed,diourbel,rural,"no education, preschool",piped into yard/plot,...,,,no,charcoal,somewhere else,,,every day,dépot sauvage {fr},middle
4,117231,1,31,10,2,completed,diourbel,rural,"no education, preschool",piped into dwelling,...,,,no,charcoal,in dwelling/yard/plot,yes,yes,every day,incineration,richer
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7407,377518 3,377,3,4,0,completed,saint-louis,urban,secondary,piped into yard/plot,...,,,yes,"lpg, natural gas",in dwelling/yard/plot,yes,yes,occasionally,wagon,richer
7408,37751828,377,28,3,0,completed,saint-louis,urban,secondary,piped into yard/plot,...,,,yes,"lpg, natural gas",in dwelling/yard/plot,yes,yes,occasionally,9,richer
7409,37751843,377,43,3,0,completed,saint-louis,urban,secondary,piped into yard/plot,...,,,yes,"lpg, natural gas",in dwelling/yard/plot,yes,yes,occasionally,wagon,richer
7410,37753424,377,24,12,2,completed,saint-louis,urban,"no education, preschool",public tap,...,,,yes,charcoal,in dwelling/yard/plot,yes,yes,water always available (no outages),other,richer


# Geodata

In [7]:
gps_2005 = gpd.read_file("2005/SNGE4BFL/SNGE4BFL.shp")

In [8]:
gps_2005

Unnamed: 0,DHSID,DHSCC,DHSYEAR,DHSCLUST,CCFIPS,ADM1FIPS,ADM1FIPSNA,ADM1SALBNA,ADM1SALBCO,ADM1DHS,...,DHSREGCO,DHSREGNA,SOURCE,URBAN_RURA,LATNUM,LONGNUM,ALT_GPS,ALT_DEM,DATUM,geometry
0,SN200500000001,SN,2005.0,1.0,SG,SG03,Diourbel,,,2.0,...,2.0,diourbel,GPS,R,14.862779,-15.871947,17.9102,38.0,WGS84,POINT (-15.87195 14.86278)
1,SN200500000002,SN,2005.0,2.0,SG,SG15,Matam,,,7.0,...,7.0,matam,GPS,R,16.015481,-13.724759,15.9875,22.0,WGS84,POINT (-13.72476 16.01548)
2,SN200500000003,SN,2005.0,3.0,SG,SG09,Fatick,,,3.0,...,3.0,fatick,GPS,U,14.271731,-15.943239,29.2056,24.0,WGS84,POINT (-15.94324 14.27173)
3,SN200500000004,SN,2005.0,4.0,SG,SG01,Dakar,,,1.0,...,1.0,dakar,GPS,R,14.770231,-17.158437,26.5619,23.0,WGS84,POINT (-17.15844 14.77023)
4,SN200500000006,SN,2005.0,6.0,SG,SG14,Saint-Louis,,,8.0,...,8.0,saint-louis,GPS,U,16.016467,-16.494718,-8.7662,10.0,WGS84,POINT (-16.49472 16.01647)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
371,SN200500000070,SN,2005.0,70.0,,SG01,Dakar,,,1.0,...,1.0,dakar,MIS,R,0.000000,0.000000,9999.0000,9999.0,WGS84,POINT (0.00000 0.00000)
372,SN200500000123,SN,2005.0,123.0,,SG14,Saint-Louis,,,8.0,...,8.0,saint-louis,MIS,R,0.000000,0.000000,9999.0000,9999.0,WGS84,POINT (0.00000 0.00000)
373,SN200500000145,SN,2005.0,145.0,,SG12,Ziguinchor,,,11.0,...,11.0,zuguinchor,MIS,R,0.000000,0.000000,9999.0000,9999.0,WGS84,POINT (0.00000 0.00000)
374,SN200500000168,SN,2005.0,168.0,,SG03,Diourbel,,,2.0,...,2.0,diourbel,MIS,R,0.000000,0.000000,9999.0000,9999.0,WGS84,POINT (0.00000 0.00000)


In [9]:
df_1 = gps_2005[['DHSCLUST','geometry']].rename(columns={"DHSCLUST": "hv001"})
df_2 = hr_2005_sel.merge(df_1,
                         on='hv001',
                         how='left')
gdf = gpd.GeoDataFrame(df_2,
                       geometry='geometry',
                       crs=gps_2005.crs.to_epsg())
gdf

Unnamed: 0,hhid,hv001,hv002,hv009,hv014,hv015,hv024,hv025,hv106_01,hv201,...,hv215,hv225,hv226,hv230,hv231,hv232,sh22c,sh24c,hv270,geometry
0,1 3411,1,11,7,0,completed,diourbel,rural,"no education, preschool",piped into dwelling,...,,,"firewood, straw",in dwelling/yard/plot,yes,yes,occasionally,dépot sauvage {fr},middle,POINT (-15.87195 14.86278)
1,1 8355,1,55,8,2,completed,diourbel,rural,"no education, preschool",public tap,...,,no,"lpg, natural gas",in dwelling/yard/plot,yes,yes,water always available (no outages),wagon,middle,POINT (-15.87195 14.86278)
2,112462,1,62,4,0,completed,diourbel,rural,"no education, preschool",piped into dwelling,...,,no,99,in dwelling/yard/plot,yes,yes,water always available (no outages),wagon,middle,POINT (-15.87195 14.86278)
3,112724,1,24,9,1,completed,diourbel,rural,"no education, preschool",piped into yard/plot,...,,no,charcoal,somewhere else,,,every day,dépot sauvage {fr},middle,POINT (-15.87195 14.86278)
4,117231,1,31,10,2,completed,diourbel,rural,"no education, preschool",piped into dwelling,...,,no,charcoal,in dwelling/yard/plot,yes,yes,every day,incineration,richer,POINT (-15.87195 14.86278)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7407,377518 3,377,3,4,0,completed,saint-louis,urban,secondary,piped into yard/plot,...,,yes,"lpg, natural gas",in dwelling/yard/plot,yes,yes,occasionally,wagon,richer,POINT (-16.51125 16.04090)
7408,37751828,377,28,3,0,completed,saint-louis,urban,secondary,piped into yard/plot,...,,yes,"lpg, natural gas",in dwelling/yard/plot,yes,yes,occasionally,9,richer,POINT (-16.51125 16.04090)
7409,37751843,377,43,3,0,completed,saint-louis,urban,secondary,piped into yard/plot,...,,yes,"lpg, natural gas",in dwelling/yard/plot,yes,yes,occasionally,wagon,richer,POINT (-16.51125 16.04090)
7410,37753424,377,24,12,2,completed,saint-louis,urban,"no education, preschool",public tap,...,,yes,charcoal,in dwelling/yard/plot,yes,yes,water always available (no outages),other,richer,POINT (-16.51125 16.04090)


In [10]:
# Convert categorical columns to string (otherwise not possible to export GPKG)
categorical_columns = gdf.select_dtypes(['category']).columns
for var in categorical_columns:
    gdf[var] = gdf[var].astype(str)

In [11]:
gdf.to_file("Export_students/dhs_household-data_2005_group-B.gpkg",
            driver="GPKG",
            crs=gps_2005.crs.to_epsg())