In [1]:
import pandas as pd

## Files

In [2]:
path = '../data/raw/ENAHO/'
enaho_dwelling_2014_file = path + 'enaho01-2014-100.dta'
enaho_dwelling_2015_file = path + 'enaho01-2015-100.dta'
enaho_dwelling_2016_file = path + 'enaho01-2016-100.dta'
enaho_dwelling_2017_file = path + 'enaho01-2017-100.dta'
enaho_dwelling_2018_file = path + 'enaho01-2018-100.dta'
enaho_summary_2014_file = path + 'sumaria-2014.dta'
enaho_summary_2015_file = path + 'sumaria-2015.dta'
enaho_summary_2016_file = path + 'sumaria-2016.dta'
enaho_summary_2017_file = path + 'sumaria-2017.dta'
enaho_summary_2018_file = path + 'sumaria-2018.dta'
enaho_labor_2018_file = path + 'enaho01a-2018-500.dta'

## Dwelling

In [3]:
cols_dwelling = ['aÑo', 'conglome', 'vivienda', 'hogar', 'ubigeo', 'result', 'p110', 'factor07']
enaho_dwelling_2014 = pd.read_stata(enaho_dwelling_2014_file, columns=cols_dwelling)
enaho_dwelling_2015 = pd.read_stata(enaho_dwelling_2015_file, columns=cols_dwelling)
enaho_dwelling_2016 = pd.read_stata(enaho_dwelling_2016_file, columns=cols_dwelling)
enaho_dwelling_2017 = pd.read_stata(enaho_dwelling_2017_file, columns=cols_dwelling)
enaho_dwelling_2018 = pd.read_stata(enaho_dwelling_2018_file, columns=cols_dwelling)

In [4]:
enaho_dwelling = pd.concat([enaho_dwelling_2014,
                            enaho_dwelling_2015,
                            enaho_dwelling_2016,
                            enaho_dwelling_2017,
                            enaho_dwelling_2018]).reset_index(drop = True)

In [5]:
dwelling_names = {'aÑo': 'year',
                  'conglome': 'conglomerate',
                  'vivienda': 'house',
                  'hogar': 'household',
                  'ubigeo': 'IDDIST',
                  'p110': 'access to water',
                  'factor07': 'household weight'}
enaho_dwelling = enaho_dwelling.rename(columns = dwelling_names)

In [6]:
enaho_dwelling = enaho_dwelling[enaho_dwelling['IDDIST'].apply(lambda x: x[:4] == '1501' or x[:2] == '07')]
enaho_dwelling = enaho_dwelling[enaho_dwelling['access to water'].notna()]
enaho_dwelling.head()

Unnamed: 0,year,conglomerate,house,household,IDDIST,result,access to water,household weight
4179,2014,1650,18,11,70101,incompleta,"red pública, dentro de la vivienda",313.410004
4180,2014,1650,31,11,70101,completa,"red pública, dentro de la vivienda",313.410004
4181,2014,1650,44,11,70101,incompleta,"red pública, dentro de la vivienda",313.410004
4183,2014,1650,81,11,70101,incompleta,"red pública, dentro de la vivienda",313.410004
4185,2014,1652,3,11,70101,completa,"red pública, dentro de la vivienda",290.570007


## Summary

In [7]:
cols_summary = ['aÑo', 'conglome', 'vivienda', 'hogar', 'ubigeo', 'mieperho']
enaho_summary_2014 = pd.read_stata(enaho_summary_2014_file, columns=cols_summary)
enaho_summary_2015 = pd.read_stata(enaho_summary_2015_file, columns=cols_summary)
enaho_summary_2016 = pd.read_stata(enaho_summary_2016_file, columns=cols_summary)
enaho_summary_2017 = pd.read_stata(enaho_summary_2017_file, columns=cols_summary)
enaho_summary_2018 = pd.read_stata(enaho_summary_2018_file, columns=cols_summary)

In [8]:
enaho_summary = pd.concat([enaho_summary_2014,
                           enaho_summary_2015,
                           enaho_summary_2016,
                           enaho_summary_2017,
                           enaho_summary_2018]).reset_index(drop = True)

In [9]:
summary_names = {'aÑo': 'year',
                 'conglome': 'conglomerate',
                 'vivienda': 'house',
                 'hogar': 'household',
                 'ubigeo': 'IDDIST',
                 'mieperho': 'num_hh_members'}
enaho_summary = enaho_summary.rename(columns = summary_names)

In [10]:
enaho_summary = enaho_summary[enaho_summary['IDDIST'].apply(lambda x: x[:4] == '1501' or x[:2] == '07')]
enaho_summary.head()

Unnamed: 0,year,conglomerate,house,household,IDDIST,num_hh_members
3243,2014,1650,18,11,70101,3
3244,2014,1650,31,11,70101,4
3245,2014,1650,44,11,70101,4
3246,2014,1650,81,11,70101,5
3247,2014,1652,3,11,70101,2


## Merging

In [11]:
enaho = pd.merge(enaho_dwelling, enaho_summary, how='left', on=['year', 'conglomerate', 'house', 'household', 'IDDIST'])
enaho.head()

Unnamed: 0,year,conglomerate,house,household,IDDIST,result,access to water,household weight,num_hh_members
0,2014,1650,18,11,70101,incompleta,"red pública, dentro de la vivienda",313.410004,3
1,2014,1650,31,11,70101,completa,"red pública, dentro de la vivienda",313.410004,4
2,2014,1650,44,11,70101,incompleta,"red pública, dentro de la vivienda",313.410004,4
3,2014,1650,81,11,70101,incompleta,"red pública, dentro de la vivienda",313.410004,5
4,2014,1652,3,11,70101,completa,"red pública, dentro de la vivienda",290.570007,2


In [21]:
enaho['hh members weight'] = enaho['household weight'] * enaho['num_hh_members']

In [22]:
enaho.loc[enaho['access to water'] == 'red pública, dentro de la vivienda', 'piped water inside the house'] = 1
enaho.loc[enaho['access to water'] != 'red pública, dentro de la vivienda', 'piped water inside the house'] = 0

In [23]:
enaho.loc[enaho['access to water'] == 'red pública, fuera de la vivienda pero dentro del edificio', 'piped water outside the house, inside the property'] = 1
enaho.loc[enaho['access to water'] != 'red pública, fuera de la vivienda pero dentro del edificio', 'piped water outside the house, inside the property'] = 0

In [24]:
enaho.loc[enaho['access to water'] == 'camión - cisterna u otro similar', 'tanker truck'] = 1
enaho.loc[enaho['access to water'] != 'camión - cisterna u otro similar', 'tanker truck'] = 0

In [25]:
enaho.loc[(enaho['access to water'] == 'pilón de uso público') | (enaho['access to water'] == 'pilón o pileta de uso público'), 'public water reservoir'] = 1
enaho.loc[(enaho['access to water'] != 'pilón de uso público') & (enaho['access to water'] != 'pilón o pileta de uso público'), 'public water reservoir'] = 0

In [26]:
enaho.loc[(enaho['access to water'] == 'pozo') | (enaho['access to water'] == 'pozo (agua subterranea)'), 'water well'] = 1
enaho.loc[(enaho['access to water'] != 'pozo') & (enaho['access to water'] != 'pozo (agua subterranea)'), 'water well'] = 0

In [27]:
enaho.loc[enaho['access to water'] == 'río, acequia, manantial o similar', 'river or water stream'] = 1
enaho.loc[enaho['access to water'] != 'río, acequia, manantial o similar', 'river or water stream'] = 0

In [28]:
enaho.loc[enaho['access to water'] == 'otra', 'others'] = 1
enaho.loc[enaho['access to water'] != 'otra', 'others'] = 0