## Libraries

In [1]:
import pandas as pd

## Dwelling data

In [2]:
path = '../../data/raw/ENAHO/'
enaho_dwelling_2016_file = path + 'enaho01-2016-100.dta'
enaho_dwelling_2017_file = path + 'enaho01-2017-100.dta'
enaho_dwelling_2018_file = path + 'enaho01-2018-100.dta'

In [3]:
cols_dwelling = ['aÑo', 'conglome', 'vivienda', 'hogar', 'ubigeo', 'nbi1', 'nbi2', 'nbi3', 'factor07']
enaho_dwelling_2016 = pd.read_stata(enaho_dwelling_2016_file, columns=cols_dwelling)
enaho_dwelling_2017 = pd.read_stata(enaho_dwelling_2017_file, columns=cols_dwelling)
enaho_dwelling_2018 = pd.read_stata(enaho_dwelling_2018_file, columns=cols_dwelling)

In [4]:
enaho_dwelling = pd.concat([enaho_dwelling_2016,
                            enaho_dwelling_2017,
                            enaho_dwelling_2018]).reset_index(drop = True)

In [5]:
dwelling_names = {'aÑo': 'year',
                  'conglome': 'conglomerate',
                  'vivienda': 'house',
                  'hogar': 'household',
                  'ubigeo': 'IDDIST',
                  'nbi1': 'inadequate dwelling',
                  'nbi2': 'overcrowded dwelling',
                  'nbi3': 'no water/sewage connection',
                  'factor07': 'hh weight'}
enaho_dwelling = enaho_dwelling.rename(columns = dwelling_names)

In [6]:
enaho_dwelling = enaho_dwelling[enaho_dwelling['IDDIST'].apply(lambda x: x[:4] == '1501' or x[:2] == '07')]
enaho_dwelling = enaho_dwelling[(enaho_dwelling['inadequate dwelling'].notna()) & \
                                (enaho_dwelling['overcrowded dwelling'].notna()) & \
                                (enaho_dwelling['no water/sewage connection'].notna())]

In [7]:
enaho_dwelling.loc[(enaho_dwelling['inadequate dwelling'] == 'vivienda inadecuada') | \
                   (enaho_dwelling['overcrowded dwelling'] == 'vivienda con hacinamiento') | \
                   (enaho_dwelling['overcrowded dwelling'] == 'vivienda hacinada') | \
                   (enaho_dwelling['no water/sewage connection'] == 'hogares con vivienda sin servicios hogienicos') | \
                   (enaho_dwelling['no water/sewage connection'] == 'vivienda sin servicios higienicos'),
                   'bad dwelling'] = 1
enaho_dwelling.loc[(enaho_dwelling['bad dwelling'].isna(), 'bad dwelling')] = 0

In [8]:
enaho_dwelling.head()

Unnamed: 0,year,conglomerate,house,household,IDDIST,inadequate dwelling,overcrowded dwelling,no water/sewage connection,hh weight,bad dwelling
579,2016,1652,3,11,70101,vivienda adecuada,vivienda sin hacinamiento,hogares con vivienda con servicios higienicos,330.041718,0.0
581,2016,1652,42,11,70101,vivienda adecuada,vivienda sin hacinamiento,hogares con vivienda con servicios higienicos,330.041718,0.0
582,2016,1652,64,11,70101,vivienda adecuada,vivienda sin hacinamiento,hogares con vivienda con servicios higienicos,330.041718,0.0
584,2016,1652,103,11,70101,vivienda adecuada,vivienda sin hacinamiento,hogares con vivienda con servicios higienicos,330.041718,0.0
585,2016,1661,11,11,70101,vivienda adecuada,vivienda sin hacinamiento,hogares con vivienda con servicios higienicos,330.041718,0.0


In [9]:
enaho_dwelling['bad dwelling'].value_counts()

0.0    11612
1.0      634
Name: bad dwelling, dtype: int64

## Collapsing by district

In [10]:
def weighted_mean_function_generator(col, weights):

    def weighted_mean(df):

        col_times_weight = df[col] * df[weights]
        weight = df[weights]

        return col_times_weight.sum() / weight.sum()

    return weighted_mean

In [11]:
weighted_mean_bad_dwelling = weighted_mean_function_generator('bad dwelling', 'hh weight')

In [12]:
groupby_cols = ['IDDIST']

In [13]:
district_bad_dwelling = enaho_dwelling.groupby(groupby_cols).apply(weighted_mean_bad_dwelling).reset_index().rename(columns={0: 'bad dwelling rate'})

In [14]:
district_bad_dwelling.head()

Unnamed: 0,IDDIST,bad dwelling rate
0,70101,0.046902
1,70102,0.024276
2,70103,0.024731
3,70104,0.009583
4,70105,0.0


In [15]:
district_bad_dwelling.to_csv('../../data/clean/bad dwelling by district_2016-2018 average.csv', index=False)