# clean_hh_2005

This notebook clean household level data 

## Inputs
1. ii_in.dta : household non labor income
2. ii_inr.dta : household rural income
3. ii_portad.dta : household location

Outputs
1. folio : household id
2. hh_no_savings : hh has no savings
2. hh_has_savings : hh has savings
2. hh_no_debts : hh has no debts
2. hh_has_debts : hh has debts
3. hh_has_liquid : hh has savings or debts

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import os 

import sys; sys.path.append('/home/mitch/util/python')

In [8]:
raw = '/home/mitch/Dropbox/data/mexico_fls/raw/2002/'
interim = '/home/mitch/Dropbox/data/mexico_fls/interim/2002/'

figs = '/home/mitch/school/mfl2/fig/'
dicts= '/home/mitch/school/mfl2/notebooks/dicts/'

In [9]:
keep = ['folio']

In [10]:
os.chdir(raw + 'book II/data/' )
data = pd.read_stata('ii_crh.dta')
ii_in = pd.read_stata('ii_in.dta')
ii_inr = pd.read_stata('ii_inr.dta')
ii_portad = pd.read_stata('ii_portad.dta')
ii_ah = pd.read_stata('ii_ah.dta').drop(columns=['ls'])
weights = pd.read_stata('hh02w_b2.dta')

os.chdir(raw + 'book I/data/' )
i_cs = pd.read_stata('i_cs.dta')
i_cs1 = pd.read_stata('i_cs1.dta')

os.chdir(raw + 'book C/data/' )
c_cv = pd.read_stata('c_cv.dta')


In [11]:
weights = weights.rename(columns={'factor_b2':'weight'})
keep += ['weight']

In [12]:
weights

Unnamed: 0,folio,weight
0,00001000,2385
1,00002000,2385
2,00003000,2385
3,00004000,2385
4,00006000,2385
...,...,...
8435,10754000,484
8436,10756000,484
8437,10757000,1373
8438,10758000,1373


In [13]:
for dataset in [data, weights, ii_in, ii_inr, ii_portad, i_cs, i_cs1, ii_ah, c_cv]:
    # make folio (household id) numeric for merging
    dataset['folio'] = dataset['folio'].astype('float64')

In [14]:
data = (data.merge(weights, on=['folio'], how='inner')
            .merge(ii_in, on=['folio'], how='inner')
            .merge(ii_inr, on=['folio'], how='inner')
            .merge(ii_portad, on=['folio'], how='inner')
            .merge(i_cs, on=['folio'], how='inner')
            .merge(i_cs1, on=['folio'], how='inner')
            .merge(ii_ah, on=['folio'], how='inner')
            .merge(c_cv, on=['folio'], how='inner')
            )

In [15]:
data = data.rename(columns={'cs27e_2':'property_income_tax'})
keep += ['property_income_tax']

In [16]:
os.chdir(dicts)
import json_utils

illiquid_assets = json_utils.load_json('illiquid_assets.json')
productive_illiquid_assets = json_utils.load_json('productive_illiquid_assets.json')
has_asset = json_utils.load_json('has_asset.json')
knows_asset_value = json_utils.load_json('knows_asset_value.json')
asset_value = json_utils.load_json('asset_value.json')

cv02_1_keys = json_utils.load_json('cv02_1_keys.json')

In [17]:
data = data.rename(columns = {'cv02_1':'property_ownership'})
data.loc[data['property_ownership'].isna(), 'property_ownership'] = -1.0
data['property_ownership'] = data['property_ownership'].astype(str).apply(lambda x : cv02_1_keys[str(x)])
data['property_own_outright'] = data['property_ownership']  == 'property_own_outright'
data['property_own_community_ejido'] = data['property_ownership']  == 'property_community_ejido'

keep += ['property_ownership', 'property_own_outright', 'property_own_community_ejido']

In [18]:
knows_asset_value

{'house': 'ah04a_1',
 'otherhouse': 'ah04b_1',
 'bicycles': 'ah04c_1',
 'vehicle': 'ah04d_1',
 'electronics': 'ah04e_1',
 'washmachinestove': 'ah04f_1',
 'domesticappliance': 'ah04g_1',
 'financialassets': 'ah04h_1',
 'machinary': 'ah04i_1',
 'bullcow': 'ah04j_1',
 'horsesmules': 'ah04k_1',
 'pigsgoats': 'ah04l_1',
 'poultry': 'ah04m_1',
 'otherassets': 'ah04n_1'}

In [19]:
data = data.copy()

yes = 1.0
no = 3.0

for asset in illiquid_assets:
    data[asset] = np.nan
    does_have_asset = data[has_asset[asset]] == yes
    knows_value = data[knows_asset_value[asset]] == yes

    #data['has_' + asset] = does_have_asset
    data.loc[knows_value, asset] = data.loc[knows_value, asset_value[asset]]
    #keep.append('has_' + asset)

data['illiquid_assets'] = data[illiquid_assets].sum(axis=1)
data['has_illiquid_assets'] = data['illiquid_assets'] > 0

data['productive_illiquid_assets'] = data[productive_illiquid_assets].sum(axis=1)
data['has_productive_illiquid_assets'] = data['productive_illiquid_assets'] > 0

data['housing'] = data[has_asset['house']] + data[has_asset['otherhouse']]

data['has_housing'] = (data[has_asset['house']] == 1.0) | (data[has_asset['otherhouse']] == 1.0)

animal = ['bullcow', 'horsesmules', 'pigsgoats', 'poultry']
data['animal'] = data[animal].sum(axis=1)
data['has_animal'] = (
      (data[has_asset['bullcow']] == 1.0) 
    | (data[has_asset['horsesmules']] == 1.0)
    | (data[has_asset['pigsgoats']] == 1.0)
    | (data[has_asset['poultry']] == 1.0)
                       )
data['has_machinary'] = data[has_asset['machinary']] == 1.0
  

durable = ['bicycles', 'vehicle', 'electronics', 'washmachinestove', 'domesticappliance', 'machinary']
data['durable'] = data[durable].sum(axis=1)
data['has_durable'] = (
      (data[has_asset['bicycles']] == 1.0) 
    | (data[has_asset['vehicle']] == 1.0)
    | (data[has_asset['electronics']] == 1.0)
    | (data[has_asset['washmachinestove']] == 1.0)
    | (data[has_asset['domesticappliance']] == 1.0)
                       )

data['has_financialassets'] = data[has_asset['financialassets']] == 1.0

keep = keep + ['house', 'otherhouse', 'housing', 'financialassets', 'otherassets', 'illiquid_assets', 'productive_illiquid_assets'] 
keep = keep + ['has_housing', 'has_durable', 'has_animal', 'has_machinary', 'has_financialassets', 'has_illiquid_assets',
               'has_productive_illiquid_assets'] 
keep = keep + ['animal', 'durable', 'machinary']



In [20]:
data = data.copy()
data['hh_no_savings'] = data['crh01_1a'] == 1.0
data['hh_has_savings'] = data['hh_no_savings'] == False

data['hh_has_debts_12mth'] = data['crh02_1'] == 1.0
data['hh_no_debts_12mth'] = data['crh02_1'] == 2.0

data['hh_debts'] = np.nan
data.loc[data['hh_has_debts_12mth'], 'hh_debts'] = data.loc[data['hh_has_debts_12mth'], 'crh02_2']

data['hh_has_liquid'] = data['hh_has_savings'] | data['hh_has_debts_12mth']

keep = keep + ['hh_no_savings', 'hh_has_savings', 'hh_has_debts_12mth', 'hh_no_debts_12mth', 'hh_debts', 'hh_has_liquid']

In [21]:
os.chdir(interim)
data[keep].to_stata('hh_2002.dta', write_index=False)