In [None]:
# Nepal Gorkha 2015 Earthquake Magnitude, Damage, and Impact data
# https://www.kaggle.com/datasets/arashnic/earthquake-magnitude-damage-and-impact
import pandas as pd
import geopandas as gpd
from data_manipulations import *

# Load the data
buildings = pd.read_csv('./csv_building_structure.csv')
csv_household_demographics = pd.read_csv('./csv_household_demographics.csv')
mapping = pd.read_csv('./mapping.csv')

In [None]:
# Perturb the data
from randomized_response import *

# gamma = 0.48
# gamma = 0.45
print(f"Using gamma = {gamma}")
buildings = perturb(buildings, 'damage_grade', gamma)
buildings = perturb(buildings, 'age_building', gamma, 0, 200)
buildings = perturb(buildings, 'condition_post_eq', gamma)
buildings = perturb(buildings, 'technical_solution_proposed', gamma)
csv_household_demographics = perturb(csv_household_demographics, 'income_level_household', gamma)
csv_household_demographics = perturb(csv_household_demographics, 'size_household', gamma, 1, 40)
csv_household_demographics = perturb(csv_household_demographics, 'gender_household_head', gamma)


In [None]:
# Identify new constructions (age <= 10)
buildings['new_construction'] = buildings['age_building'] <= 10
buildings['new_construction_p'] = buildings['age_building_p'] <= 10

# Calculate household poverty using upper bound on household monthly income range.
# Defining poverty as below 20K rupees / person / year. This ends up being slightly more generous than the UN definition of $1.90 / person / day
# https://databankfiles.worldbank.org/data/download/poverty/987B9C90-CB9F-4D93-AE8C-750588BF00QA/SM2020/Global_POVEQ_NPL.pdf
csv_household_demographics['income_level_household_numeric'] = csv_household_demographics['income_level_household'].apply(numeric_income)
csv_household_demographics['income_level_household_numeric_p'] = csv_household_demographics['income_level_household_p'].apply(numeric_income)
csv_household_demographics['poverty'] = (csv_household_demographics['income_level_household_numeric'] * 12.0 / csv_household_demographics['size_household']) <= 20000.0
csv_household_demographics['poverty_p'] = (csv_household_demographics['income_level_household_numeric_p'] * 12.0 / csv_household_demographics['size_household_p']) <= 20000.0
csv_household_demographics['female_headed'] = csv_household_demographics['gender_household_head'] == 'Female'
csv_household_demographics['female_headed_p'] = csv_household_demographics['gender_household_head_p'] == 'Female'

# Integer values for damage grade
buildings['damage_grade_numeric'] = buildings['damage_grade'].apply(numeric_damage_grade)
buildings['damage_grade_numeric_p'] = buildings['damage_grade_p'].apply(numeric_damage_grade)

# Correlate households and buildings
households = csv_household_demographics.merge(mapping[['household_id', 'building_id']].drop_duplicates(), on='household_id')
households = households.merge(buildings, on='building_id')