In [None]:
# Nepal Gorkha 2015 Earthquake Magnitude, Damage, and Impact data
# https://www.kaggle.com/datasets/arashnic/earthquake-magnitude-damage-and-impact
import pandas as pd
import geopandas as gpd
from data_manipulations import *

# Load the data
buildings = pd.read_csv('./csv_building_structure.csv')
csv_household_demographics = pd.read_csv('./csv_household_demographics.csv')
mapping = pd.read_csv('./mapping.csv')

In [None]:
# Perturb the data
from randomized_response import *

gamma = 0.48
buildings = perturb(buildings, 'damage_grade', gamma)
buildings = perturb(buildings, 'age_building', gamma, 0, 200)
buildings = perturb(buildings, 'condition_post_eq', gamma)
buildings = perturb(buildings, 'technical_solution_proposed', gamma)
csv_household_demographics = perturb(csv_household_demographics, 'income_level_household', gamma)
csv_household_demographics = perturb(csv_household_demographics, 'size_household', gamma, 1, 40)
csv_household_demographics = perturb(csv_household_demographics, 'gender_household_head', gamma)


In [None]:
# Identify new constructions (age <= 10)
buildings['new_construction'] = buildings['age_building'] <= 10
buildings['new_construction_p'] = buildings['age_building_p'] <= 10

# Calculate household poverty using upper bound on household monthly income range.
# Defining poverty as below 20K rupees / person / year. This ends up being slightly more generous than the UN definition of $1.90 / person / day
# https://databankfiles.worldbank.org/data/download/poverty/987B9C90-CB9F-4D93-AE8C-750588BF00QA/SM2020/Global_POVEQ_NPL.pdf
csv_household_demographics['income_level_household_numeric'] = csv_household_demographics['income_level_household'].apply(numeric_income)
csv_household_demographics['income_level_household_numeric_p'] = csv_household_demographics['income_level_household_p'].apply(numeric_income)
csv_household_demographics['poverty'] = (csv_household_demographics['income_level_household_numeric'] * 12.0 / csv_household_demographics['size_household']) <= 20000.0
csv_household_demographics['poverty_p'] = (csv_household_demographics['income_level_household_numeric_p'] * 12.0 / csv_household_demographics['size_household_p']) <= 20000.0

# Integer values for damage grade
buildings['damage_grade_numeric'] = buildings['damage_grade'].apply(numeric_damage_grade)
buildings['damage_grade_numeric_p'] = buildings['damage_grade_p'].apply(numeric_damage_grade)

# Correlate households and buildings
households = csv_household_demographics.merge(mapping[['household_id', 'building_id']].drop_duplicates(), on='household_id')
households = households.merge(buildings, on='building_id')

In [None]:
# Damage grade for new vs old construction
damage_grade_new_construction = histogram(households, 'damage_grade', 'new_construction')
histogram_plot(damage_grade_new_construction, 'new_construction')
damage_grade_new_construction

In [None]:
# Damage grade for new vs old construction - perturbed
damage_grade_new_construction_p = histogram(households, 'damage_grade_p', 'new_construction_p')
histogram_plot(damage_grade_new_construction_p, 'new_construction_p')
damage_grade_new_construction_p

In [None]:
# Intensity of building damage by household, poverty
damage_grade = histogram(households, 'damage_grade', 'poverty')
histogram_plot(damage_grade, 'poverty')
damage_grade

In [None]:
# Intensity of building damage by household, poverty - perturbed
damage_grade_p = histogram(households, 'damage_grade_p', 'poverty_p')
histogram_plot(damage_grade_p, 'poverty_p')
damage_grade_p

In [None]:
# Condition of building after earthquake by household, poverty
condition_post_eq = histogram(households, 'condition_post_eq', 'poverty')
histogram_plot(condition_post_eq, 'poverty')
condition_post_eq

In [None]:
# Condition of building after earthquake by household, poverty - perturbed
condition_post_eq_p = histogram(households, 'condition_post_eq_p', 'poverty_p')
histogram_plot(condition_post_eq_p, 'poverty_p')
condition_post_eq_p

In [None]:
# Condition of building after earthquake by household, poverty (female head of household)
condition_post_eq_f = histogram(households[households['gender_household_head'] == 'Female'], 'condition_post_eq', 'poverty')
histogram_plot(condition_post_eq_f, 'poverty')
condition_post_eq_f

In [None]:
# Condition of building after earthquake by household, poverty (female head of household) - perturbed
condition_post_eq_f_p = histogram(households[households['gender_household_head_p'] == 'Female'], 'condition_post_eq_p', 'poverty_p')
histogram_plot(condition_post_eq_f_p, 'poverty_p')
condition_post_eq_f_p

In [None]:
# Proposed repairs by household, poverty
technical_solution = histogram(households, 'technical_solution_proposed', 'poverty')
histogram_plot(technical_solution, 'poverty')
technical_solution

In [None]:
# Proposed repairs by household, poverty - perturbed
technical_solution_p = histogram(households, 'technical_solution_proposed_p', 'poverty_p')
histogram_plot(technical_solution_p, 'poverty_p')
technical_solution_p

In [None]:
# Proposed repairs by household, poverty (female head of household)
technical_solution_f = histogram(households[households['gender_household_head'] == 'Female'], 'technical_solution_proposed', 'poverty')
histogram_plot(technical_solution_f, 'poverty')
technical_solution_f

In [None]:
# Proposed repairs by household, poverty (female head of household) - perturbed
technical_solution_f_p = histogram(households[households['gender_household_head_p'] == 'Female'], 'technical_solution_proposed_p', 'poverty_p')
histogram_plot(technical_solution_f_p, 'poverty_p')
technical_solution_f_p

In [None]:
# Condition after the earthquake by household, poverty (female head of household)
condition_post_eq_with_f = condition_post_eq.join(condition_post_eq_f, rsuffix=' Female HOH')
condition_post_eq_with_f.plot.bar(y=['is_poverty', 'isnt_poverty', 'is_poverty Female HOH', 'isnt_poverty Female HOH'])


In [None]:
# Condition after the earthquake by household, poverty (female head of household) - perturbed
condition_post_eq_with_f_p = condition_post_eq_p.join(condition_post_eq_f_p, rsuffix=' Female HOH')
condition_post_eq_with_f_p.plot.bar(y=['is_poverty_p', 'isnt_poverty_p', 'is_poverty_p Female HOH', 'isnt_poverty_p Female HOH'])


In [None]:
# Load and clean geo data
geojson = gpd.read_file('./municipality.geojson')
ward_vdcmun_district_name_mapping = pd.read_csv('./ward_vdcmun_district_name_mapping.csv')
ward_vdcmun_district_name_mapping['vdc'] = ward_vdcmun_district_name_mapping['vdcmun_name'].str.replace('Rural Municipality', '')
ward_vdcmun_district_name_mapping['vdc'] = ward_vdcmun_district_name_mapping['vdc'].str.replace('Municipality', '')
ward_vdcmun_district_name_mapping['vdc'] = ward_vdcmun_district_name_mapping['vdc'].str.replace('Sub-Metropolitan City', '')
ward_vdcmun_district_name_mapping['vdc'] = ward_vdcmun_district_name_mapping['vdc'].str.strip()

In [None]:
vdcs = ward_vdcmun_district_name_mapping[['vdcmun_id', 'vdc', 'district_id', 'district_name']].drop_duplicates()
geojson = geojson.merge(vdcs, how='left', left_on='GaPa_NaPa', right_on='vdc')

In [None]:
# Choropleth of new construction
new_construction_for_vdc = buildings[buildings['new_construction']].groupby('vdcmun_id').size().to_frame().reset_index()
new_construction_for_vdc.rename(columns={0:'new_construction'}, inplace=True)

geojson = geojson.merge(new_construction_for_vdc[['vdcmun_id', 'new_construction']].drop_duplicates(), how='left', on="vdcmun_id")
geojson['new_construction'] = geojson['new_construction'].fillna(0)
geojson.plot(column='new_construction')


In [None]:
# Choropleth of new construction - perturbed
new_construction_for_vdc_p = buildings[buildings['new_construction_p']].groupby('vdcmun_id').size().to_frame().reset_index()
new_construction_for_vdc_p.rename(columns={0:'new_construction_p'}, inplace=True)

geojson = geojson.merge(new_construction_for_vdc_p[['vdcmun_id', 'new_construction_p']].drop_duplicates(), how='left', on="vdcmun_id")
geojson['new_construction_p'] = geojson['new_construction_p'].fillna(0)
geojson.plot(column='new_construction_p')


In [None]:
# Choropleth of intensity of earthquake damage
damage_for_vdc = buildings.groupby(['vdcmun_id'])['damage_grade_numeric'].mean().to_frame().reset_index()
# damage_for_vdc.rename(columns={0:'damage'}, inplace=True)

geojson = geojson.merge(damage_for_vdc[['vdcmun_id', 'damage_grade_numeric']].drop_duplicates(), how='left', on="vdcmun_id")
geojson['damage_grade_numeric'] = geojson['damage_grade_numeric'].fillna(0)
geojson.plot(column='damage_grade_numeric')

In [None]:
# Choropleth of intensity of earthquake damage - perturbed
damage_for_vdc_p = buildings.groupby(['vdcmun_id'])['damage_grade_numeric_p'].mean().to_frame().reset_index()
# damage_for_vdc.rename(columns={0:'damage'}, inplace=True)

geojson = geojson.merge(damage_for_vdc_p[['vdcmun_id', 'damage_grade_numeric_p']].drop_duplicates(), how='left', on="vdcmun_id")
geojson['damage_grade_numeric_p'] = geojson['damage_grade_numeric_p'].fillna(0)
geojson.plot(column='damage_grade_numeric_p')

In [None]:
# Choropleth of prevalence of poverty
poverty_by_vdc = households.groupby(['vdcmun_id_x'])['poverty'].size().to_frame().reset_index()
geojson = geojson.merge(poverty_by_vdc, how='left', left_on='vdcmun_id', right_on='vdcmun_id_x')
geojson['poverty'] = geojson['poverty'].fillna(0)
geojson.plot(column='poverty')


In [None]:
# Choropleth of prevalence of poverty - perturbed
poverty_by_vdc_p = households.groupby(['vdcmun_id_x'])['poverty_p'].size().to_frame().reset_index()
geojson = geojson.merge(poverty_by_vdc_p, how='left', left_on='vdcmun_id', right_on='vdcmun_id_x')
geojson['poverty_p'] = geojson['poverty_p'].fillna(0)
geojson.plot(column='poverty_p')
