In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotnine as p9
import os

from lib.lib import Import_data

path = Import_data()
files = os.listdir(path)

# Load the building structure and damage assessment data
building_structure = pd.read_csv(os.path.join(path, "csv_building_structure.csv"))
damage_assessment = pd.read_csv(os.path.join(path, "csv_building_damage_assessment.csv"))

display(building_structure.head())
display(damage_assessment.head())

# Merge the two dataframes on building_id
merged_data = pd.merge(building_structure, damage_assessment, how='left',
                        on="building_id", suffixes=('_structure', '_assessment'))
# show the first 5 rows of the merged data
display(merged_data.head())

# Convert 'damage_grade' to numerical values
damage_grade_mapping = {'Grade 1': 1, 'Grade 2': 2, 'Grade 3': 3, 'Grade 4': 4, 'Grade 5': 5}
merged_data['damage_grade'] = merged_data['damage_grade'].map(damage_grade_mapping)

# Fill missing values in 'damage_grade' with the mean
merged_data['damage_grade'] = merged_data['damage_grade'].fillna(merged_data['damage_grade'].mean())

# Analyze the correlation between building age and damage
correlation = merged_data[['age_building', 'damage_grade']].corr()
print("Correlation between building age and damage grade:")
print(correlation)

# Create a scatter plot of building age vs. damage grade
plt.figure(figsize=(10, 6))
sns.scatterplot(x='age_building', y='damage_grade', data=merged_data, alpha=0.5)
plt.title('Building Age vs. Damage Grade')
plt.xlabel('Building Age')
plt.ylabel('Damage Grade')
plt.show()

# Create a box plot of damage grade vs. building age groups
age_bins = [0, 20, 50, 100, merged_data['age_building'].max()]
age_labels = ['0-20', '21-50', '51-100', '100+']
merged_data['age_group'] = pd.cut(merged_data['age_building'], bins=age_bins, labels=age_labels, right=False)

plt.figure(figsize=(12, 6))
sns.boxplot(x='age_group', y='damage_grade', data=merged_data)
plt.title('Damage Grade vs. Building Age Group')
plt.xlabel('Building Age Group')
plt.ylabel('Damage Grade')
plt.show()

Path to dataset files: /home/chaotic/.cache/kagglehub/datasets/arashnic/earthquake-magnitude-damage-and-impact/versions/6




Unnamed: 0,building_id,district_id,vdcmun_id,ward_id,count_floors_pre_eq,count_floors_post_eq,age_building,plinth_area_sq_ft,height_ft_pre_eq,height_ft_post_eq,...,has_superstructure_mud_mortar_brick,has_superstructure_cement_mortar_brick,has_superstructure_timber,has_superstructure_bamboo,has_superstructure_rc_non_engineered,has_superstructure_rc_engineered,has_superstructure_other,condition_post_eq,damage_grade,technical_solution_proposed
0,120101000011,12,1207,120703,1,1,9,288,9,9,...,0,0,0,1,0,0,0,Damaged-Used in risk,Grade 3,Major repair
1,120101000021,12,1207,120703,1,1,15,364,9,9,...,0,0,0,1,0,0,0,Damaged-Repaired and used,Grade 5,Reconstruction
2,120101000031,12,1207,120703,1,1,20,384,9,9,...,0,0,0,0,0,0,0,Damaged-Repaired and used,Grade 2,Minor repair
3,120101000041,12,1207,120703,1,1,20,312,9,9,...,0,0,0,0,0,0,0,Damaged-Repaired and used,Grade 2,Minor repair
4,120101000051,12,1207,120703,1,1,30,308,9,9,...,0,0,0,0,0,0,0,Damaged-Repaired and used,Grade 1,Minor repair


Unnamed: 0,building_id,district_id,vdcmun_id,ward_id,damage_overall_collapse,damage_overall_leaning,damage_overall_adjacent_building_risk,damage_foundation_severe,damage_foundation_moderate,damage_foundation_insignificant,...,has_damage_parapet,has_damage_cladding_glazing,has_geotechnical_risk,has_geotechnical_risk_land_settlement,has_geotechnical_risk_fault_crack,has_geotechnical_risk_liquefaction,has_geotechnical_risk_landslide,has_geotechnical_risk_rock_fall,has_geotechnical_risk_flood,has_geotechnical_risk_other
0,120101000011,12,1207,120703,Moderate-Heavy,Insignificant/light,,,Moderate-Heavy-(<1/3),Insignificant/light-(<1/3),...,0.0,0.0,0.0,0,0,0,0,0,0,0
1,120101000021,12,1207,120703,Severe-Extreme,Severe-Extreme,Insignificant/light,Severe-Extreme-(>2/3),,,...,0.0,0.0,0.0,0,0,0,0,0,0,0
2,120101000031,12,1207,120703,Moderate-Heavy,Moderate-Heavy,Moderate-Heavy,,Moderate-Heavy-(>2/3),,...,0.0,0.0,0.0,0,0,0,0,0,0,0
3,120101000041,12,1207,120703,Moderate-Heavy,Moderate-Heavy,Moderate-Heavy,,Moderate-Heavy-(>2/3),,...,0.0,0.0,0.0,0,0,0,0,0,0,0
4,120101000051,12,1207,120703,Insignificant/light,,,,,Insignificant/light-(<1/3),...,0.0,0.0,0.0,0,0,0,0,0,0,0


Unnamed: 0,building_id,district_id_structure,vdcmun_id_structure,ward_id_structure,count_floors_pre_eq,count_floors_post_eq,age_building,plinth_area_sq_ft,height_ft_pre_eq,height_ft_post_eq,...,has_damage_parapet,has_damage_cladding_glazing,has_geotechnical_risk,has_geotechnical_risk_land_settlement,has_geotechnical_risk_fault_crack,has_geotechnical_risk_liquefaction,has_geotechnical_risk_landslide,has_geotechnical_risk_rock_fall,has_geotechnical_risk_flood,has_geotechnical_risk_other
0,120101000011,12,1207,120703,1,1,9,288,9,9,...,0.0,0.0,0.0,0,0,0,0,0,0,0
1,120101000021,12,1207,120703,1,1,15,364,9,9,...,0.0,0.0,0.0,0,0,0,0,0,0,0
2,120101000031,12,1207,120703,1,1,20,384,9,9,...,0.0,0.0,0.0,0,0,0,0,0,0,0
3,120101000041,12,1207,120703,1,1,20,312,9,9,...,0.0,0.0,0.0,0,0,0,0,0,0,0
4,120101000051,12,1207,120703,1,1,30,308,9,9,...,0.0,0.0,0.0,0,0,0,0,0,0,0


KeyError: 'damage_grade'

In [None]:
# print the first 5 rows of the data for each file
for file in files:
    #display the filename as a header fotmatted in bold
    print("\033[1m" + file + "\033[0m")
    data = pd.read_csv(path + "/" + file)
    display(data.head())


[1mcsv_building_damage_assessment.csv[0m




Unnamed: 0,building_id,district_id,vdcmun_id,ward_id,damage_overall_collapse,damage_overall_leaning,damage_overall_adjacent_building_risk,damage_foundation_severe,damage_foundation_moderate,damage_foundation_insignificant,...,has_damage_parapet,has_damage_cladding_glazing,has_geotechnical_risk,has_geotechnical_risk_land_settlement,has_geotechnical_risk_fault_crack,has_geotechnical_risk_liquefaction,has_geotechnical_risk_landslide,has_geotechnical_risk_rock_fall,has_geotechnical_risk_flood,has_geotechnical_risk_other
0,120101000011,12,1207,120703,Moderate-Heavy,Insignificant/light,,,Moderate-Heavy-(<1/3),Insignificant/light-(<1/3),...,0.0,0.0,0.0,0,0,0,0,0,0,0
1,120101000021,12,1207,120703,Severe-Extreme,Severe-Extreme,Insignificant/light,Severe-Extreme-(>2/3),,,...,0.0,0.0,0.0,0,0,0,0,0,0,0
2,120101000031,12,1207,120703,Moderate-Heavy,Moderate-Heavy,Moderate-Heavy,,Moderate-Heavy-(>2/3),,...,0.0,0.0,0.0,0,0,0,0,0,0,0
3,120101000041,12,1207,120703,Moderate-Heavy,Moderate-Heavy,Moderate-Heavy,,Moderate-Heavy-(>2/3),,...,0.0,0.0,0.0,0,0,0,0,0,0,0
4,120101000051,12,1207,120703,Insignificant/light,,,,,Insignificant/light-(<1/3),...,0.0,0.0,0.0,0,0,0,0,0,0,0


[1mcsv_building_ownership_and_use.csv[0m


Unnamed: 0,building_id,district_id,vdcmun_id,ward_id,legal_ownership_status,count_families,has_secondary_use,has_secondary_use_agriculture,has_secondary_use_hotel,has_secondary_use_rental,has_secondary_use_institution,has_secondary_use_school,has_secondary_use_industry,has_secondary_use_health_post,has_secondary_use_gov_office,has_secondary_use_use_police,has_secondary_use_other
0,120101000011,12,1207,120703,Private,1.0,0.0,0,0,0,0,0,0,0,0,0,0
1,120101000021,12,1207,120703,Private,1.0,0.0,0,0,0,0,0,0,0,0,0,0
2,120101000031,12,1207,120703,Private,1.0,0.0,0,0,0,0,0,0,0,0,0,0
3,120101000041,12,1207,120703,Private,1.0,0.0,0,0,0,0,0,0,0,0,0,0
4,120101000051,12,1207,120703,Private,1.0,0.0,0,0,0,0,0,0,0,0,0,0


[1mcsv_building_structure.csv[0m


Unnamed: 0,building_id,district_id,vdcmun_id,ward_id,count_floors_pre_eq,count_floors_post_eq,age_building,plinth_area_sq_ft,height_ft_pre_eq,height_ft_post_eq,...,has_superstructure_mud_mortar_brick,has_superstructure_cement_mortar_brick,has_superstructure_timber,has_superstructure_bamboo,has_superstructure_rc_non_engineered,has_superstructure_rc_engineered,has_superstructure_other,condition_post_eq,damage_grade,technical_solution_proposed
0,120101000011,12,1207,120703,1,1,9,288,9,9,...,0,0,0,1,0,0,0,Damaged-Used in risk,Grade 3,Major repair
1,120101000021,12,1207,120703,1,1,15,364,9,9,...,0,0,0,1,0,0,0,Damaged-Repaired and used,Grade 5,Reconstruction
2,120101000031,12,1207,120703,1,1,20,384,9,9,...,0,0,0,0,0,0,0,Damaged-Repaired and used,Grade 2,Minor repair
3,120101000041,12,1207,120703,1,1,20,312,9,9,...,0,0,0,0,0,0,0,Damaged-Repaired and used,Grade 2,Minor repair
4,120101000051,12,1207,120703,1,1,30,308,9,9,...,0,0,0,0,0,0,0,Damaged-Repaired and used,Grade 1,Minor repair


[1mcsv_household_demographics.csv[0m


Unnamed: 0,household_id,district_id,vdcmun_id,ward_id,gender_household_head,age_household_head,caste_household,education_level_household_head,income_level_household,size_household,is_bank_account_present_in_household
0,12010100001101,12,1207,120703,Male,31.0,Rai,Illiterate,Rs. 10 thousand,3.0,0.0
1,12010100002101,12,1207,120703,Female,62.0,Rai,Illiterate,Rs. 10 thousand,6.0,0.0
2,12010100003101,12,1207,120703,Male,51.0,Gharti/Bhujel,Illiterate,Rs. 10 thousand,13.0,0.0
3,12010100004101,12,1207,120703,Male,48.0,Gharti/Bhujel,Illiterate,Rs. 10 thousand,5.0,0.0
4,12010100005101,12,1207,120703,Male,70.0,Gharti/Bhujel,Illiterate,Rs. 10 thousand,8.0,0.0


[1mcsv_household_earthquake_impact.csv[0m


Unnamed: 0,household_id,district_id,vdcmun_id,ward_id,shelter_condition_household_post_eq,residence_household_pre_eq,residence_household_post_eq,household_eq_id_type,has_death_occurred_last_12_months,count_death_last_12_months,...,has_occupation_change_occurred_last_12_months,count_occupation_change_last_12_months,residence_district_household_head_pre_eq,residence_district_household_head_post_eq,is_recipient_rahat_15k,is_recipient_rahat_10k,is_recipient_rahat_200k,is_recipient_rahat_social_security_3k,is_recipient_rahat_none,is_ineligible_rahat
0,12010100001101,12,1207,120703,Staying in their own house,This VDC/Municipality,This VDC/Municipality,ID Card for Total Destruction,0.0,,...,0.0,,,,1,1,0,0,0,0
1,12010100002101,12,1207,120703,Staying in their own house,This VDC/Municipality,This VDC/Municipality,Have not received ID Card,0.0,,...,0.0,,,,0,0,0,0,1,0
2,12010100003101,12,1207,120703,Staying in their own house,This VDC/Municipality,This VDC/Municipality,Have not received ID Card,0.0,,...,0.0,,,,0,0,0,0,1,0
3,12010100004101,12,1207,120703,Staying in their own house,This VDC/Municipality,This VDC/Municipality,Have not received ID Card,0.0,,...,0.0,,,,0,0,0,0,1,0
4,12010100005101,12,1207,120703,Staying in their own house,This VDC/Municipality,This VDC/Municipality,Have not received ID Card,0.0,,...,0.0,,,,0,0,0,0,1,0


[1mcsv_household_resources.csv[0m


Unnamed: 0,household_id,district_id,vdcmun_id,ward_id,source_water_pre_eq,source_water_post_eq,source_cooking_fuel_pre_eq,source_cooking_fuel_post_eq,source_light_pre_eq,source_light_post_eq,...,has_asset_cable_post_eq,has_asset_computer_post_eq,has_asset_internet_post_eq,has_asset_telephone_post_eq,has_asset_mobile_phone_post_eq,has_asset_fridge_post_eq,has_asset_motorcycle_post_eq,has_asset_four_wheeler_family_use_post_eq,has_asset_four_wheeler_commercial_post_eq,has_asset_none_post_eq
0,12010100001101,12,1207,120703,Pipeline,Pipeline,Wood,Wood,Electricity,Electricity,...,0,0,0,0,1,0,0,0,0,0
1,12010100002101,12,1207,120703,Pipeline,Pipeline,Wood,Wood,Electricity,Electricity,...,0,0,0,0,1,0,0,0,0,0
2,12010100003101,12,1207,120703,Pipeline,Pipeline,Wood,Wood,Electricity,Electricity,...,0,0,0,0,1,0,0,0,0,0
3,12010100004101,12,1207,120703,Pipeline,Pipeline,Wood,Wood,Electricity,Electricity,...,0,0,0,0,1,0,0,0,0,0
4,12010100005101,12,1207,120703,Pipeline,Pipeline,Wood,Wood,Electricity,Electricity,...,0,0,0,0,1,0,0,0,0,0


[1mcsv_individual_demographics.csv[0m


Unnamed: 0,individual_id,district_id,vdcmun_id,ward_id,gender_individual,age_individual,presence_in_household,disability_individual,has_registered_birth,education_level_individual,marital_status_individual
0,12010100001101001,12,1207,120703,Male,31,Mostly Present,No,,Illiterate,Married
1,12010100001101002,12,1207,120703,Female,58,Mostly Present,No,,Illiterate,Married
2,12010100001101003,12,1207,120703,Male,15,Mostly Present,No,,Class 7,Not Married
3,12010100002101001,12,1207,120703,Female,62,Mostly Present,No,,Illiterate,Widow/Widower
4,12010100002101002,12,1207,120703,Male,23,Not present-Abroad,No,,Class 6,Married


[1mmapping.csv[0m


Unnamed: 0,individual_id,household_id,building_id
0,12010200004101001,12010200004101,120102000041
1,12010200004101002,12010200004101,120102000041
2,12010200004101003,12010200004101,120102000041
3,12010200004101004,12010200004101,120102000041
4,12010200004101005,12010200004101,120102000041


[1mward_vdcmun_district_name_mapping.csv[0m


Unnamed: 0,ward_id,vdcmun_id,vdcmun_name,district_id,district_name
0,120101,1201,Champadevi Rural Municipality,12,Okhaldhunga
1,120102,1201,Champadevi Rural Municipality,12,Okhaldhunga
2,120103,1201,Champadevi Rural Municipality,12,Okhaldhunga
3,120104,1201,Champadevi Rural Municipality,12,Okhaldhunga
4,120105,1201,Champadevi Rural Municipality,12,Okhaldhunga
