In [123]:
import pandas as pd

In [124]:
raw = pd.read_csv("inputs/disasters_emdat.csv", encoding="latin1", skiprows=6, usecols=["Year", "Disaster Type", "Country", "No Homeless", "Total Deaths", "No Injured", "No Affected", "No Homeless", "Total Affected", "Reconstruction Costs ('000 US$)", "Insured Damages ('000 US$)", "Total Damages ('000 US$)"])

In [125]:
raw = raw.rename(columns={"Disaster Type":"disaster_type",
                          "Total Deaths":"deaths",
                          "No Injured":"injured",
                          "No Affected":"affected",
                          "No Homeless":"homeless",
                          "Total Affected":"total_affected",
                          "Reconstruction Costs ('000 US$)":"reconstruction_costs",
                          "Insured Damages ('000 US$)":"insured_damages",
                          "Total Damages ('000 US$)":"total_damages"
                         }
                )

In [126]:
deaths = raw.groupby(["Year","disaster_type", "Country"])["deaths"].sum().reset_index()
injured = raw.groupby(["Year","disaster_type", "Country"])["injured"].sum().reset_index()
affected = raw.groupby(["Year","disaster_type", "Country"])["affected"].sum().reset_index()
homeless = raw.groupby(["Year","disaster_type", "Country"])["homeless"].sum().reset_index()
total_affected = raw.groupby(["Year","disaster_type", "Country"])["total_affected"].sum().reset_index()
reconstruction_costs = raw.groupby(["Year","disaster_type", "Country"])["reconstruction_costs"].sum().reset_index()
insured_damages = raw.groupby(["Year","disaster_type", "Country"])["insured_damages"].sum().reset_index()
total_damages = raw.groupby(["Year","disaster_type", "Country"])["total_damages"].sum().reset_index()

In [127]:
combined = pd.merge(deaths, injured, how="outer")
combined = pd.merge(combined, affected, how="outer")
combined = pd.merge(combined, homeless, how="outer")
combined = pd.merge(combined, total_affected, how="outer")
combined = pd.merge(combined, reconstruction_costs, how="outer")
combined = pd.merge(combined, insured_damages, how="outer")
combined = pd.merge(combined, total_damages, how="outer")

In [128]:
world = combined.groupby(["Year", "disaster_type"])["deaths", "injured", "affected", "homeless", "total_affected", "reconstruction_costs", "insured_damages", "total_damages"].sum().reset_index()
world["Country"]="World"

  """Entry point for launching an IPython kernel.


In [129]:
combined = pd.concat([combined, world])

In [130]:
totals = combined.groupby(["Year", "Country"])["deaths", "injured", "affected", "homeless", "total_affected", "reconstruction_costs", "insured_damages", "total_damages"].sum().reset_index()
totals["disaster_type"]="All disasters"

  """Entry point for launching an IPython kernel.


In [131]:
combined = pd.concat([combined, totals])

In [132]:
countries = pd.read_csv("inputs/countries.csv", encoding="latin1")
combined = pd.merge(combined, countries, how="outer")
combined = combined.drop(columns=["Country"])

In [133]:
population = pd.read_csv("inputs/population.csv", encoding="latin1")
combined = pd.merge(combined, population, how="left")

In [134]:
# Need to normalise country names
# Need to merge population data and calculate rates
# Need to do count of number of disasters

In [135]:
combined["death_rate_per_100k"] = combined["deaths"] / combined["Population"] * 100000
combined["injury_rate_per_100k"] = combined["injured"] / combined["Population"] * 100000
combined["affected_rate_per_100k"] = combined["affected"] / combined["Population"] * 100000
combined["homeless_rate_per_100k"] = combined["homeless"] / combined["Population"] * 100000
combined["total_affected_per_100k"] = combined["total_affected"] / combined["Population"] * 100000 

In [136]:
combined = combined.drop(columns=["Population"])

In [137]:
combined

Unnamed: 0,Year,disaster_type,deaths,injured,affected,homeless,total_affected,reconstruction_costs,insured_damages,total_damages,Entity,death_rate_per_100k,injury_rate_per_100k,affected_rate_per_100k,homeless_rate_per_100k,total_affected_per_100k
0,1900,Drought,11000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Cape Verde,9685.656423,0.000000,0.000000,0.000000,0.000000
1,1910,Drought,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Cape Verde,0.000000,0.000000,0.000000,0.000000,0.000000
2,1920,Drought,24000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Cape Verde,17699.115044,0.000000,0.000000,0.000000,0.000000
3,1940,Drought,20000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Cape Verde,12335.019119,0.000000,0.000000,0.000000,0.000000
4,1946,Drought,30000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Cape Verde,17536.475870,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15303,2017,All disasters,9766.0,25430.0,103239897.0,590432.0,103855759.0,1241400.0,127084560.0,326082769.0,World,0.129388,0.336917,1367.803741,7.822510,1375.963167
15304,2018,All disasters,11791.0,74126.0,84970128.0,417216.0,85461470.0,1600000.0,45827300.0,133614166.0,World,0.154513,0.971368,1113.472841,5.467318,1119.911527
15305,2019,All disasters,11795.0,51885.0,107311365.0,389397.0,107752647.0,0.0,27233000.0,100940456.0,World,0.152914,0.672655,1391.220737,5.048274,1396.941665
15306,2020,All disasters,15071.0,7646.0,99255670.0,703769.0,99967085.0,9450000.0,39334000.0,172402491.0,World,0.193347,0.098091,1273.357702,9.028700,1282.484494


In [138]:
drought = combined[combined["disaster_type"].str.contains("Drought")]
drought.columns = [str(col) + "_drought" for col in drought.columns]
drought = drought.rename(columns={"Year_drought":"Year",
                       "Entity_drought":"Entity"
                       }
              )

In [139]:
earthquake = combined[combined["disaster_type"].str.contains("Earthquake")]
earthquake.columns = [str(col) + "_earthquake" for col in earthquake.columns]
earthquake = earthquake.rename(columns={"Year_earthquake":"Year",
                                        "Entity_earthquake":"Entity"
                                       }
                              )

In [140]:
all_disasters = combined[combined["disaster_type"].str.contains("All disasters")]
all_disasters.columns = [str(col) + "_all_disasters" for col in all_disasters.columns]
all_disasters = all_disasters.rename(columns={"Year_all_disasters":"Year",
                                              "Entity_all_disasters":"Entity"
                                             }
                                    )

In [141]:
volcanic = combined[combined["disaster_type"].str.contains("Volcanic activity")]
volcanic.columns = [str(col) + "_volcanic" for col in volcanic.columns]
volcanic = volcanic.rename(columns={"Year_volcanic":"Year",
                                   "Entity_volcanic":"Entity"
                                   }
              )

In [142]:
flood = combined[combined["disaster_type"].str.contains("Flood")]
flood.columns = [str(col) + "_flood" for col in flood.columns]
flood = flood.rename(columns={"Year_flood":"Year",
                                   "Entity_flood":"Entity"
                                   }
              )

In [143]:
mass_movement = combined[combined["disaster_type"].str.contains("Mass movement (dry)")]
mass_movement.columns = [str(col) + "_mass_movement" for col in mass_movement.columns]
mass_movement = mass_movement.rename(columns={"Year_mass_movement":"Year",
                                   "Entity_mass_movement":"Entity"
                                   }
              )

  return func(self, *args, **kwargs)


In [144]:
storm = combined[combined["disaster_type"].str.contains("Storm")]
storm.columns = [str(col) + "_storm" for col in storm.columns]
storm = storm.rename(columns={"Year_storm":"Year",
                              "Entity_storm":"Entity"
                                   }
              )

In [145]:
landslide = combined[combined["disaster_type"].str.contains("Landslide")]
landslide.columns = [str(col) + "_landslide" for col in landslide.columns]
landslide = landslide.rename(columns={"Year_landslide":"Year",
                              "Entity_landslide":"Entity"
                                   }
              )

In [146]:
fog = combined[combined["disaster_type"].str.contains("Fog")]
fog.columns = [str(col) + "_fog" for col in fog.columns]
fog = fog.rename(columns={"Year_fog":"Year",
                              "Entity_fog":"Entity"
                                   }
              )

In [147]:
wildfire = combined[combined["disaster_type"].str.contains("Wildfire")]
wildfire.columns = [str(col) + "_wildfire" for col in wildfire.columns]
wildfire = wildfire.rename(columns={"Year_wildfire":"Year",
                              "Entity_wildfire":"Entity"
                                   }
              )

In [148]:
temperature = combined[combined["disaster_type"].str.contains("Extreme temperature")]
temperature.columns = [str(col) + "_temperature" for col in temperature.columns]
temperature = temperature.rename(columns={"Year_temperature":"Year",
                                          "Entity_temperature":"Entity"
                                         }
                                )

In [149]:
glacial_lake = combined[combined["disaster_type"].str.contains("Glacial lake outburst")]
glacial_lake.columns = [str(col) + "_glacial_lake" for col in glacial_lake.columns]
glacial_lake = glacial_lake.rename(columns={"Year_glacial_lake":"Year",
                              "Entity_glacial_lake":"Entity"
                                   }
              )

In [150]:
disasters = pd.merge(drought, earthquake, how="outer")
disasters = pd.merge(disasters, all_disasters, how="outer")
disasters = pd.merge(disasters, volcanic, how="outer")
disasters = pd.merge(disasters, flood, how="outer")
disasters = pd.merge(disasters, mass_movement, how="outer")
disasters = pd.merge(disasters, storm, how="outer")
disasters = pd.merge(disasters, landslide, how="outer")
disasters = pd.merge(disasters, fog, how="outer")
disasters = pd.merge(disasters, wildfire, how="outer")
disasters = pd.merge(disasters, temperature, how="outer")
disasters = pd.merge(disasters, glacial_lake, how="outer")

In [151]:
disasters = disasters.drop(columns=["disaster_type_drought",
                                   "disaster_type_earthquake",
                                    "disaster_type_all_disasters",
                                    "disaster_type_landslide",
                                    "disaster_type_fog",
                                    "disaster_type_flood",
                                    "disaster_type_mass_movement",
                                    "disaster_type_storm",
                                    "disaster_type_landslide",
                                    "disaster_type_wildfire",
                                    "disaster_type_temperature",
                                    "disaster_type_glacial_lake",
                                    "disaster_type_temperature",
                                    "disaster_type_volcanic"
                                   ]
                          )

In [152]:
disasters = disasters[ ["Entity"] + [ col for col in disasters.columns if col != "Entity" ] ]
disasters = disasters[disasters["Entity"].notna()]
disasters = disasters[(disasters.Year != 2021)]

In [153]:
disasters.to_csv("output/Natural disasters (EMDAT).csv", index=False)