In [1]:
# import the necessary packages
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import os

In [2]:
# specify the path to the EM-DAT dataset
emdat_dataset_path = os.path.join(
    "natural-disasters-data",
    "em-dat",
    "EMDAT_1900-2021_NatDis.csv"
)

# load the EM-DAT natural disasters dataset from disk
df = pd.read_csv(emdat_dataset_path)
df.tail()

Unnamed: 0,Dis No,Year,Seq,Disaster Group,Disaster Subgroup,Disaster Type,Disaster Subtype,Disaster Subsubtype,Event Name,Entry Criteria,...,End Day,Total Deaths,No Injured,No Affected,No Homeless,Total Affected,Reconstruction Costs ('000 US$),Insured Damages ('000 US$),Total Damages ('000 US$),CPI
15822,2020-0031-ZMB,2020,31,Natural,Hydrological,Flood,,,,Affected,...,,,,1500.0,,1500.0,,,,
15823,2020-0110-ZMB,2020,110,Natural,Hydrological,Flood,,,,Affected,...,26.0,,,700000.0,,700000.0,,,,
15824,2021-0036-ZWE,2021,36,Natural,Meteorological,Storm,Tropical cyclone,,Tropical cyclone 'Eloise',Kill,...,23.0,3.0,,1745.0,,1745.0,,,,
15825,2020-0131-TLS,2020,131,Natural,Hydrological,Flood,Riverine flood,,,Affected,...,13.0,3.0,7.0,9124.0,,9131.0,,,20000.0,
15826,2020-0362-SSD,2020,362,Natural,Hydrological,Flood,,,,Affected,...,,,,1042000.0,,1042000.0,,,,


In [3]:
# display the disaster groups
df["Disaster Group"].unique()

array(['Natural'], dtype=object)

In [4]:
# display the natural disaster subgroups
df["Disaster Subgroup"].unique()

array(['Climatological', 'Geophysical', 'Meteorological', 'Hydrological',
       'Biological', 'Extra-terrestrial'], dtype=object)

In [5]:
# grab all rows that are part of the 'meteorological' disaster subgroup
df_meteo = df[df["Disaster Subgroup"] == "Meteorological"]
df_meteo.tail()

Unnamed: 0,Dis No,Year,Seq,Disaster Group,Disaster Subgroup,Disaster Type,Disaster Subtype,Disaster Subsubtype,Event Name,Entry Criteria,...,End Day,Total Deaths,No Injured,No Affected,No Homeless,Total Affected,Reconstruction Costs ('000 US$),Insured Damages ('000 US$),Total Damages ('000 US$),CPI
15807,2020-0425-VNM,2020,425,Natural,Meteorological,Storm,Tropical cyclone,,Tropical storm 'Nangka' (Nika),Waiting,...,14.0,2.0,,67855.0,2925.0,70780.0,,,,
15808,2020-0462-VNM,2020,462,Natural,Meteorological,Storm,Tropical cyclone,,Tropical storm 'Noul' (Leon),Kill,...,21.0,6.0,,125000.0,,125000.0,,,33000.0,
15809,2020-0558-VNM,2020,558,Natural,Meteorological,Storm,Tropical cyclone,,Tropical depression 'Vicky' (Krovanh),Affected,...,21.0,1.0,4.0,,,4.0,,,,
15810,2020-0132-VUT,2020,132,Natural,Meteorological,Storm,Tropical cyclone,,Cyclone 'Harold',--,...,5.0,5.0,,83837.0,,83837.0,,,,
15824,2021-0036-ZWE,2021,36,Natural,Meteorological,Storm,Tropical cyclone,,Tropical cyclone 'Eloise',Kill,...,23.0,3.0,,1745.0,,1745.0,,,,


In [6]:
# display all natural disaster types for "meteorological" events
df_meteo["Disaster Type"].unique()

array(['Storm', 'Extreme temperature', 'Fog'], dtype=object)

In [7]:
# grab all rows that are part of the 'meteorological' disaster subgroup
df_storm = df_meteo[df_meteo["Disaster Type"] == "Storm"]
df_storm.tail()

Unnamed: 0,Dis No,Year,Seq,Disaster Group,Disaster Subgroup,Disaster Type,Disaster Subtype,Disaster Subsubtype,Event Name,Entry Criteria,...,End Day,Total Deaths,No Injured,No Affected,No Homeless,Total Affected,Reconstruction Costs ('000 US$),Insured Damages ('000 US$),Total Damages ('000 US$),CPI
15807,2020-0425-VNM,2020,425,Natural,Meteorological,Storm,Tropical cyclone,,Tropical storm 'Nangka' (Nika),Waiting,...,14.0,2.0,,67855.0,2925.0,70780.0,,,,
15808,2020-0462-VNM,2020,462,Natural,Meteorological,Storm,Tropical cyclone,,Tropical storm 'Noul' (Leon),Kill,...,21.0,6.0,,125000.0,,125000.0,,,33000.0,
15809,2020-0558-VNM,2020,558,Natural,Meteorological,Storm,Tropical cyclone,,Tropical depression 'Vicky' (Krovanh),Affected,...,21.0,1.0,4.0,,,4.0,,,,
15810,2020-0132-VUT,2020,132,Natural,Meteorological,Storm,Tropical cyclone,,Cyclone 'Harold',--,...,5.0,5.0,,83837.0,,83837.0,,,,
15824,2021-0036-ZWE,2021,36,Natural,Meteorological,Storm,Tropical cyclone,,Tropical cyclone 'Eloise',Kill,...,23.0,3.0,,1745.0,,1745.0,,,,


In [8]:
# display all natural disaster subtypes for "storm" events
df_storm["Disaster Subtype"].unique()

array(['Tropical cyclone', 'Convective storm', nan,
       'Extra-tropical storm'], dtype=object)

In [9]:
# grab all rows that are part of the 'convective form' disaster subtype
df_convective = df_storm[df_storm["Disaster Subtype"] == "Convective storm"]
df_convective.tail()

Unnamed: 0,Dis No,Year,Seq,Disaster Group,Disaster Subgroup,Disaster Type,Disaster Subtype,Disaster Subsubtype,Event Name,Entry Criteria,...,End Day,Total Deaths,No Injured,No Affected,No Homeless,Total Affected,Reconstruction Costs ('000 US$),Insured Damages ('000 US$),Total Damages ('000 US$),CPI
15786,2020-0167-USA,2020,167,Natural,Meteorological,Storm,Convective storm,Tornado,,Waiting,...,24.0,3.0,31.0,,,31.0,,,1400000.0,
15787,2020-0011-USA,2020,11,Natural,Meteorological,Storm,Convective storm,Severe storm,,Kill,...,12.0,10.0,,,,,,,1200000.0,
15791,2020-0165-VNM,2020,165,Natural,Meteorological,Storm,Convective storm,Lightning/Thunderstorms,,Affected,...,27.0,3.0,13.0,30000.0,,30013.0,,,,
15798,2020-0082-USA,2020,82,Natural,Meteorological,Storm,Convective storm,Tornado,,Waiting,...,5.0,25.0,300.0,12000.0,,12300.0,,,2500000.0,
15799,2020-0582-USA,2020,582,Natural,Meteorological,Storm,Convective storm,Severe storm,,SigDam,...,28.0,,,,,,,2200000.0,2900000.0,


In [10]:
# display all natural disaster subtypes for "storm" events
df_convective["Disaster Subsubtype"].unique()

array(['Tornado', 'Hail', 'Severe storm', 'Winter storm/Blizzard',
       'Lightning/Thunderstorms', nan, 'Sand/Dust storm', 'Rain',
       'Storm/Surge', 'Derecho'], dtype=object)

In [11]:
# grab all rows that are part of the 'tornado form' disaster subsubtype
df_tornado = df_convective[df_convective["Disaster Subsubtype"] == "Tornado"]
df_tornado.tail()

Unnamed: 0,Dis No,Year,Seq,Disaster Group,Disaster Subgroup,Disaster Type,Disaster Subtype,Disaster Subsubtype,Event Name,Entry Criteria,...,End Day,Total Deaths,No Injured,No Affected,No Homeless,Total Affected,Reconstruction Costs ('000 US$),Insured Damages ('000 US$),Total Damages ('000 US$),CPI
15298,2019-0081-USA,2019,81,Natural,Meteorological,Storm,Convective storm,Tornado,,Kill,...,4.0,28.0,90.0,,,90.0,,140000.0,190000.0,100.0
15780,2020-0190-USA,2020,190,Natural,Meteorological,Storm,Convective storm,Tornado,,SigDam,...,9.0,,,,,,,2200000.0,2900000.0,
15785,2020-0148-USA,2020,148,Natural,Meteorological,Storm,Convective storm,Tornado,,Kill,...,14.0,38.0,200.0,,,200.0,,2600000.0,3500000.0,
15786,2020-0167-USA,2020,167,Natural,Meteorological,Storm,Convective storm,Tornado,,Waiting,...,24.0,3.0,31.0,,,31.0,,,1400000.0,
15798,2020-0082-USA,2020,82,Natural,Meteorological,Storm,Convective storm,Tornado,,Waiting,...,5.0,25.0,300.0,12000.0,,12300.0,,,2500000.0,


In [12]:
# find all avalanches in the EM-DAT dataset by filtering *directly* on the
# Disaster Subtype of the original dataframe
df_hurricane = df[df["Disaster Subtype"] == "Avalanche"]
df_hurricane.tail()

Unnamed: 0,Dis No,Year,Seq,Disaster Group,Disaster Subgroup,Disaster Type,Disaster Subtype,Disaster Subsubtype,Event Name,Entry Criteria,...,End Day,Total Deaths,No Injured,No Affected,No Homeless,Total Affected,Reconstruction Costs ('000 US$),Insured Damages ('000 US$),Total Damages ('000 US$),CPI
14580,2017-0466-MNG,2017,466,Natural,Hydrological,Landslide,Avalanche,,,Kill,...,22.0,17.0,,,,,,,,95.878166
14625,2017-0034-TJK,2017,34,Natural,Hydrological,Landslide,Avalanche,,,Waiting,...,28.0,13.0,,,,,,,,95.878166
15517,2020-0063-AFG,2020,63,Natural,Hydrological,Landslide,Avalanche,,,Kill,...,14.0,22.0,10.0,,250.0,260.0,,,,
15625,2020-0574-IRN,2020,574,Natural,Hydrological,Landslide,Avalanche,,,Kill,...,25.0,12.0,,,,,,,,
15736,2020-0044-TUR,2020,44,Natural,Hydrological,Landslide,Avalanche,,,Kill,...,5.0,41.0,84.0,,,84.0,,,,
