# Natural Disasters: Revision
This is a revision of the original notebook "Natural Disasters" by Hannah.

In [39]:
import pandas as pd
import numpy as np

## Read

In [40]:
# Read
raw = pd.read_csv(
    "inputs/disasters_emdat.csv",
    encoding="latin1",
    skiprows=6,
    usecols=[
        "Year",
        "Disaster Type",
        "Country",
        "No Homeless",
        "Total Deaths",
        "No Injured",
        "No Affected",
        "No Homeless",
        "Total Affected",
        "Reconstruction Costs ('000 US$)",
        "Insured Damages ('000 US$)",
        "Total Damages ('000 US$)"
    ],
)

## Harmonize names

In [41]:
# Column renaming
raw = raw.rename(
    columns={
        "Disaster Type": "disaster_type",
        "Total Deaths": "deaths",
        "No Injured": "injured",
        "No Affected": "affected",
        "No Homeless": "homeless",
        "Total Affected": "total_affected",
        "Reconstruction Costs ('000 US$)": "reconstruction_costs",
        "Insured Damages ('000 US$)": "insured_damages",
        "Total Damages ('000 US$)": "total_damages",
    }
)

In [42]:
# Get metrics
combined = raw.groupby(["Year", "disaster_type", "Country"], as_index=False)[[
    "deaths", 
    "injured", 
    "affected", 
    "homeless", 
    "total_affected", 
    "reconstruction_costs", 
    "insured_damages", 
    "total_damages"
]].sum()

In [43]:
# Standardize country names
countries = pd.read_csv("inputs/countries.csv", encoding="latin1")
combined = combined.merge(countries, on="Country", how="left")
print("Countries without standardisation:", combined.loc[combined.Entity.isna(), "Country"].unique())
combined = combined.drop(columns=["Country"])

Countries without standardisation: ['Germany Fed Rep' 'Yemen Arab Rep' 'Yemen P Dem Rep' 'Germany Dem Rep']


## Processing
### Subnational to national

In [44]:
# Add subnational regions'data to their parent countries' data
subnational = pd.read_csv("inputs/subnational.csv")
map_sub = dict(zip(subnational["region"], subnational["country"]))
combined["Entity"] = combined["Entity"].replace(map_sub)
combined = combined.groupby(["Entity", "Year", "disaster_type"], as_index=False).sum()

### All disasters value

In [45]:
# Obtain all disasters figures
totals = combined.groupby(["Year", "Entity"], as_index=False).sum()
totals = totals.assign(disaster_type="All disasters")
combined = pd.concat([combined, totals])

### Population

In [46]:
# Add population
population = pd.read_csv("inputs/population.csv", encoding="latin1")
combined = combined.merge(population, on=["Entity", "Year"], how="left")

### GDP

In [47]:
# Add gdp
gdp = pd.read_csv("inputs/gdp.csv", encoding="latin1")
combined = combined.merge(gdp, on=["Entity", "Year"], how="left")

### Supra-national entities

In [48]:
# Add supra-national entities (continents, income groups, world)
# Load mappings
continents_map = pd.read_csv("inputs/continent.csv")
continents_map = dict(zip(continents_map.Entity, continents_map.continent))
income_map = pd.read_csv("inputs/income.csv")
income_map = dict(zip(income_map.Entity, income_map.income_group))

In [49]:
# Create and add new entities
continents = combined.replace(continents_map)
continents = continents.groupby(["Entity", "Year"], as_index=False).sum()
income = combined.replace(income_map)
income = income.groupby(["Entity", "Year"], as_index=False).sum()
world = combined.groupby(["Year", "disaster_type"], as_index=False).sum()
world = world.assign(Entity="World")
combined = pd.concat([combined, continents, income, world])

#### Sanity checks

In [50]:
# Only informative
countries_continents_miss = set(continents.Entity).difference(set(continents_map.values()))
countries_income_miss = set(income.Entity).difference(set(income_map.values()))
print("* No income group:", " / ".join(sorted(countries_income_miss)))
print("* No continent group:", " / ".join(sorted(countries_continents_miss)))

* No income group: Czechoslovakia / Serbia-Montenegro / Soviet Union / Yugoslavia
* No continent group: Serbia-Montenegro / Soviet Union


### Rates

In [51]:
# Add rates (population)
columns = ["deaths", "injured", "affected", "homeless", "total_affected"]
combined = combined.assign(
    **{f"{col}_rate_per_100k": combined[col]/combined["Population"]*100000 for col in columns}
)
#combined = combined.drop(columns=["Population"])

In [52]:
# Add rates (gdp)
columns = ["total_damages"]
combined = combined.assign(
    **{f"{col}_pct_gdp": combined[col]/(combined["gdp"]/1000) * 100 for col in columns}
)
# combined = combined.drop(columns=["gdp"])

In [53]:
# Drop population and gdp columns
combined = combined.drop(columns=["Population", "gdp"])

### Reshape dataset

In [54]:
disasters = pd.pivot_table(combined, columns='disaster_type', index=["Year", "Entity"])
mapping = {
    "All disasters": "all_disasters",
    "Drought": "drought",
    "Earthquake": "earthquake",
    "Extreme temperature": "temperature",
    "Flood": "flood",
    "Fog": "fog",
    "Glacial lake outburst": "glacial_lake",
    "Landslide": "landslide",
    "Mass movement (dry)": "mass_movement",
    "Storm": "storm",
    "Volcanic activity": "volcanic",
    "Wildfire": "wildfire",
}
disasters.columns = [f"{colname[0]}_{mapping[colname[1]]}" for colname in disasters.columns]
disasters = disasters.reset_index()

In [55]:
disasters.head()

Unnamed: 0,Year,Entity,affected_all_disasters,affected_drought,affected_earthquake,affected_temperature,affected_flood,affected_fog,affected_glacial_lake,affected_landslide,...,total_damages_pct_gdp_drought,total_damages_pct_gdp_earthquake,total_damages_pct_gdp_temperature,total_damages_pct_gdp_flood,total_damages_pct_gdp_glacial_lake,total_damages_pct_gdp_landslide,total_damages_pct_gdp_mass_movement,total_damages_pct_gdp_storm,total_damages_pct_gdp_volcanic,total_damages_pct_gdp_wildfire
0,1900,Cape Verde,0.0,0.0,,,,,,,...,,,,,,,,,,
1,1900,India,0.0,0.0,,,,,,,...,,,,,,,,,,
2,1900,Jamaica,0.0,,,,0.0,,,,...,,,,,,,,,,
3,1900,Japan,0.0,,,,,,,,...,,,,,,,,,,
4,1900,Turkey,0.0,,0.0,,,,,,...,,,,,,,,,,


In [56]:
# Final processing
disasters = disasters[["Entity"] + [col for col in disasters.columns if col != "Entity"]]
disasters = disasters[disasters["Entity"].notna()]
disasters = disasters[(disasters.Year != 2021)]

### Decade dataset

In [57]:
# Decade
disasters_dec = disasters.assign(Year=disasters.Year//10*10)
disasters_dec = disasters_dec.groupby(["Entity", "Year"], as_index=False).sum()

In [58]:
# Add missing decades, fill nulls with zeroes
COLS_FILLNA = [x for x in disasters_dec.columns if x not in ["Entity", "Year"]]


def complete_country_decades(x):
    # Build year dataframe
    years = np.arange(x.Year.min(), x.Year.max()+1, 10)
    df_year = pd.DataFrame({"Year": years})
    # Add years to main df
    x = x.merge(df_year, how="right")
    x["Entity"] = x["Entity"].fillna(method="ffill")
    # Fill NaNs
    x[COLS_FILLNA] = x[COLS_FILLNA].fillna(0)
    return x


disasters_dec = disasters_dec.groupby("Entity", as_index=False).apply(complete_country_decades)
disasters_dec = disasters_dec.reset_index(drop=True)
disasters_dec = disasters_dec[(disasters_dec.Year != 2020)]

## Export

In [59]:
# Export
disasters.sort_values(["Entity", "Year"]).to_csv("output/Natural disasters (EMDAT).csv", index=False)
disasters_dec.sort_values(["Entity", "Year"]).to_csv("output/Natural disasters (EMDAT – decadal).csv", index=False)