In [1]:
import pandas as pd
import altair as alt

In [2]:
# Handy constants for Altair.
TEMPORAL = 'temporal'
ORDINAL = 'ordinal'

In [3]:
# Handy column name constants.
HAZARD_CATEGORY = 'Hazard Category'
HAZARD_TYPE = 'Hazard Type'
NEW_DISPLACEMENTS = 'New Displacements'
YEAR = 'Year'

In [4]:
disasters = pd.read_csv(
    'disasters/disasters.csv',
    header=[0,1],
    encoding='latin-1',
    dtype={HAZARD_CATEGORY: str, HAZARD_TYPE: str})

In [5]:
disasters.columns = disasters.columns.droplevel(1)

In [6]:
# Unify different capitalizations.
disasters[HAZARD_TYPE] = disasters[HAZARD_TYPE].str.lower()

In [7]:
disasters

Unnamed: 0,ISO3,Name,Year,Start Date,Event Name,Hazard Category,Hazard Type,New Displacements
0,AB9,Abyei Area,2018,2018-07-01,Abyie: Flood - 01/07/2018,Weather related,flood,2.0
1,AB9,Abyei Area,2019,2019-06-01,Abyei: Flood - southern parts - 01/06/2019,Weather related,flood,40000.0
2,AFG,Afghanistan,2008,2008-04-17,,Geophysical,earthquake,3250.0
3,AFG,Afghanistan,2008,2008-08-01,,Weather related,flood,180.0
4,AFG,Afghanistan,2008,2008-01-01,,Weather related,extreme temperature,
...,...,...,...,...,...,...,...,...
10180,ZWE,Zimbabwe,2019,2019-02-13,Zimbabwe: Floods- Chiredzi (Masvingo)- 13/02/2019,Weather related,flood,200.0
10181,ZWE,Zimbabwe,2019,2019-10-07,Zimbabwe: Hailstorm- Masvingo (Zaka)- 07/10/2019,Weather related,storm,400.0
10182,ZWE,Zimbabwe,2020,2020-02-11,Zimbabwe: Flash flooding- Matabeleland North (...,Weather related,flood,148.0
10183,ZWE,Zimbabwe,2020,2020-01-17,Zimbabwe: Flash flooding- Matabeleland North (...,Weather related,flood,35.0


In [8]:
# What categories are they using?
disasters[HAZARD_CATEGORY].unique()

array(['Weather related', 'Geophysical', nan], dtype=object)

In [9]:
# How many (what portion) of events are missing a category?
m = disasters[HAZARD_CATEGORY].isna().sum()
n = len(disasters)
m, m/n

(3, 0.0002945508100147275)

In [10]:
# Which events were those?
disasters[disasters[HAZARD_CATEGORY].isna()]

Unnamed: 0,ISO3,Name,Year,Start Date,Event Name,Hazard Category,Hazard Type,New Displacements
776,BGD,Bangladesh,2017,2017-04-12,"Bangladesh : Riverbanck erosion - Sirajganj , ...",,,630.0
2424,COL,Colombia,2018,2018-03-02,Colombia: oil spill - Santander - March 2018,,,178.0
9120,USA,United States,2017,2017-08-11,United States : Utah Uintah Fire - Utah - 05/0...,,,309.0


In [11]:
# How many people were affected by those uncategorized events?
m = disasters[disasters[HAZARD_CATEGORY].isna()][NEW_DISPLACEMENTS].sum()
n = disasters[NEW_DISPLACEMENTS].sum()
m, m/n

(1117.0, 3.5053208935342254e-06)

In [12]:
# It's ok to ignore those rows.
disasters = disasters[~disasters[HAZARD_CATEGORY].isna()]

In [13]:
# What types fall under each category?
disasters.groupby(HAZARD_CATEGORY)[HAZARD_TYPE].unique().to_dict()

{'Geophysical': array(['earthquake', 'dry mass movement', 'volcanic eruption',
        'volcanic activity', 'mass movement', 'wet mass movement'],
       dtype=object),
 'Weather related': array(['flood', 'extreme temperature', 'wet mass movement', 'storm',
        'drought', 'wildfire', 'mass movement', 'severe winter condition'],
       dtype=object)}

In [14]:
# How has reporting changed by year?
source = disasters.groupby([YEAR, HAZARD_CATEGORY], as_index=False).size()
alt.Chart(source).mark_bar().encode(
    alt.X(YEAR, type=ORDINAL),
    y='size',
    color=HAZARD_CATEGORY
)

In [15]:
# How many people affected by weather were captured by the reporting each year?
source = disasters[disasters[HAZARD_CATEGORY] == 'Weather related'].groupby(YEAR)[NEW_DISPLACEMENTS].sum()
alt.Chart(source.to_frame().reset_index()).mark_bar().encode(
    alt.X(YEAR, type=ORDINAL),
    y=NEW_DISPLACEMENTS
)

In [18]:
disasters.groupby([YEAR, HAZARD_CATEGORY], as_index=False).sum()

Unnamed: 0,Year,Hazard Category,New Displacements
0,2008,Geophysical,15769370.0
1,2008,Weather related,22471458.0
2,2009,Geophysical,1478323.0
3,2009,Weather related,15254184.0
4,2010,Geophysical,4049861.0
5,2010,Weather related,38300305.0
6,2011,Geophysical,1143625.0
7,2011,Weather related,13880529.0
8,2012,Geophysical,677548.0
9,2012,Weather related,29468412.0
