In [1]:
from nose.tools import assert_equal, assert_almost_equal
import numpy as np
import pandas as pd

For this set of problems we will work with historical large [flood events](https://data.amerigeoss.org/dataset/types-of-large-flood-events-1985-2016). Download the CSV file with the flood events (in case the code below does not work).

In [4]:
events = pd.read_csv("FloodArchive.csv")
events.head()

Unnamed: 0,ID,GlideNumber,Country,OtherCountry,long,lat,Area,Began,Ended,Validation,Dead,Displaced,MainCause,Severity
0,1,0,Algeria,0,5.23026,35.8142,92615.67,1/1/85,1/5/85,News,26,3000,Heavy rain,1.0
1,2,0,Brazil,0,-45.3489,-18.7111,678498.82,1/15/85,2/2/85,News,229,80000,Heavy rain,2.0
2,3,0,Phillipines,0,122.974,10.0207,12846.03,1/20/85,1/21/85,News,43,444,Torrential rain,1.0
3,4,0,Indonesia,0,124.606,1.01489,16542.12,2/4/85,2/18/85,News,21,300,Torrential rain,1.0
4,5,0,Mozambique,0,32.3491,-25.8693,20082.21,2/9/85,2/11/85,News,19,0,Heavy rain,2.0


## Problem 1

What was the cause for the flood events that had the highest number of fatalities? Save the result as a string into variable `fatal_cause`.

*Hint: Make sure you correctly account for duplicate names in flood causes!*

In [5]:
# BEGIN SOLUTION
events["MainCause"] = events["MainCause"].str.lower()
fatal_cause = events["Dead"].groupby(events["MainCause"]).sum().idxmax()
# END SOLUTION

In [6]:
assert fatal_cause == 'tropical cyclone'

## Problem 2

Create a `Series` that has the geographic information as the index and the area affected by the flood as the values, and save it with `gflood` as the variable name.

In [7]:
# BEGIN SOLUTION
gflood = events.set_index(['lat', 'long'])['Area']
# END SOLUTION

In [11]:
assert len(gflood) == 5130

In [12]:
assert gflood.name == 'Area'

## Problem 3

How many flood events lasted longer than one week? Save the result into a variable named `events_7days`.

In [13]:
# BEGIN SOLUTION
dt = pd.to_datetime(events['Ended'], errors='coerce') - pd.to_datetime(events['Began'], errors='coerce')
event_7days = (dt.dt.days > 7).sum()
# END SOLUTION

In [16]:
assert_equal(event_7days, 1837)

## Problem 4

Using the `resample` functionality of `pandas`, can you create a `Series` (named `monthly`) with the number of flood events per month?

In [18]:
# BEGIN SOLUTION
monthly = events.set_index(pd.to_datetime(events['Began']))['ID'].resample('MS').count()
# END SOLUTION

In [19]:
assert_equal(monthly['1990-1'].values, 8)

## Problem 5

Download the GDP data from the [World Bank site](https://data.worldbank.org/indicator/NY.GDP.MKTP.CD). Can you create a `DataFrame` (named `df`) that has the country as the index, and the GDP and total number of people displaced by floods as the two columns? The two columns should be named `GDP` and `DISPLACED`.

In [20]:
# BEGIN SOLUTION
gdp = pd.read_csv("API_NY.GDP.MKTP.CD_DS2_en_csv_v2_3840536.csv", skiprows=3, index_col=0)['2015']
events_by_country = events.groupby('Country').sum()['Displaced']
df = pd.merge(gdp, events_by_country, left_index=True, right_index=True).rename(columns={'2015': 'GDP'})
# END SOLUTION

In [21]:
assert_equal(df.loc['Armenia', 'Displaced'], 400)