In [5]:
import numpy as np
import pandas as pd
import scipy as sp
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.formula.api import ols

In [6]:
# Import plotting routines
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')

In [7]:
emdat = pd.read_csv('EM-DAT/Storms_all_countries.csv')
emdat

Unnamed: 0,year,disaster type,iso,country_name,occurrence,Total deaths,Affected,Injured,Homeless,Total affected,Total damage
0,1900,Storm,USA,United States of America (the),1,6000.0,,,,,30000.0
1,1902,Storm,MMR,Myanmar,1,600.0,,,,,
2,1903,Storm,JAM,Jamaica,1,65.0,,,,,
3,1903,Storm,USA,United States of America (the),1,98.0,,,,,
4,1904,Storm,BGD,Bangladesh,1,,,,,,
5,1905,Storm,PHL,Philippines (the),1,240.0,,,,,
6,1906,Storm,HKG,Hong Kong,1,10000.0,,,,,20000.0
7,1906,Storm,USA,United States of America (the),2,298.0,,,,,
8,1909,Storm,BGD,Bangladesh,2,172.0,,,,,
9,1909,Storm,HTI,Haiti,1,150.0,,,,,


In [8]:
emdat[['Total affected','Total damage','Total deaths']].dropna(axis=0)

Unnamed: 0,Total affected,Total damage,Total deaths
14,2700.0,5000.0,28.0
63,12000.0,40000.0,2000.0
114,1881672.0,400000.0,4197.0
127,300.0,18000.0,14.0
145,20200.0,56000.0,154.0
152,800.0,2000.0,23.0
155,452.0,50000.0,1014.0
207,1500000.0,600000.0,5264.0
216,1250.0,35000.0,5.0
225,425000.0,32000.0,631.0


Notes: EMDAT summarizes events per year. This means that one datapoint can contain the outcome of more than one storm. It is hence not possible to assign storm effects or deaths to only singular events, unless there was only one storm in place.
<br>

The current selection contains 1976 entries, but only 174 of these are complete in all columns. <br>

When looking at total affected and total damage, the dataset contains 719 complete entries <br>

When looking at total affected and total damage and total death, the dataset contains 640 complete entries <br>

In [9]:
emdat.rename(columns={' country_name':'country','Total affected':'tot_aff','Total damage':'tot_dam','Total deaths':'tot_death'},inplace=True)

In [10]:
emdat.head(2)

Unnamed: 0,year,disaster type,iso,country,occurrence,tot_death,Affected,Injured,Homeless,tot_aff,tot_dam
0,1900,Storm,USA,United States of America (the),1,6000.0,,,,,30000.0
1,1902,Storm,MMR,Myanmar,1,600.0,,,,,


In [11]:
storms = emdat[emdat['disaster type']=='Storm']

In [12]:
storms.head()

Unnamed: 0,year,disaster type,iso,country,occurrence,tot_death,Affected,Injured,Homeless,tot_aff,tot_dam
0,1900,Storm,USA,United States of America (the),1,6000.0,,,,,30000.0
1,1902,Storm,MMR,Myanmar,1,600.0,,,,,
2,1903,Storm,JAM,Jamaica,1,65.0,,,,,
3,1903,Storm,USA,United States of America (the),1,98.0,,,,,
4,1904,Storm,BGD,Bangladesh,1,,,,,,


In [14]:
storms = storms[['year','iso','country','occurrence','tot_aff','tot_death','tot_dam']]

In [15]:
storms.head()

Unnamed: 0,year,iso,country,occurrence,tot_aff,tot_death,tot_dam
0,1900,USA,United States of America (the),1,,6000.0,30000.0
1,1902,MMR,Myanmar,1,,600.0,
2,1903,JAM,Jamaica,1,,65.0,
3,1903,USA,United States of America (the),1,,98.0,
4,1904,BGD,Bangladesh,1,,,


In [16]:
regress_multiple_p = smf.ols(formula='tot_aff ~ tot_dam + tot_death-1', data=storms).fit()
regress_multiple_p.params
regress_multiple_p.summary()

0,1,2,3
Dep. Variable:,tot_aff,R-squared:,0.016
Model:,OLS,Adj. R-squared:,0.013
Method:,Least Squares,F-statistic:,5.091
Date:,"Mon, 02 May 2016",Prob (F-statistic):,0.0064
Time:,15:51:35,Log-Likelihood:,-10911.0
No. Observations:,640,AIC:,21830.0
Df Residuals:,638,BIC:,21840.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5
,coef,std err,t,P>|t|,[95.0% Conf. Int.]
tot_dam,0.0603,0.030,2.023,0.044,0.002 0.119
tot_death,40.9474,16.873,2.427,0.016,7.814 74.081

0,1,2,3
Omnibus:,1118.989,Durbin-Watson:,1.967
Prob(Omnibus):,0.0,Jarque-Bera (JB):,750435.165
Skew:,11.07,Prob(JB):,0.0
Kurtosis:,169.286,Cond. No.,566.0


In [17]:
#x_grid=np.linspace(0,storms['tot_aff'].max(), 100)
#y_pol_intercept = regress_multiple_p.params[0] + regress_multiple_p.params[1]*x_grid + regress_multiple_p.params[2]*x_grid
#y_pol= regress_multiple_p.params[0]*x_grid + regress_multiple_p.params[1]*x_grid

In [18]:
%matplotlib inline

In [19]:
%matplotlib qt

In [20]:
fig = plt.figure(figsize=[12,9])
plt.scatter(storms['tot_aff'], storms['tot_dam'], color='k', clip_on=False)
#plt.plot(storms['tot_aff'],  regress_multiple_p.params[0]*storms['tot_dam']
#        + regress_multiple_p.params[1] * storms['tot_death'], 'rx')
plt.plot(storms['tot_aff'],  regress_multiple_p.params[0]*storms['tot_aff']
         + regress_multiple_p.params[1]*storms['tot_death'], 'rx')


#plt.plot(x_grid, y_pol)
# for k in df_stats.index.values:
#     plt.annotate(xy = [df_stats.loc[k,'social_p'], df_stats.loc[k,'CPI2015']],
#                  s=k, xycoords='data')

plt.xlabel('Total affected')
plt.ylabel('Total damage')
plt.title('Storms')
plt.xlim([0,2e7])
plt.ylim([0,1e7])
plt.legend()

<matplotlib.legend.Legend at 0x1220ad668>

In [21]:
fig = plt.figure(figsize=[12,9])
plt.scatter(storms['tot_aff'], storms['tot_death'], color='k', clip_on=False)

plt.plot(storms['tot_aff'],  regress_multiple_p.params[0]*storms['tot_aff']
         + regress_multiple_p.params[1]*storms['tot_death'], 'rx')

#plt.plot(x_grid, y_pol)
# for k in df_stats.index.values:
#     plt.annotate(xy = [df_stats.loc[k,'social_p'], df_stats.loc[k,'CPI2015']],
#                  s=k, xycoords='data')

plt.xlabel('Total affected')
plt.ylabel('Total deaths')
plt.title('Storms')
plt.xlim([0,2e7])
plt.ylim([0,1e4])
plt.legend()

<matplotlib.legend.Legend at 0x1221ec438>