In [None]:
from shapely.geometry import Point
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium.plugins import MarkerCluster, HeatMap, HeatMapWithTime
import ipywidgets as widgets
from IPython.display import HTML, display
from collections import defaultdict

In [None]:
eq_table = pd.read_csv('../Data/04_deadly_eq_mag_cleaned.csv', index_col=False)

In [None]:
eq_table

In [None]:
eq_table['origin (utc)'] = pd.to_datetime(eq_table['origin (utc)'])

In [None]:
eq_table['year'] = eq_table['origin (utc)'].dt.year

In [None]:
eq_table['present-day country and link to wikipedia article']

In [None]:
eq_table['country'] = eq_table['present-day country and link to wikipedia article'].apply(lambda x: x.split(' (', 1)[0])

In [None]:
eq_table.iloc[1033]['country']

In [None]:
eq_table.at[1033, 'country'] = 'Venezuela'

In [None]:
eq_table.groupby('country')['origin (utc)'].count().to_dict()

In [None]:
corr = eq_table.corr()

In [None]:
mask = np.triu(np.ones_like(corr, dtype=bool))

In [None]:
f, ax = plt.subplots(figsize=(11, 9))
cmap = sns.diverging_palette(230, 20, as_cmap=True)
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0,
            square=True, linewidths=.5, cbar_kws={"shrink": .5},  alpha=.5);

In [None]:
sns.relplot(x='deaths', y='original_magnitude', data=eq_table, size='deaths', sizes=(40, 400), alpha=.5, palette="muted",);

# Doesn't appear to be a correlation between magnitude and deaths on overall

- Download Population data as CSV data from http://data.un.org/Data.aspx?q=population&d=PopDiv&f=variableID%3a12
- Rename file to un_pop_data.csv

In [None]:
un_pop = pd.read_csv(r"../data/un_pop_data.csv")

In [None]:
un_pop.columns

In [None]:
un_pop_medium = un_pop[un_pop['Variant'] =='Medium']

In [None]:
un_pop_medium = un_pop_medium.rename(columns = {"Value": "population", "Year(s)": "year"})

In [None]:
un_pop_medium['country_join'] = un_pop_medium['Country or Area'].str.lower()

In [None]:
eq_table['country_join'] = eq_table['country'].str.lower()

In [None]:
eq_table['country_join']

In [None]:
eq_table_pop = eq_table.merge(un_pop_medium, on=['country_join', 'year'])

In [None]:
eq_table_pop.shape

In [None]:
eq_table_pop[['deaths', 'population']].corr()

# in the matchable subset, there is a negative correlation with population

- Download GDP data as CSV data from http://data.un.org/Data.aspx?q=gdp&d=SNAAMA&f=grID%3a101%3bcurrID%3aUSD%3bpcFlag%3a1
- Rename file to un_per_capita_gdp.csv

In [None]:
un_gdp = pd.read_csv(r"../data/un_per_capita_gdp.csv", skipfooter=2)

In [None]:
un_gdp = un_gdp.rename(columns = {"Value": "gdp", "Country or Area": "country", "Year": "year"})

In [None]:
un_gdp = un_gdp[un_gdp['Item']=='Gross Domestic Product (GDP)']

In [None]:
un_gdp['country_join'] = un_gdp['country'].str.lower()

In [None]:
un_gdp.info()

In [None]:
eq_table_gdp = eq_table.merge(un_gdp, on=['country_join', 'year'] )

In [None]:
eq_table_gdp.columns

In [None]:
missing_countries = eq_table[~eq_table['country'].isin(eq_table_gdp['country_x'].to_list())]

In [None]:
missing_countries[missing_countries['country'].str.contains('Japan')]

In [None]:
un_gdp[un_gdp['country'].str.contains('Japan')]

In [None]:
eq_table_gdp[['deaths', 'gdp']].corr()

In [None]:
sns.scatterplot(x='deaths', y='gdp', data=eq_table_gdp[['deaths', 'gdp']])

In [None]:
eq_table_gdp['log_deaths'] = np.log(eq_table_gdp['deaths'])

In [None]:
eq_table_gdp

In [None]:
sns.scatterplot(x='log_deaths', y='gdp', data=eq_table_gdp[['log_deaths', 'gdp']])

In [None]:
eq_table_gdp[['log_deaths', 'gdp']].corr()

# FACT: Earthquakes donâ€™t kill people, buildings and their contents do.

- Distance from the epicentre
- Level of development
- Population density
- Time of day influences whether people are in their homes, at work or travelling. 
- The time of year and climate will influence survival rates and the rate at which disease can spread.