In [14]:
import urllib.request
from bs4 import BeautifulSoup as BS
import pandas as pd
import numpy as np
import re
from ipywidgets import interact, interactive, fixed, interact_manual, Layout
import ipywidgets as widgets
import matplotlib.pylab as pylab
import matplotlib.pyplot as plt
%matplotlib inline
import json
import requests
import geopandas as gpd
from geopandas import GeoDataFrame
import fiona
from shapely.geometry import Point
from IPython import display
from IPython.display import display, clear_output
from mpl_toolkits.axes_grid1 import make_axes_locatable

> Scraping data from Wikipedia using beautifulsoup

In [15]:
## Creating wikipedia link
request = urllib.request.Request('https://en.wikipedia.org/wiki/List_of_deadly_earthquakes_since_1900')
result = urllib.request.urlopen(request)
resulttext = result.read()

In [16]:
## Using BeautifulSoup to import and parse data
soup = BS(resulttext, 'html.parser')
#soup.prettify

In [17]:
## Find the earthquake table using it's unique class
table = soup.find('table', class_ = 'sortable wikitable')

## Find all rows within the table
table = table.find_all('tr')

## Empty data list to store table values
data = []

## Loop that looks for table data from rows, then strips the text and stores it as a list
for row in table:
    cells = row.find_all('td')
    cells = [ele.text.strip() for ele in cells]
    data.append(cells)

## Create a DataFrame from our stripped data
eq_df = pd.DataFrame(data)

In [18]:
## Rename column headers
eq_df.columns = ['origin','country','lat','long','depth_km','magnitude','sec_effects','shaking_death','pde_total','utsu_total','em_total','other_deaths']

In [19]:
## Drop the empty first row
eq_df = eq_df.drop(0,0)

## Rename columns
eq_columns = ['origin','country','lat','long','depth_km','magnitude','sec_effects','shaking_death','pde_total','utsu_total','em_total','other_deaths']

In [20]:
## Regex function
def eq_reg(eq_col, reg_phrase):
    eq_df[eq_col] = eq_df[eq_col].str.replace(reg_phrase, '')

## Cleaning columns
eq_reg('magnitude', '([a-zA-Z])')
eq_reg('magnitude','\[..\]')
eq_reg('other_deaths', '\[.*\]')
eq_reg('em_total', '\[7\].')
eq_reg('country', '\([^\)]*\)*')
eq_reg('other_deaths', '\([^\)]*\)*')
eq_reg('other_deaths', '\+')
eq_reg('other_deaths', '26271 26000')
eq_reg('other_deaths', '231000* 283000* 227898*')
eq_reg('other_deaths', '\*')


In [21]:
## Converts column to float
def float_convert(df,col):
    df[col] = pd.to_numeric(df[col], errors ='coerce')

## Float conversions
float_convert(eq_df, 'pde_total')
float_convert(eq_df, 'utsu_total')
float_convert(eq_df, 'em_total')
float_convert(eq_df, 'other_deaths')
float_convert(eq_df, 'magnitude')

In [22]:
## Grabbing the highest value from the death columns
eq_df['deaths'] = eq_df[['pde_total','utsu_total','em_total','other_deaths']].max(axis=1)

In [23]:
## Cleaning up empty spaces and changing to floats 
eq_df[eq_df['lat'] == ''] = np.nan
eq_df[eq_df['long'] == ''] = np.nan

## Drop na's
eq_df= eq_df.dropna(subset=['lat','long','deaths'])

## Additional float conversions
float_convert(eq_df, 'lat')
float_convert(eq_df, 'long')
float_convert(eq_df, 'deaths')

In [24]:
## Converting wikipedia data to a Point Geometry column for Geopandas
geometry = [Point(xy) for xy in zip(eq_df.long, eq_df.lat)]
df = eq_df.drop(['long', 'lat'], axis=1)
crs = {'init': 'epsg:4326'}
eq_df = GeoDataFrame(eq_df, crs=crs, geometry=geometry)

>Bringing in live data

In [25]:
## API site: https://earthquake.usgs.gov/fdsnws/event/1/#format-geojson

url = 'https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&minmagnitude=2.0&orderby=time&limit=5000'
request = requests.get(url)
b = bytes(request.content)
with fiona.BytesCollection(b) as f:
    crs = f.crs
    gdf = gpd.GeoDataFrame.from_features(f, crs=crs)

live_df = gdf
# live_df.to_csv('/Users/Justin/Code/data-question-3-xtreme-united/live_eq_api_return.csv')

>Created a function that uses the slider to filter the table by magnitude

In [26]:
## Use a built-in world shp file
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

In [27]:
## First, check if slider value is greater than the max magnitude value in the live data
## If so, return only the value for the wikipedia data so the function doesn't return an error
## NOTE - colorbar starts with slider value as a minimum

def mag_slider(x):
    clear_output(wait=True)
    pylab.rcParams['figure.figsize'] = 20, 20
    base = world.plot(color='lightgray', edgecolor='gray')
    fig = base.get_figure()
    base.axis('off')
    space = make_axes_locatable(base)
    loc = space.append_axes('right', size='3%', pad=0.01)
    sm = plt.cm.ScalarMappable(cmap='jet', norm=plt.Normalize(vmin=x, vmax=10.0))
    sm._A = []
    fig.colorbar(sm,cax=loc)
    if x < live_df['mag'].max():
        for row in eq_df:
            eq_bool = x < eq_df['magnitude']
            eq_table = eq_df[eq_bool==True]
            eq_table.plot(ax=base, marker='o', alpha = 0.3, column = 'magnitude', cmap = 'jet', markersize = 10);
        for row in live_df:
            live_bool = x < live_df['mag']
            live_table = live_df[live_bool==True]
            live_table.plot(ax=base, marker='D', alpha = 0.3, column ='mag', cmap = 'jet', markersize = 10);
        return plt.show()
    elif x < eq_df['magnitude'].max():
        for row in eq_df:
            eq_bool = x < eq_df['magnitude']
            eq_table = eq_df[eq_bool==True]
            eq_table.plot(ax=base, marker='o', alpha = 0.3, column = 'magnitude', cmap = 'jet', markersize = 10);
        return plt.show()
    else:
        print('Magnitude too high - select lower magnitude')

In [28]:
mag = widgets.FloatSlider(
    value=4.5,
    min=1.0,
    max=10.0,
    step=0.1,
    description='Magnitude',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.2f',
)

## interact(function name, function input = slider)

widgets.interact(mag_slider, x = mag);

A Jupyter Widget

In [31]:
## Function looks for country name to match the country name in the wikipedia data
## Live data stores the country name in the title column, along with other data, so str.contains is used

def country_drop(x):
    clear_output(wait=True)
    pylab.rcParams['figure.figsize'] = 20, 20
    country = world.loc[world.name == x]
    base = country.plot(color='lightgray', edgecolor='gray')
    fig = base.get_figure()
    base.axis('off')
    for row in eq_df:
        eq_bool = (x == eq_df['country'])
        eq_table = eq_df[eq_bool==True]
        eq_table.plot(ax=base, marker='o', alpha = 0.6, column = 'magnitude', cmap = 'jet', markersize = 40)
        space = make_axes_locatable(base)
        loc = space.append_axes('right', size='3%', pad=0.01)
        sm = plt.cm.ScalarMappable(cmap='jet', norm=plt.Normalize(vmin=1.0, vmax=10.0))
        sm._A = []
        fig.colorbar(sm,cax=loc);
    for row in live_df:
        live_bool = (live_df[live_df["title"].str.contains(x, case=False)])
        live_table = live_df[live_bool==True]
        live_table.plot(ax=base, marker='D', alpha = 0.6, column ='mag', cmap = 'jet', markersize = 40);
    return plt.show()

In [32]:
eq_country = widgets.Dropdown(
    options=[
        'China',
        'Japan',
        'Pakistan',
        'Peru',
        'Turkey',],
    description='Country:',
    disabled=False,
    readout=True,
)

widgets.interact(country_drop, x= eq_country);

A Jupyter Widget