In [1]:
import pandas as pd
import numpy as np

In [2]:
#!pip install bokeh --upgrade

In [3]:
from bokeh.io import output_file, show, output_notebook, push_notebook
from bokeh.plotting import figure
from bokeh.models import (HoverTool, ColumnDataSource, NumeralTickFormatter, DatetimeTickFormatter, RangeTool)
from bokeh.layouts import row, column, gridplot
from bokeh.models.widgets import Tabs, Panel
from bokeh.transform import jitter

In [4]:
LINK = "https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_month.csv"

In [5]:
quakes = pd.read_csv(LINK, parse_dates=['time', 'updated'])
quakes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9122 entries, 0 to 9121
Data columns (total 22 columns):
 #   Column           Non-Null Count  Dtype              
---  ------           --------------  -----              
 0   time             9122 non-null   datetime64[ns, UTC]
 1   latitude         9122 non-null   float64            
 2   longitude        9122 non-null   float64            
 3   depth            9122 non-null   float64            
 4   mag              9120 non-null   float64            
 5   magType          9120 non-null   object             
 6   nst              6356 non-null   float64            
 7   gap              7681 non-null   float64            
 8   dmin             6495 non-null   float64            
 9   rms              9121 non-null   float64            
 10  net              9122 non-null   object             
 11  id               9122 non-null   object             
 12  updated          9122 non-null   datetime64[ns, UTC]
 13  place            9

In [6]:
quakes.head()

Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,...,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource
0,2021-12-24 16:42:28.580000+00:00,34.199,-118.138167,0.88,1.38,ml,8.0,140.0,0.3454,0.38,...,2021-12-24 16:46:21.753000+00:00,"1km NNW of Altadena, CA",earthquake,1.29,31.61,0.3,10.0,automatic,ci,ci
1,2021-12-24 16:35:36.254000+00:00,61.0155,-147.0071,23.5,1.4,ml,,,,0.3,...,2021-12-24 16:39:22.381000+00:00,"24 km NW of Tatitlek, Alaska",earthquake,,0.2,,,automatic,ak,ak
2,2021-12-24 16:09:17.750000+00:00,36.021333,-117.8475,2.59,0.81,ml,13.0,108.0,0.03733,0.1,...,2021-12-24 16:38:12.390000+00:00,"9km ESE of Coso Junction, CA",earthquake,0.21,0.24,0.108,9.0,reviewed,ci,ci
3,2021-12-24 16:07:55.610000+00:00,38.766998,-122.751167,-0.71,1.4,md,7.0,174.0,0.05351,0.18,...,2021-12-24 16:40:10.538000+00:00,"1km SSE of The Geysers, CA",earthquake,1.0,2.82,0.09,7.0,automatic,nc,nc
4,2021-12-24 16:05:50.940000+00:00,19.173334,-155.489166,35.529999,1.76,ml,39.0,90.0,,0.13,...,2021-12-24 16:11:21.250000+00:00,"3 km SSW of Pāhala, Hawaii",earthquake,0.51,0.61,5.04,3.0,automatic,hv,hv


In [7]:
quakes.shape

(9122, 22)

In [8]:
quakes['timeframe'] = quakes['time'].apply(lambda x: str(x)[0:10])
quakes['starttime'] = quakes['time'].apply(lambda x: str(x)[0:19])
quakes['updatetime'] = quakes['updated'].apply(lambda x: str(x)[0:19])

quakes.filter(regex="time")

Unnamed: 0,time,timeframe,starttime,updatetime
0,2021-12-24 16:42:28.580000+00:00,2021-12-24,2021-12-24 16:42:28,2021-12-24 16:46:21
1,2021-12-24 16:35:36.254000+00:00,2021-12-24,2021-12-24 16:35:36,2021-12-24 16:39:22
2,2021-12-24 16:09:17.750000+00:00,2021-12-24,2021-12-24 16:09:17,2021-12-24 16:38:12
3,2021-12-24 16:07:55.610000+00:00,2021-12-24,2021-12-24 16:07:55,2021-12-24 16:40:10
4,2021-12-24 16:05:50.940000+00:00,2021-12-24,2021-12-24 16:05:50,2021-12-24 16:11:21
...,...,...,...,...
9117,2021-11-24 17:45:03.130000+00:00,2021-11-24,2021-11-24 17:45:03,2021-12-03 15:23:02
9118,2021-11-24 17:30:00.260000+00:00,2021-11-24,2021-11-24 17:30:00,2021-11-24 20:10:24
9119,2021-11-24 17:28:19.270000+00:00,2021-11-24,2021-11-24 17:28:19,2021-12-03 15:21:11
9120,2021-11-24 17:21:42.183000+00:00,2021-11-24,2021-11-24 17:21:42,2021-11-24 18:06:24


In [9]:
quakes = quakes[quakes['mag'] > 0]

In [10]:
quakes.isna().sum()

time                  0
latitude              0
longitude             0
depth                 0
mag                   0
magType               0
nst                2761
gap                1436
dmin               2471
rms                   1
net                   0
id                    0
updated               0
place                 0
type                  0
horizontalError    2155
depthError            0
magError           1877
magNst             1440
status                0
locationSource        0
magSource             0
timeframe             0
starttime             0
updatetime            0
dtype: int64

In [11]:
#completed cases only
quakes = quakes.dropna(how='any')
quakes.isna().sum()

time               0
latitude           0
longitude          0
depth              0
mag                0
magType            0
nst                0
gap                0
dmin               0
rms                0
net                0
id                 0
updated            0
place              0
type               0
horizontalError    0
depthError         0
magError           0
magNst             0
status             0
locationSource     0
magSource          0
timeframe          0
starttime          0
updatetime         0
dtype: int64

In [12]:
quakes.shape

(3915, 25)

In [13]:
quakes = quakes.sort_values('time')
quakes.reset_index(inplace=True, drop=True)

In [14]:
quakes.nunique()

time               3915
latitude           3457
longitude          3549
depth              1740
mag                 368
magType               3
nst                 100
gap                 300
dmin               3309
rms                  55
net                   9
id                 3915
updated            3915
place              1992
type                  4
horizontalError     444
depthError          618
magError            600
magNst               78
status                2
locationSource        9
magSource             9
timeframe            31
starttime          3913
updatetime         3901
dtype: int64

In [15]:
quakes.describe()

Unnamed: 0,latitude,longitude,depth,mag,nst,gap,dmin,rms,horizontalError,depthError,magError,magNst
count,3915.0,3915.0,3915.0,3915.0,3915.0,3915.0,3915.0,3915.0,3915.0,3915.0,3915.0,3915.0
mean,36.589773,-114.555341,7.622522,1.326289,20.331034,108.287356,0.096758,0.123522,0.570543,2.643284,0.172784,11.359642
std,6.059759,13.149054,9.393737,0.779983,14.131662,60.970544,0.159278,0.084105,1.245832,6.923825,0.090148,12.031062
min,17.726,-125.918833,-3.46,0.01,2.0,13.0,0.000305,0.0,0.09,0.1,0.0,1.0
25%,33.756,-122.303,2.46,0.75,11.0,63.0,0.02146,0.06,0.23,0.41,0.111,4.0
50%,37.03,-117.647167,5.94,1.16,17.0,90.0,0.05356,0.11,0.34,0.62,0.16,8.0
75%,38.832166,-115.211167,10.225,1.81,25.0,140.0,0.09792,0.17,0.59,1.2,0.213,14.0
max,49.452833,-63.6623,184.0,4.84,174.0,359.0,2.1901,0.79,54.5,31.61,0.774,269.0


In [16]:
quakes.head()

Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,...,horizontalError,depthError,magError,magNst,status,locationSource,magSource,timeframe,starttime,updatetime
0,2021-11-24 17:30:00.260000+00:00,33.274333,-115.9995,2.97,1.0,ml,18.0,109.0,0.0128,0.21,...,0.45,0.4,0.159,19.0,reviewed,ci,ci,2021-11-24,2021-11-24 17:30:00,2021-11-24 20:10:24
1,2021-11-24 18:25:07.200000+00:00,40.466167,-121.542833,3.73,1.58,md,6.0,134.0,0.01901,0.07,...,0.61,0.94,0.057,5.0,reviewed,nc,nc,2021-11-24,2021-11-24 18:25:07,2021-11-24 23:19:11
2,2021-11-24 18:33:28.150000+00:00,36.012833,-118.869333,9.23,1.82,ml,20.0,59.0,0.1415,0.19,...,0.26,0.87,0.1,10.0,reviewed,ci,ci,2021-11-24,2021-11-24 18:33:28,2021-11-24 20:00:44
3,2021-11-24 18:49:52.420000+00:00,33.192,-115.574333,3.29,1.33,ml,20.0,83.0,0.03755,0.19,...,0.28,0.33,0.265,12.0,reviewed,ci,ci,2021-11-24,2021-11-24 18:49:52,2021-11-24 19:12:39
4,2021-11-24 19:12:49.420000+00:00,39.4235,-110.294833,-3.43,2.14,md,10.0,200.0,0.02345,0.1,...,0.69,1.88,0.382,8.0,reviewed,uu,uu,2021-11-24,2021-11-24 19:12:49,2021-11-24 19:43:40


In [17]:
colormap = ['red','blue','green','orange','goldenrod','gray','purple']
print(colormap)

colors_dict = dict(zip(quakes.magType, colormap))
print(colors_dict)

quakes['colors'] = quakes['magType'].map(colors_dict)

print(quakes['colors'].unique())

['red', 'blue', 'green', 'orange', 'goldenrod', 'gray', 'purple']
{'ml': 'purple', 'md': 'gray'}
['purple' 'gray' nan]


In [18]:
source = ColumnDataSource(quakes)

figures = []

cols = ['depth', 'mag', 'nst', 'gap']

for i, g in enumerate(quakes[cols]):

    p = figure(        
        background_fill_color='white',
        x_axis_type='datetime',
        width=850,
        height=250,        
    )

    p.line(source=source, x='time', y=g, legend_label=g, color=colormap[i], line_width=.5)
    #p.square(source=source, x='time', y=g, legend_label=g, color=colormap[i], size=5)
    p.legend.location = "top_right"   
    
    figures.append(p)
    

gp = gridplot(figures, ncols=1)

p.yaxis[0].formatter = NumeralTickFormatter(format=",")
#p.xaxis[0].formatter.days = ['%m/%d/%Y']

output_notebook()

show(gp)

In [19]:
figures = []

cols = ['depth', 'mag', 'nst', 'gap']

for g in quakes[cols]:

    p = figure(title=g, background_fill_color='white', width=400, height=250)

    measures = quakes[g]
    hist, edges = np.histogram(measures, density=True, bins=10)

    p.quad(
        top=hist, bottom=0,
        left=edges[:-1], right=edges[1:],
        fill_color='blue', 
        line_color='darkblue',
        alpha=.5,
    )
    
    figures.append(p)

gp = gridplot(figures, ncols=2)


output_notebook()

show(gp)

In [20]:
source = ColumnDataSource(quakes)

p = figure(
    title = "Quakes",
    x_axis_type="linear",
    tools="hover, reset",
    plot_height=400,
    plot_width=800
)

p.xaxis.axis_label = 'Magnitude'
p.yaxis.axis_label = 'Depth'

p.circle(
    source=source,
    x='mag',
    y='depth',
    fill_alpha=0.5,
    size=10,
    fill_color='colors',
    line_color='colors',
    legend_field='magType',
)

p.xaxis[0].formatter = NumeralTickFormatter(format=",")
p.yaxis[0].formatter = NumeralTickFormatter(format=",")

p.select_one(HoverTool).tooltips = [
    ("Place", "@place"),
    ("Magnitude", "@mag{,.1f}"),
    ("Depth", "@depth{,.0f}")
]

output_notebook()

show(p)

In [21]:
q = quakes.groupby([pd.Grouper(key='time', freq='D')])[['gap','nst','mag','depth']].mean().reset_index()
q.head()

Unnamed: 0,time,gap,nst,mag,depth
0,2021-11-24 00:00:00+00:00,119.142857,15.392857,1.309286,8.9125
1,2021-11-25 00:00:00+00:00,110.763158,20.570175,1.390965,7.562894
2,2021-11-26 00:00:00+00:00,108.190751,18.67052,1.313988,8.814951
3,2021-11-27 00:00:00+00:00,110.104712,20.230366,1.117068,7.752996
4,2021-11-28 00:00:00+00:00,109.54321,20.888889,1.423642,7.811743


In [22]:
source = ColumnDataSource(q)

p = figure(
    title = "Earthquakes",
    background_fill_color="white",
    width=800,
    height=400,
    x_axis_type="datetime",
    tools="hover"
)

hover_list = []

for i, g in enumerate(q.columns[1:]):
    p.line(source=source, x='time', y=g, legend_label=g, color=colormap[i], line_width=2)
    p.square(source=source, x='time', y=g, legend_label=g, color=colormap[i], size=5)
    hover_list.append((g, f"@{g}"+"{,.2f}"))

p.yaxis[0].formatter = NumeralTickFormatter(format=",")
p.xaxis[0].formatter.days = ['%m/%d/%Y']

#p.legend.location = "top_left"   
p.legend[0]=None
p.add_layout(p.legend[0], 'right')

p.select_one(HoverTool).tooltips = hover_list

output_notebook()
show(p)

In [23]:
#q = quakes.groupby(pd.Grouper(key='time', freq='h'), as_index=False).size()
q = quakes.groupby(pd.Grouper(key='time', freq='h')).size().reset_index().rename(columns={0:'size'})
q.head()

Unnamed: 0,time,size
0,2021-11-24 17:00:00+00:00,1
1,2021-11-24 18:00:00+00:00,3
2,2021-11-24 19:00:00+00:00,3
3,2021-11-24 20:00:00+00:00,6
4,2021-11-24 21:00:00+00:00,6


In [24]:
from datetime import datetime

In [25]:
#dates = np.array(q['time'], dtype=np.datetime64)
dates = np.array(q['time'], dtype=datetime)
source = ColumnDataSource(data=dict(date=dates, total=q['size']))

p = figure(
    plot_height=400,
    plot_width=800,
    tools="xpan, zoom_in, zoom_out, reset", 
    x_axis_type="datetime",
    x_axis_location="above",
    x_range=(dates.min(), dates.max())
)

p.line('date', 'total', source=source, line_width=.5)

select = figure(
    title="Drag the middle and edges of the selection box to change the range above",
    plot_height=200, plot_width=800, y_range=p.y_range,
    x_axis_type="datetime",
    y_axis_type=None,
    tools="", 
    toolbar_location=None
)

range_tool = RangeTool(x_range=p.x_range)
range_tool.overlay.fill_color = "navy"
range_tool.overlay.fill_alpha = 0.2

select.line('date', 'total', source=source)
select.ygrid.grid_line_color = None
select.add_tools(range_tool)
select.toolbar.active_multi = range_tool

show(column(p, select))

In [26]:
dimension='locationSource'
measure='depth'

q = quakes.groupby([dimension])[measure].mean().reset_index()
q.head()

Unnamed: 0,locationSource,depth
0,ci,8.220711
1,mb,5.959186
2,nc,5.655517
3,nm,7.33619
4,pr,22.495536


In [27]:
source = ColumnDataSource(q)

p = figure(title = "Earthquakes", x_range=q[dimension], plot_width=800, plot_height=300, tools='',)

p.xaxis.axis_label = dimension.upper()
p.yaxis.axis_label = measure.upper()

p.vbar(source=source, x=dimension, top=measure, width=0.8, color='green')

p.yaxis[0].formatter = NumeralTickFormatter(format=",")


output_notebook()
show(p)

In [28]:
from bokeh.palettes import brewer

for k, v in brewer.items():
    print(k, len(v))

YlGn 7
YlGnBu 7
GnBu 7
BuGn 7
PuBuGn 7
PuBu 7
BuPu 7
RdPu 7
PuRd 7
OrRd 7
YlOrRd 7
YlOrBr 7
Purples 8
Blues 8
Greens 8
Oranges 8
Reds 8
Greys 8
PuOr 9
BrBG 9
PRGn 9
PiYG 9
RdBu 9
RdGy 9
RdYlBu 9
Spectral 9
RdYlGn 9
Accent 6
Dark2 6
Paired 10
Pastel1 7
Pastel2 6
Set1 7
Set2 6
Set3 10


In [29]:
q = quakes.copy()
q['timeframe'] = q['time'].dt.date

q = q.pivot_table(
    index='timeframe',
    columns='magSource',
    values='mag',
    fill_value=0,
    aggfunc=np.mean,
).reset_index()

q

magSource,timeframe,ci,mb,nc,nm,pr,se,tx,uu,uw
0,2021-11-24,0.984615,1.45,1.325714,0.0,3.06,0.0,0.0,2.14,1.14
1,2021-11-25,1.115926,1.814286,1.039216,0.0,2.708571,1.49,2.2375,0.71,1.295
2,2021-11-26,1.095366,1.628333,1.081967,2.086667,2.984286,0.0,2.6125,0.842059,1.511667
3,2021-11-27,0.906727,1.38,1.0432,1.31,2.88875,1.88,2.628571,0.721765,1.9325
4,2021-11-28,0.963529,1.546667,0.925116,1.72,2.666818,0.0,2.833333,1.546154,1.543333
5,2021-11-29,1.062766,1.8175,1.045577,1.97,2.751111,0.0,2.3,1.35125,1.294286
6,2021-11-30,0.920364,1.7,1.120185,2.39,2.832,0.0,2.366667,1.27,0.67
7,2021-12-01,1.00027,1.676667,1.059,2.49,2.778,1.61,2.333333,1.322,1.217143
8,2021-12-02,0.895814,1.54125,0.768696,1.665,2.383333,0.0,2.216667,0.973333,1.484
9,2021-12-03,0.944528,1.457273,1.047105,1.97,2.738182,2.1,3.1,0.855,1.12375


In [30]:
p = figure(x_axis_type="datetime", plot_width=800)

p.grid.minor_grid_line_color = '#eeeeee'

names = q.columns[1:].tolist()
N = len(names)

p.varea_stack(stackers=names, x='timeframe', color=brewer['Blues'][N], source=q, legend_label=names)

p.legend.location = "top_left"

output_notebook()

show(p)