In [1]:
import csv
import requests
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd

from sklearn.cluster import dbscan
from sklearn.preprocessing import StandardScaler

import holoviews as hv
from holoviews import dim
hv.extension('bokeh')

import hvplot.pandas 
import hvplot.dask

import param as pm
import panel as pn
pn.extension()

In [2]:
# The number of earthquakes detected worldwide by the USGS
csvurl = 'http://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_month.csv'

In [3]:
url = "https://raw.githubusercontent.com/madhurapg/countries/master/countries.geojson"
countries_gpd = gpd.read_file(url)

In [4]:
df1 = pd.read_csv(csvurl)

In [5]:
df2 = df1.drop(["time", "magType", "updated", "place", "type", "status", "locationSource", "magSource", "net", 'id'], axis=1)
df3 = df2.drop(["horizontalError", "depthError", "magError", "magNst"], axis=1)
df4 = df3.drop(["nst", "gap", "dmin"], axis=1)

In [6]:
colors = ['fuchsia', 'lime', 'blue', 'red', 'gold', 'aqua', 'pink', 'blueviolet', 'orange']

In [7]:
df4['datetime'] = df1["time"].dropna()

In [8]:
# store the start/end dates of our data set
min_date = df4['datetime'].min()
max_date = df4['datetime'].max()

DEFAULT_BOUNDS = (min_date, max_date)

In [9]:
class EarthquakesApp(pm.Parameterized):
    """

    """
    
    # the number of days to get data for
    days = pm.Integer(default=30, bounds=(0,30))

    def filter_by_days1(self, days):
        """
        Return the subset of the full data set ('df4') that 
        occurred in the last 'self.days' days.
        """
        # Today's date
        today = pd.to_datetime("today")
        
        someday = df4["datetime"]

        someday = pd.to_datetime(someday)
        
        someday = someday.dt.tz_localize(None)

        # Difference between earthquake and today
        diff = today - someday
        
        # Valid selection: less than X days ago
        selection = diff.dt.days < days

        # only return subset of data that is necessary
        subset = df4.loc[selection]

        # only return columns we need
        COLS = ["longitude", "latitude", "mag", "depth", "datetime"]
        subset = subset[COLS]

        return subset
    
    @pm.depends("days", watch=True)
    def _reset_timeFilter(self):
        """
        Internal function that resets the time filter to the default limits.
        
        This function will run anytime that the "days" parameter changes.
        """
        self.timeFilter.update(boundsx=DEFAULT_BOUNDS)
    
    @pn.depends("days")
    def daily_earthquakes2(self):
        """
        Return a scatter plot of daily earthquakes. 

        The data displayed will only be within the currently 
        selected x-axis bounds.
        """
      
        data1 = self.filter_by_days1(self.days)

        data2 = data1.dropna()
    
        plt2 = data2.hvplot(x='latitude', y='depth', kind='scatter', c=data2['mag'], cmap="plasma")
    
        return plt2
    
    @pm.depends("days")
    def magnitude_hist(self):
        """
        Return a histogramof magnitudes of daily earthquakes.
        
        The data displayed will only be within the currently 
        selected x-axis bounds.
        """
        # get the daily earthquakes (filtered by days)
        data1 = self.filter_by_days1(self.days) 

        hist_plt = data1.hvplot.hist(y="mag", bins=100, grid=True, fill_color="navy")
        
        return hist_plt
    
    @pm.depends("days")
    def bivar(self):
        """
        Return a bivariate chart of depth vs magnitude for daily earthquakes.
        
        The data displayed will only be within the currently 
        selected x-axis bounds.
        """
        # get the daily earthquakes (filtered by days)
        data1 = self.filter_by_days1(self.days)
    
        plt3 = data1.hvplot.bivariate(x='mag', y='depth', cmap="plasma")

        return plt3
    
    @pm.depends("days")
    def summary_text(self):
        """
        Get a summary of the number of shootings/homicides.
        
        Returns an HTML <p> tag.
        """
        # only filter this by days
        data1 = self.filter_by_days1(self.days)

        # count shootings and homicides
        earthquakes = len(data1)
        t = f"<p><b>There have been {earthquakes:,} earthquakes in the last {self.days} days.</b></p>"

        return pn.Pane(t, width=600)
            
            
    @pn.depends("days")
    def map_earthquake2(self):
    
        data1 = self.filter_by_days1(self.days)
    
        data1 = data1.drop(["datetime"], axis=1)

        # DBSCAN

        scaler1 = StandardScaler()
        scaled_features1 = scaler1.fit_transform(data1)

        # run DBSCAN 
        cores1, labels1 = dbscan(scaled_features1, eps=0.25, min_samples=40)

        # Add the labels back to the original (unscaled) dataset
        data1['label'] = labels1

        # extract the number of clusters 
        num_clusters1 = data1['label'].nunique() - 1

        N1 = data1.groupby('label').size()
        N21 = N1.sort_index()
        no_noise1 = list(N21.iloc[1:].index)

        # Setup figure and axis
        f, ax = plt.subplots(figsize=(20, 10))

        countries_gpd.plot(ax=ax, facecolor='black', edgecolor='grey')

        # Plot noise in grey
        noise1 = data1.loc[data1['label']==-1]
        ax.scatter(noise1['longitude'], noise1['latitude'], c='lightcyan', s=20, linewidth=0)

        # loop over the clusters
        for i, label_num in enumerate(no_noise1):
                
            # select all the samples with label equals "label_num"
            this_cluster = data1.loc[data1['label']==label_num]
    
            # plot earthquakes 
            ax.scatter(this_cluster['longitude'], this_cluster['latitude'], 
                   linewidth=0, color=colors[i], s=20, alpha=1)
    
    
    
        # Display the figure
        ax.set_facecolor("black")
        plt.close(f)
        return f

In [10]:
app = EarthquakesApp(name="")

In [11]:
title = pn.Pane("<h2>Earthquakes around the globe</h2>", width=500)

instructions = pn.Pane(
    """
<div font-size=28px><b>Note:</b> Data for a specific time period can be selected by clicking and dragging a specific range on the line chart above.</div>""",
    width=1200,
)


In [12]:
# Layout the panel
panel = pn.Column(
    pn.Row(title, app.param),
    pn.Row(instructions),
    pn.Row(app.summary_text),
    pn.Row(app.map_earthquake2, align="center"),
    pn.Row(app.daily_earthquakes2, align="center"),
    pn.Row(app.magnitude_hist, app.bivar, align="center"),
)

In [13]:
panel.servable()