In [None]:
## Package imports ##
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as ss
from sklearn import mixture
import pandas as pd
import geopandas as gpd
from obspy.clients.fdsn import Client
from obspy import UTCDateTime
from dateutil.relativedelta import relativedelta
import datetime as dt
from scipy.optimize import minimize
from scipy.special import gamma as gamma_func, gammaln, gammaincc, exp1

import json
import os
import pprint
import fiona

from functools import partial
import pyproj
from shapely.geometry import Polygon, Point
import shapely.ops as ops


#Check shapely speedups are enabled
from shapely import speedups
speedups.enabled

#Set geopandas settings
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
gpd.io.file.fiona.drvsupport.supported_drivers

## Geographic zonation methods
Set of routines to sort earthquakes into subregions.

In [None]:
# Load KML polygon file for the whole study area - execute cell if you have a KML file of rupture areas from Google Earth
def load_polys_kml(fpath):
    """
    Load EQ rupture zones from a KML file into gdf and find centroids
    # fpath: filepath of the KML file
    """
    polys = gpd.read_file(fpath, driver='KML', crs="EPSG:4326")
    # Add polygon centroids as well
    polys = polys.to_crs('+proj=cea')
    polys['centroid'] = polys['geometry'].centroid
    polys['centroid_geog'] = polys['centroid'].to_crs(4326)
    polys['centroid_lon'] = polys['centroid_geog'].x
    polys['centroid_lat'] = polys['centroid_geog'].y
    polys = polys.drop(columns=['centroid', 'Description'])
    polys = polys.to_crs(4326)
    # save to file
    outfile = (fpath.split(sep='/')[0]+'/'+fpath.split(sep='/')[1].split(sep='_')[2].split(sep='.')[0]
               +'_large_rupture_labels.txt')
    print(outfile)
    with open(outfile, 'w') as f:
        f.write(polys.to_string(columns=['centroid_lon','centroid_lat','Name'],header=False,index=False))
    return polys

In [None]:
load_polys_kml('figs/large_ruptures_SAM.kml')

In [None]:
# Load polygons - execute cell if you have lists of coordinates defining target zones
# Read in .dat file as df
def load_polys(target_area):
    fpath = 'sbams/' + target_area + '/'
    regions_path = fpath + target_area + '_trench_points.txt'
    regions = []
    with open(regions_path, 'r') as reader:
        next(reader)
        for line in reader:
            regions.append(line.split()[5])
    polygon_coords = []
    for region in regions:
        coords_df = pd.read_table((fpath+region+'_CropPolygon.dat'), sep="\s+", header=None, names=['lon', 'lat'])
        polygon_coords.append(Polygon(zip(coords_df['lon'], coords_df['lat'])))


    crs = {'init': 'epsg:4326'}
    d = {'name': regions, 'geometry': polygon_coords}
    polys = gpd.GeoDataFrame(d, crs="EPSG:4326")
    # Add polygon centroids as well
    polys = polys.to_crs('+proj=cea')
    polys['centroid'] = polys['geometry'].centroid
    polys['centroid_geog'] = polys['centroid'].to_crs(4326)
    polys = polys.drop(columns=['centroid'])
    polys = polys.to_crs(4326)
    return polys, regions

In [None]:
polys, regions = load_polys('SAM')
polys['centroid_lon'] = polys['centroid_geog'].x
polys['centroid_lat'] = polys['centroid_geog'].y
outfile = ('sbams/SAM/SAM_zone_centroids.txt')
print(outfile)
with open(outfile, 'w') as f:
    f.write(polys.to_string(columns=['centroid_lon','centroid_lat','name'],header=False,index=False))

In [None]:
# define a function to extract the earthquakes relevant to each polygon

def assign_events_and_save(target_area):
    """
    Assigns events to geopgraphic regions defined by KML polygons
    # target_area is the study area (type str: Japan, SAM)
    """
    # Initialise filepaths
    cat_dir = target_area + '_EQ_data/'
    declustM = cat_dir + 'Mizrahi_ETAS_decluster/ETAS_declustered_cat.csv'
    rejM = cat_dir + 'Mizrahi_ETAS_decluster/ETAS_rejected_evs.csv'
    declustZ = cat_dir + 'Zaliapin_decluster/declustered_catalog_data.csv'
    rejZ = cat_dir + 'Zaliapin_decluster/rejected_ev.csv'
    raw = cat_dir + 'raw_catalog_data.csv'

    # Get the target regions
    polys, regions = load_polys(target_area)
    # Sort the earthquakes into subregions, one subregion at a time
    for fpath in [declustM, rejM, declustZ, rejZ]:
        # Load CSV catalog
        cat = pd.read_csv(fpath, index_col=0)
        # Write to a pandas geodataframe
        gdf = gpd.GeoDataFrame(cat, geometry=gpd.points_from_xy(cat.lon, cat.lat))
        for region_name in regions:
            # Location filtering
            outdir = cat_dir + 'zonal_cat/' + region_name
            outfile = outdir + '/' + fpath.split(sep='/')[2]
            print('Now processing ', outfile)
            region = polys.loc[polys['name']==region_name]
            region.reset_index(drop=True, inplace=True)
            pip_mask = gdf.within(region.at[0, 'geometry'])
            pip_data = gdf.loc[pip_mask]
            # Write the earthquakes for the chosen region to a csv file
            if not os.path.exists(outdir):
                os.mkdir(outdir)
            pip_data.to_csv(outfile, columns=['year', 'month', 'day', 'hour', 'minute', 'second', 'lat', 'lon', 
                                              'depth_km', 'mag', 'time'])
    return regions

In [None]:
assign_events_and_save('SAM')

In [None]:
# Prepare a catalog for plotting

# Prepare catalog:
def prep_cat_zaliapin(cat_init):
    """ Loads and prepares a raw catalog for further processing
    # input cat_init needs to be a file path to a CSV document containing labelled columns:
    # Index, year, month, day, hour, minute, second, lat,lon, depth_km, mag
    # cat_start, cat_start are the start and end times of the catalog, to be given as datetime objects
    """
    # Load catalog from file:
    cat = pd.read_csv(cat_init, index_col=0)
    cat = cat.sort_index()
    
    # Create datetimes
    cat["time"] = pd.to_datetime(cat[['year', 'month', 'day', 'hour', 'minute', 'second']])

    #Fix dtypes
    cat = cat.infer_objects()
    cat.loc[:, 'depth_km'] *=0.001
    # translate target lat, lon to radians for spherical distance calculation
    cat['lat_rad'] = np.radians(cat['lat'])
    cat['lon_rad'] = np.radians(cat['lon'])
    return cat

# Haversine formula for computing spherical distances
def hav(theta):
    """Haversine function
    Takes in arguments in radians
    """
    return np.square(np.sin(theta / 2))

def haversine(lat_rad_1, lat_rad_2, lon_rad_1, lon_rad_2, earth_radius=6.3781e3):
    #Haversine distance in km - calculate distance between 2 pts on a sphere
    # lat_rad_1, lat_rad_2, lon_rad_1, lon_rad_2 must all be in radians
    ####################################################################
    # to calculate distance on a sphere
    d = 2 * earth_radius * np.arcsin(
        np.sqrt(
            hav(lat_rad_1 - lat_rad_2)
            + np.cos(lat_rad_1)
            * np.cos(lat_rad_2)
            * hav(lon_rad_1 - lon_rad_2)))
    return d

## Synchronisation detection

Following methods outlined in Jara et al. (2017)

In [None]:
# Synchronisation detection
# Following Jara et al. (2017)
def detect_sync(region, zone, method, search_type, shallow_0, shallow_1, deep_0, deep_1):
    """
    Detect synchronisations between deep and shallow seismicity
    region: large-scale target area, type str (e.g. SAM, Japan)
    zone: subregion , type str
    method: Preferred declustered catalog, type str (Mizrahi or Zaliapin)
    search_type: Method to compute sync coefficients - if 'S', function will look backwards from shallow events.
                 If 'D', function will look forwards from deep events
    shallow_lim: the lower depth limit (km) for what is considered a shallow earthquake
    deep_lim: the upper depth limit (km) for what is considered a deep earthquake
    deep_extr: deepest limit to the seismicity to be considered
    """
    # Build filepaths
    root_dir = region+'_EQ_data/'
    if method == 'Zaliapin':
        fn_cat = root_dir+'zonal_cat/'+zone+'/declustered_catalog_data.csv'
    elif method == 'Mizrahi':
        fn_cat = root_dir+'zonal_cat/'+zone+'/ETAS_declustered_cat.csv'
    
    # Load catalog from file:
    cat = pd.read_csv(fn_cat, index_col=0,parse_dates=['time'])
    cat = cat.sort_index()
    
    cat = cat[['time', 'lat','lon', 'depth_km', 'mag']]
    
    # Define lengths of test windows from Jara et al.
    test_periods_h = [1, 3, 6, 12]
    test_periods_d = [1,2,3,4,5,6,7,8,9,10,15,20,25,30,60,90]
    deltas_h = [dt.timedelta(hours=period) for period in test_periods_h]
    deltas_d = [dt.timedelta(days=period) for period in test_periods_d]
    deltas = [*deltas_h, *deltas_d]
    col_labels_p = [] # store test period column labels
    col_labels_m = [] # store magnitude labels
    
    # Initiliase columns to store synchronisation coeffs
    for win_length in test_periods_h:
        label_p = '{}h'.format(win_length)
        label_m = '{}h_mag'.format(win_length)
        cat[label_p] = 0.0
        cat[label_m] = 0.0
        col_labels_p.append(label_p)
        col_labels_m.append(label_m)
    for win_length in test_periods_d:
        label_p = '{}d'.format(win_length)
        label_m = '{}d_mag'.format(win_length)
        cat[label_p] = 0.0
        cat[label_m] = 0.0
        col_labels_p.append(label_p)
        col_labels_m.append(label_m)
    
    # Create deep and shallow catalogs:
    deep_cat = cat.loc[(cat['depth_km'] >= deep_0) & (cat['depth_km'] < deep_1)]
    shallow_cat = cat.loc[(cat['depth_km'] >= shallow_0) & (cat['depth_km'] < shallow_1)]
    
    # Perform search
    if search_type == 'S':
        print("SHALLOW MODE")
        for t_delta, col_label_p, col_label_m in zip(deltas, col_labels_p, col_labels_m):
            print('Looking for synchronisations within {} of shallow event'.format(str(t_delta)))
            for shallow_ev in shallow_cat.itertuples():
                # get values of shallow event
                sev_time = shallow_ev.time
                #print(sev_time)
                ref_time = sev_time - t_delta
                # Specify the search time period
                target_deep = deep_cat.loc[(deep_cat['time'] < sev_time) & (deep_cat['time'] >= ref_time)]
                #print(target_deep)
                if target_deep.shape[0] > 0:
                    shallow_cat.loc[shallow_ev.Index, col_label_p] = 1.0 # set sync coefficient to 1
                    shallow_cat.loc[shallow_ev.Index, col_label_m] = target_deep['mag'].max() # extract max magnitude of deep sync
                    #print('Sync detected')
                else:
                    shallow_cat.loc[shallow_ev.Index, col_label_p] = 0.0
                    shallow_cat.loc[shallow_ev.Index, col_label_m] = np.nan

        # Save to file
        outfile = '{}zonal_cat/{}/sync_coeff_S_data_{}-{}_{}-{}.csv'.format(root_dir,zone,str(int(shallow_0)),
                                                                    str(int(shallow_1)),str(int(deep_0)),str(int(deep_1)))
        shallow_cat.to_csv(outfile)
        return shallow_cat  
    
    elif search_type == 'D':
        print("DEEP MODE")
        for t_delta, col_label_p, col_label_m in zip(deltas, col_labels_p, col_labels_m):
            print('Looking for synchronisations within {} of deep event'.format(str(t_delta)))
            for deep_ev in deep_cat.itertuples():
                # get values of deep event
                dev_time = deep_ev.time
                ref_time = dev_time + t_delta
                # Specify the search time period
                target_shallow = shallow_cat.loc[(shallow_cat['time'] > dev_time) & (shallow_cat['time'] <= ref_time)]
                #print(target_deep)
                if target_shallow.shape[0] > 0:
                    deep_cat.loc[deep_ev.Index, col_label_p] = 1.0 # set sync coefficient to 1
                    deep_cat.loc[deep_ev.Index, col_label_m] = target_shallow['mag'].max() 
                    # extract max magnitude of deep sync
                else:
                    deep_cat.loc[deep_ev.Index, col_label_p] = 0.0
                    deep_cat.loc[deep_ev.Index, col_label_m] = np.nan

        # Save to file
        outfile = '{}zonal_cat/{}/sync_coeff_D_data_{}-{}_{}-{}.csv'.format(root_dir,zone,str(int(shallow_0)),
                                                                    str(int(shallow_1)),str(int(deep_0)),str(int(deep_1)))
        deep_cat.to_csv(outfile)
        return deep_cat
                

In [None]:
# Run for both shallow and deep modes for SAM
for mode in ['S', 'D']:
    detect_sync('SAM', 'Bucaramanga','Zaliapin', mode, 0.0, 50.0, 80.0, 200.0)
    detect_sync('SAM', 'Colombia','Zaliapin', mode, 0.0, 60.0, 80.0, 200.0)
    detect_sync('SAM', 'Ecuador','Zaliapin', mode, 0.0, 60.0, 80.0, 250.0)
    detect_sync('SAM', 'Peru','Zaliapin', mode, 0.0, 40.0, 80.0, 200)
    detect_sync('SAM', 'Arequipa','Zaliapin', mode, 0.0, 50.0, 90.0, 250.0)
    detect_sync('SAM', 'North_Chile','Zaliapin', mode, 0.0, 70.0, 70.0, 150.0)
    detect_sync('SAM', 'North_Chile','Zaliapin', mode, 0.0, 70.0, 150.0, 250.0)
    detect_sync('SAM', 'Atacama','Zaliapin', mode, 0.0, 70.0, 70.0, 120.0)
    detect_sync('SAM', 'Atacama','Zaliapin', mode, 70.0, 120.0, 120.0, 250.0)
    detect_sync('SAM', 'Central_Chile','Zaliapin', mode, 0.0, 60.0, 80.0, 100.0)
    detect_sync('SAM', 'Central_Chile','Zaliapin', mode, 80.0, 100.0, 100.0, 250.0)
    detect_sync('SAM', 'Central_Chile','Zaliapin', mode, 0.0, 60.0, 80.0, 250.0)
    detect_sync('SAM', 'Maule','Zaliapin', mode, 0.0, 40.0, 80.0, 250.0)
    detect_sync('SAM', 'Valdivia','Zaliapin', mode, 0.0, 40.0, 80.0, 250.0)

In [None]:
def plot_sync_coeffs_pts(region, zones, search_type, cat_start, cat_end):
    """
    Plot sync coeffs for all subregions in a specific region, for a specific timewindow
    region: the region of interest, type str (e.g. SAM)
    zones: list of subregions (str) we want to plot sync coeffs for
    search_type: specify type of search - i.e. shallow syncing with deep before it (S) 
                or deep syncing with shallow after (D)
    cat_start: start of the plotting period (dt.datetime)
    cat_end: end of the plotting period (dt.datetime)
    """ 
    # Filenames
    fn_large_ev = region+'_EQ_data/large_eq.csv'
    if search_type == 'S':
        fsearch = 'sync_coeff_S*.csv'
    elif search_type == 'D':
        fsearch = 'sync_coeff_D*.csv'
    # Read in sync_coeffs
    from pathlib import Path
    for zone in zones:
        basedir = region+'_EQ_data/zonal_cat/'+zone+'/'
        basedir_p = Path(basedir)
        print(basedir)
        for fpath in basedir_p.glob(fsearch):
            sync_coeffs = pd.read_csv(fpath, index_col=0, parse_dates=['time'])
    
            # plot the sync coefficients
            sample_cat = sync_coeffs.loc[(sync_coeffs['time'] <= cat_end) &
                                         (sync_coeffs['time'] >= cat_start)]
            fig, axs = plt.subplots(nrows=5, ncols=3, figsize=(30,32))
            dep_range = fpath.name.split(sep='data_')[1].split(sep='.c')[0]
            if search_type == 'S':
                fig.suptitle('Sync coefficients for {}, z: {} km (Shallow mode)'.format(zone,dep_range), y=0.89)
            elif search_type == 'D':
                fig.suptitle('Sync coefficients for {}, z: {} km (Deep mode)'.format(zone,dep_range), y=0.89)
            test_periods_d = [2,3,4,5,6,7,8,9,10,15,20,25,30,60,90]
            test_periods = ['{}d'.format(dur) for dur in test_periods_d]
            print(test_periods)
            
            ## Plot important large evs ##
            # Load catalog of large events (for plotting milestone lines)
            large_ev_cat = prep_cat_zaliapin(fn_large_ev)
            large_ev_cat = large_ev_cat.loc[(large_ev_cat['time'] <= cat_end) &
                                         (large_ev_cat['time'] >= cat_start)]

            # Get the target regions
            polys, boxes = load_polys(region)

            # Only include large events within 1000 km of centroid of zone
            clonrad = np.radians((polys.loc[polys['name']==zone])['centroid_geog'].x.values)
            clatrad = np.radians((polys.loc[polys['name']==zone])['centroid_geog'].y.values)
            large_ev_cat['dist'] = haversine(clatrad,large_ev_cat['lat_rad'],clonrad,large_ev_cat['lon_rad'])
            large_ev = large_ev_cat.loc[large_ev_cat['dist']<=1000.0]
            large_ev = large_ev.set_index('time')
            large_ev = large_ev.sort_index()
            
            poly_df = polys.loc[polys['name']==zone]
            poly_df.reset_index(drop=True, inplace=True)
            
                    
            # Define binary colourmap
            from matplotlib.colors import ListedColormap
            cmap = ListedColormap(['red', 'green'])
            for ax, test_per in zip(axs.flat, test_periods):
                coeffs = sample_cat[test_per]
                dates = sample_cat['time']
                im = ax.scatter(dates, coeffs, c=coeffs, cmap=cmap, 
                                label='{} (Total {} events)'.format(test_per, len(dates)))
                for event in large_ev.itertuples():
                    ax.axvline(event.Index,color='k', ls='--')
                    hyp = gpd.GeoSeries([Point(event.lon, event.lat)])
                    if hyp.within(poly_df.at[0,'geometry']).values:
                        props = dict(boxstyle='round',facecolor='white',alpha=1.0)
                        if event.depth_km <= 60.0:
                            ax.text(event.Index,ax.get_ylim()[1]/2,event.name,ha='center',va='center', 
                                     fontsize='large', rotation=90, color='r', bbox=props)
                        elif event.depth_km > 60.0:
                            ax.text(event.Index,ax.get_ylim()[1]/2,event.name,ha='center',va='center', 
                                     fontsize='large', rotation=90, color='b', bbox=props)
                    else:
                        props = dict(boxstyle='round',facecolor='white',ls='--',alpha=1.0)
                        ax.text(event.Index,ax.get_ylim()[1]/2,event.name,ha='center',va='center', 
                             fontsize='large', rotation=90, color='grey', bbox=props)
                #ax.scatter(dates, coeffs, label=test_per)
                #_=[ax.axvline(x[i],color='k') for i in idxs]
                #_=[ax.text(x[i],ax.get_ylim()[1],f"{x[i]:1.2f}",ha='center',va='bottom') for i in idxs]
                ax.set_ylabel('Synchronisation coefficient', fontsize=14)
                ax.xaxis.set_tick_params(labelsize=14)
                ax.yaxis.set_tick_params(labelsize=14)
                ax.legend(loc='lower left')
                # Add a colorbar for depth
                #cbar = fig.colorbar(im, ax=ax)
                #cbar.set_label('depth[km]', fontsize=12)
                
            plt.show()
            
            # Save the figure
            root_dir = basedir.split(sep='/')[0] +'/'
            outdir = root_dir+'figs-PDF/sync_coeffs/'
            if not os.path.exists(outdir):
                os.mkdir(outdir)
            box = basedir.split(sep='/')[2]
            if search_type == 'S':
                fn_output = outdir+box+'_'+dep_range+'_S_sync_coeffs.pdf'
            elif search_type == 'D':
                fn_output = outdir+box+'_'+dep_range+'_D_sync_coeffs.pdf'
            fig.savefig(fn_output, dpi=300, bbox_inches='tight')

In [None]:
zones = ['Bucaramanga', 'Colombia', 'Ecuador', 'Peru', 'Arequipa', 'North_Chile',
           'Atacama', 'Central_Chile', 'Maule', 'Valdivia']
plot_sync_coeffs_pts('SAM', zones, 'S', dt.datetime(2000,1,1), dt.datetime(2021,7,1))
plot_sync_coeffs_pts('SAM', zones, 'D', dt.datetime(2000,1,1), dt.datetime(2021,7,1))

In [None]:
def plot_sync_coeffs_bar(region, zones, search_type, cat_start, cat_end):
    """
    Plot sync coeffs for all subregions in a specific region, for a specific timewindow, in 6 month bins, and counting
    the number of positive and negative synchronisations in each bin
    region: the region of interest, type str (e.g. SAM)
    zones: list of subregions (str) we want to plot sync coeffs for
    search_type: specify type of search - i.e. shallow syncing with deep before it (S) 
                or deep syncing with shallow after (D)
    cat_start: start of the plotting period (dt.datetime)
    cat_end: end of the plotting period (dt.datetime)
    """ 
    # Filenames
    fn_large_ev = region+'_EQ_data/large_eq.csv'
    if search_type == 'S':
        fsearch = 'sync_coeff_S*.csv'
    elif search_type == 'D':
        fsearch = 'sync_coeff_D*.csv'
    # Read in sync_coeffs
    from pathlib import Path
    for zone in zones:
        basedir = region+'_EQ_data/zonal_cat/'+zone+'/'
        basedir_p = Path(basedir)
        print(basedir)
        for fpath in basedir_p.glob(fsearch):
            sync_coeffs = pd.read_csv(fpath, index_col=0, parse_dates=['time'])
    
            # plot the sync coefficients
            sample_cat = sync_coeffs.loc[(sync_coeffs['time'] <= cat_end) &
                                         (sync_coeffs['time'] >= cat_start)]
            fig, axs = plt.subplots(nrows=5, ncols=3, figsize=(30,32))
            dep_range = fpath.name.split(sep='data_')[1].split(sep='.c')[0]
            if search_type == 'S':
                fig.suptitle('Sync coefficients for {}, z: {} km (Shallow mode)'.format(zone,dep_range), y=0.89)
            elif search_type == 'D':
                fig.suptitle('Sync coefficients for {}, z: {} km (Deep mode)'.format(zone,dep_range), y=0.89)
            test_periods_d = [2,3,4,5,6,7,8,9,10,15,20,25,30,60,90]
            test_periods = ['{}d'.format(dur) for dur in test_periods_d]
            print(test_periods)
            
            ## Plot important large evs ##
            # Load catalog of large events (for plotting milestone lines)
            large_ev_cat = prep_cat_zaliapin(fn_large_ev)
            large_ev_cat = large_ev_cat.loc[(large_ev_cat['time'] <= cat_end) &
                                         (large_ev_cat['time'] >= cat_start)]

            # Get the target regions
            polys, boxes = load_polys(region)

            # Only include large events within 1000 km of centroid of zone
            clonrad = np.radians((polys.loc[polys['name']==zone])['centroid_geog'].x.values)
            clatrad = np.radians((polys.loc[polys['name']==zone])['centroid_geog'].y.values)
            large_ev_cat['dist'] = haversine(clatrad,large_ev_cat['lat_rad'],clonrad,large_ev_cat['lon_rad'])
            large_ev = large_ev_cat.loc[large_ev_cat['dist']<=1000.0]
            large_ev = large_ev.set_index('time')
            large_ev = large_ev.sort_index()
            
            poly_df = polys.loc[polys['name']==zone]
            poly_df.reset_index(drop=True, inplace=True)
            
            # Plot bar charts
            def zero_col(col):
                if col[test_per] == 1.0:
                    return np.nan
                elif col[test_per] == 0.0:
                    return 0.0

            def one_col(col):
                if col[test_per] == 1.0:
                    return 1.0
                elif col[test_per] == 0.0:
                    return np.nan
                
            for ax, test_per in zip(axs.flat, test_periods):
                # Plot the bars:
                cat_plot = sample_cat[['time', test_per]]
                cat_plot['zeros'] = cat_plot.apply(lambda cat_plot : zero_col(cat_plot), axis=1)
                cat_plot['ones'] = cat_plot.apply(lambda cat_plot : one_col(cat_plot), axis=1)

                cat_plot = cat_plot.groupby(pd.Grouper(key='time', freq='6M')).count()
                cat_plot.index.freq = None

                ax.bar(cat_plot.index, cat_plot['zeros'].values, label='SC=0 ({})'.format(test_per), width=182)
                ax.bar(cat_plot.index, cat_plot['ones'].values, label='SC=1 ({})'.format(test_per), 
                       bottom=cat_plot['zeros'].values, width=182)
                # Add markers for large events
                for event in large_ev.itertuples():
                    ax.axvline(event.Index,color='k', ls='--')
                    hyp = gpd.GeoSeries([Point(event.lon, event.lat)])
                    if hyp.within(poly_df.at[0,'geometry']).values:
                        props = dict(boxstyle='round',facecolor='white',alpha=1.0)
                        if event.depth_km <= 60.0:
                            ax.text(event.Index,ax.get_ylim()[1]/2,event.name,ha='center',va='center', 
                                     fontsize='large', rotation=90, color='r', bbox=props)
                        elif event.depth_km > 60.0:
                            ax.text(event.Index,ax.get_ylim()[1]/2,event.name,ha='center',va='center', 
                                     fontsize='large', rotation=90, color='b', bbox=props)
                    else:
                        props = dict(boxstyle='round',facecolor='white',ls='--',alpha=1.0)
                        ax.text(event.Index,ax.get_ylim()[1]/2,event.name,ha='center',va='center', 
                             fontsize='large', rotation=90, color='grey', bbox=props)
                #ax.scatter(dates, coeffs, label=test_per)
                #_=[ax.axvline(x[i],color='k') for i in idxs]
                #_=[ax.text(x[i],ax.get_ylim()[1],f"{x[i]:1.2f}",ha='center',va='bottom') for i in idxs]
                ax.set_ylabel('Count', fontsize=14)
                ax.xaxis.set_tick_params(labelsize=14)
                ax.yaxis.set_tick_params(labelsize=14)
                ax.legend(loc='upper left')
                # Add a colorbar for depth
                #cbar = fig.colorbar(im, ax=ax)
                #cbar.set_label('depth[km]', fontsize=12)
                
            plt.show()
            
            # Save the figure
            root_dir = basedir.split(sep='/')[0] +'/'
            outdir = root_dir+'figs-PDF/sync_coeffs_bars/'
            if not os.path.exists(outdir):
                os.mkdir(outdir)
            box = basedir.split(sep='/')[2]
            if search_type == 'S':
                fn_output = outdir+box+'_'+dep_range+'_S_bars_sync_coeffs.pdf'
            elif search_type == 'D':
                fn_output = outdir+box+'_'+dep_range+'_D_bars_sync_coeffs.pdf'
            fig.savefig(fn_output, dpi=300, bbox_inches='tight')

In [None]:
zones = ['Bucaramanga', 'Colombia', 'Ecuador', 'Peru', 'Arequipa', 'North_Chile',
           'Atacama', 'Central_Chile', 'Maule', 'Valdivia']
plot_sync_coeffs_bar('SAM', zones, 'S', dt.datetime(2000,1,1), dt.datetime(2021,7,1))
plot_sync_coeffs_bar('SAM', zones, 'D', dt.datetime(2000,1,1), dt.datetime(2021,7,1))

### Detection of accelerations (incomplete)
Following Marsan et al. (2017): <br>
 - Separate catalog into nodes with 0.05 degree spacing
 - For each node, consider EQ within 50 km of the node, and discard nodes with fewer than 50 EQ
 - For each of the remaining nodes, compute best quadratic fit $ \hat{N}(t) = at^2 + bt + c $ to the cumulative time series of background earthquakes $ \hat{N}(t) $, produced by each of the 2 declustering methods
 - Impose constraint that initial rate $2at_s + b$ and final rate $2at_e + b$ must be positive
 - Characterise the time series using the constant $\phi$, where: $$ \phi = \frac{\hat{N(t_e)}}{\hat{N(t_s)}} = \frac{2at_e + b}{2at_s + b} $$

In [None]:
# Define gridding function
# total area for the grid
xmin, ymin, xmax, ymax= gdf.total_bounds
# how many cells across and down
n_cells=30
cell_size = (xmax-xmin)/n_cells
# projection of the grid
crs = "+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs"
# create the cells in a loop
grid_cells = []
for x0 in np.arange(xmin, xmax+cell_size, cell_size ):
    for y0 in np.arange(ymin, ymax+cell_size, cell_size):
        # bounds
        x1 = x0-cell_size
        y1 = y0+cell_size
        grid_cells.append( shapely.geometry.box(x0, y0, x1, y1)  )
cell = geopandas.GeoDataFrame(grid_cells, columns=['geometry'], 
                                 crs=crs)

In [None]:
## Detection of accelerations following Marsan et al. (2017)

# Calculate acceleration coefficients
def calc_accel_coeffs(fpath_cat1, fpath_cat2, regions_fpath):
    """
    Calculate background rate acceleration/deceleration coefficients
    fpath_cat1: filepath (str) to CSV file containing declustered catalog using Method 1 (Z-BZ 2013)
    fpath_cat2: filepath (str) to CSV file containing declustered catalog using Method 2 (ETAS)
    regions_fpath: Path of KML file defining geographic regions of interest in the study area as polygons
    """
    # Read in the polygons and initialise cols to store the coeffs
    regions = gpd.read_file(regions_fpath, driver='KML')
    regions['phi_1'] = 0.0
    regions['phi_2'] = 0.0
    
    # Read in the declustered catalogs
    cat1 = pd.read_csv(fpath_cat1, index_col=0)
    cat2 = pd.read_csv(fpath_cat2, index_col=0)
    
    # Calculate the coefficients for each region
    for region in regions.itertuples():
        region_name = region.Name
        cat1_in_region = assign_events(cat1, regions, region_name)
        cat2_in_region = assign_events(cat2, regions, region_name)

        # Declustered cat from method 1
        cum_cat1 = cat1_in_region.set_index('time')
        cum_cat1['count'] = 1.0
        cum_cat1 = cum_cat.cumsum()

        # Declustered cat from method 2
        cum_cat2 = cat2_in_region.set_index('time')
        cum_cat2['count'] = 1.0
        cum_cat2 = cum_cat2.cumsum()

        # Fit quadratics
        from numpy.polynomial import Polynomial as P
        time_diffs1 = (1./(24.*60.*60.))*(cum_cat1.index - cum_cat1.index.min()).total_seconds()
        counts1 = cum_cat1['count'].values
        p1 = P.fit(time_diffs1,counts1, 2)
        time_diffs2 = (1./(24.*60.*60.))*(cum_cat2.index - cum_cat2.index.min()).total_seconds()
        counts2 = cum_cat2['count'].values
        p2 = P.fit(time_diffs2,counts2, 2)

        # calculate the phi coefficient
        phi_1 = (2*((cum_cat1.index.max() - cum_cat1.index.min()).total_seconds())*p1.coef[1] +  p1.coef[0])/(p1.coef[0])
        phi_2 = (2*((cum_cat2.index.max() - cum_cat2.index.min()).total_seconds())*p2.coef[1] +  p2.coef[1])/(p2.coef[1])
        regions.loc[region.Index, 'phi_1' ] = phi_1
        regions.loc[region.Index, 'phi_2' ] = phi_2
        
        # Filter according to Marsan criteria
        regions = regions.loc[((regions['phi_1']>1.0) & (regions['phi_2']>1.0)) | 
                              ((regions['phi_1']<1.0) & (regions['phi_2']<1.0)) | 
                              (np.abs(regions['phi_1']-regions['phi_2'])<0.3)]
        regions['phi_ave'] = (regions['phi_1'] + regions['phi_2'])/2.0
        
        return regions

## Plotting routines for figures

In [None]:
#### PLOTTING ####

def plot_cumulative(cat_declust_fn, cat_orig_fn, large_ev_fn, cat_start, mc, method, fig_fpath):
    # cat_declust: path to CSV file containing a declustered catalog
    # cat_orig: path to CSV file with the original catalog
    # cat_start: time (datetime) past which rate data is considered reliable
    # mc: the catalog completeness magnitude
    # method: name of the declustering method, type str (Zaliapin or Mizrahi)
    # fig_fpath: path to output PDF file for plot
    
    # Load the catalogs
    cat_declust = pd.read_csv(cat_declust_fn, index_col=0, parse_dates=["time"], dtype={"url": str, "alert": str})
    cat_orig = prep_cat_zaliapin(cat_orig_fn)
    
    # Load catalog of large events (for plotting milestone lines)
    ### Create df of important events:
    large_ev = prep_cat_zaliapin(large_ev_fn)
    large_ev = large_ev.set_index('time')
    large_ev = large_ev.sort_index()
    
    # Counting
    cum_cat = cat_orig.loc[(cat_orig['time'] >= cat_start) & (cat_orig['mag'] >= 4.8)]
    cum_cat = cum_cat.set_index('time')
    cum_cat['count'] = 1.0
    cum_cat = cum_cat.cumsum()
    print('Total events: {}'.format(len(cum_cat.index)))

    # And repeat on declustered catalog
    cum_cat_declustered = cat_declust.copy(deep=True)
    cum_cat_declustered = cum_cat_declustered.set_index('time')
    cum_cat_declustered['count'] = 1.0
    cum_cat_declustered = cum_cat_declustered.cumsum()
    print('Events in declustered catalog: {}'.format(len(cum_cat_declustered)))

    # Fit quadratics
    from numpy.polynomial import Polynomial as P
    time_diffs_orig = (1./(24.*60.*60.))*(cum_cat.index - cat_start).total_seconds()
    counts_orig = cum_cat['count'].values
    p_orig = P.fit(time_diffs_orig,counts_orig,2)
    time_diffs_declust = (1./(24.*60.*60.))*(cum_cat_declustered.index - cat_start).total_seconds()
    counts_declust = cum_cat_declustered['count'].values
    p_declust = P.fit(time_diffs_declust,counts_declust,2)


    # Plot cumulative no of events with time and fit a quadratic
    fig, ax1 = plt.subplots(figsize=(12, 7))
    ax1.plot(cum_cat.index.values, cum_cat['count'], 'b', label='Original catalog')
    ax1.plot(cum_cat_declustered.index.values, cum_cat_declustered['count'], 'r', 
             label='Declustered catalog - {}'.format(method))
    #ax1.plot(cum_cat.index.values, p_orig(time_diffs_orig), 'steelblue', label='quadratic fit - original')
    #ax1.plot(cum_cat_declustered.index.values, p_declust(time_diffs_declust), 'salmon', label='quadratic fit - declustered')


    # Plot important large evs
    bottom, top = ax1.get_ylim()
    # these are matplotlib.patch.Patch properties
    props = dict(boxstyle='round', facecolor='white', alpha=1.0)
    indices = large_ev.index.values
    for date, name, m in zip(large_ev.index.values, large_ev['name'], large_ev['mag']):
        #ind_ev = indices.where(indices == date)
        ax1.plot([date, date], [bottom, top], 'k--')
        #ax.arrow(date, 0, 0, cum_cat['count'].values[ind_ev], head_width=0.05, head_length=0.1, fc='k', ec='k')
        ax1.text(date, bottom+400, (name+' $M_w$'+str(m)), size="large", rotation=90,
                         horizontalalignment='center', verticalalignment='center',
                         rotation_mode='anchor', bbox=props)

    # Tidy up plot
    ax1.xaxis.set_tick_params(labelsize=14)
    ax1.yaxis.set_tick_params(labelsize=14)
    ax1.set_ylabel('Cumulative number of events', fontsize=14)
    #ax1.set_title('All earthquakes', fontsize=16)
    ax1.legend(loc='best', fontsize=14)
    plt.show()
    fig.savefig(fig_fpath, dpi=300, bbox_inches='tight')

In [None]:
def plot_cumulative_compare(data_path, cat_start, mc):
    """
    Function to compare declustering methods against the original catalog
    # data_path: filepath to folder holding the 2 declustered catalogs and the original catalog
    # cat_start: time (datetime) past which rate data is considered reliable
    # mc: the catalog completeness magnitude
    """
    # Build filepaths
    cat_declust_fn1 = data_path + 'Zaliapin_decluster/declustered_catalog_data.csv'
    cat_declust_fn2 = data_path + 'Mizrahi_ETAS_decluster/ETAS_declustered_cat.csv'
    cat_orig_fn = data_path + 'raw_catalog_data.csv'
    
    fn_large_ev = data_path + 'large_eq.csv'
    fn_output = data_path + 'figs-PDF/' + 'cum_ev_compare_methods.pdf'
    
    # Load the catalogs
    cat_declust1 = pd.read_csv(cat_declust_fn1, index_col=0, parse_dates=["time"], dtype={"url": str, "alert": str})
    cat_declust2 = pd.read_csv(cat_declust_fn2, index_col=0, parse_dates=["time"], dtype={"url": str, "alert": str})
    cat_orig = prep_cat_zaliapin(cat_orig_fn)
    
    # Load catalog of large events (for plotting milestone lines)
    ### Create df of important events:
    large_ev = prep_cat_zaliapin(fn_large_ev)
    large_ev = large_ev.set_index('time')
    large_ev = large_ev.sort_index()
    
    # Counting
    m_cut = mc-(0.1/2)
    cum_cat = cat_orig.loc[(cat_orig['time'] >= cat_start) & (cat_orig['mag'] >= m_cut)]
    cum_cat = cum_cat.set_index('time')
    cum_cat['count'] = 1.0
    cum_cat = cum_cat.cumsum()
    print('Total events: {}'.format(len(cum_cat.index)))

    # And repeat on declustered catalog 1
    cum_cat_declustered1 = cat_declust1.copy(deep=True)
    cum_cat_declustered1 = cum_cat_declustered1.set_index('time')
    cum_cat_declustered1['count'] = 1.0
    cum_cat_declustered1 = cum_cat_declustered1.cumsum()
    print('Events in Zaliapin declustered catalog: {}'.format(len(cum_cat_declustered1)))
    
    # And repeat on declustered catalog 2
    cum_cat_declustered2 = cat_declust2.copy(deep=True)
    cum_cat_declustered2 = cum_cat_declustered2.set_index('time')
    cum_cat_declustered2['count'] = 1.0
    cum_cat_declustered2 = cum_cat_declustered2.cumsum()
    print('Events in Mizrahi declustered catalog: {}'.format(len(cum_cat_declustered2)))

    # Plot cumulative no of events with time
    fig, ax1 = plt.subplots(figsize=(20, 10))
    ax1.plot(cum_cat.index.values, cum_cat['count'], 'k', label='Original catalog')
    ax1.plot(cum_cat_declustered1.index.values, cum_cat_declustered1['count'], 'g', 
             label='Declustered catalog - Zaliapin')
    ax1.plot(cum_cat_declustered2.index.values, cum_cat_declustered2['count'], 'orange', 
             label='Declustered catalog - Mizrahi-ETAS')

    # Plot important large evs
    bottom, top = ax1.get_ylim()
    # these are matplotlib.patch.Patch properties
    props = dict(boxstyle='round', facecolor='white', alpha=1.0)
    indices = large_ev.index.values
    for date, name, m, dep in zip(large_ev.index.values, large_ev['name'], large_ev['mag'], large_ev['depth_km']):
        colour = 'k'
        if dep > 60.0 and dep < 200.0:
            colour = 'royalblue'
        elif dep < 60.0:
            colour = 'r'
        else:
            colour = 'midnightblue'
        ax1.plot([date, date], [bottom, top], ls='--', c=colour)
        #ax.arrow(date, 0, 0, cum_cat['count'].values[ind_ev], head_width=0.05, head_length=0.1, fc='k', ec='k')
        ax1.text(date, top, (name+' $M_w$'+'{:.1f}'.format(m)), size="medium", rotation=45,
                         horizontalalignment='center', verticalalignment='center', c=colour,
                         rotation_mode='anchor', bbox=props)

    # Tidy up plot
    ax1.xaxis.set_tick_params(labelsize=14)
    ax1.yaxis.set_tick_params(labelsize=14)
    ax1.set_ylabel('Cumulative number of events', fontsize=14)
    #ax1.set_title('All earthquakes', fontsize=16)
    ax1.legend(loc=4, fontsize=16)
    plt.show()
    fig.savefig(fn_output, dpi=300, bbox_inches='tight')

In [None]:
plot_cumulative_compare('SAM_EQ_data/', dt.datetime(1980,1,1), 4.8)

In [None]:
## Compare deep and shallow
def plot_cumulative_compare(data_path, deep_lim, shallow_lim, method):
    """
    Compare the deep and shallow catalogs
    # data_path: filepath to folder holding the declustered catalog
    # deep_lim: define deep catalog containing events of depth greater than this depth
    # shallow_lim: define shallow catalog containing events of depth smaller than this depth
    # method: name of the declustering method, type str (Zaliapin or Mizrahi)
    """
    # Build filepaths
    if method == 'Zaliapin':
        declust_cat_fn = data_path + 'Zaliapin_decluster/declustered_catalog_data.csv'
    elif method == 'Mizrahi':
        declust_cat_fn = data_path + 'Mizrahi_ETAS_decluster/ETAS_declustered_cat.csv'
    else:
        raise ValueError('method must be type str, "Mizrahi" or "Zaliapin".')
    
    fn_large_ev = data_path + 'large_eq.csv'
    fn_output = data_path + 'figs-PDF/' + 'compare_deep_shallow' + method + '_cat.pdf'
    
     # Load the catalogs
    cat_declustered = pd.read_csv(declust_cat_fn, index_col=0, parse_dates=["time"], dtype={"url": str, "alert": str})

    # Deep declustered catalog
    cum_deep_declustered = cat_declustered.loc[cat_declustered['depth_km'] > deep_lim]
    cum_deep_declustered = cum_deep_declustered.set_index('time')
    cum_deep_declustered['count'] = 1.0
    cum_deep_declustered = cum_deep_declustered.cumsum()

    # Shallow declustered catalog
    cum_shallow_declustered = cat_declustered.loc[cat_declustered['depth_km'] < shallow_lim]
    cum_shallow_declustered = cum_shallow_declustered.set_index('time')
    cum_shallow_declustered['count'] = 1.0
    cum_shallow_declustered = cum_shallow_declustered.cumsum()

    # Plot cumulative no of events with time
    fig, ax1 = plt.subplots(figsize=(12, 7))
    ax1.plot(cum_deep_declustered.index.values, cum_deep_declustered['count']/len(cum_deep_declustered.index),
             'b', label='Deep EQs ($z\geq${}km)'.format(deep_lim))
    ax1.plot(cum_shallow_declustered.index.values, cum_shallow_declustered['count']/len(cum_shallow_declustered.index),
             'r', label='Shallow EQs($z\leq${}km)'.format(shallow_lim))

    # Tidy up plot
    ax1.set_title('Catalog using {} declustering'.format(method))
    ax1.xaxis.set_tick_params(labelsize=14)
    ax1.yaxis.set_tick_params(labelsize=14)
    ax1.set_ylabel('Normalised cumulative \n # of events', fontsize=14)
    #ax1.set_title('All earthquakes', fontsize=16)
    ax1.legend(loc='best', fontsize=14)
    plt.show()
    fig.savefig(fn_output, dpi=300, bbox_inches='tight')

In [None]:
plot_cumulative_compare('Jara_search_area/', 80.0, 40.0, 'Zaliapin')
plot_cumulative_compare('Jara_search_area/', 80.0, 40.0, 'Mizrahi')

In [None]:
## Rolling averages ##
# Plot the removed EQs vs the ones remaining in the declustered catalog
def plot_and_compare_30D_rates_rolling(region, box, dep_lims, method):
    """
    Plots bar chart of 30d sliding window rate for the declustered and rejected catalogs
    # region: The study region (type str) - e.g. Japan, SAM...
    # box: the subregion of interest -e.g. Arequipa, Atacama etc.
                directory region_EQ_data must contain subfolder zonal_cat/box/ -  for each 'box', CSV files for 
                declustered catalog, rejected catalog must be present
    # dep_lims: list of depth limits - supply 3 numbers to plot 2 depth bands; 
                supply 4 numbers to plot 3 depth bands (last number must be deeper limit to consider seismicity)
    # method: name of the declustering method, type str (Zaliapin or Mizrahi)
    """
    
    # Build filepaths
    root_dir = region + '_EQ_data/'
    if method == 'Zaliapin':
        declust_cat_fn = root_dir + 'zonal_cat/' + box + '/declustered_catalog_data.csv'
        rejected_cat_fn = root_dir + 'zonal_cat/' + box + '/rejected_ev.csv'
    elif method == 'Mizrahi':
        declust_cat_fn = root_dir + 'zonal_cat/' + box + '/ETAS_declustered_cat.csv'
        rejected_cat_fn = root_dir + 'zonal_cat/' + box + '/ETAS_rejected_evs.csv'
    else:
        raise ValueError('method must be type str, "Mizrahi" or "Zaliapin".')
    
    fn_large_ev = root_dir + 'large_eq.csv'

    
    # Load the catalogs
    declust_cat = pd.read_csv(declust_cat_fn, index_col=0, parse_dates=["time"], dtype={"url": str, "alert": str})
    rejected_cat = pd.read_csv(rejected_cat_fn, index_col=0, parse_dates=["time"], dtype={"url": str, "alert": str})

    resampled_declustered = declust_cat.set_index('time').groupby(pd.Grouper(freq='30D')).count()
    
    import matplotlib.dates as mdates
    
    # Plot important large evs
    # Load catalog of large events (for plotting milestone lines)
    large_ev_cat = prep_cat_zaliapin(fn_large_ev)
    
    # Get the target regions
    polys, zones = load_polys(region)

    # Only include large events within 1000 km of centroid of zone
    clonrad = np.radians((polys.loc[polys['name']==box])['centroid_geog'].x.values)
    clatrad = np.radians((polys.loc[polys['name']==box])['centroid_geog'].y.values)
    large_ev_cat['dist'] = haversine(clatrad,large_ev_cat['lat_rad'],clonrad,large_ev_cat['lon_rad'])
    large_ev = large_ev_cat.loc[large_ev_cat['dist']<=1000.0]
    large_ev = large_ev.set_index('time')
    large_ev = large_ev.sort_index()
    

    if len(dep_lims) == 3:
        # Deep declustered catalog
        cum_deep_declustered = declust_cat.loc[(declust_cat['depth_km'] > dep_lims[1]) & 
                                               (declust_cat['depth_km'] <= dep_lims[2])]
        cum_deep_declustered_roll = cum_deep_declustered.set_index('time')
        cum_deep_declustered_roll['count'] = 1.0
        cum_deep_declustered_roll = cum_deep_declustered_roll['count'].rolling('30D').sum()

        # Shallow declustered catalog
        cum_shallow_declustered = declust_cat.loc[declust_cat['depth_km'] < dep_lims[0]]
        cum_shallow_declustered_roll = cum_shallow_declustered.set_index('time')
        cum_shallow_declustered_roll['count'] = 1.0
        cum_shallow_declustered_roll = cum_shallow_declustered_roll['count'].rolling('30D').sum()

        # Deep catalog of clusters
        cum_deep_clusters = rejected_cat.loc[(rejected_cat['depth_km'] > dep_lims[1]) &
                                             (rejected_cat['depth_km'] <= dep_lims[2])]
        cum_deep_clusters_roll = cum_deep_clusters.set_index('time')
        cum_deep_clusters_roll['count'] = 1.0
        cum_deep_clusters_roll = cum_deep_clusters_roll['count'].rolling('30D').sum()

        # Shallow catalog of clusters
        cum_shallow_clusters = rejected_cat.loc[rejected_cat['depth_km'] < dep_lims[0]]
        cum_shallow_clusters_roll = cum_shallow_clusters.set_index('time')
        cum_shallow_clusters_roll['count'] = 1.0
        cum_shallow_clusters_roll = cum_shallow_clusters_roll['count'].rolling('30D').sum()

        # convert the date format to matplotlib date format 
        #plt_date = mdates.date2num(resampled_declustered.index.values)
        #bins = mdates.datestr2num(["{}/01/01".format(i) for i in np.arange(1970, 2015)])
        # plot it
        fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, sharex=True, figsize=(30,20))
        fig.suptitle('{} 30-day earthquake rate \n (using {} declustering)'.format(box, method),
                     x=0.5, y=0.93, fontsize=18)


        # Plot shallow declustered
        ax1.bar(cum_shallow_declustered_roll.index.values, cum_shallow_declustered_roll.to_numpy(), color='r', 
                label='$z<${:.1f} km - declustered'.format(dep_lims[0]))

        # Plot shallow clusters
        ax2.bar(cum_shallow_clusters_roll.index.values, cum_shallow_clusters_roll.to_numpy(), color='r', 
                label='$z<${:.1f} km - rejected'.format(dep_lims[0]))
        
        # Plot deep declustered
        ax3.bar(cum_deep_declustered_roll.index.values, cum_deep_declustered_roll.to_numpy(), color='b', 
                label='{:.1f}$<z<${:.1f} km - declustered'.format(dep_lims[1], dep_lims[2]))

        # Plot deep clusters
        ax4.bar(cum_deep_clusters_roll.index.values, cum_deep_clusters_roll.to_numpy(), 
                color='b', label='{:.1f}$<z<${:.1f} km - rejected'.format(dep_lims[1], dep_lims[2]))

        # x ticks and limit
        ax4.xaxis.set_tick_params(labelsize=12, labelrotation=0)
        
        indices = large_ev.index.values
        
        #Finalise plots
        for ax in [ax1,ax2,ax3,ax4]:
            ax.xaxis.set_major_locator(mdates.YearLocator(5))
            ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
            ax.yaxis.set_tick_params(labelsize=12)
            ax.set_ylabel('EQs/30 days', fontsize=14)
            ax.legend(loc='upper left', fontsize=14)
            #Plot the large events
            for date, name, m in zip(large_ev.index.values, large_ev['name'], large_ev['mag']):
                ax.axvline(date,color='k', ls='--')
        
            
    elif len(dep_lims) == 4:
        # D1 declustered catalog
        cum_D1_declustered = declust_cat.loc[(declust_cat['depth_km'] <= dep_lims[2]) & 
                                             (declust_cat['depth_km'] >= dep_lims[1])]
        cum_D1_declustered_roll = cum_D1_declustered.set_index('time')
        cum_D1_declustered_roll['count'] = 1.0
        cum_D1_declustered_roll = cum_D1_declustered_roll['count'].rolling('30D').sum()
        
        # D2 declustered catalog
        cum_D2_declustered = declust_cat.loc[(declust_cat['depth_km'] > dep_lims[2]) & 
                                             (declust_cat['depth_km'] <= dep_lims[3])]
        cum_D2_declustered_roll = cum_D2_declustered.set_index('time')
        cum_D2_declustered_roll['count'] = 1.0
        cum_D2_declustered_roll = cum_D2_declustered_roll['count'].rolling('30D').sum()

        # Shallow declustered catalog
        cum_shallow_declustered = declust_cat.loc[declust_cat['depth_km'] < dep_lims[0]]
        cum_shallow_declustered_roll = cum_shallow_declustered.set_index('time')
        cum_shallow_declustered_roll['count'] = 1.0
        cum_shallow_declustered_roll = cum_shallow_declustered_roll['count'].rolling('30D').sum()

        # D1 catalog of clusters
        cum_D1_clusters = rejected_cat.loc[(rejected_cat['depth_km'] <= dep_lims[2]) &
                                           (rejected_cat['depth_km'] >= dep_lims[1])]
        cum_D1_clusters_roll = cum_D1_clusters.set_index('time')
        cum_D1_clusters_roll['count'] = 1.0
        cum_D1_clusters_roll = cum_D1_clusters_roll['count'].rolling('30D').sum()
        
        # D2 catalog of clusters
        cum_D2_clusters = rejected_cat.loc[(rejected_cat['depth_km'] > dep_lims[2]) & 
                                           (rejected_cat['depth_km'] <= dep_lims[3])]
        cum_D2_clusters_roll = cum_D2_clusters.set_index('time')
        cum_D2_clusters_roll['count'] = 1.0
        cum_D2_clusters_roll = cum_D2_clusters_roll['count'].rolling('30D').sum()

        # Shallow catalog of clusters
        cum_shallow_clusters = rejected_cat.loc[rejected_cat['depth_km'] < dep_lims[0]]
        cum_shallow_clusters_roll = cum_shallow_clusters.set_index('time')
        cum_shallow_clusters_roll['count'] = 1.0
        cum_shallow_clusters_roll = cum_shallow_clusters_roll['count'].rolling('30D').sum()

        # convert the date format to matplotlib date format 
        #plt_date = mdates.date2num(resampled_declustered.index.values)
        # plot it
        fig, (ax1, ax2, ax3, ax4, ax5, ax6) = plt.subplots(6, sharex=True, figsize=(30,30))
        fig.suptitle('{} 30-day earthquake rate \n (using {} declustering)'.format(box, method),
                     x=0.5, y=0.93, fontsize=18)

        # Plot shallow declustered
        ax1.bar(cum_shallow_declustered_roll.index.values, cum_shallow_declustered_roll.to_numpy(), color='r', 
                label='$z<${:.1f} km - declustered'.format(dep_lims[0]))

        # Plot shallow clusters
        ax2.bar(cum_shallow_clusters_roll.index.values, cum_shallow_clusters_roll.to_numpy(),
                color='r', label='$z<${:.1f} km - rejected'.format(dep_lims[0]))
        
        # Plot D1 declustered
        ax3.bar(cum_D1_declustered_roll.index.values, cum_D1_declustered_roll.to_numpy(), 
                color='royalblue', label='{:.1f}$<z<${:.1f} km - declustered'.format(dep_lims[1], dep_lims[2]))

        # Plot D1 clusters
        ax4.bar(cum_D1_clusters_roll.index.values, cum_D1_clusters_roll.to_numpy(),
                color='royalblue', label='{:.1f}$<z<${:.1f} km - rejected'.format(dep_lims[1], dep_lims[2]))
        
        # Plot D2 declustered
        ax5.bar(cum_D2_declustered_roll.index.values, cum_D2_declustered_roll.to_numpy(), 
                color='midnightblue', label='{:.1f}$<z<${:.1f} km - declustered'.format(dep_lims[2], dep_lims[3]))

        # Plot D2 clusters
        ax6.bar(cum_D2_clusters_roll.index.values, cum_D2_clusters_roll.to_numpy(),
                color='midnightblue', label='{:.1f}$<z<${:.1f} km - rejected'.format(dep_lims[2], dep_lims[3]))


        # x ticks and limit
        ax6.xaxis.set_tick_params(labelsize=12, labelrotation=0)

        # Plot important large evs
        # Load catalog of large events (for plotting milestone lines)
        large_ev = prep_cat_zaliapin(fn_large_ev)
        large_ev = large_ev.set_index('time')
        large_ev = large_ev.sort_index()

        indices = large_ev.index.values
        
        #Finalise plots
        for ax in [ax1,ax2,ax3,ax4,ax5,ax6]:
            ax.xaxis.set_major_locator(mdates.YearLocator(5))
            ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
            ax.yaxis.set_tick_params(labelsize=12)
            ax.set_ylabel('EQs/30 days', fontsize=14)
            ax.legend(loc='upper left', fontsize=14)
            #Plot the large events
            for date, name, m in zip(large_ev.index.values, large_ev['name'], large_ev['mag']):
                ax.axvline(date,color='k', ls='--')
        #for date, name, m in zip(large_ev.index.values, large_ev['name'], large_ev['mag']):
            #ax1.text(date,ax1.get_ylim()[1],name,ha='center',va='center', fontsize='large', rotation=0, bbox=props)
            
    # Tidy up plots
    for event in large_ev.itertuples():
        region = polys.loc[polys['name']==box]
        region.reset_index(drop=True, inplace=True)
        hyp = gpd.GeoSeries([Point(event.lon, event.lat)])
        if hyp.within(region.at[0,'geometry']).values:
            props = dict(boxstyle='round',facecolor='white',alpha=1.0)
            if event.depth_km <= 60.0:
                ax1.text(event.Index,ax1.get_ylim()[1],event.name,ha='center',va='center', 
                         fontsize='large', rotation=0, color='r', bbox=props)
            elif event.depth_km > 60.0:
                ax1.text(event.Index,ax1.get_ylim()[1],event.name,ha='center',va='center', 
                         fontsize='large', rotation=0, color='b', bbox=props)
        else:
            props = dict(boxstyle='round',facecolor='white',ls='--',alpha=1.0)
            ax1.text(event.Index,ax1.get_ylim()[1],event.name,ha='center',va='center', 
                 fontsize='large', rotation=0, color='grey', bbox=props)

    plt.xlabel('Start of 30 day period', fontsize=14)
    plt.show()
    # Save to file
    outdir = root_dir+'figs-PDF/rates/'
    if not os.path.exists(outdir):
        os.mkdir(outdir)
    fn_output = outdir+box+'_'+method +'_declust_'+'30d_rate_comparisonsbar.pdf'
    fig.savefig(fn_output, dpi=300, bbox_inches='tight')

In [None]:
for d_method in ['Zaliapin', 'Mizrahi']:
    plot_and_compare_30D_rates_rolling('SAM', 'Colombia', [60.0, 80.0, 250.0], d_method)
    plot_and_compare_30D_rates_rolling('SAM', 'Ecuador', [60.0, 80.0, 250.0], d_method)
    plot_and_compare_30D_rates_rolling('SAM', 'Peru', [60.0, 80.0, 200.0], d_method)
    plot_and_compare_30D_rates_rolling('SAM', 'Arequipa', [50.0, 90.0, 250.0], d_method)
    plot_and_compare_30D_rates_rolling('SAM', 'North_Chile', [70.0, 70.0, 150.0, 250.0], d_method)
    plot_and_compare_30D_rates_rolling('SAM', 'Atacama', [70.0, 70.0, 120.0, 250.0], d_method)
    plot_and_compare_30D_rates_rolling('SAM', 'Central_Chile', [60.0, 80.0, 100.0, 250.0], d_method)
    plot_and_compare_30D_rates_rolling('SAM', 'Maule', [40.0, 80.0, 250.0], d_method)
    plot_and_compare_30D_rates_rolling('SAM', 'Jara_target', [40.0, 80.0, 900.0], d_method)

In [None]:
## Convert the linewidth into dataunits for use with logscale

def linewidth_from_data_units(linewidth, axis, reference='y'):
    """
    Convert a linewidth in data units to linewidth in points.

    Parameters
    ----------
    linewidth: float
        Linewidth in data units of the respective reference-axis
    axis: matplotlib axis
        The axis which is used to extract the relevant transformation
        data (data limits and size must not change afterwards)
    reference: string
        The axis that is taken as a reference for the data width.
        Possible values: 'x' and 'y'. Defaults to 'y'.

    Returns
    -------
    linewidth: float
        Linewidth in points
    """
    fig = axis.get_figure()
    if reference == 'x':
        length = fig.bbox_inches.width * axis.get_position().width
        value_range = np.diff(axis.get_xlim())
    elif reference == 'y':
        length = fig.bbox_inches.height * axis.get_position().height
        value_range = np.diff(axis.get_ylim())
    # Convert length to points
    length *= 72
    # Scale linewidth to value range
    return linewidth * (length / value_range)

In [None]:
# Jara 'periodogram'
def plot_periodogram(region, params_dict):
    """
    Plots similar figure to Jara et al. (2017), Fig.3a-b
    # region: the region of focus, type str (e.g. Japan, SAM)
    # params_dict: dict of function parameters, necessary keywords:
        # box: subregion, type str (e.g. Arequipa)
        # dep_lim: type list, depth filtering limits, upper and lower, e.g. 
        # dep_range: controls whether to plot the periodogram for deep or shallow events
        # method: name of the declustering method, type str (Zaliapin or Mizrahi)
        # c_palette: cmap to use for plotting the periodogram
    """
    # Build filepaths
    box = params_dict["box"]
    root_dir = region + '_EQ_data/'
    method = params_dict["method"]
    if method == 'Zaliapin':
        fn_declust_cat = root_dir + 'zonal_cat/' + box +  '/declustered_catalog_data.csv'
    elif method == 'Mizrahi':
        fn_declust_cat = root_dir + 'zonal_cat/' + box + '/ETAS_declustered_cat.csv'
    else:
        raise ValueError('method must be type str, "Mizrahi" or "Zaliapin".')
    
    fn_large_ev = root_dir + 'large_eq.csv'
    
    # Load the catalogs
    declust_cat = pd.read_csv(fn_declust_cat, index_col=0, parse_dates=["time"], dtype={"url": str, "alert": str})

    # Declustered catalog with correct depth filter
    assert (len(params_dict["dep_lim"]) == 2), ("Provide upper and lower depth limits.")
    assert (params_dict["dep_lim"][0] < params_dict["dep_lim"][1]), ("Need upper depth limit < lower depth limit")

    upper_lim = params_dict["dep_lim"][0]
    lower_lim = params_dict["dep_lim"][1]
    cum_declustered = declust_cat.loc[(declust_cat['depth_km'] >= upper_lim) & 
                                      (declust_cat['depth_km'] < lower_lim)]
    cum_declustered_roll = cum_declustered.set_index('time')
    cum_declustered_roll['count'] = 1.0

    
    import matplotlib.dates as mdates
    
    win_lengths = np.logspace(0, 2.85, 20, base=10.0, endpoint=True)
    windows = ['{}D'.format(i) for i in win_lengths.astype('int')]
    l_widths = np.flip(np.logspace(0, 0.5, len(win_lengths), base=2.0, endpoint=True))


    from matplotlib.collections import LineCollection
    fig, ax = plt.subplots(figsize=(13,7))
    fig.suptitle('{}:{} events at {:.1f}$<z<${:.1f} km using {} declustering'.format(region, box, upper_lim, 
                lower_lim, method),x=0.5, y=1, fontsize=14)


    for win, win_length, l_w in zip(windows, win_lengths, l_widths):
        rolling_df = cum_declustered_roll['count'].rolling(win, center=True).sum()
        norm_vals = rolling_df.to_numpy()/rolling_df.to_numpy().max()
        dates = rolling_df.index.values
        y = np.full(len(dates), win_length)
        s = pd.Series(y, index=dates)
        #convert dates to numbers first
        inxval = mdates.date2num(s.index.to_pydatetime())
        points = np.array([inxval, s.values]).T.reshape(-1,1,2)
        segments = np.concatenate([points[:-1],points[1:]], axis=1)
        norm = plt.Normalize(norm_vals.min(), norm_vals.max())
        lc = LineCollection(segments, cmap=params_dict["c_palette"], linewidth=18.5, norm=norm)
        #print(linewidth_from_data_units(l_w, ax, reference='y'))
        # set color to date values
        lc.set_array(norm_vals)
        # note that you could also set the colors according to y values
        # lc.set_array(s.values)
        # add collection to axes
        line = ax.add_collection(lc)

    fig.colorbar(line, ax=ax, label='# of events normalised against max #events in window')

    ## Plot important large evs ##
    # Load catalog of large events (for plotting milestone lines)
    large_ev_cat = prep_cat_zaliapin(fn_large_ev)
    
    # Get the target regions
    polys, zones = load_polys(region)

    # Only include large events within 1000 km of centroid of zone
    clonrad = np.radians((polys.loc[polys['name']==box])['centroid_geog'].x.values)
    clatrad = np.radians((polys.loc[polys['name']==box])['centroid_geog'].y.values)
    large_ev_cat['dist'] = haversine(clatrad,large_ev_cat['lat_rad'],clonrad,large_ev_cat['lon_rad'])
    large_ev = large_ev_cat.loc[large_ev_cat['dist']<=1000.0]
    large_ev = large_ev.set_index('time')
    large_ev = large_ev.sort_index()

    # these are matplotlib.patch.Patch properties
    props = dict(boxstyle='round', facecolor='white', alpha=1.0)
    indices = large_ev.index.values
    #for date, name in zip(large_ev.index.values, large_ev['name']):
        #ax.axvline(date,color='k', ls='--')
        #ax.text(date,ax.get_ylim()[1]-0.2,name,rotation=45, ha='center', va='center', 
                #rotation_mode='anchor', bbox=props)
        
    # Tidy up plots
    for event in large_ev.itertuples():
        region = polys.loc[polys['name']==box]
        region.reset_index(drop=True, inplace=True)
        hyp = gpd.GeoSeries([Point(event.lon, event.lat)])
        if hyp.within(region.at[0,'geometry']).values:
            props = dict(boxstyle='round',facecolor='white',alpha=1.0)
            if event.depth_km <= 60.0:
                ax.axvline(event.Index,color='r', ls='-')
                ax.text(event.Index,ax.get_ylim()[1]-0.2,event.name,ha='center',va='center',
                        rotation=45, color='r', rotation_mode='anchor', bbox=props)
            elif event.depth_km > 60.0:
                ax.axvline(event.Index,color='b', ls='-')
                ax.text(event.Index,ax.get_ylim()[1]-0.2,event.name,ha='center',va='center', 
                         rotation=45, color='b', rotation_mode='anchor', bbox=props)
        else:
            props = dict(boxstyle='round',facecolor='white',ls='--',alpha=1.0)
            if event.depth_km <= 60.0:
                ax.axvline(event.Index,color='r', ls='--')
                ax.text(event.Index,ax.get_ylim()[1]-0.2,event.name,ha='center',va='center',
                        rotation=45, color='r', alpha=0.7, rotation_mode='anchor', bbox=props)
            elif event.depth_km > 60.0:
                ax.axvline(event.Index,color='b', ls='--')
                ax.text(event.Index,ax.get_ylim()[1]-0.2,event.name,ha='center',va='center', 
                         rotation=45, color='b', alpha=0.7, rotation_mode='anchor', bbox=props)
    
    # Tidy up the plot
    ax.set_yscale('log')
    ax.invert_yaxis()
    ax.xaxis.set_major_locator(mdates.YearLocator(5))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    ax.yaxis.set_tick_params(labelsize=12)
    ax.set_ylabel('Length of reference time window [days]', fontsize=12)
    ax.set_xlabel('Start of sliding time window', fontsize=12)
    #ax.legend(loc='upper left', fontsize=14)
    
    # x ticks and limit
    ax.xaxis.set_tick_params(labelsize=12, labelrotation=0)   

    ax.autoscale_view()
    plt.show()
    
    lower_lim = str(int(lower_lim))
    upper_lim = str(int(upper_lim))
    outdir = root_dir + 'figs-PDF/periodograms/'
    if not os.path.exists(outdir):
        os.mkdir(outdir)
    fn_output = outdir+box+'_'+method+'_'+upper_lim+'-'+lower_lim+'_declust_cat_periodogram.pdf'
    fig.savefig(fn_output, dpi=300, bbox_inches='tight')

In [None]:
## Produce figs for SAM depth bands
cases = pd.read_csv('SAM_EQ_data/SAM_periodogram_params.csv', index_col='id')

for method in ['Zaliapin', 'Mizrahi']:
    for case in cases.itertuples():
        params = {"box": case.box_name, "dep_lim": [case.u_lim, case.l_lim], "method": method, "c_palette": case.color,}
        plot_periodogram('SAM', params)

### Generic figures

In [None]:
#### PLOTTING ####

alt_cat = cat_preprocessed.set_index('time')
# Plot events magnitude and time
fig, ax2 = plt.subplots(figsize=(15, 10))
for date, magnitude in zip(alt_cat.index.values, alt_cat['mag']):
    ax2.plot([date, date],[0, magnitude],'k')
ax2.plot(alt_cat.index.values, alt_cat['mag'], 'ko')
ax2.set_ylim(bottom=4.0)
ax2.xaxis.set_tick_params(labelsize=14)
ax2.yaxis.set_tick_params(labelsize=14)
ax2.set_xlabel('Years', fontsize=14)
ax2.set_ylabel('Magnitude', fontsize=14)

# Plot events with latitude and time
fig, ax3 = plt.subplots(figsize=(15, 10))
im = ax3.scatter(alt_cat.index.values, alt_cat['lat'], c=alt_cat['depth_km'], s=2*((alt_cat['mag'])**2.5), cmap='viridis')
ax3.xaxis.set_tick_params(labelsize=14)
ax3.yaxis.set_tick_params(labelsize=14)
ax3.set_xlabel('Years', fontsize=14)
ax3.set_ylabel('Latitude', fontsize=14)

# produce a legend with sizes from the scatter
kw = dict(prop="sizes", num=8, color='k', fmt="{x:.1f}",
          func=lambda s: (s/2)**(1/2.5))
legend2 = ax3.legend(*im.legend_elements(**kw),
                    loc="best", title="Magnitude", bbox_to_anchor=(1.3, 1.0), fontsize=12)
legend2.get_title().set_fontsize('12')

# Add a colorbar for depth
cbar = fig.colorbar(im, ax=ax3, label='depth[km]')
cbar.set_label('depth[km]', fontsize=12)

#plt.show()
#fig.savefig('rate_graph.pdf', dpi=200, bbox_inches='tight')

In [None]:
#### PLOTTING for Iquique 2014 ##

alt_cat = cat_preprocessed.loc[(cat_preprocessed['time'] > datetime(2013,1,1))]
alt_cat = alt_cat.loc[(alt_cat['lat'] > -24.0) & (alt_cat['lat'] < -18.0) & (alt_cat['lon'] > -74.0) & (alt_cat['lon'] < -68.0)]
alt_cat = alt_cat.set_index('time')
# Plot events magnitude and time
fig, ax2 = plt.subplots(figsize=(15, 10))
for date, magnitude in zip(alt_cat.index.values, alt_cat['mag']):
    ax2.plot([date, date],[0, magnitude],'k')
ax2.plot(alt_cat.index.values, alt_cat['mag'], 'ko')
ax2.set_ylim(bottom=4.0)
ax2.xaxis.set_tick_params(labelsize=14)
ax2.yaxis.set_tick_params(labelsize=14)
ax2.set_xlabel('Years', fontsize=14)
ax2.set_ylabel('Magnitude', fontsize=14)

# Plot events with latitude and time
fig, ax3 = plt.subplots(figsize=(15, 10))
im = ax3.scatter(alt_cat['lon'], alt_cat['lat'], s=2*((alt_cat['mag'])**3), c=alt_cat.index.values, cmap='viridis')
ax3.xaxis.set_tick_params(labelsize=14)
ax3.yaxis.set_tick_params(labelsize=14)
ax3.set_xlabel('Longitude', fontsize=14)
ax3.set_ylabel('Latitude', fontsize=14)

# produce a legend with sizes from the scatter
#kw = dict(prop="sizes", num=8, color='k', fmt="{x:.1f}",
          #func=lambda s: (s/2)**(1/2.5))
#legend2 = ax3.legend(*im.legend_elements(**kw),
                    #loc="best", title="Magnitude", bbox_to_anchor=(1.3, 1.0), fontsize=12)
#legend2.get_title().set_fontsize('12')

# Add a colorbar for depth
cbar = fig.colorbar(im, ax=ax3, label='date')
#cbar.set_label('depth[km]', fontsize=12)

#plt.show()
#fig.savefig('rate_graph.pdf', dpi=200, bbox_inches='tight')

#alt_cat.head(60)

### G-R analysis

In [None]:
## MAG-FREQ PLOT

def G_R_plotting(cat, region, zone, dmag, mc):
    """ Plot the Gutenberg-Richter plot for a catalog
        # cat must be a dataframe containing a column for event magnitudes (col label ='mag')
        # dmag is the magnitude step
        # zone: name of the subregion of interest
    """
    # Define magnitude bins (includes left edge of first bin and right edge of last bin)
    mag_bins = np.arange(cat.mag.min(),cat.mag.max()+dmag,dmag)
    
    # Count up number of earthquakes in each bin (in ascending order)
    mag_counts = cat['mag'].value_counts(bins=mag_bins, ascending=True)
    mag_counts.sort_index(inplace=True)
    #print(mag_counts.values)
    #print('counts in mag bins', mag_counts.values)

    N_ascend = mag_counts[::-1].values.cumsum()

    # Flip N to descending magnitude to match mag_bins
    N = np.array(N_ascend[::-1])
    #print(N)

    # Make sure all values of N are finite (since can't plot zero on log scale)
    ind_finite = N > 0
    N = N[ind_finite]
    M = mag_bins[:-1][ind_finite]

    # Estimate G-R params
    #log_N = np.log10(N)
    #polycoeffs = np.polyfit(M, log_N, 1)
    #p1 = np.poly1d(polycoeffs)

    # Plot G-R plot
    fig, ax = plt.subplots(figsize=(10, 5))
    #ax.plot(M, p1(M), label='$\log(N) = $%.3f $%.3f$M' % (polycoeffs[1], polycoeffs[0]))
    ax.plot(M, np.log10(N), 'bx')
    ax.xaxis.set_tick_params(labelsize=14)
    ax.yaxis.set_tick_params(labelsize=14)
    ax.set_xlabel('Magnitude', fontsize=14)
    ax.set_ylabel('$log(N)$', fontsize=14)
    #ax.legend(loc='best', fontsize=12)
    ax.set_title('{} ($M_c$={})'.format(zone, mc))
    plt.show()
    # Save file
    root_dir = region+'_EQ_data/'
    outdir = root_dir + 'figs-PDF/G-R_plots/'
    if not os.path.exists(outdir):
        os.mkdir(outdir)
    fn_output = outdir+zone+'_G-R_plot.pdf'
    fig.savefig(fn_output, dpi=300, bbox_inches='tight')

In [None]:
## First find the best-fitting completeness magnitude and the best-fitting b-value
## Using code by Mizrahi et al. (2021) as sourced from https://github.com/lmizrahi/etas/blob/main/mc_b_est.py


##############################################################################
# joint beta and completeness magnitude estimation
# using p-value of Kolmogorov-Smirnov distance to fitted Gutenberg-Richter law
#
# as described by Mizrahi et al., 2021
# Leila Mizrahi, Shyam Nandan, Stefan Wiemer;
# The Effect of Declustering on the Size Distribution of Mainshocks.
# Seismological Research Letters 2021; doi: https://doi.org/10.1785/0220200231
# inspired by method of Clauset et al., 2009
##############################################################################

# mc is the binned completeness magnitude,
# so the 'true' completeness magnitude is mc - delta_m / 2


def round_half_up(n, decimals=0):
    # this is because numpy does weird rounding.
    multiplier = 10 ** decimals
    return np.floor(n * multiplier + 0.5) / multiplier


def estimate_beta_tinti(magnitudes, mc, weights=None, axis=None, delta_m=0):
    # Tinti, S., & Mulargia, F. (1987). Confidence intervals of b values for grouped magnitudes.
    # Bulletin of the Seismological Society of America, 77(6), 2125-2134.

    if delta_m > 0:
        p = (1 + (delta_m / (np.average(magnitudes - mc, weights=weights, axis=axis))))
        beta = 1 / delta_m * np.log(p)
    else:
        beta = 1 / np.average((magnitudes - (mc - delta_m / 2)), weights=weights, axis=axis)
    return beta


def simulate_magnitudes(n, beta, mc):
    mags = np.random.uniform(size=n)
    mags = (-1 * np.log(1 - mags) / beta) + mc
    return mags


def fitted_cdf_discrete(sample, mc, delta_m, x_max=None, beta=None):
    if beta is None:
        beta = estimate_beta_tinti(sample, mc=mc, delta_m=delta_m)

    if x_max is None:
        sample_bin_n = (sample.max() - mc) / delta_m
    else:
        sample_bin_n = (x_max - mc) / delta_m
    bins = np.arange(sample_bin_n + 1)
    cdf = 1 - np.exp(-beta * delta_m * (bins + 1))
    x, y = mc + bins * delta_m, cdf

    x, y_count = np.unique(x, return_counts=True)
    return x, y[np.cumsum(y_count) - 1]


def empirical_cdf(sample, weights=None):
    try:
        sample = sample.values
    except:
        pass
    try:
        weights = weights.values
    except:
        pass

    sample_idxs_sorted = np.argsort(sample)
    sample_sorted = sample[sample_idxs_sorted]
    if weights is not None:
        weights_sorted = weights[sample_idxs_sorted]
        x, y = sample_sorted, np.cumsum(weights_sorted) / weights_sorted.sum()
    else:
        x, y = sample_sorted, np.arange(1, len(sample) + 1) / len(sample)

    # only return one value per bin
    x, y_count = np.unique(x, return_counts=True)
    return x, y[np.cumsum(y_count) - 1]


def ks_test_gr(sample, mc, delta_m, ks_ds=None, n_samples=10000, beta=None):
    sample = sample[sample >= mc - delta_m / 2]
    if len(sample) == 0:
        print("no sample")
        return 1, 0, []
    if len(np.unique(sample)) == 1:
        print("sample contains only one value")
        return 1, 0, []
    if beta is None:
        beta = estimate_beta_tinti(sample, mc=mc, delta_m=delta_m)

    if ks_ds is None:
        ks_ds = []

        n_sample = len(sample)
        simulated_all = round_half_up(
            simulate_magnitudes(mc=mc - delta_m / 2, beta=beta, n=n_samples * n_sample) / delta_m
        ) * delta_m

        x_max = np.max(simulated_all)
        x_fit, y_fit = fitted_cdf_discrete(sample, mc=mc, delta_m=delta_m, x_max=x_max, beta=beta)

        for i in range(n_samples):
            simulated = simulated_all[n_sample * i:n_sample * (i + 1)].copy()
            x_emp, y_emp = empirical_cdf(simulated)
            y_fit_int = np.interp(x_emp, x_fit, y_fit)

            ks_d = np.max(np.abs(y_emp - y_fit_int))
            ks_ds.append(ks_d)
    else:
        x_fit, y_fit = fitted_cdf_discrete(sample, mc=mc, delta_m=delta_m, beta=beta)

    x_emp, y_emp = empirical_cdf(sample)
    y_emp_int = np.interp(x_fit, x_emp, y_emp)

    orig_ks_d = np.max(np.abs(y_fit - y_emp_int))

    return orig_ks_d, sum(ks_ds >= orig_ks_d) / len(ks_ds), ks_ds


def estimate_mc(sample, mcs_test, delta_m, p_pass, stop_when_passed=True, verbose=False, beta=None,
                n_samples=10000):
    """
    sample: np array of magnitudes to test
    mcs_test: completeness magnitudes to test
    delta_m: magnitude bins (sample has to be rounded to bins beforehand)
    p_pass: p-value with which the test is passed
    stop_when_passed: stop calculations when first mc passes the test
    verbose: verbose
    beta: if beta is 'known', only estimate mc
    n_samples: number of magnitude samples to be generated in p-value calculation of KS distance
    """

    ks_ds = []
    ps = []
    i = 0
    for mc in mcs_test:
        if verbose:
            print('\ntesting mc', mc)
        ks_d, p, _ = ks_test_gr(sample, mc=mc, delta_m=delta_m, n_samples=n_samples, beta=beta)

        ks_ds.append(ks_d)
        ps.append(p)

        i += 1
        if verbose:
            print('..p-value: ', p)

        if p >= p_pass and stop_when_passed:
            break
    ps = np.array(ps)
    if np.any(ps >= p_pass):
        best_mc = mcs_test[np.argmax(ps >= p_pass)]
        if beta is None:
            beta = estimate_beta_tinti(sample[sample >= best_mc - delta_m / 2], mc=best_mc, delta_m=delta_m)
        if verbose:
            print("\n\nFirst mc to pass the test:", best_mc, "\nwith a b-value of:", beta/np.log(10))
        return mcs_test, ks_ds, ps, best_mc, beta/np.log(10)
    else:
        best_mc = None
        beta = None
        if verbose:
            print("None of the mcs passed the test.")
        mcs_test = None
        return mcs_test, ks_ds, ps, best_mc, beta

    # beta is the Tinti beta - so b value is beta/ln10
    # return the b-value

In [None]:
# GR analysis for SAM
for zone in ['Arequipa','Bucaramanga','Colombia','Jara_target','North_Chile','Valdivia',
 'Atacama','Central_Chile','Ecuador','Maule','Peru']: 
    cat = pd.read_csv('SAM_EQ_data/zonal_cat/{}/raw_cat_in_region.csv'.format(zone), index_col=0)
    magnitude_sample = cat['mag'].values
    mcs = round_half_up(np.arange(2.0, 5.5, 0.1), 1)
    print('NOW PROCESSING: {}'.format(zone))
    mcs_tested, ks_distances, p_values, mc_winner, b_value_winner = estimate_mc(magnitude_sample,mcs,delta_m=0.1,
                                        p_pass=0.05,stop_when_passed=False,verbose=True,n_samples=1000)
    print('Mc for {} is {}'.format(zone, mc_winner))
    G_R_plotting(cat, 'SAM', zone, 0.1, mc_winner)

### References

 - Jara, J., Socquet, A., Marsan, D., Bouchon, M., 2017. Long-Term Interactions Between Intermediate Depth and Shallow Seismicity in North Chile Subduction Zone. *Geophysical Research Letters 44*, 9283–9292. https://doi.org/10.1002/2017GL075029 <br>
 - Marsan, D., Bouchon, M., Gardonio, B., Perfettini, H., Socquet, A., Enescu, B., 2017. Change in seismicity along the Japan trench, 1990–2011, and its relationship with seismic coupling. *Journal of Geophysical Research: Solid Earth 122*, 4645–4659. https://doi.org/10.1002/2016JB013715 <br>
 - Marsan, D., Reverso, T., Helmstetter, A., Enescu, B., 2013. Slow slip and aseismic deformation episodes associated with the subducting Pacific plate offshore Japan, revealed by changes in seismicity. *Journal of Geophysical Research: Solid Earth 118*, 4900–4909. https://doi.org/10.1002/jgrb.50323 <br>
