In [None]:
import math
# !pip install numpy
import numpy as np
# !pip install matplotlib
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime as dt
# !pip install seaborn
import seaborn as sns
import pandas as pd
from os import listdir
pd.options.display.float_format = '{:.4f}'.format
# !pip install geopy
from geopy import distance
from geopy import Point
import geopandas
import shapely
# !pip install geopandas
# !pip install shapely
# !pip install sklearn
from sklearn.linear_model import LinearRegression

In [None]:
# routine to read in data
def readData(mpath):
    files = listdir(mpath)
    my_data = pd.DataFrame()
    for f in files:
        # read in next file to process
        file = mpath+f
        fileToRead = open(file, mode='r')
        pr = pd.read_csv(fileToRead)
        # append new file to df
        my_data = my_data.append(pr, sort=False)
        fileToRead.close()
    return my_data 

In [None]:
# find the magnitude to lookup in shock_dt table of distances and times per magnitude for foreshock/aftershock algorithm
# magnitudes are 2.0, 2.5, 3.0, 3.5, etc.
def myfloor(mag):
    return .5 * math.floor(float(mag)/.5)

In [None]:
def fore_after_shocks_distance(df):
    num_records = len(df)
    #shock = np.full(num_records, 'S', np.str) 

    foreshocks = 0
    aftershocks = 0
   # diff = [0]*num_records
   # dist_diff = [0]*num_records
    shock = ['S']*num_records
    for k in range(0, num_records):
        # print('k: ', k)
        after_shock_win = shock_dt[shock_dt['mag']==myfloor(df.iloc[k].mag)].days.iloc[0] # number of days for the aftershock window
        fore_shock_win = after_shock_win/2
        shock_dist_win = shock_dt[shock_dt['mag']==myfloor(df.iloc[k].mag)].km.iloc[0] # kilometers for the aftershock dist window
        
        # identify foreshocks
        if k < num_records-1:
            next = k + 1
            # compute difference to the next record suspicious as mainshock
            diff_next = (df.datetime.iloc[next] - df.datetime.iloc[k]).days
            # diff[k] = diff_next
            dist_diff_next = findDist(df, k, next)
            # dist_diff[k] = dist_diff_next
            while (diff_next <= fore_shock_win) and (shock[k] == 'S') and (next < num_records):
                if (df.iloc[next].mag >= df.iloc[k].mag) and (dist_diff_next <= shock_dist_win):
                # if df.iloc[next].mag >= df.iloc[k].mag:
                    shock[k] = 'F'
                    foreshocks = foreshocks + 1
                else:
                    next = next + 1
                    if next < num_records:
                        diff_next = (df.datetime.iloc[next] - df.datetime.iloc[k]).days
                        dist_diff_next = findDist(df, k, next)
  
        # Identify aftershocks
        if k > 0:
            previous = k-1
            # compute difference to the previous record suspicious as mainshock
            diff_previous = (df.datetime.iloc[k] - df.datetime.iloc[previous]).days
            dist_diff_prev = findDist(df, k, previous)
            while (diff_previous < after_shock_win) and (shock[k] != 'A') and (previous >= 0):
                if (df.iloc[previous].mag > df.iloc[k].mag)  and (dist_diff_prev <= shock_dist_win): 
                    shock[k] = 'A'
                    aftershocks = aftershocks + 1
                else:
                    previous = previous - 1
                    if previous >= 0:
                        # compute difference to the previous suspecious record
                        diff_previous = (df.datetime.iloc[k] - df.datetime.iloc[previous]).days
                        dist_diff_prev = findDist(df, k, previous)
                        
    print("foreshocks: ", foreshocks, "aftershocks", aftershocks, "shock:", shock)
    return shock

In [None]:
# read in table of distances and times per magnitude for foreshock / aftershock algorithm
data_dir = "C:\\Users\\User\\Debbie\\Data\\"
file_path = data_dir + "example_raw_data\\shock_dist_time.csv"
fileToRead = open (file_path, mode='r')
shock_dt = pd.read_csv(fileToRead)
fileToRead.close()

In [None]:
shock_dt

In [None]:
# round to .1
def mytenths(mag):
    return round(.1 * math.floor(float(mag)/.1), 2)

In [None]:
mytenths(3.34)

In [None]:
# use geopy to find the distance between 2 points with latitude, longitude

def findDist(df,a,b):
    p1 = Point(df.iloc[a].latitude, df.iloc[a].longitude)
    p2 = Point(df.iloc[b].latitude, df.iloc[b].longitude)
    dist = distance.distance(p1,p2).kilometers
    return dist

In [None]:
def points_from_xy(lat, long):
    return [Point(x, y) for x, y in zip(lat, long)]

In [None]:
# shape file for cyprus
import geopandas
shp_path = data_dir + "shape\\is\\CYP_adm0.shp"
#reading Israel data from shape file
gdf1 = geopandas.read_file(shp_path)

In [None]:
# shape file for Israel
import geopandas
shp_path = data_dir + "shape\\is\\ISR_adm0.shp"
#reading Israel data from shape file
gdf = geopandas.read_file(shp_path)

In [None]:
# read all of israel data
file_path = data_dir + "example_raw_data\\cat_1983_2019.csv"
fileToRead = open(file_path, mode='r')
isd = pd.read_csv(fileToRead)
fileToRead.close()
isd['datetime'] = pd.to_datetime(isd[['year', 'month', 'day', 'hour','minute']])
isd['mag'] = isd[["Md", "Mb","Mw"]].max(axis=1)
isd = isd[['datetime','mag','depth','latitude','longitude','region','felt']]
print ('length isd: ', len(isd))
isd.columns

In [None]:
# 11287 after taking out earthquakes < 2 and those that were 0 (undefined)
isrd = isd[isd['mag']>=2.0]
len(isrd)

In [None]:
isrd.groupby('region')['mag'].count().sort_values(ascending=False)

In [None]:
# took the top 10 regions which is 82% of the data = 19372 / 23676
# Lebanon was not in Marks analysis and and Saudia-Arabia was and isn't included here
# The total is 8820 which is 78% of the earthquakes with magnitude > 2.0
regs = ['Eilat-Deep','Aragonese-Deep','Arava','E.Mediter.Sea','Cyprus','Dead-Sea-Basin','Lebanon',
        'Sinai','Arnona-Dakar-Deep','Suez']
isdr = isrd[isrd['region'].isin(regs)].reset_index(drop=True)
print(len(isdr))
isdr.head()

In [None]:
# Plot the number of earthquakes per region
eq = isdr.groupby(['region'])['mag'].count().sort_values(ascending=False)
fig = plt.figure(figsize=(12,8))
plt.title('Clusters of Earthquakes in Israel 1983-2019', fontsize=15)
eq.plot.bar()

In [None]:
# perform forshock / aftershock removal on the california data without noise earthquakes
shocks = fore_after_shocks_distance(isdr)

# put shock labels back into the df
isdr['shocks'] = shocks
file_path = data_dir + "output\\israel_shocks.csv"
# save file to disk
isdr.to_csv(file_path, encoding='utf-8', index=False)

In [None]:
# read back in data that was saved to disk
fileToRead = open(file_path, mode='r')
isdr = pd.read_csv(fileToRead)
fileToRead.close()
isdr['datetime'] = pd.to_datetime(isdr['datetime'])

isdr

In [None]:
isdr[isdr['region']=='Arnona-Dakar-Deep']

In [None]:
type(isdr['datetime'][0])

In [None]:
# see how many of each earthquake type: the catalog is reduced to less than 1/4
isdr.groupby('shocks')['mag'].count()