# Catalog Comparison 

The aim of this notebook is to compare the available catalogs with each other for Silivri Earthquake data. 

In [4]:
import os 
import pandas as pd

In [3]:
catalogs_dir = '/home/boxx/Public/earthquake_model_evaluations/data/SilivriPaper_2019-09-01__2019-11-30/processed_catalogs'
afad_cat = os.path.join(catalogs_dir,'afad_catalog.csv')
kara74_cat = os.path.join(catalogs_dir, 'kara74a_phase_picks.csv')
sezim_cat = os.path.join(catalogs_dir, 'sezim_hoca_silivri2019.csv')
kandilli_cat = os.path.join(catalogs_dir, 'kandilli_catalog.csv')

In [5]:
afad_df= pd.read_csv(afad_cat)
kara74_df= pd.read_csv(kara74_cat)
sezim_df= pd.read_csv(sezim_cat)
kandilli_df= pd.read_csv(kandilli_cat)

In [6]:
afad_df.head()

Unnamed: 0,orgtime,station,p_arrival_time,s_arrival_time,event_lat,event_lon,event_dep
0,2019-08-31T21:07:15,GCAM,2019-08-31T21:07:23.670000,,37.559,26.785,6.9
1,2019-08-31T21:07:15,DDIM,2019-08-31T21:07:23.970000,,37.559,26.785,6.9
2,2019-08-31T21:07:15,DGB,2019-08-31T21:07:25.660000,,37.559,26.785,6.9
3,2019-08-31T21:07:15,ZEYE,2019-08-31T21:07:29.740000,,37.559,26.785,6.9
4,2019-08-31T21:07:15,BDRM,2019-08-31T21:07:29.760000,,37.559,26.785,6.9


In [8]:
kara74_df.head()

Unnamed: 0,orgtime,station,p_arrival_time,s_arrival_time,event_lat,event_lon,event_dep
0,2019-01-11T06:46:35.210000,SLVT,2019-01-11T06:46:43.046741,2019-01-11T06:46:48.797343,40.8649,28.2378,10.0
1,2019-01-11T06:46:35.210000,BGKT,2019-01-11T06:46:44.702938,2019-01-11T06:46:51.709377,40.8649,28.2378,10.0
2,2019-01-11T06:46:35.210000,ISK,2019-01-11T06:46:47.390843,2019-01-11T06:46:56.035244,40.8649,28.2378,10.0
3,2019-01-11T06:46:35.210000,KLYT,2019-01-11T06:46:48.628002,2019-01-11T06:46:58.414964,40.8649,28.2378,10.0
4,2019-01-11T06:46:35.210000,CTYL,,2019-01-11T06:46:55.147751,40.8649,28.2378,10.0


In [9]:
sezim_df.head()

Unnamed: 0,station,p_arrival_time,s_arrival_time,event_lat,event_lon,event_dep
0,ARMT,2019-09-20T16:53:49.840,,40.963,28.308,29.2
1,BGKT,2019-09-20T16:53:49.690,2019-09-20T16:53:56.210,40.963,28.308,29.2
2,CTKS,2019-09-20T16:53:48.020,2019-09-20T16:53:53.990,40.963,28.308,29.2
3,EDRB,2019-09-20T16:54:08.520,,40.963,28.308,29.2
4,ERIK,2019-09-20T16:54:05.310,,40.963,28.308,29.2


## Compare Unique Stations with Existing Stations

In [10]:
stations_dir = '/home/boxx/Public/earthquake_model_evaluations/data/SilivriPaper_2019-09-01__2019-11-30/prepared_waveforms/day_by_day'

In [None]:
stations = []
for (root, dirs, files) in os.walk(stations_dir):
    for dir in dirs:
        stations.append(dir)

['ADVT',
 'CTKS',
 'ERIK',
 'TKR',
 'IZI',
 'SLVT',
 'UKOP',
 'BGKT',
 'MRMT',
 'ISK',
 'KRBG',
 'KLYT',
 'YLV',
 'KCTX',
 'ARMT',
 'ORLT',
 'OSMT',
 'LAP',
 'KAVV',
 'GONE',
 'CRLT',
 'GELI',
 'HRTX',
 'RKY',
 'CTYL',
 'MDNY',
 'SILT',
 'EZN',
 'CAVI']

In [26]:
def compare_df_stations(station_list, df):
    unique_df_stations = set(df['station'].unique())
    station_set = set(station_list)
    print('Existing stations on the df: ', list(unique_df_stations & station_set))
    print('Missing stations from the df: ', list(station_set - unique_df_stations))

In [28]:
print('AFAD CATALOG:')
compare_df_stations(station_list=stations, df=afad_df)
print('KANDILLI CATALOG:')
compare_df_stations(station_list=stations, df=kandilli_df)
print('KARA74 CATALOG:')
compare_df_stations(station_list=stations, df=kara74_df)
print('SEZIM HOCA CATALOG:')
compare_df_stations(station_list=stations, df=sezim_df)

AFAD CATALOG:
Existing stations on the df:  []
Missing stations from the df:  ['SILT', 'KRBG', 'CAVI', 'KAVV', 'LAP', 'CTYL', 'HRTX', 'OSMT', 'GONE', 'ADVT', 'IZI', 'CTKS', 'UKOP', 'RKY', 'SLVT', 'BGKT', 'CRLT', 'TKR', 'ORLT', 'KCTX', 'ISK', 'MRMT', 'ARMT', 'YLV', 'GELI', 'EZN', 'KLYT', 'ERIK', 'MDNY']
KANDILLI CATALOG:
Existing stations on the df:  ['SILT', 'KRBG', 'CAVI', 'KAVV', 'LAP', 'CTYL', 'HRTX', 'OSMT', 'GONE', 'ADVT', 'IZI', 'CTKS', 'UKOP', 'RKY', 'SLVT', 'BGKT', 'CRLT', 'TKR', 'ORLT', 'KCTX', 'ISK', 'MRMT', 'ARMT', 'YLV', 'GELI', 'EZN', 'KLYT', 'ERIK', 'MDNY']
Missing stations from the df:  []
KARA74 CATALOG:
Existing stations on the df:  ['SILT', 'KRBG', 'CAVI', 'KAVV', 'LAP', 'CTYL', 'HRTX', 'OSMT', 'GONE', 'ADVT', 'IZI', 'CTKS', 'UKOP', 'SLVT', 'BGKT', 'TKR', 'ORLT', 'KCTX', 'ISK', 'MRMT', 'ARMT', 'YLV', 'GELI', 'KLYT', 'ERIK', 'MDNY']
Missing stations from the df:  ['CRLT', 'EZN', 'RKY']
SEZIM HOCA CATALOG:
Existing stations on the df:  ['SILT', 'KRBG', 'KAVV', 'LAP', 'C

#### Verdict: 

Kandilli and Kara74 catalogs are usable. 