In [62]:
import numpy as np
import pandas as pd

from geo_utils import calculate_dist

In [20]:
df_eq = pd.read_pickle('../data/USGS_Earthquake_20160201_20200801.pkl')
df_event = pd.read_hdf('../data/event_20160201_20190929.h5')
df_dmreview = pd.read_hdf('../data/dmreview_20160201_20190929.h5')

# Get metrics
In this section, we will get the True positive, False positive, and False negative in terms of detection. 

* True positive - SA sent out alert for the USGS event. 
* False positive - SA sent out alert but no corresponding USGS event. 
* False negative - SA didn't send out an alert for USGS event. 

For True positive, what I did
* First check if an earthquake event has corresponding alerts with +- origin_time_difference_sec
* If there are, I grabbed all the alerts that has alert time later than the origin time. 
* Then I check the distance error, if it smaller than location_difference_km. 
* If there are multiple alerts satisfy the results, I only select the one with the fastest alert. I tried the closest origin time, but it seems there are multiple alerts selected for multiple events. 

### Get the True positive and False negative

In [36]:
mag_threshold = 3.5

# We define the association rules
origin_time_difference_sec = 30
location_difference_km = 100

# get the first alert of all the ShakeAlert events
df_event_select = df_event[(df_event['system']=='dm') & (df_event['type']=='new')]

In [42]:
# We calculate unix datetime. 
datetime_to_timestamp = lambda x: ((x - pd.Timestamp("1970-01-01")) // pd.Timedelta('1ms'))/1000.

In [132]:
false_negative_list = []
true_positive_list = []

for _, row in df_eq.iterrows():
    evtime = datetime_to_timestamp(row.name.tz_localize(None))
    evla = row['latitude']
    evlo = row['longitude']
    evid = row['id']
    
    t_start = evtime - origin_time_difference_sec
    t_end = evtime + origin_time_difference_sec
    
    # We make sure the origin time is within the time range.
    df_alert = df_event_select[(df_event_select['time']>=t_start) & (df_event_select['time']<=t_end)]
    
    # If there are events detected, we continue to check the distance.
    if len(df_alert) >= 1:
        detected = False
        alert_list = []
        for _, alert in df_alert.iterrows():
            est_evla = alert['lat']
            est_evlo = alert['lon']
            
            # First, let's make sure the alert time is late than origin time
            if alert['alert_time'] > evtime:
                dist_km = calculate_dist(evla, evlo, est_evla, est_evlo)
                alert_time_rel_s = alert['alert_time'] - evtime
                origin_time_diff_s = abs(alert['time'] - evtime)
                # Alert location should be less than the location_difference_km
                if dist_km <= location_difference_km:
                    alert_list.append([evid, str(alert['id']), dist_km, alert_time_rel_s, origin_time_diff_s])
                else:
                    continue
            else:
                continue
                
        if len(alert_list) > 1:
            alert_list = np.array(alert_list)
            
            # If there are multiple alerts satisfy the rule, 
            # we will choose the one has the fastest alert.
            ix = np.argmin(alert_list[:, -1])
            true_positive_list.append(alert_list[ix])
        elif len(alert_list)<1:
            false_negative_list.append(evid)
        else:
            true_positive_list.append(alert_list[0])
    
    else:
        # If no, then we have a False Negative.
        false_negative_list.append(evid)

### Get the False positive

In [133]:
df_eq_smaller = pd.read_pickle('../data/USGS_Earthquake_20160201_20200801_M2.5_M3.5.pkl')

In [134]:
true_positive_alert_ids = [item[1] for item in true_positive_list]

In [152]:
df_matched_events = pd.read_csv('../data/matched_event.csv')
df_matched_events_2016 = pd.read_csv('../data/matched_event_2016.csv')

In [166]:
data_list = []
for _, row in df_matched_events.iterrows():
    data_list.append([row['id'], row['ver'],row['catid'],
                      row['time'], row['day'], row['dsecs'],
                      row['dkm'], row['dmag'], row['modtime'],
                      row['id.1'], row['catalogid'], row['lat'],
                      row['lon'], row['depth'], row['mag'],
                      row['time.1'], row['day.1'], row['modtime.1']])

In [169]:
for _, row in df_matched_events_2016.iterrows():
    data_list.append([row['id'], row['ver'],row['catid'],
                      row['time'], row['day'], row['dsecs'],
                      row['dkm'], row['dmag'], row['modtime'],
                      row['id.1'], row['catalog'], row['lat'],
                      row['lon'], row['depth'], row['mag'],
                      row['time.1'], row['day.1'], row['modtime.1']])

In [173]:
df_matched_events = pd.DataFrame(data_list, columns=['id', 'ver', 'catid', 'time', 'day', 'dsecs', 'dkm', 'dmag', 'modtime',
       'id.1', 'catalogid', 'lat', 'lon', 'depth', 'mag', 'time.1', 'day.1',
       'modtime.1'])

df_matched_events = df_matched_events.sort_values('time')
df_matched_events.to_csv('matched_events_from_SA_20160201_20201006.csv')

In [178]:
len(true_positive_list)

508

In [179]:
len(false_negative_list)

280