# Jupyter notebook to find the seismic activity across the Solar System

# Read the Apollo 12 Grade A catalog

In [1]:
'''
    Seismic Detection Across the Solar System.
    2024 NASA Space Apps Challenge.
    The task of this project is to detect the seismic activity on Moon and Mars.
    The notebook is based on demo_notebook.ipynb provided by the organizers in "Space Apps 2024 Seismic Detection Data Packet".
    It is extended by the evolution algorithm.
'''

# Import libraries
import numpy as np
import pandas as pd
from obspy import read
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import os

Let's take a look at the training data for the lunar dataset. In addition to the data itself, we include a catalog that will tell you which events happen when in the data. The catalog includes the name of the file, the absolute time, the relative time in seconds (relative to the start of the file), the event ID (evid), and the type of moonquake. The types of moonquakes include impacts, deep moonquakes, and shallow moonquakes. You do not have to worry about predicting the type of moonquakes, that's just fun information for you to know! 

**Note**: For your prediction, feel free to include either the absolute time or relative time, just make sure to mark it using the same header in the CSV file so we can easily score it!

In [2]:
lunar_directory = './data/lunar/training/catalogs/'
lunar_file = lunar_directory + 'apollo12_catalog_GradeA_final.csv'
lunar_df = pd.read_csv(lunar_file)
lunar_df

Unnamed: 0,filename,time_abs(%Y-%m-%dT%H:%M:%S.%f),time_rel(sec),evid,mq_type
0,xa.s12.00.mhz.1970-01-19HR00_evid00002,1970-01-19T20:25:00.000000,73500.0,evid00002,impact_mq
1,xa.s12.00.mhz.1970-03-25HR00_evid00003,1970-03-25T03:32:00.000000,12720.0,evid00003,impact_mq
2,xa.s12.00.mhz.1970-03-26HR00_evid00004,1970-03-26T20:17:00.000000,73020.0,evid00004,impact_mq
3,xa.s12.00.mhz.1970-04-25HR00_evid00006,1970-04-25T01:14:00.000000,4440.0,evid00006,impact_mq
4,xa.s12.00.mhz.1970-04-26HR00_evid00007,1970-04-26T14:29:00.000000,52140.0,evid00007,deep_mq
...,...,...,...,...,...
71,xa.s12.00.mhz.1974-10-14HR00_evid00156,1974-10-14T17:43:00.000000,63780.0,evid00156,impact_mq
72,xa.s12.00.mhz.1975-04-12HR00_evid00191,1975-04-12T18:15:00.000000,65700.0,evid00191,impact_mq
73,xa.s12.00.mhz.1975-05-04HR00_evid00192,1975-05-04T10:05:00.000000,36300.0,evid00192,impact_mq
74,xa.s12.00.mhz.1975-06-24HR00_evid00196,1975-06-24T16:03:00.000000,57780.0,evid00196,impact_mq


In [3]:
mars_directory = './data/mars/training/catalogs/'
mars_file = mars_directory + 'Mars_InSight_training_catalog_final.csv'
mars_df = pd.read_csv(mars_file)
mars_df

Unnamed: 0,filename,time_abs(%Y-%m-%dT%H:%M:%S.%f),time_rel(sec),evid
0,XB.ELYSE.02.BHV.2022-02-03HR08_evid0005.csv,2022-02-03T08:08:27.000000,507.0,evid0005
1,XB.ELYSE.02.BHV.2022-01-02HR04_evid0006.csv,2022-01-02T04:35:30.000000,2130.0,evid0006


# Sample short-term average / long-term average (STA/LTA) detection algorithm

A STA/LTA algorithm moves two time windows of two lengths (one short, one long) across the seismic data. The algorithm calculates the average amplitude in both windows, and calculates the ratio between them. If the data contains an earthquake, then the short-term window containing the earthquake will be much larger than the long-term window -- resulting in a detection. 

Next, we define the values of the characteristic function (i.e. amplitude ratio between short-term and long-term windows) where we flag a seismic detection. These values are called triggers. There are two types of triggers -- "on" and "off", defined as follows:

1. "on" : If the characteristic function is above this value, then a seismic event begins. 
2. "off" : If the characteristic function falls below this value (after an "on" trigger), than a seismic event ends. 

In [4]:
from obspy.signal.invsim import cosine_taper
from obspy.signal.filter import highpass
from obspy.signal.trigger import classic_sta_lta, plot_trigger, trigger_onset
from scipy.optimize import differential_evolution
from glob import glob

def read_sample(mseed_file):
    st = read(mseed_file)
    tr = st.traces[0].copy()
    return tr

def run_sta_lta_algorithm(sta_len, lta_len, thr_on, thr_off, df, tr_data):
    # Run Obspy's STA/LTA to obtain a characteristic function
    # This function basically calculates the ratio of amplitude between the short-term 
    # and long-term windows, moving consecutively in time across the data
    cft = classic_sta_lta(tr_data, int(sta_len * df), int(lta_len * df))
    
    # The first column contains the indices where the trigger is turned "on". 
    # The second column contains the indices where the trigger is turned "off".
    on_off = np.array(trigger_onset(cft, thr_on, thr_off))
    return on_off

def min_func(values, df, tr_data):
    sta_len, lta_len_coeff, thr_on, thr_off_coeff = values
    lta_len = sta_len * lta_len_coeff
    thr_off = thr_on * thr_off_coeff
    on_off = run_sta_lta_algorithm(sta_len, lta_len, thr_on, thr_off, df, tr_data)
    value = abs(len(on_off) - 1)
    return value

def run_algorithm(filepath, bounds):
    fnames, time_rels, detection_times = [], [], []
    print('Index, filename, sta_len, lta_len, thr_on, thr_off, loss, value.')
    
    for i, filename in enumerate(sorted(glob(filepath))):
        tr = read_sample(filename)
    
        # Sampling frequency of our trace
        df = tr.stats.sampling_rate
    
        tr_times = tr.times()
        tr_data = tr.data
        
        result = differential_evolution(min_func, bounds, args=(df, tr_data))
        loss_value = result.fun
    
        if loss_value == 0.0:
            sta_len, lta_len_coeff, thr_on, thr_off_coeff = result.x
            lta_len = sta_len * lta_len_coeff
            thr_off = thr_on * thr_off_coeff
        else:
            # How long should the short-term and long-term window be, in seconds.
            sta_len = 120
            lta_len = 600
        
            # Play around with the on and off triggers, based on values in the characteristic function.
            thr_on = 4.0
            thr_off = 1.5
    
        on_off = run_sta_lta_algorithm(sta_len, lta_len, thr_on, thr_off, df, tr_data)
        
        if len(on_off) > 0:
            triggers = on_off[0]
            time_rel = tr_times[triggers[0]]
        else:
            time_rel = 0.0

        starttime = tr.stats.starttime.datetime
        on_time = starttime + timedelta(seconds=time_rel)
        on_time_str = datetime.strftime(on_time, '%Y-%m-%dT%H:%M:%S.%f')

        fnames.append(filename)
        detection_times.append(on_time_str)
        time_rels.append(time_rel)
        
        print(f'{i + 1} {filename} {sta_len:.2f} {lta_len:.2f} {thr_on:.2f} {thr_off:.2f} {loss_value} {len(on_off)}')

    return fnames, time_rels, detection_times

# Run the algorithm on the lunar training data.
fnames, time_rels, detection_times = run_algorithm('./data/lunar/training/data/S12_GradeA/*.mseed', bounds = [(10, 1000), (1.05, 5.0), (3.5, 4.5), (0.05, 0.5)])

Index, filename, sta_len, lta_len, thr_on, thr_off, loss, value.
1 ./data/lunar/training/data/S12_GradeA/xa.s12.00.mhz.1970-01-19HR00_evid00002.mseed 120.00 600.00 4.00 1.50 1.0 19
2 ./data/lunar/training/data/S12_GradeA/xa.s12.00.mhz.1970-03-25HR00_evid00003.mseed 527.73 2555.07 3.69 1.16 0.0 1
3 ./data/lunar/training/data/S12_GradeA/xa.s12.00.mhz.1970-03-26HR00_evid00004.mseed 270.05 1042.48 3.59 1.15 0.0 1
4 ./data/lunar/training/data/S12_GradeA/xa.s12.00.mhz.1970-04-25HR00_evid00006.mseed 562.75 2438.50 3.67 1.32 0.0 1
5 ./data/lunar/training/data/S12_GradeA/xa.s12.00.mhz.1970-04-26HR00_evid00007.mseed 469.73 2158.26 3.88 0.38 0.0 1
6 ./data/lunar/training/data/S12_GradeA/xa.s12.00.mhz.1970-06-15HR00_evid00008.mseed 517.34 1902.16 3.66 0.54 0.0 1
7 ./data/lunar/training/data/S12_GradeA/xa.s12.00.mhz.1970-06-26HR00_evid00009.mseed 544.68 2640.50 4.18 0.46 0.0 1
8 ./data/lunar/training/data/S12_GradeA/xa.s12.00.mhz.1970-07-20HR00_evid00010.mseed 575.50 2663.48 4.38 1.40 0.0 1
9 ./dat

In [5]:
# Run the algorithm on the mars training data.
fnames, time_rels, detection_times = run_algorithm('./data/mars/training/data/*.mseed', bounds = [(10, 500), (1.05, 5.0), (3.5, 4.5), (0.05, 0.5)])

Index, filename, sta_len, lta_len, thr_on, thr_off, loss, value.
1 ./data/mars/training/data/XB.ELYSE.02.BHV.2022-01-02HR04_evid0006.mseed 120.00 600.00 4.00 1.50 1.0 0
2 ./data/mars/training/data/XB.ELYSE.02.BHV.2022-02-03HR08_evid0005.mseed 55.72 248.74 3.55 1.61 0.0 1


**Note**: You do not have to worry about marking the end of the seismic trace (as you can see, even for us it's not very accurate!). For this challenge, all we care about is the start of the seismic waveform.

## Sample detection export into a catalog
There are many ways to do this, but we'll show a way to do it using pandas. 

In [6]:
# Run the algorithm on the lunar test data.
fnames, time_rels, detection_times = run_algorithm('./data/lunar/test/data/**/*.mseed', bounds = [(10, 1000), (1.05, 5.0), (3.5, 4.5), (0.05, 0.5)])

Index, filename, sta_len, lta_len, thr_on, thr_off, loss, value.
1 ./data/lunar/test/data/S12_GradeB/xa.s12.00.mhz.1969-12-16HR00_evid00006.mseed 185.45 281.81 4.39 1.51 0.0 1
2 ./data/lunar/test/data/S12_GradeB/xa.s12.00.mhz.1970-01-09HR00_evid00007.mseed 416.77 1861.07 3.69 0.89 0.0 1
3 ./data/lunar/test/data/S12_GradeB/xa.s12.00.mhz.1970-02-07HR00_evid00014.mseed 238.69 1178.51 4.30 0.70 0.0 1
4 ./data/lunar/test/data/S12_GradeB/xa.s12.00.mhz.1970-02-18HR00_evid00016.mseed 355.17 1551.23 3.88 1.39 0.0 1
5 ./data/lunar/test/data/S12_GradeB/xa.s12.00.mhz.1970-03-14HR00_evid00018.mseed 120.00 600.00 4.00 1.50 1.0 0
6 ./data/lunar/test/data/S12_GradeB/xa.s12.00.mhz.1970-03-30HR00_evid00020.mseed 371.83 1511.57 3.96 1.65 0.0 1
7 ./data/lunar/test/data/S12_GradeB/xa.s12.00.mhz.1970-04-03HR00_evid00021.mseed 628.12 2863.23 3.62 1.27 0.0 1
8 ./data/lunar/test/data/S12_GradeB/xa.s12.00.mhz.1970-05-20HR00_evid00026.mseed 878.65 3541.18 3.77 1.14 0.0 1
9 ./data/lunar/test/data/S12_GradeB/xa.s1

In [7]:
# Compile dataframe of detections
detect_df = pd.DataFrame(data = {'filename': fnames, 'time_abs(%Y-%m-%dT%H:%M:%S.%f)': detection_times, 'time_rel(sec)': time_rels})
detect_df.head()

Unnamed: 0,filename,time_abs(%Y-%m-%dT%H:%M:%S.%f),time_rel(sec)
0,./data/lunar/test/data/S12_GradeB/xa.s12.00.mh...,1969-12-16T22:09:36.027057,79775.849057
1,./data/lunar/test/data/S12_GradeB/xa.s12.00.mh...,1970-01-09T17:32:26.993925,63146.867925
2,./data/lunar/test/data/S12_GradeB/xa.s12.00.mh...,1970-02-07T18:55:32.495528,68131.924528
3,./data/lunar/test/data/S12_GradeB/xa.s12.00.mh...,1970-02-18T12:14:24.601830,44064.45283
4,./data/lunar/test/data/S12_GradeB/xa.s12.00.mh...,1970-03-14T00:00:00.520000,0.0


This can then be exported to a csv.

In [8]:
detect_df.to_csv('output/lunar_catalog.csv', index=False)

In [9]:
# Run the algorithm on the mars test data.
fnames, time_rels, detection_times = run_algorithm('./data/mars/test/data/*.mseed', bounds = [(10, 500), (1.05, 5.0), (3.5, 4.5), (0.05, 0.5)])

Index, filename, sta_len, lta_len, thr_on, thr_off, loss, value.
1 ./data/mars/test/data/XB.ELYSE.02.BHV.2019-05-23HR02_evid0041.mseed 99.88 493.03 3.70 1.44 0.0 1
2 ./data/mars/test/data/XB.ELYSE.02.BHV.2019-07-26HR12_evid0033.mseed 305.88 1459.24 3.67 1.07 0.0 1
3 ./data/mars/test/data/XB.ELYSE.02.BHV.2019-07-26HR12_evid0034.mseed 183.94 830.54 4.13 0.85 0.0 1
4 ./data/mars/test/data/XB.ELYSE.02.BHV.2019-09-21HR03_evid0032.mseed 320.53 1301.18 3.73 0.90 0.0 1
5 ./data/mars/test/data/XB.ELYSE.02.BHV.2021-05-02HR01_evid0017.mseed 67.67 321.29 3.63 0.74 0.0 1
6 ./data/mars/test/data/XB.ELYSE.02.BHV.2021-10-11HR23_evid0011.mseed 16.61 79.63 3.83 1.75 0.0 1
7 ./data/mars/test/data/XB.ELYSE.02.BHV.2021-12-24HR22_evid0007.mseed 388.34 1888.93 4.04 1.74 0.0 1
8 ./data/mars/test/data/XB.ELYSE.02.BHV.2022-04-09HR22_evid0002.mseed 120.00 600.00 4.00 1.50 1.0 0
9 ./data/mars/test/data/XB.ELYSE.02.BHV.2022-05-04HR23_evid0001.mseed 398.20 1978.22 3.84 1.08 0.0 1


In [10]:
# Compile dataframe of detections
detect_df = pd.DataFrame(data = {'filename': fnames, 'time_abs(%Y-%m-%dT%H:%M:%S.%f)': detection_times, 'time_rel(sec)': time_rels})
detect_df.head()

Unnamed: 0,filename,time_abs(%Y-%m-%dT%H:%M:%S.%f),time_rel(sec)
0,./data/mars/test/data/XB.ELYSE.02.BHV.2019-05-...,2019-05-23T02:38:10.132000,2290.1
1,./data/mars/test/data/XB.ELYSE.02.BHV.2019-07-...,2019-07-26T12:54:14.260000,3254.25
2,./data/mars/test/data/XB.ELYSE.02.BHV.2019-07-...,2019-07-26T12:13:50.460000,830.45
3,./data/mars/test/data/XB.ELYSE.02.BHV.2019-09-...,2019-09-21T03:40:15.737000,2415.7
4,./data/mars/test/data/XB.ELYSE.02.BHV.2021-05-...,2021-05-02T01:27:39.125000,1659.1


This can then be exported to a csv.

In [11]:
detect_df.to_csv('output/mars_catalog.csv', index=False)

# Good luck!