In [4]:
import os
import pdb
import pywt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from obspy import read
import pickle
import re

from scipy.ndimage import median_filter
from sklearn.preprocessing import MinMaxScaler
from obspy.signal.filter import bandpass

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split

In [5]:
lunar_cat = 'data/lunar/training/catalogs/apollo12_catalog_GradeA_final.csv'
lunar_df = pd.read_csv(lunar_cat)

lunar_event = {}
for index, row in lunar_df.iterrows():

    filename = row['filename']
    cleaned_filename = re.match(r"^.*\d{4}-\d{2}-\d{2}", filename).group()
    
    detection_time = row['time_rel(sec)']
    
    if cleaned_filename in lunar_event:
        lunar_event[cleaned_filename].append(detection_time)
    else:
        lunar_event[cleaned_filename] = [detection_time]

In [12]:
lunar_event

{'xa.s12.00.mhz.1970-01-19': [73500.0],
 'xa.s12.00.mhz.1970-03-25': [12720.0],
 'xa.s12.00.mhz.1970-03-26': [73020.0],
 'xa.s12.00.mhz.1970-04-25': [4440.0],
 'xa.s12.00.mhz.1970-04-26': [52140.0],
 'xa.s12.00.mhz.1970-06-15': [68400.0],
 'xa.s12.00.mhz.1970-06-26': [72060.0],
 'xa.s12.00.mhz.1970-07-20': [18360.0, 42240.0],
 'xa.s12.00.mhz.1970-09-26': [71820.0],
 'xa.s12.00.mhz.1970-10-24': [41460.0],
 'xa.s12.00.mhz.1970-11-12': [46200.0],
 'xa.s12.00.mhz.1970-12-11': [26520.0],
 'xa.s12.00.mhz.1970-12-27': [74040.0],
 'xa.s12.00.mhz.1970-12-31': [56460.0],
 'xa.s12.00.mhz.1971-01-15': [45600.0],
 'xa.s12.00.mhz.1971-01-28': [53940.0],
 'xa.s12.00.mhz.1971-01-29': [66060.0],
 'xa.s12.00.mhz.1971-02-09': [13320.0],
 'xa.s12.00.mhz.1971-03-25': [55080.0],
 'xa.s12.00.mhz.1971-04-13': [46500.0],
 'xa.s12.00.mhz.1971-04-17': [25440.0],
 'xa.s12.00.mhz.1971-05-12': [29100.0, 35100.0],
 'xa.s12.00.mhz.1971-05-13': [10800.0],
 'xa.s12.00.mhz.1971-05-23': [80400.0],
 'xa.s12.00.mhz.1971-06

In [8]:
def wavelet_transform(data, wavelet, scales):
    coefficients, frequencies = pywt.cwt(data, scales, wavelet)
    return np.abs(coefficients), frequencies

In [9]:
def plot_trace(tr_times, tr_data, arrival, coefficients, scales):
    fig, (ax, ax2) = plt.subplots(2, 1, figsize=(10, 6))

    ax.plot(tr_times,tr_data)

    ax.axvline(x = arrival, color='red',label='Rel. Arrival')
    ax.legend(loc='upper left')

    ax.set_xlim([min(tr_times),max(tr_times)])
    ax.set_ylabel('Velocity (m/s)')
    ax.set_title('Seismic Trace', fontweight='bold')

    ax2.imshow(coefficients, extent=[tr_times.min(), tr_times.max(), scales.min(), scales.max()],
               aspect='auto', interpolation='bilinear', cmap='jet')
    ax2.set_ylabel('Scales')
    ax2.set_xlabel('Time (s)')
    ax2.set_title('Wavelet Coefficients', fontweight='bold')

    plt.tight_layout()
    plt.show()

In [10]:
def preprocess_mseed(file_path, arrival, minfreq=0.5, maxfreq=1.5):
    wavelet='cmor1.0-0.5' #adjust the center frequency (0.5 to 10 Hz) and bandwidth (around 1.5)
    scales = np.arange(1, 20)

    st = read(file_path)    
    tr = st[0]
    if np.count_nonzero(np.isnan(tr.data)) > 0:
        print(f"Warning: Missing values found in {tr.id}. Interpolation may be needed.")
        tr.interpolate(method='linear', tolerance=0.1, sampling_rate=tr.stats.sampling_rate)
    
    data = tr.data.reshape(-1, 1)
    tr_times = tr.times()

    tr.data = data.flatten()
    tr.filter("bandpass", freqmin=minfreq, freqmax=maxfreq, corners=4, zerophase=True)
    
    coefficients, frequencies = wavelet_transform(tr.data, wavelet, scales)
    
    # plot_trace(tr_times, tr.data, arrival, coefficients, scales) #Use this to check the plot for Seismic trace and Waveleet Coefficients
    return tr.data, coefficients, os.path.basename(file_path), tr_times

In [11]:
def process_file(file_path, event_time):
    try:
        filtered_data, wavelet_coefficients, filename, timeline = preprocess_mseed(file_path, event_time)
        return filename, filtered_data, wavelet_coefficients, timeline
    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")
        return None

In [None]:
mseed_training_directory = 'data/lunar/training/data/S12_GradeA'
mseed_testing_directory = 'data/lunar/test/data/'

'''
Looping through each mseed file to apply data preprocessing.
'''

lstm_train_input = []
cnn_train_input = []
train_filenames = []

lstm_test_input = []
cnn_test_input = []
test_filenames = []

for filename in os.listdir(mseed_training_directory):
    if filename.endswith(".mseed"):
        file_path = os.path.join(mseed_training_directory, filename)
        print(f"Processing file: {filename}")
        filename, filtered_data, wavelet_coefficients, timeline = process_file(file_path, lunar_event[filename.replace('.mseed', '')])
        lstm_train_input.append([list(val) for val in zip(filtered_data, timeline)])
        cnn_train_input.append(wavelet_coefficients)
        train_filenames.append(filename)

for directory in os.listdir(mseed_testing_directory):
    for filename in os.listdir(os.path.join(mseed_testing_directory,directory)):
        if filename.endswith(".mseed"):
            file_path = os.path.join(mseed_testing_directory, directory, filename)
            print(f"Processing file: {filename}")
            filename, filtered_data, wavelet_coefficients, timeline  = process_file(file_path, 0)
            lstm_test_input.append([list(val) for val in zip(filtered_data, timeline)])
            cnn_test_input.append(wavelet_coefficients)
            test_filenames.append(filename)

# file_path = os.path.join(mseed_training_directory, 'xa.s12.00.mhz.1970-07-20HR00_evid00010.mseed')
# arrival_time = lunar_event['xa.s12.00.mhz.1970-07-20HR00_evid00010']
# print(f"Processing file: {file_path}")
# filtered_data, wavelet_coefficients, filename, timeline = preprocess_mseed(file_path, arrival_time)
# print(filtered_data)
# print(wavelet_coefficients)

