In [1]:
import os
import pdb
import pywt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from obspy import read
from sklearn.preprocessing import MinMaxScaler
from obspy.signal.filter import bandpass

In [2]:
lunar_cat = 'data/lunar/training/catalogs/apollo12_catalog_GradeA_final.csv'
lunar_df = pd.read_csv(lunar_cat)

lunar_event = dict(zip(lunar_df['filename'],  lunar_df['time_rel(sec)']))

In [3]:
def wavelet_transform(data, wavelet, scales):
    coefficients, frequencies = pywt.cwt(data, scales, wavelet)
    return np.abs(coefficients), frequencies

In [4]:

def plot_trace(tr_times, tr_data, arrival, coefficients, scales):
    fig, (ax, ax2) = plt.subplots(2, 1, figsize=(10, 6))

    ax.plot(tr_times,tr_data)

    ax.axvline(x = arrival, color='red',label='Rel. Arrival')
    ax.legend(loc='upper left')

    ax.set_xlim([min(tr_times),max(tr_times)])
    ax.set_ylabel('Velocity (m/s)')
    ax.set_title('Seismic Trace', fontweight='bold')

    ax2.imshow(coefficients, extent=[tr_times.min(), tr_times.max(), scales.min(), scales.max()],
               aspect='auto', interpolation='bilinear', cmap='jet')
    ax2.set_ylabel('Scales')
    ax2.set_xlabel('Time (s)')
    ax2.set_title('Wavelet Coefficients', fontweight='bold')

    plt.tight_layout()
    plt.show()

In [5]:
def preprocess_mseed(file_path, arrival, minfreq=0.5, maxfreq=3.0):
    wavelet='cmor2.0-1.0' #adjust the center frequency (0.5 to 10 Hz) and bandwidth (around 1.5)
    scales = np.arange(1, 100)

    st = read(file_path)    
    tr = st[0]
    if np.count_nonzero(np.isnan(tr.data)) > 0:
        print(f"Warning: Missing values found in {tr.id}. Interpolation may be needed.")
        tr.interpolate(method='linear', tolerance=0.1, sampling_rate=tr.stats.sampling_rate)
    
    data = tr.data.reshape(-1, 1)
    tr_times = tr.times()
    scaler = MinMaxScaler()
    normalized_data = scaler.fit_transform(data)
    tr.data = normalized_data.flatten() 
    tr.filter("bandpass", freqmin=minfreq, freqmax=maxfreq)
    
    coefficients, frequencies = wavelet_transform(tr.data, wavelet, scales)
    
    # plot_trace(tr_times, tr.data, arrival, coefficients, scales) #Use this to check the plot for Seismic trace and Waveleet Coefficients
    return tr.data, coefficients

In [6]:
mseed_directory = 'data/lunar/training/data/S12_GradeA'

'''
Looping through each mseed file to apply data preprocessing.
'''
# for filename in os.listdir(mseed_directory):
#     if filename.endswith(".mseed"):
#         file_path = os.path.join(mseed_directory, filename)
#         print(f"Processing file: {filename}")
#         filtered_data, wavelet_coefficients = preprocess_mseed(file_path)
#         break
'''
test for a single file
'''
file_path = os.path.join(mseed_directory, 'xa.s12.00.mhz.1970-06-26HR00_evid00009.mseed')
arrival_time = lunar_event['xa.s12.00.mhz.1970-06-26HR00_evid00009']
print(f"Processing file: {file_path}")
filtered_data, wavelet_coefficients = preprocess_mseed(file_path, arrival_time)
print(filtered_data)
print(wavelet_coefficients)



Processing file: data/lunar/training/data/S12_GradeA\xa.s12.00.mhz.1970-06-26HR00_evid00009.mseed
[ 1.85364131e-01  9.80416292e-02 -2.48695679e-01 ...  2.26957859e-09
  7.47876546e-10 -1.07908346e-09]
[[6.21278043e-03 1.14884139e-02 2.03014734e-02 ... 7.33170752e-11
  1.74725577e-10 1.40907769e-10]
 [4.08877177e-03 1.09146009e-02 2.22570602e-02 ... 6.30123278e-11
  1.17384794e-10 1.67403286e-10]
 [5.11031264e-02 5.82908498e-02 5.67632122e-02 ... 1.52680140e-10
  2.09403520e-10 2.42679232e-10]
 ...
 [4.02946566e-03 1.21133671e-02 8.22306556e-03 ... 1.49151428e-09
  1.01692772e-09 2.37608287e-10]
 [4.88351926e-03 1.23654256e-02 7.75771153e-03 ... 2.20175417e-09
  1.38534958e-09 7.67545717e-10]
 [4.44544394e-03 9.59582014e-03 8.50902422e-03 ... 1.19292700e-09
  8.23772567e-10 1.80470473e-09]]
