In [1]:
import obspy_local
import obspy
from obspy_local.obspy_local.io.segy.core import _read_segy
import glob
import numpy as np
import matplotlib.pyplot as plt
import multiprocessing
import os

In [2]:
# define function for converting files
def downsample_file(f, out_path, nx_old, nx_new, fs_new):
    try:
        st = _read_segy(f,npts=nx_old)

        # resample
        st = st.resample(fs_new)

        # correct metadata
        st.stats.binary_file_header.number_of_samples_per_data_trace = nx_new
        st.stats.binary_file_header.sample_interval_in_microseconds = int(1/fs_new*1e6)

        # convert to float32
        for tr in st:
            tr.data = tr.data.astype('float32')

        # write file
        st.write(out_path+f.split("/")[-1],format="SEGY")
    except:
        print("Issue processing file " + f)

In [None]:
# list all the 4khz Greenland files
path_4khz = "/1-fnp/petasaur/p-wd03/greenland/Store Glacier DAS data/"
files_4khz = glob.glob(path_4khz+"4kHz/*")

# remove any previously-processed files
out_path = "/1-fnp/pnwstore1/p-wd05/greenland/resampled/"
out_files = glob.glob(out_path+"*")
out_files = [f.split("/")[-1] for f in out_files]

In [None]:
'''

Read 4khz segy files using obspy, downsample, and write

'''

# set parameters
fs_new = 1000
nx_new = 30000
nx_old = 120000

# get list of files to process
arg_list = []
for f in files_4khz:
    if f.split("/")[-1] not in out_files:
        arg_list.append(f)
        
# run in serial
for f in arg_list:
    downsample_file(f, out_path, nx_old, nx_new, fs_new)

In [3]:
# list all the natively 1khz files
path_1khz = "/1-fnp/petasaur/p-wd03/greenland/Store Glacier DAS data/1kHz/*"
path_resampled = "/1-fnp/pnwstore1/p-wd05/greenland/data/resampled/*"
files_1khz = glob.glob(path_1khz)

# list all the resamopled 1khz files
files_resampled = glob.glob(path_resampled)

# combine and sort
files_1khz_all = files_1khz + files_resampled
files_1khz_all.sort()

# set output path
out_path = "/fd1/solinger/"

In [None]:
'''

Combine files into mseed files for each channel for the entire dataset

'''

# choose which channels to write continous files for
start_channel = 331
end_channel = 1362
channels = np.arange(start_channel,end_channel,10)

# make empty stream to fill
st = obspy.Stream()

# iterate through each 30 second file
for f in files_1khz_all:
    try:
        # read in all channels for current file
        st_tmp = _read_segy(f)

        # add each channel to cumulative stream
        for channel in channels:
            st_tmp[channel].stats.station = str(channel)
            st += st_tmp[channel]
    except:
        print("Issue processing file " + f)

# merge into one stream 
st.merge(fill_value=0)

# write a separate file for each channel
for channel in channels:
    st_channel = st.select(station=str(channel))
    st_channel.write(out_path + "channel_"+str(channel)+".mseed",format="MSEED")

Issue processing file /1-fnp/petasaur/p-wd03/greenland/Store Glacier DAS data/1kHz/Greenland_iDAS15040_ContinuousAQ_190705120337.sgy
