# Welcome to the NoisePy Colab Tutorial!

This tutorial will walk you through the basic steps of using NoisePy to compute ambient noise cross correlation functions.


First, we install the noisepy-seis package

In [1]:
# Uncomment and run this line if the environment doesn't have noisepy already installed:
# ! pip install noisepy-seis 

__Warning__: NoisePy uses ```obspy``` as a core Python module to manipulate seismic data. Restart the runtime now for proper installation of ```obspy``` on Colab.

Then we import the basic modules

In [2]:
from noisepy.seis import download, cross_correlate, stack, plotting_modules, __version__
from noisepy.seis.asdfstore import ASDFRawDataStore, ASDFCCStore, ASDFStackStore
from noisepy.seis.datatypes import ConfigParameters
from dateutil.parser import isoparse
import os
print(f"Using NoisePy version {__version__}")

path = "./get_started_data"

os.makedirs(path,exist_ok=True)
raw_data_path = os.path.join(path, "RAW_DATA")
cc_data_path = os.path.join(path, "CCF")
stack_data_path = os.path.join(path, "STACK")

  from .autonotebook import tqdm as notebook_tqdm


Using NoisePy version 0.9.72.dev12


## Ambient Noise Project Configuration

We store the metadata information about the ambient noise cross correlation workflow in a ConfigParameters() object. We first initialize it, then we tune the parameters for this cross correlation.

In [3]:
config = ConfigParameters() # default config parameters which can be customized
config.inc_hours = 12
config.samp_freq= 20  # (int) Sampling rate in Hz of desired processing (it can be different than the data sampling rate)
config.cc_len= 3600.0  # (float) basic unit of data length for fft (sec)
    # criteria for data selection
config.ncomp = 3  # 1 or 3 component data (needed to decide whether do rotation)


config.acorr_only = False  # only perform auto-correlation or not
config.xcorr_only = True  # only perform cross-correlation or not

config.inc_hours = 12 # if the data is first 

config.lamin = 31       # min latitude
config.lamax = 42       # max latitude
config.lomin = -124     # min longitude
config.lomax = -115     # max longitude
config.net_list = ["*"] # look for all network codes



 # pre-processing parameters
config.step= 1800.0  # (float) overlapping between each cc_len (sec)
config.stationxml= False  # station.XML file used to remove instrument response for SAC/miniseed data
config.rm_resp= "inv"  # select 'no' to not remove response and use 'inv' if you use the stationXML,'spectrum',
config.freqmin = 0.05
config.freqmax = 2.0
config.max_over_std  = 10  # threshold to remove window of bad signals: set it to 10*9 if prefer not to remove them

# TEMPORAL and SPECTRAL NORMALISATION
config.freq_norm= "rma"  # choose between "rma" for a soft whitenning or "no" for no whitening. Pure whitening is not implemented correctly at this point.
config.smoothspect_N = 10  # moving window length to smooth spectrum amplitude (points)
    # here, choose smoothspect_N for the case of a strict whitening (e.g., phase_only)

config.time_norm = "no"  # 'no' for no normalization, or 'rma', 'one_bit' for normalization in time domain,
    # TODO: change time_norm option from "no" to "None"
config.smooth_N= 10  # moving window length for time domain normalization if selected (points)

config.cc_method= "xcorr"  # 'xcorr' for pure cross correlation OR 'deconv' for deconvolution;
    # FOR "COHERENCY" PLEASE set freq_norm to "rma", time_norm to "no" and cc_method to "xcorr"

# OUTPUTS:
config.substack = True  # True = smaller stacks within the time chunk. False: it will stack over inc_hours
config.substack_len = config.cc_len  # how long to stack over (for monitoring purpose): need to be multiples of cc_len
    # if substack=True, substack_len=2*cc_len, then you pre-stack every 2 correlation windows.
    # for instance: substack=True, substack_len=cc_len means that you keep ALL of the correlations

config.maxlag= 200  # lags of cross-correlation to save (sec)
config.substack = True

## Step 0: download data


This step will download data using obspy and save them into ASDF files locally. The data will be stored for each time chunk defined in hours by inc_hours.

The download will clean up the raw data by detrending, removing the mean, bandpassing (broadly), removing the instrumental response, merging gaps, ignoring too-gappy data.

Use the function ```download``` with the following arguments: 
* ```path```:where to put the data
* ```config```: configuration settings, in particular:
    * ```channel```: list of the seismic channels to download, and example is shown below
    * ```stations```: list of the seismic stations, it can be "\*" (not "all") 
    * ```start_time```
    * ```end_time```
* ```client_url_key```: the string for FDSN clients


In [4]:
config.stations = ["A*"]
config.channels =  ["BHE","BHN","BHZ"]
config.start_date =  isoparse("2019-02-01")
config.end_date = isoparse("2019-02-02")

# Download data locally. Enters raw data path, channel types, stations, config, and fdsn server.
download(raw_data_path, config)

2023-09-10 15:11:26,414 4302980480 INFO S0A_download_ASDF_MPI.download(): Download
        From: 2019-02-01T00:00:00.000000Z
        To: 2019-02-02T00:00:00.000000Z
        Stations: ['A*']
        Channels: ['BHE', 'BHN', 'BHZ']
        
2023-09-10 15:11:30,849 4302980480 INFO S0A_download_ASDF_MPI.download(): Fetched inventory
2023-09-10 15:11:30,850 4302980480 INFO utils.log_raw(): TIMING: 4.4360 secs. for Getting inventory
HTTP Status code: 500
Detailed response of server:

 for get_waveforms(ADO.BHN)
HTTP Status code: 500
Detailed response of server:

 for get_waveforms(ADO.BHZ)
HTTP Status code: 500
Detailed response of server:

 for get_waveforms(ALP.BHE)
HTTP Status code: 500
Detailed response of server:

 for get_waveforms(ARV.BHN)
HTTP Status code: 500
Detailed response of server:

 for get_waveforms(ARV.BHZ)
HTTP Status code: 500
Detailed response of server:

 for get_waveforms(ARV.BHE)
HTTP Status code: 500
Detailed response of server:

 for get_waveforms(AVM.BHN)
HTTP Stat

List the files that were downloaded, just to make sure !

In [5]:
print(os.listdir(raw_data_path))

['station.csv', '2019_02_01_00_00_00T2019_02_01_12_00_00.h5', '2019_02_01_12_00_00T2019_02_02_00_00_00.h5']


Plot the raw data, make sure it's noise!

In [6]:
file = os.path.join(raw_data_path, "2019_02_01_00_00_00T2019_02_01_12_00_00.h5")
plotting_modules.plot_waveform(file,'CI','ADO',0.01,0.4) # this function takes for input: filename, network, station, freqmin, freqmax for a bandpass filter

## Step 1: Cross-correlation

This step will perform the cross correlation. For each time chunk, it will read the data, perform classic ambient noise pre-processing (time and frequency normalization), FFT, cross correlation, substacking, saving cross correlations in to a temp ASDF file (this is not fast and will be improved).


In [7]:
# For this tutorial make sure the previous run is empty
os.system(f"rm -rf {cc_data_path}")

0

In [8]:
config.freq_norm = "rma"
raw_store = ASDFRawDataStore(raw_data_path) # Store for reading raw data
cc_store = ASDFCCStore(cc_data_path) # Store for writing CC data

# print the configuration parameters. Some are chosen by default but we cab modify them
print(config)

client_url_key='SCEDC' start_date=datetime.datetime(2019, 2, 1, 0, 0) end_date=datetime.datetime(2019, 2, 2, 0, 0) samp_freq=20 single_freq=True cc_len=3600.0 lamin=31 lamax=42 lomin=-124 lomax=-115 down_list=False net_list=['*'] stations=['A*'] channels=['BHE', 'BHN', 'BHZ'] step=1800.0 freqmin=0.05 freqmax=2.0 freq_norm='rma' time_norm='no' cc_method='xcorr' smooth_N=10 smoothspect_N=10 substack=True substack_len=3600.0 maxlag=200 inc_hours=12 max_over_std=10 ncomp=3 stationxml=False rm_resp='inv' rm_resp_out='VEL' respdir='./get_started_data/RAW_DATA/../resp' acorr_only=False xcorr_only=True stack_method=<StackMethod.LINEAR: 'linear'> keep_substack=False rotation=True correction=False correction_csv=None storage_options=defaultdict(<class 'dict'>, {})


Perform the cross correlation

In [9]:
cross_correlate(raw_store, config, cc_store)

2023-09-10 15:11:41,069 4302980480 INFO S1_fft_cc_MPI.cross_correlate(): Starting Cross-Correlation with 10 cores
2023-09-10 15:11:41,088 4302980480 INFO utils.log_raw(): TIMING CC Main: 0.0181 secs. for get 6 channels
2023-09-10 15:11:41,089 4302980480 INFO S1_fft_cc_MPI.cross_correlate(): Checking for stations already done: 6 pairs
2023-09-10 15:11:41,093 4302980480 INFO utils.log_raw(): TIMING CC Main: 0.0043 secs. for check for stations already done
2023-09-10 15:11:41,094 4302980480 INFO S1_fft_cc_MPI.cross_correlate(): Still need to process: 3/3 stations, 6/6 channels, 6/6 pairs for 2019-02-01T00:00:00+0000 - 2019-02-01T12:00:00+0000
Read channel data. Memory:  1026 MB: 100%|██████████| 6/6 [00:00<00:00, 24.08it/s]
2023-09-10 15:11:41,359 4302980480 INFO S1_fft_cc_MPI._filter_channel_data(): Picked 20.0 as the closest sampling frequence to 20. 
2023-09-10 15:11:41,359 4302980480 INFO S1_fft_cc_MPI._filter_channel_data(): Filtered to 6/6 channels with sampling rate == 20.0
2023-09

Plot a single set of the cross correlation

In [10]:
timespans = cc_store.get_timespans()
plotting_modules.plot_substack_cc(cc_store, timespans[0], 0.1, 1, 200, False)

2023-09-10 15:11:45,447 4302980480 ERROR plotting_modules.plot_substack_cc(): No data available for plotting in 2019-02-01T00:00:00+0000 - 2019-02-01T12:00:00+0000/(CI.ALP, CI.ARV)


## Step 2: Stack the cross correlation

This combines the time-chunked ASDF files to stack over each time chunk and at each station pair.

In [11]:
# open a new cc store in read-only mode since we will be doing parallel access for stacking
cc_store = ASDFCCStore(cc_data_path, mode="r")
stack_store = ASDFStackStore(stack_data_path)
stack(cc_store, stack_store, config)

2023-09-10 15:11:45,455 4302980480 INFO S2_stacking.initializer(): Station pairs: [(CI.ALP, CI.ARV), (CI.ALP, CI.ALP), (CI.ADO, CI.ARV), (CI.ADO, CI.ADO), (CI.ALP, CI.AVM), (CI.ADO, CI.ALP), (CI.AVM, CI.AVM), (CI.ARV, CI.ARV), (CI.ADO, CI.AVM), (CI.ARV, CI.AVM)], timespans:[2019-02-01T00:00:00+0000 - 2019-02-01T12:00:00+0000, 2019-02-01T12:00:00+0000 - 2019-02-02T00:00:00+0000]
2023-09-10 15:11:48,581 4337485184 INFO utils.log_raw(): TIMING: 0.0145 secs. for loading CCF data
2023-09-10 15:11:48,807 4338697600 INFO utils.log_raw(): TIMING: 0.0075 secs. for loading CCF data
2023-09-10 15:11:48,808 4338697600 INFO utils.log_raw(): TIMING: 0.0013 secs. for stack/rotate all station pairs (CI.ADO, CI.ARV)
2023-09-10 15:11:48,854 4342891904 INFO utils.log_raw(): TIMING: 0.0045 secs. for loading CCF data
2023-09-10 15:11:48,855 4342891904 INFO utils.log_raw(): TIMING: 0.0009 secs. for stack/rotate all station pairs (CI.ADO, CI.ALP)
2023-09-10 15:11:48,866 4311467392 INFO utils.log_raw(): TIMIN

Plot the stacks

In [12]:
print(os.listdir(cc_data_path))
print(os.listdir(stack_data_path))

['2019_02_01_00_00_00T2019_02_01_12_00_00.h5', '2019_02_01_12_00_00T2019_02_02_00_00_00.h5']
['CI.ALP', 'CI.ADO', 'CI.ARV', 'CI.AVM']


In [13]:
plotting_modules.plot_all_moveout(stack_store, 'Allstack_linear', 0.1, 0.2, 'ZZ', 1)

2023-09-10 15:11:49,076 4302980480 ERROR plotting_modules.plot_all_moveout(): No data available for plotting Allstack_linear/ZZ
