## visualize selected call samples

### A selected Fin Whale Detection

In [None]:
from obspy import read, UTCDateTime

data_path='/network/projects/aia/whale_call/RAW/'
station_name = 'ICQ'
channel = 'HHN'
call_t1 = UTCDateTime("20201029051032")
det_win = 120
data_year = call_t1.year
data_month = call_t1.month
data_day = call_t1.day

date_string = '%s%02d%02d'%(data_year,data_month,data_day)
file_patterns = data_path+date_string+'/'+ '%s.%02d.%02d.CN'%(data_year,data_month,data_day)+'*'+station_name+'*Z*.SAC'

st = read(data_path+date_string+"/"+'%s.%02d.%02d.CN'%(data_year,data_month,data_day)+".%s..%s.SAC"%(station_name,channel))
# st.taper(max_percentage=0.,type='cosine')
st.filter(
        "bandpass", freqmin=12, freqmax=32, zerophase=True
    )
T0 = UTCDateTime(date_string);
T1= st[0].meta['starttime'];
rel_t=2*(T0-T1);
call_t1 =call_t1-rel_t;
call_t2 = call_t1+det_win;

st_sliced = st.slice(starttime=call_t1,endtime=call_t2)
st_sliced.plot()
st_sliced.spectrogram();



### A selected BW Detection

In [None]:
from obspy import read, UTCDateTime

data_path='/network/projects/aia/whale_call/RAW/'
station_name = 'SNFQ'
channel = 'HHZ'
call_t1 = UTCDateTime("20210822063600")
det_win = 720
data_year = call_t1.year
data_month = call_t1.month
data_day = call_t1.day

date_string = '%s%02d%02d'%(data_year,data_month,data_day)

st = read(data_path+date_string+"/"+'%s.%02d.%02d.CN'%(data_year,data_month,data_day)+".%s..%s.SAC"%(station_name,channel))
# st.taper(max_percentage=0.,type='cosine')
st.filter(
        "bandpass", freqmin=12, freqmax=32, zerophase=True
    )
T0 = UTCDateTime(date_string);
T1= st[0].meta['starttime'];
rel_t=2*(T0-T1);
call_t1 =call_t1-rel_t;
call_t2 = call_t1+det_win;

st_sliced = st.slice(starttime=call_t1,endtime=call_t2)
st_sliced.plot()
st_sliced.spectrogram();



## Visualize random samples from raw data

### FW Calls

#### Quality metric visualization

In [None]:
import pandas as pd
import numpy as np
from obspy import read, UTCDateTime
import matplotlib.pyplot as plt
fw_path = '/network/projects/aia/whale_call/LABELS/FW/'
fw_filt = pd.read_csv(fw_path+'fw_filt.csv')
## plot histogram of R and SNR in fw_filt  
fig,ax = plt.subplots(1,2,figsize=(10,5))
ax[0].hist(fw_filt['R'],bins=100)
ax[0].set_xlabel('R')
ax[0].set_ylabel('Count')
ax[1].hist(fw_filt['SNR'],bins=100)
ax[1].set_xlabel('SNR')
ax[1].set_ylabel('Count')
plt.show()



In [None]:
print(fw_filt['R'].quantile([0.1,0.25,0.5,0.75,0.9]))
print(fw_filt['SNR'].quantile([0.1,0.25,0.5,0.75,0.9]))

In [None]:
## a heatmap showing the number of data points in each bin of R and SNR
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.scatter(fw_filt['R'],fw_filt['SNR'])
ax.set_xlabel('R')
ax.set_ylabel('SNR')
plt.show()

#### A random FWC with high quality

In [None]:
len(fw_filt[(fw_filt['R']>R0) & (fw_filt['SNR']>SNR0)])/3

In [None]:
fw_path = '/network/projects/aia/whale_call/LABELS/FW/'
fw_filt = pd.read_csv(fw_path+'fw_filt.csv')
R0 = 5 # set up the minimum R-index value for stronger signal
SNR0 = 5# set up the minimum SNR value for stronger signal
# filter fw_filt by R and SNR in one line
print(len(fw_filt[(fw_filt['R']>R0) & (fw_filt['SNR']>SNR0)])/len(fw_filt))
fw_data_sample = fw_filt[(fw_filt['R']>R0) & (fw_filt['SNR']>SNR0)].sample(n=1).iloc[0]

sac_file = fw_data_sample['file_path']
t0 = UTCDateTime(fw_data_sample['time_window_start'])
t1 = UTCDateTime(fw_data_sample['time_window_end'])
call_t = UTCDateTime(fw_data_sample['time_R_max'])

st = read(sac_file)
st_sliced = st.slice(starttime=t0,endtime=t1)
data_len = len(st_sliced[0].data)
fig,ax = plt.subplots(figsize=(12,2))
ax.plot(np.arange(0,data_len)*0.01,st_sliced[0].data)
ax.axvline(x=call_t-t0,color='red')
# st_sliced.spectrogram();

fig,ax2 = plt.subplots(figsize=(5,4))

st_sliced.spectrogram(axes=ax2);
ax2.axvline(x=call_t-t0-0.5,color='green')
ax2.axvline(x=call_t+0.5-t0,color='green')


#### A random FWC with low quality

In [None]:
fw_path = '/network/projects/aia/whale_call/LABELS/FW/'
fw_filt = pd.read_csv(fw_path+'fw_filt.csv')
R0 = 1 # set up the minimum R-index value for stronger signal
SNR0 = 1# set up the minimum SNR value for stronger signal
# filter fw_filt by R and SNR in one line
print(len(fw_filt[(fw_filt['R']<R0) & (fw_filt['SNR']<SNR0)])/len(fw_filt))
fw_data_sample = fw_filt[(fw_filt['R']<R0) & (fw_filt['SNR']<SNR0)].sample(n=1).iloc[0]

sac_file = fw_data_sample['file_path']
t0 = UTCDateTime(fw_data_sample['time_window_start'])
t1 = UTCDateTime(fw_data_sample['time_window_end'])
call_t = UTCDateTime(fw_data_sample['time_R_max'])

st = read(sac_file)
st_sliced = st.slice(starttime=t0,endtime=t1)
data_len = len(st_sliced[0].data)
fig,ax = plt.subplots(figsize=(12,2))
ax.plot(np.arange(0,data_len)*0.01,st_sliced[0].data)
ax.axvline(x=call_t-t0,color='red')
st_sliced.spectrogram();



#### A random FW detection with high quality

In [None]:
import pandas as pd
import numpy as np
from obspy import read, UTCDateTime
import matplotlib.pyplot as plt
fw_path = '/network/projects/aia/whale_call/LABELS/FW/'
R0=5
SNR0=5
fw_filt = pd.read_csv(fw_path+'fw_filt.csv')
# Create a column containing the average SNR of the same group-id
fw_filt['SNR_avg'] = fw_filt.groupby('group_id')['SNR'].transform('mean')
fw_filt = fw_filt[fw_filt['SNR_avg']>SNR0]
# Create a column containing the average R of the same group-id
fw_filt['R_avg'] = fw_filt.groupby('group_id')['R'].transform('mean')
fw_filt = fw_filt[fw_filt['R_avg']>R0]
## Get a random detection group
fw_detection_group= fw_filt['group_id'].unique()
group_id = np.random.choice(fw_detection_group)

fw_data_sample = fw_filt[fw_filt['group_id']==group_id]

for component in fw_data_sample.component.unique():

    one_component = fw_data_sample[fw_data_sample.component == component].copy()
    ## Get the ealiest time_window_start within the group
    t0 = fw_data_sample.sort_values(by=['time_window_start']).iloc[0]['time_window_start']
    ## Get the latest time_window_end within the group
    t1 = fw_data_sample.sort_values(by=['time_window_start'],ascending=False).iloc[0]['time_window_end']
    ## Get a list of unique call_start_time within the group
    call_list = fw_data_sample['time_call_start'].unique()

    sac_file = fw_data_sample.sample(n=1).iloc[0]['file_path']
    t0 = UTCDateTime(t0)
    t1 = UTCDateTime(t1)

    st = read(sac_file)
    st_sliced = st.slice(starttime=t0,endtime=t1)
    data_len = len(st_sliced[0].data)
    fig,ax = plt.subplots(figsize=(12,2))
    ax.plot(np.arange(0,data_len)*0.01,st_sliced[0].data)
    for call_t in call_list:
        call_t = UTCDateTime(call_t)
        ax.axvline(x=call_t-t0,color='red')
        ax.set_title('Filtered SAC - Component: {}'.format(component))

    fig,ax2 = plt.subplots(figsize=(8,4))
    st_sliced.spectrogram(axes=ax2);
    for call_t in call_list:
        call_t = UTCDateTime(call_t)
        ax2.axvline(x=call_t-t0,color='grey')
        ax2.axvline(x=call_t+0.5-t0,color='green')

In [None]:
fw_data_sample

#### A random FW Detection with low quality

In [None]:
import pandas as pd
import numpy as np
from obspy import read, UTCDateTime
import matplotlib.pyplot as plt
fw_path = '/network/projects/aia/whale_call/LABELS/FW/'
R0=1
SNR0=1
fw_filt = pd.read_csv(fw_path+'fw_filt.csv')
# Create a column containing the average SNR of the same group-id
fw_filt['SNR_avg'] = fw_filt.groupby('group_id')['SNR'].transform('mean')
fw_filt = fw_filt[fw_filt['SNR_avg']<SNR0]
# Create a column containing the average R of the same group-id
fw_filt['R_avg'] = fw_filt.groupby('group_id')['R'].transform('mean')
fw_filt = fw_filt[fw_filt['R_avg']<R0]
## Get a random detection group
fw_detection_group= fw_filt['group_id'].unique()
group_id = np.random.choice(fw_detection_group)

fw_data_sample = fw_filt[fw_filt['group_id']==group_id]

for component in fw_data_sample.component.unique():

    one_component = fw_data_sample[fw_data_sample.component == component].copy()
    ## Get the ealiest time_window_start within the group
    t0 = fw_data_sample.sort_values(by=['time_window_start']).iloc[0]['time_window_start']
    ## Get the latest time_window_end within the group
    t1 = fw_data_sample.sort_values(by=['time_window_start'],ascending=False).iloc[0]['time_window_end']
    ## Get a list of unique call_start_time within the group
    call_list = fw_data_sample['time_call_start'].unique()

    sac_file = fw_data_sample.sample(n=1).iloc[0]['file_path']
    t0 = UTCDateTime(t0)
    t1 = UTCDateTime(t1)

    st = read(sac_file)
    st_sliced = st.slice(starttime=t0,endtime=t1)
    data_len = len(st_sliced[0].data)
    fig,ax = plt.subplots(figsize=(12,2))
    ax.plot(np.arange(0,data_len)*0.01,st_sliced[0].data)
    for call_t in call_list:
        call_t = UTCDateTime(call_t)
        ax.axvline(x=call_t-t0,color='red')
        ax.set_title('Filtered SAC - Component: {}'.format(component))

    fig,ax2 = plt.subplots(figsize=(8,4))
    st_sliced.spectrogram(axes=ax2);
    for call_t in call_list:
        call_t = UTCDateTime(call_t)
        ax2.axvline(x=call_t-t0,color='grey')
        ax2.axvline(x=call_t+0.5-t0,color='green')

### A selected FW detection

In [None]:
import pandas as pd
import numpy as np
from obspy import read, UTCDateTime
import matplotlib.pyplot as plt
fw_path = '/network/projects/aia/whale_call/LABELS/FW/'

fw_filt = pd.read_csv(fw_path+'fw_filt.csv')

## Get a random detection group
group_id=2395
fw_data_sample = fw_filt[fw_filt['group_id']==group_id]

for component in fw_data_sample.component.unique():

    one_component = fw_data_sample[fw_data_sample.component == component].copy()
    ## Get the ealiest time_window_start within the group
    t0 = fw_data_sample.sort_values(by=['time_window_start']).iloc[0]['time_window_start']
    ## Get the latest time_window_end within the group
    t1 = fw_data_sample.sort_values(by=['time_window_start'],ascending=False).iloc[0]['time_window_end']
    ## Get a list of unique call_start_time within the group
    call_list = fw_data_sample['time_call_start'].unique()

    sac_file = fw_data_sample.sample(n=1).iloc[0]['file_path']
    t0 = UTCDateTime(t0)
    t1 = UTCDateTime(t1)

    st = read(sac_file)
    st_sliced = st.slice(starttime=t0,endtime=t1)
    data_len = len(st_sliced[0].data)
    fig,ax = plt.subplots(figsize=(12,2))
    ax.plot(np.arange(0,data_len)*0.01,st_sliced[0].data)
    for call_t in call_list:
        call_t = UTCDateTime(call_t)
        ax.axvline(x=call_t-t0,color='red')
        ax.axvline(x=call_t+0.5-t0,color='green')
    ax.set_title('Filtered SAC - Component: {}'.format(component))

    fig,ax2 = plt.subplots(figsize=(8,4))
    st_sliced.spectrogram(axes=ax2);
    for call_t in call_list:
        call_t = UTCDateTime(call_t)
        ax2.axvline(x=call_t-t0,color='grey')
        ax2.axvline(x=call_t+0.5-t0,color='green')


### BW

#### Quality Metric Visualization

In [None]:
import pandas as pd
import numpy as np
from obspy import read, UTCDateTime
import matplotlib.pyplot as plt
fw_path = '/network/projects/aia/whale_call/LABELS/BW/'

bw_filt = pd.read_csv(fw_path+'bw_filt.csv')
## plot histogram of R and SNR in fw_filt  
fig,ax = plt.subplots(1,2,figsize=(7,3))
ax[0].hist(bw_filt['R'],bins=100)
ax[0].set_xlabel('R')
ax[0].set_ylabel('Count')
ax[1].hist(bw_filt['SNR'],bins=100)
ax[1].set_xlabel('SNR')
ax[1].set_ylabel('Count')
plt.show()



In [None]:
print(bw_filt['R'].quantile([0.1,0.25,0.5,0.75,0.9]))
print(bw_filt['SNR'].quantile([0.1,0.25,0.5,0.75,0.9]))

In [None]:
## a heatmap showing the number of data points in each bin of R and SNR
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.scatter(bw_filt['R'],bw_filt['SNR'])
ax.set_xlabel('R')
ax.set_ylabel('SNR')
plt.show()

#### A random BWC with high quality

In [None]:
fw_path = '/network/projects/aia/whale_call/LABELS/BW/'

bw_filt = pd.read_csv(fw_path+'bw_filt.csv')
R0 = 5 # set up the minimum R-index value for stronger signal
SNR0 = 5# set up the minimum SNR value for stronger signal
# filter fw_filt by R and SNR in one line
print(len(bw_filt[(bw_filt['R']>R0) & (bw_filt['SNR']>SNR0)])/len(bw_filt))
bw_data_sample = bw_filt[(bw_filt['R']>R0) & (bw_filt['SNR']>SNR0)].sample(n=1).iloc[0]

sac_file = bw_data_sample['file_path']
t0 = UTCDateTime(bw_data_sample['time_window_start'])
t1 = UTCDateTime(bw_data_sample['time_window_end'])
call_t = UTCDateTime(bw_data_sample['time_R_max'])

st = read(sac_file)
st_sliced = st.slice(starttime=t0,endtime=t1)
data_len = len(st_sliced[0].data)
fig,ax = plt.subplots(figsize=(12,2))
ax.plot(np.arange(0,data_len)*0.01,st_sliced[0].data)
ax.axvline(x=call_t-t0,color='red')
# st_sliced.spectrogram();
fig,ax2 = plt.subplots(figsize=(8,4))
st_sliced.spectrogram(axes=ax2);
ax2.axvline(x=call_t-t0-4,color='green')
ax2.axvline(x=call_t-t0+4,color='green')


#### A random BWC with low quality

In [None]:

fw_path = '/network/projects/aia/whale_call/LABELS/BW/'
bw_filt = pd.read_csv(fw_path+'bw_filt.csv')
R0 = 1 # set up the minimum R-index value for stronger signal
SNR0 = 1# set up the minimum SNR value for stronger signal
# filter fw_filt by R and SNR in one line
print(len(bw_filt[(bw_filt['R']<R0) & (bw_filt['SNR']<SNR0)])/len(bw_filt))
bw_data_sample = bw_filt[(bw_filt['R']<R0) & (bw_filt['SNR']<SNR0)].sample(n=1).iloc[0]

sac_file = bw_data_sample['file_path']
t0 = UTCDateTime(bw_data_sample['time_window_start'])
t1 = UTCDateTime(bw_data_sample['time_window_end'])
call_t = UTCDateTime(bw_data_sample['time_R_max'])

st = read(sac_file)
st_sliced = st.slice(starttime=t0,endtime=t1)
data_len = len(st_sliced[0].data)
fig,ax = plt.subplots(figsize=(12,2))
ax.plot(np.arange(0,data_len)*0.01,st_sliced[0].data)
ax.axvline(x=call_t-t0,color='red')

fig,ax2 = plt.subplots(figsize=(8,4))
st_sliced.spectrogram(axes=ax2);
# st_sliced.spectrogram();
ax2.axvline(x=call_t-t0-4,color='grey')
ax2.axvline(x=call_t+4-t0,color='green')

#### A random BW detection with high quality

In [None]:
import pandas as pd
import numpy as np
from obspy import read, UTCDateTime
fw_path = '/network/projects/aia/whale_call/LABELS/BW/'

bw_filt = pd.read_csv(fw_path+'bw_filt.csv')
R0=5
SNR0=5
# Create a column containing the average SNR of the same group-id
bw_filt['SNR_avg'] = bw_filt.groupby('group_id')['SNR'].transform('mean')
bw_filt = bw_filt[bw_filt['SNR_avg']>SNR0]
# Create a column containing the average R of the same group-id
bw_filt['R_avg'] = bw_filt.groupby('group_id')['R'].transform('mean')
bw_filt = bw_filt[bw_filt['R_avg']>R0]
## Get a random detection group
bw_detection_group= bw_filt['group_id'].unique()
group_id = np.random.choice(bw_detection_group)

bw_data_sample = bw_filt[bw_filt['group_id']==group_id]

for component in bw_data_sample.component.unique():

    one_component = bw_data_sample[bw_data_sample.component == component].copy()
    ## Get the ealiest time_window_start within the group
    t0 = bw_data_sample.sort_values(by=['time_window_start']).iloc[0]['time_window_start']
    ## Get the latest time_window_end within the group
    t1 = bw_data_sample.sort_values(by=['time_window_start'],ascending=False).iloc[0]['time_window_end']
    ## Get a list of unique call_start_time within the group
    call_list = bw_data_sample['time_R_max'].unique()

    sac_file = bw_data_sample.sample(n=1).iloc[0]['file_path']
    t0 = UTCDateTime(t0)
    t1 = UTCDateTime(t1)

    st = read(sac_file)
    st_sliced = st.slice(starttime=t0,endtime=t1)
    data_len = len(st_sliced[0].data)
    fig,ax = plt.subplots(figsize=(12,2))
    ax.plot(np.arange(0,data_len)*0.01,st_sliced[0].data)
    for call_t in call_list:
        call_t = UTCDateTime(call_t)
        ax.axvline(x=call_t-t0,color='red')
        ax.set_title('Filtered SAC - Component: {}'.format(component))

    fig,ax2 = plt.subplots(figsize=(8,4))
    st_sliced.spectrogram(axes=ax2);
    for call_t in call_list:
        call_t = UTCDateTime(call_t)
        ax2.axvline(x=call_t-t0-4,color='grey')
        ax2.axvline(x=call_t+4-t0,color='green')

#### A random BW detection with low quality

In [None]:
import pandas as pd
import numpy as np
from obspy import read, UTCDateTime
fw_path = '/network/projects/aia/whale_call/LABELS/BW/'

bw_filt = pd.read_csv(fw_path+'bw_filt.csv')
R0=1
SNR0=1
# Create a column containing the average SNR of the same group-id
bw_filt['SNR_avg'] = bw_filt.groupby('group_id')['SNR'].transform('mean')
bw_filt = bw_filt[bw_filt['SNR_avg']<SNR0]
# Create a column containing the average R of the same group-id
bw_filt['R_avg'] = bw_filt.groupby('group_id')['R'].transform('mean')
bw_filt = bw_filt[bw_filt['R_avg']<R0]
## Get a random detection group
bw_detection_group= bw_filt['group_id'].unique()
group_id = np.random.choice(bw_detection_group)

bw_data_sample = bw_filt[bw_filt['group_id']==group_id]

for component in bw_data_sample.component.unique():

    one_component = bw_data_sample[bw_data_sample.component == component].copy()
    ## Get the ealiest time_window_start within the group
    t0 = bw_data_sample.sort_values(by=['time_window_start']).iloc[0]['time_window_start']
    ## Get the latest time_window_end within the group
    t1 = bw_data_sample.sort_values(by=['time_window_start'],ascending=False).iloc[0]['time_window_end']
    ## Get a list of unique call_start_time within the group
    call_list = bw_data_sample['time_R_max'].unique()

    sac_file = bw_data_sample.sample(n=1).iloc[0]['file_path']
    t0 = UTCDateTime(t0)
    t1 = UTCDateTime(t1)

    st = read(sac_file)
    st_sliced = st.slice(starttime=t0,endtime=t1)
    data_len = len(st_sliced[0].data)
    fig,ax = plt.subplots(figsize=(12,2))
    ax.plot(np.arange(0,data_len)*0.01,st_sliced[0].data)
    for call_t in call_list:
        call_t = UTCDateTime(call_t)
        ax.axvline(x=call_t-t0,color='red')
        ax.set_title('Filtered SAC - Component: {}'.format(component))

    fig,ax2 = plt.subplots(figsize=(8,4))
    st_sliced.spectrogram(axes=ax2);
    for call_t in call_list:
        call_t = UTCDateTime(call_t)
        ax2.axvline(x=call_t-t0-4,color='grey')
        ax2.axvline(x=call_t+4-t0,color='green')