### imports

In [None]:
# standard lib
import pathlib

# third party
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import scipy
import scipy.signal as signal
import pywt
import matplotlib.pyplot as plt

### file load

In [None]:
%%time
root_path = pathlib.Path('/kaggle/input/g2net-gravitational-wave-detection')
train_files = sorted(root_path.joinpath('train').glob('**/*.*'))
test_files = sorted(root_path.joinpath('test').glob('**/*.*'))
print(f"train_num: {len(train_files)}, test_num: {len(test_files)}")
sub_file = pd.read_csv(root_path.joinpath('sample_submission.csv'))
# print(sub_file.loc[:10,:])
train_labels = pd.read_csv(root_path.joinpath('training_labels.csv'))
print(f"label stats: \n{train_labels['target'].value_counts()}")

### label splitting

In [None]:
train_target0_index = train_labels[train_labels["target"]==0].index
train_target1_index = train_labels[train_labels["target"]==1].index

### plot 3 samples

In [None]:
subplot_titles = []
for i in range(3):
    subplot_titles.append(str(train_files[train_target0_index[i]].stem) + " (target=0)")
    subplot_titles.append(str(train_files[train_target1_index[i]].stem) + " (target=1)")

fig = make_subplots(rows=3, cols=2, shared_xaxes=True, shared_yaxes=True, subplot_titles=subplot_titles)
colormap = px.colors.sequential.Turbo
colors = [colormap[2], colormap[6], colormap[11]]
for i in range(3):
    signal_df = pd.DataFrame(np.load(train_files[train_target0_index[i]]).transpose(), columns=["LIGO Hanford", "LIGO Livingston", "Virgo"])
    subfig = px.line(signal_df, title=str(train_files[train_target0_index[i]]))
    
    for j,d in enumerate(subfig.data):
        fig.add_trace((go.Scatter(x=d['x'], y=d['y'], name = d['name'], line=dict(color=colors[j]))), row=i+1, col=1)
        
for i in range(3):
    signal_df = pd.DataFrame(np.load(train_files[train_target1_index[i]]).transpose(), columns=["LIGO Hanford", "LIGO Livingston", "Virgo"])
    subfig = px.line(signal_df)
    
    for j,d in enumerate(subfig.data):
        fig.add_trace((go.Scatter(x=d['x'], y=d['y'], name = d['name'], line=dict(color=colors[j]))), row=i+1, col=2)

fig.show()

### FFT analysis(ALL time)
#### amplitude spectrum

In [None]:
sig = np.load(train_files[train_target0_index[0]])
F = scipy.fft.fft(sig)
np.log10(np.abs(F[:,:F.shape[1]//2]))

fig = make_subplots(rows=3, cols=2, shared_xaxes=True, shared_yaxes=True, subplot_titles=subplot_titles)
colormap = px.colors.sequential.Turbo
colors = [colormap[2], colormap[6], colormap[11]]
for i in range(3):
    sig = np.load(train_files[train_target0_index[i]])
    F = scipy.fft.fft(sig)
    F_log = np.log10(np.abs(F[:,:F.shape[1]//2]))
    signal_df = pd.DataFrame(F_log.transpose(), columns=["LIGO Hanford", "LIGO Livingston", "Virgo"])
    subfig = px.line(signal_df, title=str(train_files[train_target0_index[i]]))
    
    for j,d in enumerate(subfig.data):
        fig.add_trace((go.Scatter(x=d['x'], y=d['y'], name = d['name'], line=dict(color=colors[j], width=1))), row=i+1, col=1)
        
for i in range(3):
    sig = np.load(train_files[train_target1_index[i]])
    F = scipy.fft.fft(sig)
    F_log = np.log10(np.abs(F[:,:F.shape[1]//2]))
    signal_df = pd.DataFrame(F_log.transpose(), columns=["LIGO Hanford", "LIGO Livingston", "Virgo"])
    subfig = px.line(signal_df, title=str(train_files[train_target0_index[i]]))
    
    for j,d in enumerate(subfig.data):
        fig.add_trace((go.Scatter(x=d['x'], y=d['y'], name = d['name'], line=dict(color=colors[j], width=1))), row=i+1, col=2)

fig.show()

#### phase spectrum (diff)

In [None]:
fig = make_subplots(rows=3, cols=2, shared_xaxes=True, shared_yaxes=True, subplot_titles=subplot_titles)
colormap = px.colors.sequential.Turbo
colors = [colormap[2], colormap[6], colormap[11]]
for i in range(3):
    sig = np.load(train_files[train_target0_index[0]])
    F = scipy.fft.fft(sig)
    spec_cross = F[0,:F.shape[1]//1]/F[1,:F.shape[1]//1]
    spec_phase_diff_01 = np.arctan2(spec_cross.imag, spec_cross.real)
    spec_cross = F[1,:F.shape[1]//1]/F[2,:F.shape[1]//1]
    spec_phase_diff_12 = np.arctan2(spec_cross.imag, spec_cross.real)
    spec_cross = F[2,:F.shape[1]//1]/F[0,:F.shape[1]//1]
    spec_phase_diff_20 = np.arctan2(spec_cross.imag, spec_cross.real)
    spec_phase_diff = np.concatenate([
        [spec_phase_diff_01],
        [spec_phase_diff_12],
        [spec_phase_diff_20],
    ], axis=0)
    
    signal_df = pd.DataFrame(spec_phase_diff.transpose(), columns=["Hanford-Livingston", "Livingston-Virgo", "Virgo-Hanford"])
    subfig = px.line(signal_df, title=str(train_files[train_target0_index[i]]))
    
    for j,d in enumerate(subfig.data):
        fig.add_trace((go.Scatter(x=d['x'], y=d['y'], name = d['name'], line=dict(color=colors[j], width=1))), row=i+1, col=1)
        
for i in range(3):
    sig = np.load(train_files[train_target1_index[0]])
    F = scipy.fft.fft(sig)
    spec_cross = F[0,:F.shape[1]//1]/F[1,:F.shape[1]//1]
    spec_phase_diff_01 = np.arctan2(spec_cross.imag, spec_cross.real)
    spec_cross = F[1,:F.shape[1]//1]/F[2,:F.shape[1]//1]
    spec_phase_diff_12 = np.arctan2(spec_cross.imag, spec_cross.real)
    spec_cross = F[2,:F.shape[1]//1]/F[0,:F.shape[1]//1]
    spec_phase_diff_20 = np.arctan2(spec_cross.imag, spec_cross.real)
    spec_phase_diff = np.concatenate([
        [spec_phase_diff_01],
        [spec_phase_diff_12],
        [spec_phase_diff_20],
    ], axis=0)
    
    signal_df = pd.DataFrame(spec_phase_diff.transpose(), columns=["Hanford-Livingston", "Livingston-Virgo", "Virgo-Hanford"])
    subfig = px.line(signal_df, title=str(train_files[train_target0_index[i]]))
    
    for j,d in enumerate(subfig.data):
        fig.add_trace((go.Scatter(x=d['x'], y=d['y'], name = d['name'], line=dict(color=colors[j], width=1))), row=i+1, col=2)

fig.show()

### FFT(STFT)

#### amplitude spectrum

In [None]:
sample_index=0
subplot_titles=[]
for site in ["LIGO Hanford", "LIGO Livingston", "Virgo"]:
    subplot_titles.append(f"file={str(train_files[train_target0_index[sample_index]].stem)}, site={site}, (target=0)")
    subplot_titles.append(f"file={str(train_files[train_target0_index[sample_index]].stem)}, site={site}, (target=1)")
fig = make_subplots(rows=3, cols=2, subplot_titles=subplot_titles)

time_signal = np.load(train_files[train_target0_index[sample_index]])
f,t,Sxx = signal.spectrogram(time_signal, fs=2048, nfft=512, nperseg=256)
fig.add_trace((go.Heatmap(x=f, y=t, z=np.log10(Sxx[0].transpose()), colorbar=None)), row=1, col=1)
fig.add_trace((go.Heatmap(x=f, y=t, z=np.log10(Sxx[1].transpose()), colorbar=None)), row=2, col=1)
fig.add_trace((go.Heatmap(x=f, y=t, z=np.log10(Sxx[2].transpose()), colorbar=None)), row=3, col=1)

time_signal = np.load(train_files[train_target1_index[sample_index]])
f,t,Sxx = signal.spectrogram(time_signal, fs=2048, nfft=512, nperseg=256)
fig.add_trace((go.Heatmap(x=f, y=t, z=np.log10(Sxx[0].transpose()), colorbar=None)), row=1, col=2)
fig.add_trace((go.Heatmap(x=f, y=t, z=np.log10(Sxx[1].transpose()), colorbar=None)), row=2, col=2)
fig.add_trace((go.Heatmap(x=f, y=t, z=np.log10(Sxx[2].transpose()), colorbar=None)), row=3, col=2)

fig.show()

#### amplitude spectrum diff

In [None]:
sample_index=-1
subplot_titles=[]
for site in ["Hanford-Livingston", "Livingston-Virgo", "Virgo-Hanford"]:
    subplot_titles.append(f"file={str(train_files[train_target0_index[sample_index]].stem)}, site={site}, (target=0)")
    subplot_titles.append(f"file={str(train_files[train_target1_index[sample_index]].stem)}, site={site}, (target=1)")
fig = make_subplots(rows=3, cols=2, subplot_titles=subplot_titles, shared_xaxes=True, shared_yaxes=True)

time_signal = np.load(train_files[train_target0_index[sample_index]])
f,t,Sxx = signal.spectrogram(time_signal, fs=2048, nfft=512, nperseg=256, mode='complex')
fig.add_trace((go.Heatmap(x=f, y=t, z=np.log10(np.abs(Sxx[0]/Sxx[1])).transpose(), colorbar=None)), row=1, col=1)
fig.add_trace((go.Heatmap(x=f, y=t, z=np.log10(np.abs(Sxx[1]/Sxx[2])).transpose(), colorbar=None)), row=2, col=1)
fig.add_trace((go.Heatmap(x=f, y=t, z=np.log10(np.abs(Sxx[2]/Sxx[0])).transpose(), colorbar=None)), row=3, col=1)

time_signal = np.load(train_files[train_target1_index[sample_index]])
f,t,Sxx = signal.spectrogram(time_signal, fs=2048, nfft=512, nperseg=256, mode='complex')
fig.add_trace((go.Heatmap(x=f, y=t, z=np.log10(np.abs(Sxx[0]/Sxx[1])).transpose(), colorbar=None)), row=1, col=2)
fig.add_trace((go.Heatmap(x=f, y=t, z=np.log10(np.abs(Sxx[1]/Sxx[2])).transpose(), colorbar=None)), row=2, col=2)
fig.add_trace((go.Heatmap(x=f, y=t, z=np.log10(np.abs(Sxx[2]/Sxx[0])).transpose(), colorbar=None)), row=3, col=2)

fig.show()

#### phase spectrum diff

In [None]:
sample_index=0
subplot_titles=[]
for site in ["Hanford-Livingston", "Livingston-Virgo", "Virgo-Hanford"]:
    subplot_titles.append(f"file={str(train_files[train_target0_index[sample_index]].stem)}, site={site}, (target=0)")
    subplot_titles.append(f"file={str(train_files[train_target1_index[sample_index]].stem)}, site={site}, (target=1)")
fig = make_subplots(rows=3, cols=2, subplot_titles=subplot_titles, shared_xaxes=True, shared_yaxes=True)

time_signal = np.load(train_files[train_target0_index[sample_index]])
f,t,Sxx = signal.spectrogram(time_signal, fs=2048, nfft=512, nperseg=256, mode='complex')
fig.add_trace((go.Heatmap(x=f, y=t, z=np.arctan2((Sxx[0]/Sxx[1]).imag, (Sxx[0]/Sxx[1]).real).transpose(), colorbar=None)), row=1, col=1)
fig.add_trace((go.Heatmap(x=f, y=t, z=np.arctan2((Sxx[1]/Sxx[2]).imag, (Sxx[1]/Sxx[2]).real).transpose(), colorbar=None)), row=2, col=1)
fig.add_trace((go.Heatmap(x=f, y=t, z=np.arctan2((Sxx[2]/Sxx[0]).imag, (Sxx[2]/Sxx[0]).real).transpose(), colorbar=None)), row=3, col=1)

time_signal = np.load(train_files[train_target1_index[sample_index]])
f,t,Sxx = signal.spectrogram(time_signal, fs=2048, nfft=512, nperseg=256, mode='complex')
fig.add_trace((go.Heatmap(x=f, y=t, z=np.arctan2((Sxx[0]/Sxx[1]).imag, (Sxx[0]/Sxx[1]).real).transpose(), colorbar=None)), row=1, col=2)
fig.add_trace((go.Heatmap(x=f, y=t, z=np.arctan2((Sxx[1]/Sxx[2]).imag, (Sxx[1]/Sxx[2]).real).transpose(), colorbar=None)), row=2, col=2)
fig.add_trace((go.Heatmap(x=f, y=t, z=np.arctan2((Sxx[2]/Sxx[0]).imag, (Sxx[2]/Sxx[0]).real).transpose(), colorbar=None)), row=3, col=2)

fig.show()

### wavelet transform

In [None]:
sample_index=0
subplot_titles=[]
for site in ["LIGO Hanford", "LIGO Livingston", "Virgo"]:
    subplot_titles.append(f"file={str(train_files[train_target0_index[sample_index]].stem)}, site={site}, (target=0)")
    subplot_titles.append(f"file={str(train_files[train_target0_index[sample_index]].stem)}, site={site}, (target=1)")
fig = make_subplots(rows=3, cols=2, subplot_titles=subplot_titles, shared_xaxes=True, shared_yaxes=True)

fs=2048 # Hz
time_signal = np.load(train_files[train_target0_index[sample_index]])
cwt_mat, freqs = pywt.cwt(time_signal, scales=np.arange(1, 31, 0.25), wavelet='cmor1.5-1.0', sampling_period=1/fs, method='fft')
fig.add_trace((go.Heatmap(x=np.arange(0,time_signal.shape[1])/fs, y=freqs, z=np.log10(np.abs(cwt_mat[:,0,:])), colorbar=None)), row=1, col=1)
fig.add_trace((go.Heatmap(x=np.arange(0,time_signal.shape[1])/fs, y=freqs, z=np.log10(np.abs(cwt_mat[:,1,:])), colorbar=None)), row=2, col=1)
fig.add_trace((go.Heatmap(x=np.arange(0,time_signal.shape[1])/fs, y=freqs, z=np.log10(np.abs(cwt_mat[:,2,:])), colorbar=None)), row=3, col=1)

time_signal = np.load(train_files[train_target1_index[sample_index]])
cwt_mat, freqs = pywt.cwt(time_signal, scales=np.arange(1, 31, 0.25), wavelet='cmor1.5-1.0', sampling_period=1/fs, method='fft')
fig.add_trace((go.Heatmap(x=np.arange(0,time_signal.shape[1])/fs, y=freqs, z=np.log10(np.abs(cwt_mat[:,0,:])), colorbar=None)), row=1, col=2)
fig.add_trace((go.Heatmap(x=np.arange(0,time_signal.shape[1])/fs, y=freqs, z=np.log10(np.abs(cwt_mat[:,1,:])), colorbar=None)), row=2, col=2)
fig.add_trace((go.Heatmap(x=np.arange(0,time_signal.shape[1])/fs, y=freqs, z=np.log10(np.abs(cwt_mat[:,2,:])), colorbar=None)), row=3, col=2)

fig.show()