# Prepare Environment

In [None]:
import os
import gc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import math
import pickle
import cv2
from IPython.lib.display import Audio

import scipy
from scipy.interpolate import interp1d
from scipy.signal import butter, filtfilt, iirdesign, zpk2tf, freqz

import librosa
import librosa.display
import torch

from sklearn.model_selection import train_test_split

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

from tensorflow import keras

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
def merge_history(hlist):
    history = {}
    for k in hlist[0].history.keys():
        history[k] = sum([h.history[k] for h in hlist], [])
    return history

In [None]:
def vis_training(h, start=1):
    epoch_range = range(start, len(h['loss'])+1)
    s = slice(start-1, None)

    plt.figure(figsize=[14,4])

    n = int(len(h.keys()) / 2)

    for i in range(n):
        k = list(h.keys())[i]
        plt.subplot(1,n,i+1)
        plt.plot(epoch_range, h[k][s], label='Training')
        plt.plot(epoch_range, h['val_' + k][s], label='Validation')
        plt.xlabel('Epoch'); plt.ylabel(k); plt.title(k)
        plt.grid()
        plt.legend()

    plt.tight_layout()
    plt.show()

In [None]:
HOME = '../input/g2net-gravitational-wave-detection'
TRAIN = '../input/g2net-gravitational-wave-detection/train'

# Load Data

In [None]:
train_full = pd.read_csv(f'{HOME}/training_labels.csv', dtype=str)
train_full.shape

In [None]:
train_full['filename'] = train_full.id + '.npy'

In [None]:
train_full.head()

# Explore Data

In [None]:
def id_to_path(idx, is_train=True):
    f = train_full.id[idx]
    SET = 'train' if is_train else 'test'
    path = f'{TRAIN}/{f[0]}/{f[1]}/{f[2]}/{f}.npy'
    return path

x = np.load(id_to_path(0))
print(x.shape)

In [None]:
plt.figure(figsize=[12,6])
for i,t in enumerate(["LIGO Hanford", "LIGO Livingston", "Virgo"]):
    plt.subplot(3, 1, i+1)
    plt.plot(x[i,:])
    plt.title(t)

plt.tight_layout()    
plt.show()

# Processing

In [None]:
dt = 10/5000
sample_rate = 2048 #Hz (1/seconds)
time_span = 2 #seconds
samples_total = time_span * sample_rate
fband = [35.0, 200.0]

In [None]:
def whiten(strain, samples_total, dt):    
    fhat = np.fft.fft(strain, samples_total)
    PSD = fhat * np.conj(fhat) / samples_total
    freq = 1/(dt*samples_total) * np.arange(samples_total)
    
    # scipy interp1d interpolation
    interp_psd = interp1d(freq, PSD, "nearest")
    
    w_fhat = fhat/np.sqrt(interp_psd(freq))
    w_strain = np.fft.ifft(w_fhat)
    return w_strain, interp_psd(freq)

In [None]:
whitened, ip = whiten(x[0,:], samples_total, dt)

plt.plot(whitened)
plt.show()

In [None]:
def bandpass(x, N, fband, fs):
    bb, ab = scipy.signal.butter(N, [fband[0]*2./fs, fband[1]*2./fs], btype='band')
    normalization = np.sqrt((fband[1]-fband[0])/(fs/2))
    x_bp = scipy.signal.filtfilt(bb, ab, x) / normalization
    return x_bp

In [None]:
bandpassed_strain = bandpass(x, 4, fband, samples_total)

plt.figure(figsize=[12,6])
for i,t in enumerate(["LIGO Hanford", "LIGO Livingston", "Virgo"]):
    plt.subplot(3, 1, i+1)
    plt.plot(bandpassed_strain[i,:])
    plt.title(t)

plt.tight_layout()    
plt.show()

In [None]:
w = x.copy()

for i in range(3):
    w[i,:], _ = whiten(x[i,:], samples_total, dt)
                    
bpw = bandpass(w, 4, fband, samples_total)

plt.figure(figsize=[12,6])
for i,t in enumerate(["LIGO Hanford", "LIGO Livingston", "Virgo"]):
    plt.subplot(3, 1, i+1)
    plt.plot(bpw[i,:])
    plt.title(t)

plt.tight_layout()    
plt.show()

# Data Generator

In [None]:
class DataGenerator(keras.utils.Sequence):
    
    #####################################################################
    # Constructor
    #####################################################################
    def __init__(self, df, batch_size=32, n_batches=None, shuffle=True, is_train=True):
        self.df = df
        self.n = len(df)
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.is_train = is_train
        self.n_batches = n_batches
        self.on_epoch_end()
        
    def on_epoch_end(self):
        self.indices = np.arange(self.n)
        if self.shuffle == True:
            np.random.shuffle(self.indices)   
    
    def __len__(self):
        # Determine batches per epoch
        if self.n_batches is None: 
            return math.ceil( self.n / self.batch_size )
        return self.n_batches
    
    def __getitem__(self, batch_index):
        # Get and return a single batch of data
        start = batch_index * self.batch_size
        end = (batch_index + 1) * self.batch_size
        
        indices = self.indices[start:end]
        
        return self.__data_generation(indices)
    
    def __data_generation(self, batch_indices):
        batch_size = len(batch_indices)
        
        SHAPE = (batch_size, 4096, 3)
        X = np.zeros(shape=SHAPE)
        y = np.zeros(batch_size)
        id_list = []
        
        for i, idx in enumerate(batch_indices):
            ID = self.df.id.values[idx]
            y[i] = self.df.target.values[idx]
            
            path = id_to_path(idx)
            x = np.load(path)
            
            # Whitening
            for j in range(3):
                x[j,:], _ = whiten(x[j,:], samples_total, dt)
            
            # Bandpass filter
            x = bandpass(x, 4, fband=[35.0, 200.0], fs=4096)
            
            X[i,:,:] = x.T 
            
            id_list.append(ID)
            
        return X, y
            
temp_gen = DataGenerator(train_full, batch_size=8, shuffle=False)
X,y = temp_gen.__getitem__(0)

print(X.shape)

plt.figure(figsize=(16,4))
for i in range(3):
    plt.subplot(1,3,i+1)
    plt.plot(X[0,:,i])
plt.show()

In [None]:
train, valid = train_test_split(train_full, test_size=0.9, random_state=1, stratify=train_full.target)
train_gen = DataGenerator(train, batch_size=512, n_batches=20, shuffle=False)
valid_gen = DataGenerator(valid, batch_size=512, n_batches=20, shuffle=False)

# Build Network

In [None]:
tf.random.set_seed(1)

cnn = Sequential([
    Conv1D(64, input_shape=(4096, 3), kernel_size=64, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(),
    
    Conv1D(64, kernel_size=32, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(),
    
    Conv1D(128, kernel_size=32, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(),
    
    Conv1D(128, kernel_size=16, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(),
    
    Conv1D(256, kernel_size=16, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(),

    Conv1D(256, kernel_size=16, activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling1D(),

    Flatten(),
    Dropout(0.2),
    
    Dense(128, activation='relu'),
    Dropout(0.2),
    
    Dense(64, activation='relu'),
    Dropout(0.1),
    
    Dense(1, activation='sigmoid')
])

cnn.summary()

In [None]:
%%time

opt = tf.keras.optimizers.Adam(1e-5)
cnn.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy', tf.keras.metrics.AUC()])

h1 = cnn.fit(train_gen, epochs=5, verbose=1, validation_data=valid_gen)