g2net wave binary classification  
- g2net wave
- bandpass filter
- rescale ([-1,1]) 
- CWT scalogram 
- EfficientNet (CNN-based) image recognizer

reference
https://www.kaggle.com/c/g2net-gravitational-wave-detection/discussion/261721

In [None]:
# cwt
!pip install git+https://github.com//Kevin-McIsaac/cmorlet-tensorflow@Performance --no-deps > /dev/null

In [None]:
# data
from tensorflow.keras.utils import Sequence
from random import shuffle
from CWT.cwt import ComplexMorletCWT
from scipy import signal
import numpy as np
import math

# bandpass filter
fs = 2048
lower_freq = 20
upper_freq = 500
wavelet_width = 1
bHP, aHP = signal.butter(8, (lower_freq, upper_freq), btype='bandpass', fs=fs)
def bandpass_filter(wav):  # Apply a 20Hz high pass filter to three channels, a larger spike around 20hz
    return signal.filtfilt(bHP, aHP, wav)

class Dataset(Sequence):
    def __init__(self, df, is_train=True):
        self.id = df["id"].values
        self.y = df["target"].values
        self.file = df["file"].values
        self.batch_size = 32
        self.is_train = is_train
        self.shuffle = True
        
    def __len__(self):
        batch_cnt = math.ceil(len(self.id) / self.batch_size) # all batch times
        return int(batch_cnt*0.4)
        
    def __getitem__(self, i):
        batch_file = self.file[i * self.batch_size:(i + 1) * self.batch_size]
        if self.y is not None:
            y = self.y[i * self.batch_size: (i + 1) * self.batch_size]

        x = np.array([self.feature(k) for k in batch_file])
        x = np.stack(x)
        if self.is_train:
            return x, y
        else:
            return x

    def feature(self, file):  # for one sample
        wav = np.load(file)
        wav = bandpass_filter(wav)
        wav = np.array([wav[i]/wav[i].max() for i in range(3)]) # rescale by max
        return wav

    def on_epoch_end(self):
        if self.shuffle and self.is_train:
            tmp = list(zip(self.file, self.y))
            shuffle(tmp)
            self.file, self.y = list(zip(*tmp))

In [None]:
# model
from tensorflow.keras.layers import InputLayer, GlobalAveragePooling2D, Dense, Permute, Input, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.applications import EfficientNetB4
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import MeanSquaredError, AUC

input_wav = Input(shape=(3, 4096))
# cwt
x = ComplexMorletCWT(n_scales = 64, stride=64, output='magnitude', data_format='channels_first',
                     fs=fs,lower_freq=lower_freq,upper_freq=upper_freq,wavelet_width=wavelet_width)(input_wav)
x = Permute(dims=(2,3,1))(x) # -> [:, time, n_scales, channels]
cnn = EfficientNetB4(include_top=False, weights='imagenet')
x = cnn(x)
x = GlobalAveragePooling2D()(x)
x = Dense(32*2, activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(32, activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(1, activation='sigmoid')(x)

model = Model(inputs=[input_wav], outputs=x)

model.compile(optimizer="sgd",
              #optimizer=Adam(learning_rate=1e-4),
              loss='binary_crossentropy',
              metrics=[AUC(),"binary_accuracy"])
model.summary()

In [None]:
# train
import pandas as pd
from sklearn.model_selection import train_test_split
dd = "../input/g2net-gravitational-wave-detection"
train = pd.read_csv('{}/training_labels.csv'.format(dd))
def get_train_file(img_id):
    return "{}/train/{}/{}/{}/{}.npy".format(dd, img_id[0], img_id[1], img_id[2], img_id)
train['file'] = train['id'].apply(get_train_file)

tr_d, val_d = train_test_split(train,test_size=0.1,random_state=32,stratify=train["target"])
tr_d = Dataset(tr_d)
val_d = Dataset(val_d)

train_h = model.fit(
    tr_d,
    epochs = 30,
    validation_data = val_d
)