In [None]:
!pip install -q efficientnet

import gc
import warnings
import sklearn.exceptions
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings("ignore", category=sklearn.exceptions.UndefinedMetricWarning)

import os
import re
import numpy as np

import tensorflow as tf
import tensorflow_addons as tfa
import efficientnet.tfkeras as efn
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.cm as cm

from IPython.display import Image, display
from matplotlib.cm import ScalarMappable
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, Layer, InputSpec
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from tensorflow.keras import regularizers, activations, initializers, constraints, Sequential
from tensorflow.keras import backend as K
from tensorflow.keras.constraints import UnitNorm, Constraint

from tqdm.notebook import tqdm
from kaggle_datasets import KaggleDatasets
from glob import glob

<img align="left" src="https://raw.githubusercontent.com/kabartay/kaggle-g2net-gravitational-wave-detection/main/pics/header.png" data-canonical-src="https://raw.githubusercontent.com/kabartay/kaggle-g2net-gravitational-wave-detection/main/pics/header.png" width="1350" />

<a id="0"></a>
<h1 style='background:#0788f0; font-size:200%; border:0; color:white;'><center> Table of Contents</center></h1>

1. [Load Data](#1)
2. [AutoEncoder](#2)  
    2.1 [Utils](#2.1)  
    2.2 [Train AutoEncoder](#2.2)  
    2.3 [Freezed Pretrained EfficientNetB1 with Encoder](#2.3)
3. [Train dataset analysis](#3)  
    3.1 [Group 1: Without gravitation wave](#3.1)  
    3.2 [Group 2: With GW & Probability > 0.8 ](#3.2)  
    3.3 [With GW & Probability < 0.8](#3.3)
4. [Train/Test dataset comparission](#4) 
5. [Grad-CAM](#5)
6. [Conclusion](#6)
7. [References](#7)

<a id="1"></a>
<h2 style='background:#0788f0; font-size:200%; border:0; color:white'><center> 1. Load Data <center><h0>

Train Datasets
* [Q-Transform TFRecords](https://www.kaggle.com/miklgr500/q-transform-tfrecords)
    * [CQT G2Net V2 [0 - 1]](https://www.kaggle.com/miklgr500/cqt-g2net-v2-0-1)
    * [CQT G2Net V2 [2 - 3]](https://www.kaggle.com/miklgr500/cqt-g2net-v2-2-3)
    * [CQT G2Net V2 [4 - 5]](https://www.kaggle.com/miklgr500/cqt-g2net-v2-4-5)
    * [CQT G2Net V2 [6 - 7]](https://www.kaggle.com/miklgr500/cqt-g2net-v2-6-7)
    * [CQT G2Net V2 [8 - 9]](https://www.kaggle.com/miklgr500/cqt-g2net-v2-8-9)
    * [CQT G2Net V2 [10 - 11]](https://www.kaggle.com/miklgr500/cqt-g2net-v2-10-11)
    * [CQT G2Net V2 [12 - 13]](https://www.kaggle.com/miklgr500/cqt-g2net-v2-12-13)
    * [CQT G2Net V2 [14 - 15]](https://www.kaggle.com/miklgr500/cqt-g2net-v2-14-15)
    
Test Datasets
* [CQT G2Net Test [0 - 1]](https://www.kaggle.com/miklgr500/cqt-g2net-test-0-1)
* [CQT G2Net Test [2 - 3]](https://www.kaggle.com/miklgr500/cqt-g2net-test-2-3)
* [CQT G2Net Test [4 - 5]](https://www.kaggle.com/miklgr500/cqt-g2net-test-4-5)
* [CQT G2Net Test [6 - 7]](https://www.kaggle.com/miklgr500/cqt-g2net-test-6-7)

In [None]:
IMG_SIZES = 256
BATCH_SIZE = 32
EPOCHS = 256

EPOCHS_VIS = 500

# From https://www.kaggle.com/xhlulu/ranzcr-efficientnet-tpu-training
def auto_select_accelerator():
    TPU_DETECTED = False
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Running on TPU:", tpu.master())
        TPU_DETECTED =True
    except ValueError:
        strategy = tf.distribute.get_strategy()
    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    
    return strategy, TPU_DETECTED


strategy, TPU_DETECTED = auto_select_accelerator()
AUTO = tf.data.experimental.AUTOTUNE
REPLICAS = strategy.num_replicas_in_sync

In [None]:
files_train_g = []
for i,k in tqdm([(0, 1), (2, 3), (4,5), (6, 7), (8, 9) ,(10,11), (12, 13), (14, 15)]):
    GCS_PATH = KaggleDatasets().get_gcs_path(f'cqt-g2net-v2-{i}-{k}')
    files_train_g.extend(np.sort(np.array(tf.io.gfile.glob(GCS_PATH + '/train*.tfrec'))).tolist())
num_train_files = len(files_train_g)
print('train_files:',num_train_files)

files_test_g = []
for i,k in tqdm([(0, 1), (2, 3), (4, 5), (6, 7)]):
    GCS_PATH = KaggleDatasets().get_gcs_path(f'cqt-g2net-test-{i}-{k}')
    files_test_g.extend(np.sort(np.array(tf.io.gfile.glob(GCS_PATH + '/test*.tfrec'))).tolist())
num_train_files = len(files_test_g)
print('test_files:',num_train_files)

In [None]:
def read_labeled_tfrecord(example):
    tfrec_format = {
        'image'                        : tf.io.FixedLenFeature([], tf.string),
        'image_id'                     : tf.io.FixedLenFeature([], tf.string),
        'target'                       : tf.io.FixedLenFeature([], tf.int64)
    }           
    example = tf.io.parse_single_example(example, tfrec_format)
    return prepare_image(example['image']), tf.reshape(tf.cast(example['target'], tf.float32), [1])


def read_unlabeled_tfrecord(example, return_image_id):
    tfrec_format = {
        'image'                        : tf.io.FixedLenFeature([], tf.string),
        'image_id'                     : tf.io.FixedLenFeature([], tf.string),
    }
    example = tf.io.parse_single_example(example, tfrec_format)
    return prepare_image(example['image']), example['image_id'] if return_image_id else 0

 
def prepare_image(img, dim=IMG_SIZES):    
    img = tf.image.resize(tf.image.decode_png(img, channels=3), size=(dim, dim))
    img = tf.cast(img, tf.float32) / 255.0
    img = tf.reshape(img, [dim,dim, 3])
            
    return img

def count_data_items(fileids):
    n = [int(re.compile(r"-([0-9]*)\.").search(fileid).group(1)) 
         for fileid in fileids]
    return np.sum(n)


def get_dataset(files, shuffle = False, repeat = False, 
                labeled=True, return_image_ids=True, batch_size=BATCH_SIZE, dim=IMG_SIZES):
    
    ds = tf.data.TFRecordDataset(files, num_parallel_reads=AUTO)
    #ds = ds.cache()
    
    if repeat:
        ds = ds.repeat()
    
    if shuffle: 
        ds = ds.shuffle(1024*2)
        opt = tf.data.Options()
        opt.experimental_deterministic = False
        ds = ds.with_options(opt)
        
    if labeled: 
        ds = ds.map(read_labeled_tfrecord, num_parallel_calls=AUTO)
    else:
        ds = ds.map(lambda example: read_unlabeled_tfrecord(example, return_image_ids), 
                    num_parallel_calls=AUTO)      

    ds = ds.batch(batch_size * REPLICAS)
    ds = ds.prefetch(AUTO)
    return ds

<a id="2"></a>
<h2 style='background:#0788f0; font-size:200%; border:0; color:white'><center> 2. AutoEncoder <center><h0>

For 2D/3D image data visualization need to reduce dimensions with saving useful information. And exist a number of techniques for doing this, one of which is autoencoder.

Pretrained model:
* [CQT G2Net EfficientNetB1[TPU Training]](https://www.kaggle.com/miklgr500/cqt-g2net-efficientnetb1-tpu-training)
* [CQT G2Net EfficientNetB1[TPU Inference] ](https://www.kaggle.com/miklgr500/cqt-g2net-efficientnetb1-tpu-inference)

<a href="https://ibb.co/k4m1MCc"><img src="https://i.ibb.co/ZB2mNDT/Untitled-Diagram-1.png" alt="Untitled-Diagram-1" border="0"></a>

<a id="2.1"></a>
<h2 style='background:#0788f0; font-size:200%; border:0; color:white'><center> 2.1 Utils <center><h0>

In [None]:
class UncorrelatedFeaturesConstraint (Constraint):
    def __init__(self, encoding_dim, weightage = 1.0):
        self.encoding_dim = encoding_dim
        self.weightage = weightage
    
    def get_covariance(self, x):
        x_centered_list = []

        for i in range(self.encoding_dim):
            x_centered_list.append(x[:, i] - K.mean(x[:, i]))
        
        x_centered = tf.stack(x_centered_list)
        covariance = K.dot(x_centered, K.transpose(x_centered)) / tf.cast(x_centered.get_shape()[0], tf.float32)
        
        return covariance
            
    # Constraint penalty
    def uncorrelated_feature(self, x, covariance):
        if(self.encoding_dim <= 1):
            return 0.0
        else:
            output = K.sum(K.square(
                covariance -  tf.linalg.band_part(covariance, 0, 0)))
            return output

    def __call__(self, x):
        covariance = self.get_covariance(x)
        return self.weightage * self.uncorrelated_feature(x, covariance)

In [None]:
def build_model(size):
    inp = tf.keras.layers.Input(shape=(size, size,3))
    base = efn.EfficientNetB1(input_shape=(size,size,3),weights='imagenet',include_top=False)
    
    x = base(inp)
    
    x = tf.keras.layers.GlobalAvgPool2D()(x)
    
    x = tf.keras.layers.Dropout(0.2)(x)
    x = tf.keras.layers.Dense(1,activation='sigmoid')(x)
    model = tf.keras.Model(inputs=inp, outputs=x)
    opt = tf.optimizers.SGD(learning_rate=1e-3)
    loss = tf.keras.losses.BinaryCrossentropy() 
    model.compile(optimizer=opt,loss=loss,metrics=['AUC'])
    return model

def build_model_gradcam(size):
    base = efn.EfficientNetB1(input_shape=(size,size,3),weights='imagenet',include_top=False)
    
    x = tf.keras.layers.GlobalAvgPool2D()(base.output)
    
    x = tf.keras.layers.Dropout(0.2)(x)
    x = tf.keras.layers.Dense(1,activation='sigmoid')(x)
    model = tf.keras.Model(inputs=base.input, outputs=x)
    opt = tf.optimizers.SGD(learning_rate=1e-3)
    loss = tf.keras.losses.BinaryCrossentropy() 
    model.compile(optimizer=opt,loss=loss,metrics=['AUC'])
    return model

def get_emb_model(size, model):
    inp = tf.keras.layers.Input(shape=(size, size,3))
    base = efn.EfficientNetB1(input_shape=(size,size,3),weights='imagenet',include_top=False)
    
    x = base(inp)
    
    x = tf.keras.layers.GlobalAvgPool2D()(x)
    model = tf.keras.Model(inputs=inp, outputs=x)
    opt = tf.keras.optimizers.SGD(learning_rate=1e-3)
    loss = tf.keras.losses.BinaryCrossentropy() 
    model.compile(optimizer=opt,loss=loss,metrics=['AUC'])
    return model    


def get_ae_model(size, model):
    inp = tf.keras.layers.Input(shape=(size, size,3))
    emb = model(inp)
    encoder = tf.keras.layers.Dense(2, kernel_constraint=tf.keras.constraints.UnitNorm(axis=0),
                              activity_regularizer=UncorrelatedFeaturesConstraint(2, weightage = 0.1))
    x = encoder(emb)
    x = tf.keras.layers.Dense(1280)(x)
    model = tf.keras.Model(inputs=inp, outputs=[x, emb])
    opt = tfa.optimizers.NovoGrad(learning_rate=1e-3)
    model.compile(optimizer=opt,loss='mse',metrics=['mse'])
    return model  

def ae_model_to_e_model(size, base, ae_model):
    inp = tf.keras.layers.Input(shape=(size, size,3))
    emb = base(inp)
    x = tf.keras.layers.Dense(2, kernel_constraint=tf.keras.constraints.UnitNorm(axis=0),
                              activity_regularizer=UncorrelatedFeaturesConstraint(2, weightage = 0.1))(emb)
    model = tf.keras.Model(inputs=inp, outputs=x)
    opt = tfa.optimizers.NovoGrad(learning_rate=1e-3)
    model.compile(optimizer=opt,loss='mse',metrics=['mse'])
    
    model.layers[-1].set_weights(ae_model.layers[-2].get_weights())
    return model  

model = build_model(256)
model.load_weights('../input/cqt-g2net-efficientnetb1-tpu-training/fold-0.h5')

emb_model = get_emb_model(256, model)
emb_model.set_weights(model.get_weights()[:-2])
for l in emb_model.layers:
    l.trainable = False

Freezed Pretrained EfficientNetB1

In [None]:
emb_model.summary()

<a id="2.2"></a>
<h2 style='background:#0788f0; font-size:200%; border:0; color:white'><center> 2.2. Train AutoEncoder <center><h0>

In [None]:
train_ds = get_dataset(files_train_g, shuffle = True, repeat = False, labeled=True, return_image_ids=False)

ae_model = get_ae_model(256, emb_model)
optimizer = tfa.optimizers.NovoGrad(learning_rate=1e-3)

loss_values = []
for i, (x, y) in tqdm(enumerate(train_ds), total=EPOCHS):
    with tf.GradientTape() as tape:
        emb, predicted = ae_model(x)
        loss_value = tf.keras.losses.mse(emb, predicted) + sum(model.losses)
        loss_values.append(loss_value)
        gradients = tape.gradient(loss_value, ae_model.trainable_weights)
        optimizer.apply_gradients(zip(gradients, ae_model.trainable_weights))
    if i == EPOCHS:
        break

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(16,9), dpi= 80)
mycolors = ['tab:red', 'tab:blue', 'tab:green', 'tab:orange', 'tab:brown', 'tab:grey', 'tab:pink', 'tab:olive']     

loss_values_s = np.std(loss_values, axis=-1)
loss_values = np.mean(loss_values, axis=-1)
ax.plot(loss_values, '.', alpha=0.7, color=mycolors[1], linewidth=0.5)
ax.fill_between(list(range(EPOCHS + 1)), y1=loss_values + loss_values_s, y2=np.max([np.zeros((EPOCHS+1,)), loss_values - loss_values_s], axis=0), alpha=0.5, color=mycolors[1], linewidth=0)
ax.set_title('AE Loss')
ax.set_xlabel('Iteration')
ax.set_ylabel('Loss');

<a id="2.3"></a>
<h2 style='background:#0788f0; font-size:200%; border:0; color:white'><center> 2.3. Freezed Pretrained EfficientNetB1 with Encoder <center><h0>

In [None]:
encoder = ae_model_to_e_model(256, emb_model, ae_model)
encoder.summary()

In [None]:
del ae_model, train_ds
gc.collect()

<a id="3"></a>
<h2 style='background:#0788f0; font-size:200%; border:0; color:white'><center> 3. Train dataset analysis <center><h0>

In [None]:
train_ds = get_dataset(files_train_g, batch_size=BATCH_SIZE*2, shuffle = False, repeat = False, labeled=True, return_image_ids=False)
test_ds = get_dataset(files_test_g, batch_size=BATCH_SIZE*2, shuffle = False, repeat = False, labeled=False, return_image_ids=True)

In [None]:
train_emb = np.zeros((EPOCHS_VIS * BATCH_SIZE*2, 2))
train_target = np.zeros((EPOCHS_VIS * BATCH_SIZE*2,))
train_pred = np.zeros((EPOCHS_VIS * BATCH_SIZE*2,))
train_img = np.zeros((BATCH_SIZE * 2, 256, 256, 3))

for i, (img, target) in tqdm(enumerate(train_ds), total=EPOCHS_VIS - 1):
    train_emb[i*BATCH_SIZE*2: (i + 1) * BATCH_SIZE*2] = encoder.predict(img).astype(np.float16)
    train_pred[i*BATCH_SIZE*2: (i + 1) * BATCH_SIZE*2] = model.predict(img).astype(np.float16)[:, 0]
    train_target[i*BATCH_SIZE*2: (i + 1) * BATCH_SIZE*2] = target.numpy().astype(np.uint8)[:, 0]
    if (i + 1) * BATCH_SIZE*2 <= len(train_img):
        train_img[i*BATCH_SIZE*2: (i + 1) * BATCH_SIZE*2] = img.numpy()
    if i == EPOCHS_VIS - 1:
        break

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(15, 7))

sns.kdeplot(x=train_emb[:, 0], y=train_emb[:, 1], hue=train_target, ax=ax[0])
ax[0].plot(train_emb[:BATCH_SIZE, 0], train_emb[:BATCH_SIZE, 1], '*r')
ax[0].set_xlabel('component 1')
ax[0].set_ylabel('component 2')
ax[0].set_title('Train Set Embeding')

pte = train_emb[train_target==1]
ptp = train_pred[train_target==1]
sns.kdeplot(x=pte[:, 0], y=pte[:, 1], ax=ax[1], alpha=0.4)
s = ax[1].scatter(x=pte[:, 0], y=pte[:, 1], c=ptp, cmap='inferno')
ax[1].set_xlabel('component 1')
ax[1].set_ylabel('component 2')
ax[1].set_title('Data with gravitation wave probability distribution')
cbar = plt.colorbar(s)
cbar.set_label('Probability')
plt.show()

So, after looking on component 1 & 2 is possible to make a conclusion that on some part of q-transformed spectrograms EfficientNet B1 can't to detect gravitational waves. Let's watch on the three types of q-transformed spectrograms below.

<a id="3.1"></a>
<h2 style='background:#0788f0; font-size:200%; border:0; color:white'><center> 3.1. Group 1: Without gravitation wave <center><h0>

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(7, 7))

img_groups = {'Without gravitation wave' : []}

pte = train_emb[:BATCH_SIZE*2]
ptp = train_pred[:BATCH_SIZE*2]
ptt = train_target[:BATCH_SIZE*2]

_ptp = ptp[ptt == 0]
_pte = pte[ptt == 0]
_pti = train_img[ptt==0]
sns.kdeplot(x=train_emb[:, 0], y=train_emb[:, 1], hue=train_target, ax=ax)
ax.plot(_pte[:, 0], _pte[:, 1], '*r')
ax.set_xlabel('component 1')
ax.set_ylabel('component 2')
ax.set_title('Train Set Embeding')
plt.show();

In [None]:
for i in range(0, min(5, len(_pti))):
    fig, ax = plt.subplots(1, 3, figsize=(21, 7))
    img_groups['Without gravitation wave'].append(_pti[i])
    for j in range(3):
        ax[j].imshow(_pti[i, ..., j])
    fig.suptitle(f'Probability: {_ptp[i]}')
    plt.show();

<a id="3.2"></a>
<h2 style='background:#0788f0; font-size:200%; border:0; color:white'><center> 3.2. Group 2: With GW & Probability > 0.8 <center><h0>

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(7, 7))

img_groups['With GW & Probability > 0.8'] = []

pte = train_emb[:BATCH_SIZE*2]
ptp = train_pred[:BATCH_SIZE*2]
ptt = train_target[:BATCH_SIZE*2]

mask = (ptt == 1) & (ptp > 0.8)
_ptp = ptp[mask]
_pte = pte[mask]
_pti = train_img[mask]
sns.kdeplot(x=train_emb[:, 0], y=train_emb[:, 1], hue=train_target, ax=ax)
ax.plot(_pte[:, 0], _pte[:, 1], '*r')
ax.set_xlabel('component 1')
ax.set_ylabel('component 2')
ax.set_title('Train Set Embeding')
plt.show();

In [None]:
for i in range(0, min(5, len(_pti))):
    fig, ax = plt.subplots(1, 3, figsize=(21, 7))
    img_groups['With GW & Probability > 0.8'].append(_pti[i])
    for j in range(3):
        ax[j].imshow(_pti[i, ..., j])
    fig.suptitle(f'Probability: {_ptp[i]}')
    plt.show();

<a id="3.3"></a>
<h2 style='background:#0788f0; font-size:200%; border:0; color:white'><center> 3.3. With GW & Probability < 0.8 <center><h0>

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(7, 7))

img_groups['With GW & Probability <= 0.8'] = []

pte = train_emb[:BATCH_SIZE*2]
ptp = train_pred[:BATCH_SIZE*2]
ptt = train_target[:BATCH_SIZE*2]

mask = (ptt == 1) & (ptp <= 0.8)
_ptp = ptp[mask]
_pte = pte[mask]
_pti = train_img[mask]
sns.kdeplot(x=train_emb[:, 0], y=train_emb[:, 1], hue=train_target, ax=ax)
ax.plot(_pte[:, 0], _pte[:, 1], '*r')
ax.set_xlabel('component 1')
ax.set_ylabel('component 2')
ax.set_title('Train Set Embeding')
plt.show();

In [None]:
for i in range(0, min(5, len(_pti))):
    fig, ax = plt.subplots(1, 3, figsize=(21, 7))
    img_groups['With GW & Probability <= 0.8'].append(_pti[i])
    for j in range(3):
        ax[j].imshow(_pti[i, ..., j])
    fig.suptitle(f'Probability: {_ptp[i]}')
    plt.show();

On q-transformed spectrograms, with high predicted probability, gravitational wave patterns are clearly visible. In turn, the spectrograms, for which the model gives a low probability that it contains a gravitational wave signal, are very similar to the spectrograms on which there are no gravitational waves. 
Let's calculate density for each set:

In [None]:
pte = train_emb
ptp = train_pred
ptt = train_target

print('Group 1: Without gravitation wave\t', sum((ptt == 0)) / len(ptt))
print('Group 2: With GW & Probability > 0.8\t', sum((ptt == 1) & (ptp > 0.8)) / len(ptt))
print('Group 3: With GW & Probability <= 0.8\t', sum((ptt == 1) & (ptp <= 0.8)) / len(ptt))

<a id="4"></a>
<h2 style='background:#0788f0; font-size:200%; border:0; color:white'><center> 4. Train/Test dataset comparission<center><h0>

In [None]:
test_emb = np.zeros((EPOCHS_VIS * BATCH_SIZE*2, 2))
test_pred = np.zeros((EPOCHS_VIS * BATCH_SIZE*2,))
for i, (img, idx) in tqdm(enumerate(test_ds), total=EPOCHS_VIS - 1):
    test_emb[i*BATCH_SIZE*2: (i + 1) * BATCH_SIZE*2] = encoder.predict(img).astype(np.float16)
    test_pred[i*BATCH_SIZE*2: (i + 1) * BATCH_SIZE*2] = model.predict(img).astype(np.float16)[:, 0]
    if i == EPOCHS_VIS - 1:
        break

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 7))
sns.kdeplot(x=test_emb[:, 0], y=test_emb[:, 1], ax=ax, label='Test')
sns.kdeplot(x=train_emb[:, 0], y=train_emb[:, 1], ax=ax, label='Train')
ax.set_xlabel('component 1')
ax.set_ylabel('component 2')
fig.suptitle('Train/Test Set Embeding Comparision')
plt.legend();
plt.show()

In [None]:
te_hist, te_bins = np.histogram(test_emb[:, 0], bins=96)
tr_hist, tr_bins = np.histogram(train_emb[:, 0], bins=96, range=[np.min(te_bins), np.max(te_bins)])

tr_hist = tr_hist / np.sum(tr_hist)
te_hist = te_hist / np.sum(te_hist)

fig, ax = plt.subplots(1, 2, figsize=(15, 7))
ax[0].plot(tr_bins[1:], tr_hist / np.sum(tr_hist), '-')
ax[0].plot(te_bins[1:], te_hist / np.sum(te_hist), '-')
ax[0].set_xlabel('component 1')
ax[0].set_ylabel('densety')

p1 = sum(abs(te_hist - tr_hist))

te_hist, te_bins = np.histogram(test_emb[:, 1], bins=96)
tr_hist, tr_bins = np.histogram(train_emb[:, 1], bins=96, range=[np.min(te_bins), np.max(te_bins)])

tr_hist = tr_hist / np.sum(tr_hist)
te_hist = te_hist / np.sum(te_hist)

ax[1].plot(tr_bins[1:], tr_hist / np.sum(tr_hist), '-')
ax[1].plot(te_bins[1:], te_hist / np.sum(te_hist), '-')
ax[1].set_xlabel('component 2')
ax[1].set_ylabel('densety')

p2 = sum(abs(te_hist - tr_hist))
p = 1 - (p1 + p2) / 2
fig.suptitle(f'Train/Test Set Similarity: {p}');

So, base on the graph above is possible to say that train and set is very similar and decisions with the right cross-validation will have a close LB & CV score.

<a id="5"></a>
<h2 style='background:#0788f0; font-size:200%; border:0; color:white'><center> 5.Grad-CAM <center><h0>

In [None]:
model = build_model(256)
model.load_weights('../input/cqt-g2net-efficientnetb1-tpu-training/fold-0.h5')
model_gradcam = build_model_gradcam(256)
model_gradcam.set_weights(model.get_weights())

In [None]:
from tensorflow import keras 

# https://keras.io/examples/vision/grad_cam/
def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    # First, we create a model that maps the input image to the activations
    # of the last conv layer as well as the output predictions
    grad_model = tf.keras.models.Model(
        [model.inputs], [model.get_layer(last_conv_layer_name).output, model.output]
    )

    # Then, we compute the gradient of the top predicted class for our input image
    # with respect to the activations of the last conv layer
    with tf.GradientTape() as tape:
        last_conv_layer_output, preds = grad_model(np.expand_dims(img_array, axis=0))
        if pred_index is None:
            pred_index = tf.argmax(preds[0])
        class_channel = preds[:, pred_index]

    # This is the gradient of the output neuron (top predicted or chosen)
    # with regard to the output feature map of the last conv layer
    grads = tape.gradient(class_channel, last_conv_layer_output)

    # This is a vector where each entry is the mean intensity of the gradient
    # over a specific feature map channel
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    # We multiply each channel in the feature map array
    # by "how important this channel is" with regard to the top predicted class
    # then sum all the channels to obtain the heatmap class activation
    last_conv_layer_output = last_conv_layer_output[0]
    heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)

    # For visualization purpose, we will also normalize the heatmap between 0 & 1
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

def display_gradcam(img, heatmap,alpha, ax):
    # Load the original image
    img = keras.preprocessing.image.img_to_array(img)

    # Rescale heatmap to a range 0-255
    heatmap = np.uint8(255 * heatmap)

    # Use jet colormap to colorize heatmap
    jet = cm.get_cmap("jet")

    # Use RGB values of the colormap
    jet_colors = jet(np.arange(256))[:, :3]
    jet_heatmap = jet_colors[heatmap]

    # Create an image with RGB colorized heatmap
    jet_heatmap = keras.preprocessing.image.array_to_img(jet_heatmap)
    jet_heatmap = jet_heatmap.resize((img.shape[1], img.shape[0]))
    jet_heatmap = keras.preprocessing.image.img_to_array(jet_heatmap)

    # Superimpose the heatmap on original image
    superimposed_img = jet_heatmap * alpha + img * (1 - alpha)
    superimposed_img = keras.preprocessing.image.array_to_img(superimposed_img)

    ax.imshow(superimposed_img)

In [None]:
for title in img_groups:
    fig, ax = plt.subplots(1, 5, figsize=(25, 5))
    for i in range(0, min(5, len(_pti))):
        img = img_groups[title][i]
        heatmap = make_gradcam_heatmap(img, model_gradcam, 'top_activation')
        display_gradcam(img, heatmap, 0.001, ax[i])
    fig.suptitle(title);
    plt.show()

On the image above is clearly that trained EfficientNet B1 is confused on a subset of samples with gravitational waves where patterns of gravitational wave are invisible. Are the third group of samples have gravitational waves? Or extracted q-transform features is invalid for this subset of data.

<a id="6"></a>
<h2 style='background:#0788f0; font-size:200%; border:0; color:white'><center>6. Conclusion <center><h0>

* So is possible to make the conclusion that extracted q-transform features are very good features for the separation of Group 2, but this feature is not enough for the separation of Group 3 and Group 1.
* Base on research above also possible make conclusion that train and test datasets are very similar and decisions with the right cross-validation will have a close LB & CV score. 

<a id="7"></a>
<h2 style='background:#0788f0; font-size:200%; border:0; color:white'><center>7. References <center><h0>

* [Build the right Autoencoder — Tune and Optimize using PCA principles. Part I](https://towardsdatascience.com/build-the-right-autoencoder-tune-and-optimize-using-pca-principles-part-i-1f01f821999b)
* [Build the right Autoencoder — Tune and Optimize using PCA principles. Part II](https://towardsdatascience.com/build-the-right-autoencoder-tune-and-optimize-using-pca-principles-part-ii-24b9cca69bd6)
* [Grad-CAM class activation visualization](https://keras.io/examples/vision/grad_cam/)
* [CQT G2Net EfficientNetB1[TPU Training]](https://www.kaggle.com/miklgr500/cqt-g2net-efficientnetb1-tpu-training)
* [CQT G2Net EfficientNetB1[TPU Inference] ](https://www.kaggle.com/miklgr500/cqt-g2net-efficientnetb1-tpu-inference)

P.S. I will be grateful if you can tell me statistical tests for comparing the distributions of random variables from point 4. 