In [None]:
import sys,glob
import librosa
import cv2,os
import numpy as np # linear algebra
import pandas as pd
import seaborn as sns
from pathlib import Path
import os,random
from scipy import signal
from scipy import optimize
import matplotlib.pyplot as plt
import gc
import matplotlib.image as immg
from tqdm.notebook import tqdm
import albumentations
import torch
import torch.nn as nn
from torch.nn import functional as F

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
!pip install -q nnAudio

In [None]:
train = pd.read_csv('../input/g2net-gravitational-wave-detection/training_labels.csv')
test = pd.read_csv('../input/g2net-gravitational-wave-detection/sample_submission.csv')

def get_train_file_path(image_id):
    return "../input/g2net-gravitational-wave-detection/train/{}/{}/{}/{}.npy".format(
        image_id[0], image_id[1], image_id[2], image_id)

def get_test_file_path(image_id):
    return "../input/g2net-gravitational-wave-detection/test/{}/{}/{}/{}.npy".format(
        image_id[0], image_id[1], image_id[2], image_id)

In [None]:
train['file_path'] = train['id'].apply(get_train_file_path)
test['file_path'] = test['id'].apply(get_test_file_path)

In [None]:
train.head()

In [None]:
test.head()

In [None]:
def apply_bandpass(x, lf=25, hf=500, order=4, sr=2048):
    sos = signal.butter(order, [lf, hf], btype="bandpass", output="sos", fs=sr)
    normalization = np.sqrt((hf - lf) / (sr / 2))
    return signal.sosfiltfilt(sos, x) / normalization

In [None]:
qtransform_params={"sr": 2048, "fmin": 20, "fmax": 1024, "hop_length": 32, "bins_per_octave": 8}

In [None]:
import torch
from nnAudio.Spectrogram import CQT1992v2

def apply_qtransform(waves, transform=CQT1992v2(**qtransform_params)):
    #waves = np.hstack(waves)
    waves = waves / np.max(waves)
    waves *= signal.tukey(4096, 0.2)
    waves = apply_bandpass(waves, 35, 500)
    waves = torch.from_numpy(waves).float()
    image = transform(waves)
    return image

for i in range(5):
    waves = np.load(train.loc[i, 'file_path'])
    image = apply_qtransform(waves[0])
    target = train.loc[i, 'target']
    plt.imshow(image[0,:,:])
    plt.title(f"target: {target}")
    plt.show()


In [None]:
image.shape

In [None]:
#from https://www.kaggle.com/daisukelab/creating-fat2019-preprocessed-data
def mono_to_color(X, mean=None, std=None, norm_max=None, norm_min=None, eps=1e-6):
    # Standardize
    mean = mean or X.mean()
    X = X - mean
    std = std or X.std()
    Xstd = X / (std + eps)
    _min, _max = Xstd.min(), Xstd.max()
    norm_max = norm_max or _max
    norm_min = norm_min or _min
    if (_max - _min) > eps:
        # Normalize to [0, 255]
        V = Xstd
        V[V < norm_min] = norm_min
        V[V > norm_max] = norm_max
        V = 255 * (V - norm_min) / (norm_max - norm_min)
        V = V.astype(np.uint8)
    else:
        # Just zero
        V = np.zeros_like(Xstd, dtype=np.uint8)
    return V

def build_spectrogram(file_loc,ax=1):
    waves = np.load(file_loc)
    image1,image2,image3 = apply_qtransform(waves[0]),apply_qtransform(waves[1]),apply_qtransform(waves[2])
    M1,M2,M3 = image1.permute(1,2,0).numpy()[:,:,0],image2.permute(1,2,0).numpy()[:,:,0],image3.permute(1,2,0).numpy()[:,:,0]
    M1,M2,M3 = mono_to_color(M1),mono_to_color(M2),mono_to_color(M3)
    return np.concatenate([M1,M2,M3],axis=ax)

In [None]:
img = build_spectrogram(train.loc[3566,'file_path'],1);img.shape

In [None]:
plt.imshow(img)

In [None]:
import os
import gc
import zipfile
from joblib import Parallel, delayed

In [None]:
train.head()

In [None]:
files = train['file_path'].values
OUT_TRAIN = 'TrainG2NET.zip'

In [None]:
x_tot,x2_tot = [],[]
batch = 50
with zipfile.ZipFile(OUT_TRAIN, 'w') as img_out:
    for idx in tqdm(range(0,len(files),batch)):
        names = files[idx:idx+batch]
        out = Parallel(n_jobs=-1)(delayed(build_spectrogram)(i) for i in names)
        for s in range(len(out)):
            img = out[s]
            x_tot.append((img/255.0).mean())
            x2_tot.append(((img/255.0)**2).mean()) 
            name = names[s].split('/')[-1].split('.')[0]
            img = cv2.imencode('.png',img)[1]
            img_out.writestr(name + '.png', img)

In [None]:
img_avr =  np.array(x_tot).mean()
img_std =  np.sqrt(np.array(x2_tot).mean() - img_avr**2)
print('mean:',img_avr, ', std:', img_std)

In [None]:
tsfiles = test['file_path'].values
OUT_TEST = 'TestG2NET.zip'

In [None]:
x_tot,x2_tot = [],[]
batch = 50
with zipfile.ZipFile(OUT_TEST, 'w') as img_out:
    for idx in tqdm(range(0,len(tsfiles),batch)):
        names = tsfiles[idx:idx+batch]
        out = Parallel(n_jobs=-1)(delayed(build_spectrogram)(i) for i in names)
        for s in range(len(out)):
            img = out[s]
            x_tot.append((img/255.0).mean())
            x2_tot.append(((img/255.0)**2).mean()) 
            name = names[s].split('/')[-1].split('.')[0]
            img = cv2.imencode('.png',img)[1]
            img_out.writestr(name + '.png', img)

In [None]:
img_avr =  np.array(x_tot).mean()
img_std =  np.sqrt(np.array(x2_tot).mean() - img_avr**2)
print('mean:',img_avr, ', std:', img_std)