In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

import os
import math
import cmath
import shutil

In [None]:
!pip install -q nnAudio

In [None]:
import glob
import pathlib
from pathlib import Path
from numba import njit, jit, cuda, guvectorize
from scipy.signal import butter, filtfilt, sosfiltfilt
from nnAudio.Spectrogram import *
import torch
import gc

import joblib
from tqdm.auto import tqdm

In [None]:
# head = pathlib.Path("../input/g2net-gravitational-wave-detection")
train_files = [Path(path) for path in sorted(glob.glob("../input/g2net-gravitational-wave-detection/train/*/*/*/*.npy"))]

In [None]:
wave = np.load(train_files[0])

In [None]:
@njit(nogil=True)
def min_max_scaler(wave):
    for i in range(len(wave)):
        wave[i] = (wave[i] - min(wave[i])) / (max(wave[i]) - min(wave[i]))
#         wave[i] = 2 * wave[i] - 1
        
    return wave

In [None]:
# Taken from https://www.kaggle.com/anjum48/continuous-wavelet-transform-cwt-in-pytorch#Test-on-GW-data
def butter_bandpass_filter(data, low, high, fs, order):
    sos = butter(order, [low, high], btype="bandpass", output="sos", fs=fs)
    normalization = np.sqrt((high - low) / (fs / 2))
    return sosfiltfilt(sos, data) # / normalization

In [None]:
T = 2
fs = 2048.0
cutoff = 2.5
nyq = 0.5 * fs
order = 4
n = int(T * fs)
normal_cutoff = cutoff / nyq

In [None]:
plt.figure(dpi=120)
plt.plot(range(len(wave[0])), wave[0])

Note from this point on, all wavelet transforms equation are taken from https://pywavelets.readthedocs.io/en/latest/ref/cwt.html

Let's do the Shannon Wavelets transform. We will first do it in numpy and see what it results in after the transformation. Note this is the most unoptimized way of performing calculations. 

In [None]:
# B = bandwidth = wavelet width (Hz)
# C = center frequency (Hz)
def shannon_wavelets(wave, B=1, C=1):
    exp_term = np.exp(1j * 2 * np.pi * C * wave)
    frac_term = (np.sin(np.pi * B * wave) / (np.pi * B * wave))
    return np.sqrt(B) * frac_term * exp_term

In [None]:
wave = np.load(train_files[0])

In [None]:
plt.figure(dpi=120)
plt.plot(range(len(wave[0])), np.abs(shannon_wavelets(min_max_scaler(wave)[0])))

In [None]:
wave = np.load(train_files[0])
plt.figure(dpi=120)
plt.plot(range(len(wave[0])), np.abs(shannon_wavelets(
    min_max_scaler(wave)[0], B=3)))

And let's try the original complex morlet. 

In [None]:
def complex_morlet(wave, B=1, C=1):
    first_exp = np.exp(-(wave**2 / B))
    second_exp = np.exp(1j * 2 * np.pi * C * wave)
    return np.abs((1 / np.sqrt(np.pi * B)) * first_exp * second_exp)

In [None]:
wave = np.load(train_files[0])
plt.figure(dpi=120)
plt.plot(range(len(wave[0])), np.abs(complex_morlet(min_max_scaler(wave)[0])))

In [None]:
wave = np.load(train_files[0])
plt.figure(dpi=120)
plt.plot(range(len(wave[0])), complex_morlet(min_max_scaler(wave)[0], B=20))

Try Complex Gaussian Derivative Wavelets

In [None]:
def cgauss_deriv(wave, C=1):
    const = C * np.exp(-(wave**2))
    real = np.cos(-wave) * const
    imag = np.sin(-wave) * const
    return np.sqrt(real**2 + imag**2)

In [None]:
wave = np.load(train_files[0])
plt.figure(dpi=120)
plt.plot(range(len(wave[0])), cgauss_deriv(min_max_scaler(wave)[0], C=1))

Try Mexican hat Wavelet

In [None]:
def mexican_hat(wave):
    const = 2 / (math.sqrt(3) * (np.pi**0.25))
    other_term = (1 - wave**2) * np.exp(-(wave**2 / 2))
    return const * other_term

In [None]:
wave = np.load(train_files[0])
plt.figure(dpi=120)
plt.plot(range(len(wave[0])), mexican_hat(min_max_scaler(wave)[0]))

Let's look at frequency B-Spline Wavelets

In [None]:
def b_spline_opt(wave, M=2, B=1, C=1):
    frac_term = ((np.sin(np.pi * B * wave / M) / (np.pi * B * wave / M)) ** M) * np.sqrt(B)
    real = np.cos(2 * np.pi * C * wave) * frac_term
    imag = np.sin(2 * np.pi * C * wave) * frac_term
    return np.sqrt(real**2 + imag**2)

Looks like complex morlet does the reinforcement for us, so we'll stick to that for now. We will rewrite the function. Particularly, split the complex exponentials up into cosine and sine will speed up calculations. 

In [None]:
def shannon_wavelets_opt(wave, B=1, C=1):
    frac_term = (np.sin(np.pi * B * wave) / (np.pi * B * wave)) * np.sqrt(B)
    real = np.cos(2 * np.pi * C * wave) * frac_term
    imag = np.sin(2 * np.pi * C * wave) * frac_term
    return np.sqrt(real**2 + imag**2)

In [None]:
def complex_morlet_opt(wave, B=1, C=1):
    first_exp = np.exp(-(wave**2 / B)) / np.sqrt(np.pi * B)
    real = first_exp * np.cos(2 * np.pi * C * wave)
    imag = first_exp * np.sin(2 * np.pi * C * wave)
    return np.sqrt(real**2 + imag**2)

In [None]:
%timeit complex_morlet(wave[0], B=20)

In [None]:
%timeit complex_morlet_opt(wave[0], B=20)

Assert that it is the same. We would not use real assert function but plot and see that it is "about the same" since value will slightly differ most likely. 

In [None]:
plt.figure(dpi=120)
plt.plot(range(len(wave[0])), complex_morlet_opt(min_max_scaler(wave)[0], B=20))

Okay let's continue building a function. 

In [None]:
%timeit _ = np.ascontiguousarray(wave)

In [None]:
fmin = 21.83
fmax = 350
def apply_qtransform(waves, 
                     transform=CQT2010v2(sr=2048, fmin=fmin, fmax=fmax, n_bins=32, hop_length=64), 
                     cuda=False, order=4):
    waves = butter_bandpass_filter(waves, fmin, fmax, fs, order)
    waves = min_max_scaler(waves)
#     waves = complex_morlet_opt(waves, B=21)
    waves = b_spline_opt(waves, M=order, B=35)
    waves = np.hstack(waves)
#     waves = np.ascontiguousarray(waves)
    waves = torch.from_numpy(waves).float()
    if cuda: waves = waves.cuda()
    image = transform(waves)
    image[torch.isnan(image)] = 0
    return image

In [None]:
img = apply_qtransform(np.load(train_files[0]))
img.shape

In [None]:
img

In [None]:
plt.figure(dpi=80)
plt.imshow(img.squeeze())

In [None]:
for i in range(1, 11):
    img = apply_qtransform(np.load(train_files[0]), order=i)
    plt.figure(dpi=80)
    plt.imshow(img.squeeze())

In [None]:
for i in range(1, 11):
    img = apply_qtransform(np.load(train_files[i]))
    plt.figure(dpi=80)
    plt.imshow(img.squeeze())

In [None]:
gc.collect()

In [None]:
train_files[0].name[:-4]

In [None]:
plt.imsave("test.jpg", img.squeeze())

In [None]:
def save_images(file_path, out_dir):
    file_name = file_path.name[:-4]
    waves = np.load(file_path).astype(np.float32)  # (3, 4096)
    image = apply_qtransform(waves)
    plt.imsave(out_dir + file_name + ".jpg", image.squeeze())

In [None]:
labels = pd.read_csv("../input/g2net-gravitational-wave-detection/training_labels.csv")
labels["file_path"] = train_files
pd.set_option("display.max_colwidth", None)
labels.head()

In [None]:
# ones_train = labels[labels["target"] == 1]["file_path"].to_numpy()
# ones_train[0]

# folder_name = "train/ones/"
# os.makedirs(folder_name, exist_ok=True)

# _ = joblib.Parallel(n_jobs=8, prefer="threads")(
#     joblib.delayed(save_images)(file_path, out_dir=folder_name) for file_path in tqdm(ones_train)
# )

In [None]:
# folder_name = "train/zero/"
# zeroes_train = labels[labels["target"] == 0]["file_path"].to_numpy()

# os.makedirs(folder_name, exist_ok=True)

# _ = joblib.Parallel(n_jobs=8, prefer="threads")(
#     joblib.delayed(save_images)(file_path, out_dir=folder_name) for file_path in tqdm(zeroes_train)
# )

In [None]:
# def move_to_destination(origin, destination, percentage_split):
#     num_images = int(len(os.listdir(origin))*percentage_split)
#     for image_name, image_number in zip(sorted(os.listdir(origin)), range(num_images)):
#         shutil.move(os.path.join(origin, image_name), destination)

In [None]:
# os.makedirs("./valid/ones")
# os.makedirs("./valid/zero")
# move_to_destination("./train/ones", "./valid/ones", 0.2)
# move_to_destination("./train/zero", "./valid/zero", 0.2)

In [None]:
OUT_DIR = "train/"
os.mkdir("train/")

_ = joblib.Parallel(n_jobs=8, prefer="threads")(
    joblib.delayed(save_images)(file_path, out_dir=OUT_DIR) for file_path in tqdm(train_files)
)

In [None]:
%%time
shutil.make_archive("train/", 'zip', "train/")
shutil.rmtree("train/")

# shutil.make_archive("valid/", "zip", "valid/")
# shutil.rmtree("valid/")

In [None]:
OUT_DIR = "test/"
os.mkdir("test/")
test_files = [Path(path) for path in sorted(glob.glob("../input/g2net-gravitational-wave-detection/test/*/*/*/*.npy"))]

_ = joblib.Parallel(n_jobs=8, prefer="threads")(
    joblib.delayed(save_images)(file_path, out_dir=OUT_DIR) for file_path in tqdm(test_files)
)

In [None]:
%%time
shutil.make_archive("test/", 'zip', "test/")
shutil.rmtree("test/")