# Install the PyCBC gravitational-wave analysis toolkit
* https://github.com/gwastro/pycbc

PyCBC is a python package developed by the community of GW astronomers to help analyze gravitational-wave data, detect signals, and even estimate the parameters of a source binary. It's meant to be accessible and welcomes contributions.

In [None]:
!pip install pycbc

# Get labels

In [None]:
import pandas as pd
df = pd.read_csv('../input/g2net-gravitational-wave-detection/training_labels.csv')

def train_filepath_from_id(id):
    return f'../input/g2net-gravitational-wave-detection/train/{id[0]}/{id[1]}/{id[2]}/{id}.npy'

df['filepath'] = df.id.apply(train_filepath_from_id)
print(df.shape)

# Plot the q-transform of the data

A constant q-transform (https://en.wikipedia.org/wiki/Constant-Q_transform) is a common method to visualize gravitational-wave data. Almost all time-frequency figures in the gravitaitonal-wave literature use this method. 

In [None]:
import pylab, glob, os
import numpy as np
import pycbc.types
from time import time

def plot_cqf(filepath, label):    
    data = np.load(filepath)
    
    fig, axes = pylab.subplots(4, 1, figsize=[9,8], dpi=100)
    pylab.title(f"{label} / {os.path.basename(filepath)}")
    powers = list()
    for i in range(3):
        ts = pycbc.types.TimeSeries(data[i, :], epoch=0, delta_t=1.0/2048) 
        
        # whiten the data (i.e. normalize the noise power at different frequencies)
        ts = ts.whiten(0.125, 0.125, remove_corrupted=True)
        
        # calculate the qtransform
        t, freq, power = ts.qtransform(.002, logfsteps=100, qrange=(10, 10), frange=(20, 512))
        powers.append(power)
        
    #standardized_powers = [(p -np.mean(p))/np.std(p) for p in powers]
    mean_of_all_sites = sum(powers)/3
    powers.append(mean_of_all_sites)
    
    for i, power in enumerate(powers):
        # convert the data to a TimeSeries instance
        #print(time()-t0)
        pylab.sca(axes[i])
        pylab.pcolormesh(t, freq, power, vmax=15, vmin=0)
        #pylab.xlim(.25, 1.75)
        pylab.yscale('log')
        #breakpoint()
    pylab.show()
    print()

for _, row in df[df.target == 1].sample(n=1).iterrows():
    plot_cqf(row['filepath'], row['target'])
    
#for _, row in df[df.target == 0].sample(n=10).iterrows():
#    plot_cqf(row['filepath'], row['target'])

In [None]:
from pathlib import Path
from tqdm.notebook import tqdm

def compute_and_save_qtransform(filepath: str, output_directory: str):    
    output_path = Path(output_directory) /  '/'.join(filepath.split('/')[-5:])
    output_path.parent.mkdir(parents=True, exist_ok=True)
    data = np.load(filepath)
    
    powers = list()
    for i in range(3):
        ts = pycbc.types.TimeSeries(data[i, :], epoch=0, delta_t=1.0/2048) 
        ts = ts.whiten(0.125, 0.125, remove_corrupted=True)
        t, freq, power = ts.qtransform(.002, logfsteps=100, qrange=(10, 10), frange=(20, 512))
        powers.append(power)
    all_powers = np.array(powers)
    np.save(output_path, all_powers)
    
from joblib import Parallel, delayed
Parallel(n_jobs=2)(delayed(compute_and_save_qtransform)(filepath, '/kaggle/working/') 
                   for filepath in tqdm(df['filepath'].values[:50]))