![header](http://storage.googleapis.com/kaggle-competitions/kaggle/23249/logos/header.png)

## Competition

In this competition you are provided with a training set of time series data containing simulated gravitational wave measurements from a network of 3 gravitational wave interferometers (LIGO Hanford, LIGO Livingston, and Virgo). Each time series contains either detector noise or detector noise plus a simulated gravitational wave signal. The task is to identify when a signal is present in the data (target=1).

Each data sample (npy file) contains 3 time series (1 for each detector) and each spans 2 sec and is sampled at 2,048 Hz.

- train/ - the training set files, one npy file per observation; labels are provided in a files shown below
- test/ - the test set files; you must predict the probability that the observation contains a gravitational wave
- training_labels.csv - target values of whether the associated signal contains a gravitational wave
- sample_submission.csv - a sample submission file in the correct format

The train/test paths are in the format:
- '../input/g2net-gravitational-wave-detection/{train/test}/{id[0]}/{id[1]}/{id[2]}/{id}.npy'

## Evaluation Metrics

- Area under the ROC curve

In [None]:
%matplotlib inline

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
#pd.set_option('display.max_rows', None)

import gc

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
from sklearn.preprocessing import StandardScaler, LabelEncoder

import matplotlib.pyplot as plt
import seaborn as sns

from glob import glob

from IPython.display import display

plt.rcParams["figure.figsize"] = (12, 8)
plt.rcParams['axes.titlesize'] = 16
plt.style.use('seaborn-whitegrid')
sns.set_palette('Set2')

import os
print(os.listdir('/kaggle/input/g2net-gravitational-wave-detection/'))

from time import time, strftime, gmtime
start = time()
import datetime
print(str(datetime.datetime.now()))

import warnings
warnings.simplefilter('ignore')

In [None]:
base_dir = '/kaggle/input/g2net-gravitational-wave-detection/'

In [None]:
train_labels = pd.read_csv(base_dir + 'training_labels.csv')
print(train_labels.shape)
train_labels.head()

In [None]:
sub = pd.read_csv(base_dir + 'sample_submission.csv')
print(sub.shape)
sub.head()

In [None]:
print(f"Num of train files: {len(glob(base_dir + 'train/*/*/*/*.npy'))}")
print(f"Num of test files: {len(glob(base_dir + 'test/*/*/*/*.npy'))}")

In [None]:
ax = sns.countplot(train_labels['target'])
for p in ax.patches:
    ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))

- The target variables are well balanced!

# Utils

In [None]:
def image_id_to_path(img_id, flag = None):
    path = f"../input/g2net-gravitational-wave-detection/{flag}/{img_id[0]}/{img_id[1]}/{img_id[2]}/{img_id}.npy"
    return path

def visualize_waves(img_id, target, flag = None, plot = None):
    plot_colors = ["blue", "teal", "orange"]
    signal_path = image_id_to_path(img_id, flag)
    signal = np.load(signal_path)
    fig1, ax1 = plt.subplots(3, 1, figsize = (16, 8), sharex = True)
    ax1 = ax1.ravel()
    for i in range(len(ax1)):
        ax1[i].plot(signal[i], color = plot_colors[i])
        ax1[i].set_xlabel('Time (1/2048 sec)')
    plt.suptitle(f"Signal - Image_id: {img_id}, Target: {target}")
    plt.show()
    
    fig2, ax2 = plt.subplots(1, 3, figsize = (16, 6))
    ax2 = ax2.ravel()
    for j, ax in enumerate(ax2):
        sns.kdeplot(signal[j], shade = True, color = plot_colors[j], ax = ax)
    plt.suptitle(f"Signal Distribution - Image_id: {img_id}, Target: {target}")
    plt.show()
    
    #Freq Spectrum of GW
    fs = 2048
    nfft = fs // 8
    novl = nfft * 15 // 16
    window = np.blackman(nfft)
    spec_cmap = ["inferno", "seismic", "icefire"]
    plt.figure(figsize = (16, 8))
    for i in range(3):
        plt.subplot(1, 3, i + 1)
        plt.specgram(signal[i], NFFT = nfft, Fs = fs, window = window, 
                    noverlap = novl, cmap = spec_cmap[i], xextent = [0, 4000], vmin = -550, 
                     vmax = -440)
        plt.grid(False)
    plt.suptitle(f"Signal Spectrogram- Image_id: {img_id}, Target: {target}")
    plt.show()

Check single npy file

In [None]:
sample_path = image_id_to_path(train_labels['id'][0], 'train')
sample = np.load(sample_path)
print(sample.shape)
plt.figure(figsize = (16, 8))
plt.title('Sample Signal')
plt.plot(sample)
plt.show()

# Visualize few samples

In [None]:
for idx in np.random.choice(train_labels.index, 4):
    img_id = train_labels['id'].iloc[idx]
    target = train_labels['target'].iloc[idx]
    visualize_waves(img_id, target, 'train')

# WIP