# Intro
Welcome to the [](https://www.kaggle.com/c/g2net-gravitational-wave-detection/overview) compedition
![](https://storage.googleapis.com/kaggle-competitions/kaggle/23249/logos/header.png)

<span style="color: royalblue;">Please vote the notebook up if it helps you. Thank you. </span>

# Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

from sklearn.model_selection import train_test_split

from keras.utils import Sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv1D, MaxPool1D, BatchNormalization
from keras.optimizers import RMSprop,Adam

import warnings
warnings.filterwarnings("ignore")

# Path

In [None]:
path = '/kaggle/input/g2net-gravitational-wave-detection/'
os.listdir(path)

# Load Data

In [None]:
train_labels = pd.read_csv(path+'training_labels.csv')
samp_subm = pd.read_csv(path+'sample_submission.csv')

# Overview

In [None]:
print('Number train samples:', len(train_labels))
print('Number submission samples:', len(samp_subm))

In [None]:
train_labels.head()

# Functions
We define some helper functions.

In [None]:
def plot_data(data):
    """ Plot 3 Detections of data array"""
    
    fig, axs = plt.subplots(1, 3, figsize=(20, 5))
    axs = axs.ravel()
    for i in range(3):
        x = range(len(data[i]))
        y = data[i]
        axs[i].plot(x, y)
        axs[i].grid()
        axs[i].set_title('Detection '+str((i+1)))

# Focus On Example Sample
We consider the first example of the train data. To get familiar with npy-files we consider [this article](https://towardsdatascience.com/what-is-npy-files-and-why-you-should-use-them-603373c78883).

In [None]:
id_ = train_labels.loc[0, 'id']
id_

The first 3 characters are used for the path:

In [None]:
path_in = '/'.join([path, 'train', id_[0], id_[1], id_[2]])+'/'
file = id_+'.npy'

Each data sample (npy file) contains 3 time series (1 for each detector) and each spans 2 sec and is sampled at 2,048 Hz.

In [None]:
data_array = np.load(path_in+file)
data_array.shape

In [None]:
plot_data(data_array)

# EDA
*Coming Soon*

# Train, Val And Test Data

In [None]:
list_IDs_train, list_IDs_val = train_test_split(list(train_labels.index), test_size=0.33, random_state=2021)
list_IDs_test = list(samp_subm.index)

In [None]:
print('Number train samples:', len(list_IDs_train))
print('Number val samples:', len(list_IDs_val))
print('Number test samples:', len(list_IDs_test))

# Data Generator
We define a data generator to define the data on demand.

In [None]:
batch_size = 64

In [None]:
class DataGenerator(Sequence):
    def __init__(self, path, list_IDs, data, batch_size):
        self.path = path
        self.list_IDs = list_IDs
        self.data = data
        self.batch_size = batch_size
        self.indexes = np.arange(len(self.list_IDs))
        
    def __len__(self):
        len_ = int(len(self.list_IDs)/self.batch_size)
        if len_*self.batch_size < len(self.list_IDs):
            len_ += 1
        return len_
    
    def __getitem__(self, index):
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        X, y = self.__data_generation(list_IDs_temp)
        return X, y
    
    def __data_generation(self, list_IDs_temp):
        X = np.zeros((self.batch_size, 3, 4096))
        y = np.zeros((self.batch_size, 1))
        for i, ID in enumerate(list_IDs_temp):
            id_ = self.data.loc[ID, 'id']
            file = id_+'.npy'
            path_in = '/'.join([self.path, id_[0], id_[1], id_[2]])+'/'
            data_array = np.load(path_in+file)
            data_array = (data_array-data_array.mean())/data_array.std()
            X[i, ] = data_array
            y[i, ] = self.data.loc[ID, 'target']
        return X, y

In [None]:
train_generator = DataGenerator(path+'train/', list_IDs_train, train_labels, batch_size)
val_generator = DataGenerator(path+'train/', list_IDs_val, train_labels, batch_size)
test_generator = DataGenerator(path+'test/', list_IDs_test, samp_subm, batch_size)

# Define Model

In [None]:
epochs = 1
lernrate = 2e-4

In [None]:
model = Sequential()
model.add(Conv1D(64, input_shape=(3, 4096,), kernel_size=3, activation='relu'))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.compile(optimizer = Adam(lr=lernrate),
              loss='binary_crossentropy',
              metrics=['acc'])

In [None]:
model.summary()

In [None]:
history = model.fit_generator(generator=train_generator, validation_data=val_generator, epochs = epochs, workers=4)

Predict test data

In [None]:
predict = model.predict_generator(test_generator, verbose=1)

In [None]:
samp_subm['target'] = predict[:len(samp_subm)]

# Export

In [None]:
samp_subm.to_csv('submission.csv', index=False)