# Prepare Environment

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pandas as pd
import math
import cv2
import librosa 
import librosa.display
import IPython.display as ipd 
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import *
from tensorflow.keras import backend as K

In [None]:
def merge_history(hlist):
    history = {}
    for k in hlist[0].history.keys():
        history[k] = sum([h.history[k] for h in hlist], [])
    return history

In [None]:
def vis_training(h, start=1):
    epoch_range = range(start, len(h['loss'])+1)
    s = slice(start-1, None)

    plt.figure(figsize=[14,4])

    n = int(len(h.keys()) / 2)

    for i in range(n):
        k = list(h.keys())[i]
        plt.subplot(1,n,i+1)
        plt.plot(epoch_range, h[k][s], label='Training')
        plt.plot(epoch_range, h['val_' + k][s], label='Validation')
        plt.xlabel('Epoch'); plt.ylabel(k); plt.title(k)
        plt.grid()
        plt.legend()

    plt.tight_layout()
    plt.show()

# Load Data

In [None]:
train = pd.read_csv('../input/g2net-gravitational-wave-detection/training_labels.csv')
print(train.shape, '\n')
train.head()

# Data Generators

In [None]:
SPEC_PATH = '../input/g2net-spectrograms-41x9/spectrograms'

class DataGenerator(keras.utils.Sequence):
    
    def __init__(self, df, batch_size=32, img_size=(41, 9), shuffle=True, is_train=True):
        self.df = df
        self.n = len(df)
        self.batch_size = batch_size
        self.img_size = img_size
        self.shuffle = shuffle
        self.is_train = is_train
        self.on_epoch_end()
        
    def on_epoch_end(self):
        self.indices = np.arange(self.n)
        if self.shuffle == True:
            np.random.shuffle(self.indices)   
    
    def __len__(self):
        # Determine batches per epoch
        return math.ceil( self.n / self.batch_size )
    
    def __getitem__(self, batch_index):
        # Get and return a single batch of data
        start = batch_index * self.batch_size
        end = (batch_index + 1) * self.batch_size
        
        indices = self.indices[start:end]
        
        return self.__data_generation(indices)
    
    def __data_generation(self, batch_indices):
        batch_size = len(batch_indices)
        
        X = np.zeros(shape=(batch_size, self.img_size[0], self.img_size[1], 3))
        y = np.zeros(batch_size)
        
        for i, idx in enumerate(batch_indices):
            ID = self.df.id.values[idx]
            y[i] = self.df.target.values[idx]
            
            SET = 'train_spec' if self.is_train else 'test_spec'
            path = f'{SPEC_PATH}/{SET}/{ID}.npy'
            data_array = np.load(path)
            
            X[i,:,:,:] = data_array
            
        return X, y
    

GENERATOR_TEST = True

if GENERATOR_TEST:
    temp_gen = DataGenerator(train, batch_size=8, shuffle=False)
    X,y = temp_gen.__getitem__(0)

    print(X.shape)
    print(y)
    
    # Display one spectrogram
    librosa.display.specshow(X[0, :, :, 0])


In [None]:
train_df, valid_df = train_test_split(train, test_size=0.2, random_state=1)

print(train_df.shape)
print(valid_df.shape)

In [None]:
train_loader = DataGenerator(train_df, batch_size=2048, shuffle=True)
valid_loader = DataGenerator(valid_df, batch_size=2048, shuffle=False)

# Build CNN

In [None]:
np.random.seed(1)

cnn = Sequential()

cnn.add(Conv2D(64, (3,3), activation = 'relu', padding = 'same', input_shape=(41,9,3)))
cnn.add(Conv2D(64, (3,3), activation = 'relu', padding = 'same'))
cnn.add(MaxPooling2D(2,2))
cnn.add(Dropout(0.20))
cnn.add(BatchNormalization())

cnn.add(Conv2D(128, (3,3), activation = 'relu', padding = 'same'))
cnn.add(Conv2D(128, (3,3), activation = 'relu', padding = 'same'))
cnn.add(MaxPooling2D(2,2))
cnn.add(Dropout(0.20))
cnn.add(BatchNormalization())

cnn.add(Conv2D(256, (3,3), activation = 'relu', padding = 'same'))
cnn.add(Conv2D(256, (3,3), activation = 'relu', padding = 'same'))
cnn.add(MaxPooling2D(2,2))
cnn.add(Dropout(0.20))
cnn.add(BatchNormalization())

cnn.add(Flatten())

cnn.add(Dense(128, activation='relu'))
cnn.add(Dropout(0.20))
cnn.add(BatchNormalization())

cnn.add(Dense(64, activation='relu'))
cnn.add(Dropout(0.20))
cnn.add(BatchNormalization())

cnn.add(Dense(1, activation='sigmoid'))

cnn.summary()

# Train Network

In [None]:
%%time

opt = tf.keras.optimizers.Adam(0.01)
cnn.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy', tf.keras.metrics.AUC()])

h1 = cnn.fit(train_loader, epochs=10, validation_data=valid_loader, verbose=1)

In [None]:
history = merge_history([h1])
vis_training(history)

In [None]:
tf.keras.backend.set_value(cnn.optimizer.learning_rate, 0.001)

h2 = cnn.fit(train_loader, epochs=10, validation_data=valid_loader, verbose=1)

In [None]:
history = merge_history([h1,h2])
vis_training(history)

In [None]:
cnn.save('my_model.h5')

# Test Predictions

In [None]:
submission = pd.read_csv('../input/g2net-spectrograms-41x9/spectrograms/sample_submission.csv')
submission.head()

In [None]:
test_loader = DataGenerator(submission, is_train=False, batch_size=2048, shuffle=False)

pred = cnn.predict(test_loader)

submission['target'] = pred

submission.head()

In [None]:
submission.to_csv('submission.csv', header=True, index=False)