In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
from matplotlib import pyplot as plt

import os

import math

import keras

import tensorflow as tf
import tensorflow.keras.layers as Layer
from tensorflow.keras.utils import Sequence


from tensorflow.keras.applications.efficientnet import *
from tensorflow.keras.applications.efficientnet import preprocess_input


In [2]:
# gpus = tf.config.experimental.list_physical_devices('GPU')
# tf.config.experimental.set_virtual_device_configuration(
#         gpus[0],
#         [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5000)])
# logical_gpus = tf.config.experimental.list_logical_devices('GPU')
# print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
print("Tensorflow version: ", tf.__version__)
print("Tensorflow GPU detection: ", tf.config.list_physical_devices('GPU'))

Tensorflow version:  2.5.0
Tensorflow GPU detection:  [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [3]:
DATA_PATH = '../data/G2Net/g2net-gravitational-wave-detection/'
TB_PATH = '../data/G2Net/tb_logs/'

In [4]:
train_labels = pd.read_csv(DATA_PATH + 'training_labels.csv')
sample_submission = pd.read_csv(DATA_PATH + 'sample_submission.csv')
train_labels.head()

Unnamed: 0,id,target
0,00000e74ad,1
1,00001f4945,0
2,0000661522,0
3,00007a006a,0
4,0000a38978,1


In [5]:
def apply_raw_path(row, is_train=True): 
    file_name = row[0]
    if is_train:
        return DATA_PATH + "train/{}/{}/{}/{}.npy".format(
            file_name[0], file_name[1], file_name[2], file_name)
    else:
        return DATA_PATH + "test/{}/{}/{}/{}.npy".format(
            file_name[0], file_name[1], file_name[2], file_name)

../data/G2Net/g2net-gravitational-wave-detection/sample_submission.csv
../data/G2Net/g2net-gravitational-wave-detection/training_labels.csv
../data/G2Net/g2net-gravitational-wave-detection/test\0\0\0\00005bced6.npy
../data/G2Net/g2net-gravitational-wave-detection/test\0\0\0\0000806717.npy
../data/G2Net/g2net-gravitational-wave-detection/test\0\0\0\0000ef4fe1.npy
../data/G2Net/g2net-gravitational-wave-detection/test\0\0\0\00020de251.npy
../data/G2Net/g2net-gravitational-wave-detection/test\0\0\1\001045a53d.npy
../data/G2Net/g2net-gravitational-wave-detection/test\0\0\1\0010564a1e.npy
../data/G2Net/g2net-gravitational-wave-detection/test\0\0\1\0010665bcf.npy
../data/G2Net/g2net-gravitational-wave-detection/test\0\0\1\0010a8cba0.npy
../data/G2Net/g2net-gravitational-wave-detection/test\0\0\2\0020180dd6.npy
../data/G2Net/g2net-gravitational-wave-detection/test\0\0\2\00209dc016.npy
../data/G2Net/g2net-gravitational-wave-detection/test\0\0\2\0020d862e7.npy
../data/G2Net/g2net-gravitational-w

In [None]:
train['raw_path'] = train.apply(apply_raw_path, args=(True,), axis=1)

In [None]:
dataset = tf.data.FixedLengthRecordDataset(train['raw_path'], 98304, header_bytes=128, num_parallel_reads=4)
dataset = dataset.map(lambda s: tf.reshape(tf.io.decode_raw(s, tf.float64), (3,4096)))
dataset = dataset.map(lambda s: tf.cast(s, tf.float32))
dataset = dataset.map(lambda s: tfio.audio.spectrogram(s, nfft=256, window=128, stride=16))
dataset = dataset.map(lambda s: (tf.math.log(tf.clip_by_value(s,1e-30, 1e-17)) + 60)*12)
# dataset = dataset.map(lambda s: tfio.audio.melscale(s, rate=2048, mels=128, fmin=20, fmax=1024))
dataset_rgb = dataset.map(lambda s: tf.transpose(s)).map(preprocess_input)

label_dataset = tf.data.Dataset.from_tensor_slices(train['target'])
full_dataset = tf.data.Dataset.zip((dataset_rgb, label_dataset))

In [14]:
SPLIT_RATIO = .9
ds_train = full_dataset.take(90000)
ds_valid = full_dataset.skip(90000).take(10000)


In [None]:
BATCH_SIZE = 32
train_data = ds_train.shuffle(2048).batch(BATCH_SIZE).prefetch(buffer_size=16)
valid_data = ds_valid.batch(BATCH_SIZE).prefetch(buffer_size=16)

In [None]:
base_model = EfficientNetB0(input_shape=[129,256,3], weights='imagenet', include_top=False, pooling='avg', drop_connect_rate=0.3)
base_model.trainable=False

In [15]:
tb_dir = os.path.join(TB_PATH, "V0")
tb_callback = tf.keras.callbacks.TensorBoard(
    tb_dir,
    update_freq=64,
    )
callbacks = [
#     tf.keras.callbacks.LearningRateScheduler(ehc_scheduler),
    tb_callback,
#     customCallback,
#     es_callback
]

metrics = [tf.keras.metrics.AUC(name='AUC'),
           tf.keras.metrics.SparseCategoricalAccuracy(name='Accuracy'),
          ]

In [17]:
dropout=.2
model = tf.keras.Sequential([
        Layer.InputLayer(input_shape=(129,256,3)),
        base_model,
        Layer.Dense(32),
        Layer.ReLU(),
        Layer.LeakyReLU(alpha=.2),
        Layer.Dropout(dropout),
        Layer.Dense(1, activation='sigmoid')
    ])

model.summary()
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss='binary_crossentropy', metrics=metrics)*



Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 27, 128, 6)        60        
_________________________________________________________________
re_lu (ReLU)                 (None, 27, 128, 6)        0         
_________________________________________________________________
reshape (Reshape)            (None, 54, 128, 3)        0         
_________________________________________________________________
efficientnetb0 (Functional)  (None, 1280)              4049571   
_________________________________________________________________
dense (Dense)                (None, 32)                40992     
_________________________________________________________________
re_lu_1 (ReLU)               (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 3

In [None]:
tb_dir = os.path.join(TB_PATH, "EffNet")

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    1e-3,
    decay_steps=1000,
    decay_rate=0.9,
    staircase=True)

def ehc_scheduler(epoch, lr):
    if epoch == 1:
        return lr/2
    if epoch > 1:
        return lr/4
    return lr

tb_callback = tf.keras.callbacks.TensorBoard(
    tb_dir,
    update_freq=64,
    )
callbacks = [
    tf.keras.callbacks.LearningRateScheduler(ehc_scheduler, verbose=1),
    tb_callback,
#     customCallback,
#     es_callback
]

model.summary()
model.compile(optimizer=tf.keras.optimizers.Adam(
#     learning_rate=1e-3,
    learning_rate=1e-3,
#     beta_1=0.9,
#     beta_2=0.999,
#     epsilon=1e-07
    ),
    loss='binary_crossentropy', metrics=metrics)




Epoch 1/2
 120/8313 [..............................] - ETA: 1:58:06 - loss: 0.6999 - AUC: 0.5032 - Accuracy: 0.4973

In [None]:
model.fit(
    train_data,
    epochs=4,
    validation_data=valid_data,
    callbacks = callbacks,
    )

In [19]:
# train_dataset = Dataset(x_train,y_train)
# valid_dataset = Dataset(x_valid,y_valid)
# model.fit(train_dataset,epochs=1,validation_data=valid_dataset)

model.fit(
    Dataset(x_train,y_train),
    epochs=2,
    validation_data=Dataset(x_valid,y_valid),
    callbacks = callbacks,
    )



Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x253973d6400>

In [None]:
train_idx =  train_labels['id'][200000:400000].values
y = train_labels['target'][200000:400000].values
# train_idx =  train_labels['id'].values
# y = train_labels['target'].values
x_train,x_valid,y_train,y_valid = train_test_split(train_idx,y,test_size=0.05,random_state=42)


In [None]:
model.fit(
    Dataset(x_train,y_train),
    epochs=2,
    validation_data=Dataset(x_valid,y_valid),
    callbacks = callbacks,
    )

In [None]:
train_idx =  train_labels['id'][400000:].values
y = train_labels['target'][400000:].values
# train_idx =  train_labels['id'].values
# y = train_labels['target'].values
x_train,x_valid,y_train,y_valid = train_test_split(train_idx,y,test_size=0.05,random_state=42)

In [None]:
model.fit(
    Dataset(x_train,y_train),
    epochs=2,
    validation_data=Dataset(x_valid,y_valid),
    callbacks = callbacks,
    )

In [None]:
def id2path(idx,is_train=True):
    if is_train:
#         path = f'../input/g2net-n-mels-128-train-images/{idx}'+'.npy'
        path = f'{DATA_PATH}/transform/train/{idx}'+'.npy'
    else:
#         path = f'../input/g2net-n-mels-128-test-images/{idx}'+'.npy'
        path = f'{DATA_PATH}/transform/test/{idx}'+'.npy'
    return path

In [29]:
train_idx =  train_labels['id'].values
y = train_labels['target'].values
# train_idx =  train_labels['id'].values
# y = train_labels['target'].values
test_idx = sample_submission['id'].values

In [30]:
train_idx =  train_labels['id'].map(lambda x: DATA_PATH + '/transform/train/' + x + '.npy' )

In [31]:
train_idx[:10]

0    ../data/G2Net/g2net-gravitational-wave-detecti...
1    ../data/G2Net/g2net-gravitational-wave-detecti...
2    ../data/G2Net/g2net-gravitational-wave-detecti...
3    ../data/G2Net/g2net-gravitational-wave-detecti...
4    ../data/G2Net/g2net-gravitational-wave-detecti...
5    ../data/G2Net/g2net-gravitational-wave-detecti...
6    ../data/G2Net/g2net-gravitational-wave-detecti...
7    ../data/G2Net/g2net-gravitational-wave-detecti...
8    ../data/G2Net/g2net-gravitational-wave-detecti...
9    ../data/G2Net/g2net-gravitational-wave-detecti...
Name: id, dtype: object

In [41]:
def tf_img_load(idx, label):
    img = tf.io.read_file(idx)
#     img = tf.cast(img, tf.float32)
#     imgs = [img]
#     return imgs, [label]*len(imgs)
#     img = img[:,:,np.newaxis]
    return img, label

In [42]:
for i in np.arange(10):
    train_idx =  train_labels['id'][i*25000:(i+1)*25000].values
    y = train_labels['target'][i*25000:(i+1)*25000].values
    x_train,x_valid,y_train,y_valid = train_test_split(train_idx,y,test_size=0.05,random_state=0)
    train_dataset = Dataset(x_train,y_train)
    valid_dataset = Dataset(x_valid,y_valid)
    model.fit(train_dataset,epochs=1,validation_data=valid_dataset)
# test_dataset = Dataset(test_idx)

In [43]:
train_dataset.cardinality()

<tf.Tensor: shape=(), dtype=int64, numpy=157>

In [44]:
plt.figure(figsize=(12,12))
batch = 0
for images, labels in train_dataset.skip(0).take(9).batch(BATCH_SIZE).cache().prefetch(buffer_size=AUTOTUNE).shuffle(buffer_size=18, seed=85):
    for i in range(BATCH_SIZE):
        ax = plt.subplot(3,3, i+1+batch*BATCH_SIZE)
        plt.imshow(images[i].numpy().astype("uint8"), cmap='gray')
#         cpt = 0
#         for l in labels[0].numpy():
#             if l>0.1:
#                 print(labelEncoder.inverse_transform([int(cpt)]))
        plt.title(encoder.inverse_transform(labels[i].numpy().reshape(-1,1)))
#             cpt = cpt + 1
        plt.axis('off')
    batch = batch + 1

InvalidArgumentError: Unknown image file format. One of JPEG, PNG, GIF, BMP required.
	 [[{{node decode_image/DecodeImage}}]] [Op:IteratorGetNext]

<Figure size 864x864 with 0 Axes>

In [28]:
model.fit(train_dataset,epochs=2,validation_data=valid_dataset)

UnimplementedError: Cast string to float is not supported
	 [[{{node Cast}}]] [Op:MakeIterator]