### Description

This notebook requires high memory, so it will not work on a normal kaggle notebook.<br>
To make it work on a normal kaggle notebook, please change the INPUT_DIR and train on a 32x32 image.<br>
This notebook is based on the following<br>
- https://www.kaggle.com/miklgr500/cqt-g2net-efficientnetb7-tpu-training-w-b?scriptVersionId=67969914<br>

The data for VQT is created at<br>
- https://www.kaggle.com/snkmr0221/g2net-vqt-features-32x32-or-128x128-img-tfrecord<br>
- https://www.kaggle.com/snkmr0221/g2net-vqt-features-32x32-img<br>

### 説明

このノートブックは、ハイメモリを必要としますので、通常のkaggle notebookでは動作しません。<br>
通常のkaggle notebook上で動作させるには、INPUT_DIRを変更し、32x32の画像で学習をしてください。<br>
このノートブックは以下を参考にしています。<br>
- https://www.kaggle.com/miklgr500/cqt-g2net-efficientnetb7-tpu-training-w-b?scriptVersionId=67969914<br>

VQTのデータは、以下で作成しています。<br>
- https://www.kaggle.com/snkmr0221/g2net-vqt-features-32x32-or-128x128-img-tfrecord<br>
- https://www.kaggle.com/snkmr0221/g2net-vqt-features-32x32-img<br>

### Import modules

In [None]:
# standard modules
import os
import random
import pathlib
import sys

# third-party modules
import tensorflow as tf
!pip3 install tensorflow_addons
import tensorflow_addons as tfa
!pip3 install efficientnet
import efficientnet.tfkeras as efn
from tensorflow.python.client import device_lib
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from IPython.display import display
import seaborn as sns
import pandas as pd
!pip install wandb
import wandb
from wandb.keras import WandbCallback

print(f"TensorFlow version: {tf.__version__}")
print(f"Eager execution: {tf.executing_eagerly()}")
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
cuda_version = !nvcc -V
print(f"{[*cuda_version]}")
devices_df = pd.DataFrame(
    [ [d.name, d.device_type, d.physical_device_desc] for d in device_lib.list_local_devices()]
, columns=["name", "device_type", "phisical_device_desc"])
# devices_df = pd.DataFrame([d.split("\n") for d in device_lib.list_local_devices()])
display(devices_df)

In [None]:
IN_COLAB = 'google.colab' in sys.modules
if IN_COLAB:
    gpu_info = !nvidia-smi
    gpu_info = '\n'.join(gpu_info)
    if gpu_info.find('failed') >= 0:
        print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
        print('and then re-execute this cell.')
    else:
        print(gpu_info)

    from psutil import virtual_memory
    ram_gb = virtual_memory().total / 1e9
    print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

    if ram_gb < 20:
        print('To enable a high-RAM runtime, select the Runtime > "Change runtime type"')
        print('menu, and then select High-RAM in the Runtime shape dropdown. Then, ')
        print('re-execute this cell.')
    else:
        print('You are using a high-RAM runtime!')
        
    from google.colab import drive
    drive.mount('/content/drive')

IN_KAGGLE = "kaggle_secrets" in sys.modules

### Config

In [None]:
def seed_everything(seed=777):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    tf.random.set_seed(seed)

# From https://www.kaggle.com/xhlulu/ranzcr-efficientnet-tpu-training
def auto_select_accelerator():
    TPU_DETECTED = False
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Running on TPU:", tpu.master())
        TPU_DETECTED =True
    except ValueError:
        strategy = tf.distribute.get_strategy()
    print(f"Running on {strategy.num_replicas_in_sync} replicas")

    return strategy, TPU_DETECTED

seed_everything()

strategy, TPU_DETECTED = auto_select_accelerator()
AUTO = tf.data.experimental.AUTOTUNE
REPLICAS = strategy.num_replicas_in_sync

print(f"strategy: {strategy}")
print(f"TPU_DETECTED: {TPU_DETECTED}")
print(f"AUTO: {AUTO}")
print(f"REPLICAS: {REPLICAS}")

# Model Eval Params
DISPLAY_PLOT = True

CFG = {
    'version': 0,
    'feature_name': 'vqt',
    'fold': 0,
    'EFFV': 7,
    'size': 128,
    'dropout': 0.2,
    'label_smoothing': 0.,
    'batch_size': 32,
    'steps_multiplier': 0.25,
    'epochs': 20,
    'aug': False,
    'MIX_UP_P': 0.2,
    'S_SHIFT': 0.,
    'T_SHIFT': 0.,
    'R_ANGLE': 0. / 180 * np.pi,
    'opt': 'AdamW',
    'lr': 1e-3,
    'lr_start': 1e-4,
    'lr_max': 0.000015 * REPLICAS * 64,
    'lr_min': 1e-5,
    'lr_ramp_ep': 3,
    'lr_sus_ep': 0,
    'lr_decay': 0.7,
    'fold_num': 4,
}

### File I/O

In [None]:
if IN_COLAB:
    ROOT_PATH = "/content/drive/MyDrive/kaggle/kaggle-g2net"
    INPUT_DIR = "input/G2Net-VQT-features-128x128-img-tfrecord"
    OUTPUT_DIR = "exp/exp-008-128x128-efn7-phase"
    INPUT_PATH = pathlib.Path(ROOT_PATH).joinpath(INPUT_DIR)
    OUTPUT_PATH = pathlib.Path(ROOT_PATH).joinpath(OUTPUT_DIR)
    INPUT_SUB_FILE = pathlib.Path(ROOT_PATH).joinpath("input", "sample_submission.csv")
    OUTPUT_SUB_FILE = pathlib.Path(OUTPUT_PATH).joinpath("submission.csv")
    MODEL_PATH = OUTPUT_PATH
elif IN_KAGGLE:
    ROOT_PATH = "/kaggle"
    INPUT_DIR = "input/g2net-vqt-features-32x32-img/G2Net-VQT-features-128x128-img-tfrecord/G2Net-VQT-features-128x128-img-tfrecord"
    # INPUT_DIR = "input/g2net-vqt-features-32x32-img/G2Net-VQT-features-32x32-img-tfrecord/G2Net-VQT-features-32x32-img-tfrecord"
    OUTPUT_DIR = "output"
    INPUT_PATH = pathlib.Path(ROOT_PATH).joinpath(INPUT_DIR)
    OUTPUT_PATH = pathlib.Path(ROOT_PATH).joinpath(OUTPUT_DIR)
    INPUT_SUB_FILE = pathlib.Path(ROOT_PATH).joinpath("input", "g2net-gravitational-wave-detection", "sample_submission.csv")
    OUTPUT_SUB_FILE = pathlib.Path(".").joinpath("submission.csv")
    MODEL_PATH = pathlib.Path(ROOT_PATH).joinpath("input", "g2net-vqt-128x128-tfrecord-efn7-trained-model")
else: # Local
    ROOT_PATH = "."
    INPUT_DIR = "input/G2Net-VQT-features-128x128-img-tfrecord"
    OUTPUT_DIR = "exp/exp-008-128x128-efn7-phase"
    INPUT_PATH = pathlib.Path(ROOT_PATH).joinpath(INPUT_DIR)
    OUTPUT_PATH = pathlib.Path(ROOT_PATH).joinpath(OUTPUT_DIR)
    INPUT_SUB_FILE = pathlib.Path(ROOT_PATH).joinpath("input", "sample_submission.csv")
    OUTPUT_SUB_FILE = pathlib.Path(OUTPUT_PATH).joinpath("submission.csv")
    MODEL_PATH = OUTPUT_PATH

OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
train_files = sorted([str(p) for p in INPUT_PATH.glob("**/train_*.tfrec")])
test_files = sorted([str(p) for p in INPUT_PATH.glob("**/test_*.tfrec")])

### TFRecord Parser

In [None]:
def _parse_feature_function(example_proto, labeled=True, return_image_id=True):
    tfrec_format = {
        'vqt0'    : tf.io.FixedLenFeature([], tf.string),
        'vqt1'    : tf.io.FixedLenFeature([], tf.string),
        'vqt2'    : tf.io.FixedLenFeature([], tf.string),
        'vqt0_ph' : tf.io.FixedLenFeature([], tf.string),
        'vqt1_ph' : tf.io.FixedLenFeature([], tf.string),
        'vqt2_ph' : tf.io.FixedLenFeature([], tf.string),
        'image_id': tf.io.FixedLenFeature([], tf.string),
    }
    if labeled:
        tfrec_format['target'] = tf.io.FixedLenFeature([], tf.int64)

    # 入力の tf.Example のプロトコルバッファを上記のディクショナリを使って解釈
    example = tf.io.parse_single_example(example_proto, tfrec_format)

    feature = tf.concat([
        tf.image.decode_png(example['vqt0']),
        tf.image.decode_png(example['vqt1']),
        tf.image.decode_png(example['vqt2']),
        #tf.image.decode_png(example['vqt0_ph']),
        #tf.image.decode_png(example['vqt1_ph']),
        #tf.image.decode_png(example['vqt2_ph']),
    ], axis=2)

    if labeled: # train
        return feature, tf.reshape(tf.cast(example['target'], tf.float32), [1])
    else: # test
        return feature, example['image_id'] if return_image_id else 0

### Dataset

In [None]:
def get_dataset(ds, shuffle = False, repeat = False,
                labeled=True, return_image_ids=True, batch_size=16, dim=32, aug=False):

    ds = ds.cache()

    if repeat:
        ds = ds.repeat(CFG['epochs'])

    if shuffle:
        ds = ds.shuffle(1024*2)
        opt = tf.data.Options()
        opt.experimental_deterministic = False
        ds = ds.with_options(opt)

    if labeled:
        ds = ds.map(_parse_feature_function, num_parallel_calls=AUTO)
    else:
        ds = ds.map(lambda example: _parse_feature_function(example, False, return_image_ids),
                    num_parallel_calls=AUTO)

    ds = ds.batch(batch_size * REPLICAS)
    if aug:
        ds = ds.map(lambda x, y: aug_f(x, y, batch_size * REPLICAS), num_parallel_calls=AUTO)
    ds = ds.prefetch(AUTO)

    return ds

class CrossValidation():
    def __init__(self, files, fold_num):
        self.ds = tf.data.TFRecordDataset(files, num_parallel_reads=AUTO)
        self.ds = self.ds.shuffle(1024*2)
        self.FOLD_NUM = fold_num
        sample_num = len([*self.ds.take(-1)])
        print(f"sample_num: {sample_num}")

    def get_dataset(self, fold_index, shuffle = False, repeat = False, 
                labeled=True, return_image_ids=True, batch_size=16, dim=32, aug=False):

        # divide train and validation
        for idx in range(self.FOLD_NUM):
            if idx != fold_index:
                if "train_ds" in locals():
                    train_ds = train_ds.concatenate(self.ds.shard(self.FOLD_NUM, idx))
                else:
                    train_ds = self.ds.shard(self.FOLD_NUM, idx)
            else:
                valid_ds = self.ds.shard(self.FOLD_NUM, fold_index)

        return train_ds, valid_ds

### Model

In [None]:
class AngularGrad(tf.keras.optimizers.Optimizer):
    def __init__(
          self,
          method_angle: str = "cos",
          learning_rate=1e-3,
          beta_1=0.9,
          beta_2=0.999,
          eps=1e-7,
          name: str = "AngularGrad",
          **kwargs,
      ):
        super().__init__(name, **kwargs)

        self.method_angle = method_angle
        self._set_hyper("learning_rate", kwargs.get("lr", learning_rate))
        self._set_hyper("beta_1", beta_1)
        self._set_hyper("beta_2", beta_2)
        self._set_hyper("eps", eps)
        self.eps = eps or tf.keras.backend.epsilon()

    def _create_slots(self, var_list):
        for var in var_list:
            self.add_slot(var, "exp_avg")
            self.add_slot(var, "exp_avg_sq")
            self.add_slot(var, "previous_grad")
            self.add_slot(var, "min", initializer=tf.keras.initializers.Constant(value=math.pi / 2))
            self.add_slot(var, "final_angle_function_theta")

    def _resource_apply_dense(self, grad, var):
        var_dtype = var.dtype.base_dtype

        lr = self._get_hyper("learning_rate", var_dtype)
        beta_1 = self._get_hyper("beta_1", var_dtype)
        beta_2 = self._get_hyper("beta_2", var_dtype)
        eps = self._get_hyper("eps", var_dtype)

        exp_avg = self.get_slot(var, "exp_avg")
        exp_avg_sq = self.get_slot(var, "exp_avg_sq")
        previous_grad = self.get_slot(var, "previous_grad")
        min = self.get_slot(var, "min")
        final_angle_function_theta = self.get_slot(var, "final_angle_function_theta")

        step = tf.cast(self.iterations + 1, var_dtype)
        beta_1_power = tf.pow(beta_1, step)
        beta_2_power = tf.pow(beta_2, step)

        new_exp_avg = exp_avg.assign(
            beta_1 * exp_avg + (1.0 - beta_1) * grad,
            use_locking=self._use_locking
        )
        exp_avg_corrected = new_exp_avg / (1.0 - beta_1_power)

        new_exp_avg_sq = exp_avg_sq.assign(
            beta_2 * exp_avg_sq + (1.0 - beta_2) * tf.square(grad),
            use_locking=self._use_locking,
        )
        exp_avg_sq_corrected = new_exp_avg_sq / (1.0 - beta_2_power)

        tan_theta = tf.abs((previous_grad - grad) / (1 + previous_grad * grad))
        cos_theta = 1 / tf.sqrt(1 + tf.square(tan_theta))

        angle = tf.atan(tan_theta) * (180 / math.pi)
        ans = tf.greater(angle, min)
        mean_ans = tf.reduce_mean(tf.cast(ans, tf.float32))

        def true_fn():
            new_min = min.assign(angle, use_locking=self._use_locking)
            new_final_angle_function_theta = final_angle_function_theta.assign(
            tf.identity(tan_theta if self.method_angle == "tan" else cos_theta),
              use_locking=self._use_locking
              )
            return new_min, new_final_angle_function_theta

        def false_fn():
            return min, final_angle_function_theta

        new_min, new_final_angle_function_theta = tf.cond(tf.less(mean_ans, 0.5), true_fn, false_fn)
        angular_coeff = tf.tanh(tf.abs(final_angle_function_theta)) * 0.5 + 0.5

        var_update = var.assign_sub(
            lr * exp_avg_corrected * angular_coeff / (tf.sqrt(exp_avg_sq_corrected) + eps),
            use_locking=self._use_locking
        )

        new_previous_grad = previous_grad.assign(grad, use_locking=self._use_locking)

        updates = [var_update, new_exp_avg, new_exp_avg_sq, new_min, new_previous_grad, new_final_angle_function_theta]
        return tf.group(*updates)

    def _resource_apply_sparse(self, grad, var, indices):
        raise NotImplementedError

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "learning_rate": self._serialize_hyperparameter("learning_rate"),
                "beta_1": self._serialize_hyperparameter("beta_1"),
                "beta_2": self._serialize_hyperparameter("beta_2"),
                "eps": self._serialize_hyperparameter("eps")
            }
        )
        return config

In [None]:
EFNS = [efn.EfficientNetB0, efn.EfficientNetB1, efn.EfficientNetB2, efn.EfficientNetB3, 
        efn.EfficientNetB4, efn.EfficientNetB5, efn.EfficientNetB6, efn.EfficientNetB7]

def build_model(config, count=820):
    inp = tf.keras.layers.Input(shape=(config['size'], config['size'],3))
    base = EFNS[config['EFFV']](
        input_shape=(config['size'],config['size'],3),
        weights='imagenet',
        include_top=False
    )
    x = base(inp)
    x = tf.keras.layers.GlobalAvgPool2D()(x)
    x = tf.keras.layers.Dropout(config['dropout'])(x)
    x = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    model = tf.keras.Model(inputs=inp, outputs=x)
    
    if config['opt'] == 'AngularGrad':
        opt = AngularGrad(config['lr'])
    elif config['opt'] == 'RectifiedAdam':
        opt = tfa.optimizers.RectifiedAdam(learning_rate=config['lr'], weight_decay=0.001)
    else:
        lr_decayed_fn = tf.keras.experimental.CosineDecay(
                              config['lr'],
                              count,
        )
        opt = tfa.optimizers.AdamW(lr_decayed_fn, learning_rate=config['lr'])

    loss = tf.keras.losses.BinaryCrossentropy() 
    model.compile(optimizer=opt, loss=loss, metrics=['AUC'])

    return model

### Train

In [None]:
def vis_lr_callback(config=CFG):
    lr_start   = config['lr_start']
    lr_max     = config['lr_max']
    lr_min     = config['lr_min']
    lr_ramp_ep = config['lr_ramp_ep']
    lr_sus_ep  = config['lr_sus_ep']
    lr_decay   = config['lr_decay']

    def lrfn(epoch):
        if epoch < lr_ramp_ep:
            lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start

        elif epoch < lr_ramp_ep + lr_sus_ep:
            lr = lr_max

        else:
            lr = (lr_max - lr_min) * lr_decay**(epoch - lr_ramp_ep - lr_sus_ep) + lr_min

        return lr
    plt.figure(figsize=(10, 7))
    plt.plot([lrfn(i) for i in range(config['epochs'])])
    plt.show()

In [None]:
def get_lr_callback(config=CFG):
    lr_start   = config['lr_start']
    lr_max     = config['lr_max']
    lr_min     = config['lr_min']
    lr_ramp_ep = config['lr_ramp_ep']
    lr_sus_ep  = config['lr_sus_ep']
    lr_decay   = config['lr_decay']

    def lrfn(epoch):
        if epoch < lr_ramp_ep:
            lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start

        elif epoch < lr_ramp_ep + lr_sus_ep:
            lr = lr_max

        else:
            lr = (lr_max - lr_min) * lr_decay**(epoch - lr_ramp_ep - lr_sus_ep) + lr_min

        return lr

    lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=False)
    return lr_callback

In [None]:
vis_lr_callback()

In [None]:

oof_pred = []; oof_tar = []; oof_val = []; oof_f1 = []; oof_ids = []; oof_folds = [] 

cv = CrossValidation(train_files, fold_num = CFG['fold_num'])

for fold in range(CFG['fold_num']):
    
    CFG['fold'] = fold
    run = wandb.init(project='g2net',
                     config=CFG,
                     mode='offline'
                    )
    config = wandb.config
    
    train_ds, valid_ds = cv.get_dataset(fold)

    print('#'*25); print('#### FOLD', fold + 1)
    train_num = len([*train_ds.take(-1)])
    valid_num = len([*valid_ds.take(-1)])
    print(f'#### Training: {train_num} | Validation: {valid_num}')
    
    # memory clear
    tf.keras.backend.clear_session()
    
    # build
    with strategy.scope():
        model = build_model(
            config,
            count=int(train_num)/config['batch_size']//REPLICAS*config['steps_multiplier']
        )
    print('#'*25)   

    # save best model each of fold
    sv = tf.keras.callbacks.ModelCheckpoint(
        OUTPUT_PATH.joinpath(f'model_fold{fold}.h5'), monitor='val_auc', verbose=0, save_best_only=True,
        save_weights_only=True, mode='max', save_freq='epoch')
   
    print('Training...')
    train_ds = get_dataset(train_ds, shuffle=True,  repeat=True, dim=config['size'], batch_size=config['batch_size'], aug=config['aug'])
    valid_ds = get_dataset(valid_ds, shuffle=False, repeat=False, dim=config['size'], batch_size=config['batch_size'])
    print(f"### Training Batch Num: {train_num//config['batch_size']} | Validation Batch Num: {valid_num//config['batch_size']}")
    history = model.fit(
        train_ds,
        epochs = config['epochs'], 
        callbacks = [sv, get_lr_callback(), WandbCallback()], 
        steps_per_epoch = train_num//config['batch_size']//REPLICAS,
        validation_data = valid_ds,
        validation_steps= valid_num//config['batch_size']//REPLICAS,
        verbose = 1
    )
    
    # Loading best model for inference
    print('Loading best model...')
    model.load_weights(OUTPUT_PATH.joinpath(f'model_fold{fold}.h5')) 

    _, valid_ds = cv.get_dataset(fold)
    valid_ds = get_dataset(valid_ds, labeled=False, return_image_ids=False, shuffle=False, repeat=False, dim=config['size'], batch_size=config['batch_size'])
    validation_steps = valid_num/config['batch_size']/REPLICAS

    pred = model.predict(valid_ds, steps=validation_steps, verbose=0)[:valid_num,] 
    oof_pred.append(
        np.mean(
            pred.reshape((valid_num, 1), order='F'), axis=1
        )
    )
    
    # GET OOF TARGETS AND idS
    _, valid_ds = cv.get_dataset(fold)
    valid_ds = get_dataset(valid_ds, labeled=True, return_image_ids=True, shuffle=False, repeat=False, dim=config['size'])
    oof_tar.append( np.array([target.numpy() for img, target in iter(valid_ds.unbatch())]) )
    
    hist_df = pd.DataFrame(history.history)
    hist_df.to_csv(OUTPUT_PATH.joinpath(f'history_fold{fold}.csv'))
    
    # plot training result
    if DISPLAY_PLOT:
        
        # plot pred distribution
        plt.figure(figsize=(8, 6))
        sns.histplot(oof_pred[-1])
        plt.show()
        plt.savefig(OUTPUT_PATH.joinpath(f'pred_histgram_fold{fold}.png'))
        
        # plot AUC
        plt.figure(figsize=(15,5))
        x = np.arange(len(history.history['auc']))
        plt.plot(x, history.history['auc'    ], '-o', label='Train AUC', color='#ff7f0e')
        plt.plot(x, history.history['val_auc'], '-o', label='Valid AUC', color='#1f77b4')
        
        x = np.argmax( history.history['val_auc'] )
        y = np.max( history.history['val_auc'] )
        
        xdist = plt.xlim()[1] - plt.xlim()[0]
        ydist = plt.ylim()[1] - plt.ylim()[0]
        
        plt.scatter(x, y, s=200, color='#1f77b4')
        plt.text(x-0.03*xdist, y-0.13*ydist, 'max auc\n%.2f'%y, size=14)
        
        plt.ylabel('auc', size=14);
        plt.xlabel('Epoch', size=14)
        plt.legend(loc=2)
        
        # plot loss
        plt.gca().twinx()
        x = np.arange(len(history.history['loss']))
        
        plt.plot(x, history.history['loss'    ], '-o', label='Train Loss', color='#2ca02c')
        plt.plot(x, history.history['val_loss'], '-o', label='Vaild Loss', color='#d62728')
        
        x = np.argmin( history.history['val_loss'] );
        y = np.min( history.history['val_loss'] )
        
        ydist = plt.ylim()[1] - plt.ylim()[0]
        
        plt.scatter(x, y, s=200, color='#d62728');
        plt.text(x-0.03*xdist, y+0.05*ydist, 'min loss', size=14)
        plt.ylabel('Loss', size=14)
        plt.legend(loc=2)
        
        plt.title('FOLD %i - Image Size %i'%(fold+1, config['size']), size=18)
        plt.savefig(OUTPUT_PATH.joinpath(f'AUC_and_loss_fold{fold}.png'))
        plt.show()
    
    run.join()

### 