## Summary

- Basically a 1D CNN starter with bandpass. Filter size hard-coded from [https://www.kaggle.com/kit716/grav-wave-detection](https://www.kaggle.com/kit716/grav-wave-detection) which uses the simple architecture from https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.120.141103 
- Added inference to @hidehisaarai1213 's PyTorch starter, iteration order changed from Y.Nakama's pipeline: "iter on loader first then load model" to "load model first then iter the loader"
- Version 3: average pool+ELU
- Version 4: max pool+SiLU
- Version 5: Generalized Mean pooling: a trainable L^p mean per channel (using ideas from Lebesgue measurable spaces) pooling added per the comments from @hannes82:
   $$\textbf{e} = \left[\left(\frac{1}{|\Omega|}\sum_{u\in{\Omega}}x^{p}_{cu}\right)^{\frac{1}{p}}\right]_{c=1,\cdots,C} $$

## Reference
- pipeline: [Y.Nakama's notebook](https://www.kaggle.com/yasufuminakama/g2net-efficientnet-b7-baseline-training).
- dataset: @hidehisaarai1213 https://www.kaggle.com/hidehisaarai1213/g2net-read-from-tfrecord-train-with-pytorch
- 1d CNN modified from https://www.kaggle.com/kit716/grav-wave-detection

## Libraries

In [1]:
import os
import time
import math
import random
from pathlib import Path

import numpy as np
import pandas as pd
import scipy as sp
from scipy import signal
import tensorflow as tf  # for reading TFRecord Dataset
import tensorflow_datasets as tfds  # for making tf.data.Dataset to return numpy arrays
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
#from kaggle_datasets import KaggleDatasets
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold
from tqdm import tqdm
from adamp import AdamP

In [2]:
SAVEDIR = Path("./")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## CFG

In [3]:
class CFG:
    debug = False
    print_freq = 2500
    num_workers = 0
    scheduler = "CosineAnnealingLR"
    #scheduler = 'ReduceLROnPlateau'
    model_name = "1dcnn"
    epochs = 8
    T_max = 8
    lr = 5e-4
    min_lr = 1e-7
    batch_size = 32
    val_batch_size = 64
    weight_decay = 1e-7
    gradient_accumulation_steps = 1
    max_grad_norm = 1000
    factor = 0.2
    patience = 1
    eps = 1e-7
    seed = 42
    target_size = 1
    target_col = "target"
    n_fold = 4
    trn_fold = [0, 1, 2, 3]
    train = True
    bandpass_params = dict(lf=30, 
                           hf=1000)

## Utils

In [4]:
# ====================================================
# Utils
# ====================================================
def get_score(y_true, y_pred):
    score = roc_auc_score(y_true, y_pred)
    return score


def init_logger(log_file=SAVEDIR / 'train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()


def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=CFG.seed)

## TFRecord Loader

This is the heart of this notebook. Instead of using PyTorch's Dataset and DataLoader, here I define custom Loader that reads samples from TFRecords.

FYI, there's a library that does the same thing, but its implementation is not optimized, so it's slower.

https://github.com/vahidk/tfrecord

In [5]:
path = f"train"
            
all_files = []
all_files.extend(np.sort(np.array(tf.io.gfile.glob(path + "/train*.tfrecords"))))
    
print("train_files: ", len(all_files))
all_files = np.array(all_files)

train_files:  20


In [6]:
def count_data_items(fileids, train=True):
    """
    Count the number of samples.
    Each of the TFRecord datasets is designed to contain 28000 samples for train
    22500 for test.
    """
    sizes = 28000 if train else 22500
    return len(fileids) * sizes


AUTO = tf.data.experimental.AUTOTUNE

## Bandpass

Modified from various notebooks and https://www.kaggle.com/c/g2net-gravitational-wave-detection/discussion/261721#1458564

In [7]:
def bandpass(x, lf=20, hf=500, order=8, sr=2048):
    '''
    Cell 33 of https://www.gw-openscience.org/LVT151012data/LOSC_Event_tutorial_LVT151012.html
    https://scipy-cookbook.readthedocs.io/items/ButterworthBandpass.html
    '''
    sos = signal.butter(order, [lf, hf], btype="bandpass", output="sos", fs=sr)
    normalization = np.sqrt((hf - lf) / (sr / 2))
    window = signal.tukey(4096, 0.1)
    if x.ndim ==2:
        x *= window
        for i in range(3):
            x[i] = signal.sosfilt(sos, x[i]) * normalization
    elif x.ndim == 3: # batch
        for i in range(x.shape[0]):
            x[i] *= window
            for j in range(3):
                x[i, j] = signal.sosfilt(sos, x[i, j]) * normalization
    return x

In [8]:
def prepare_wave(wave):
    wave = tf.reshape(tf.io.decode_raw(wave, tf.float64), (3, 4096))
    normalized_waves = []
    scaling = tf.constant([1.5e-20, 1.5e-20, 0.5e-20], dtype=tf.float64)
    for i in range(3):
#         normalized_wave = wave[i] / tf.math.reduce_max(wave[i])
        normalized_wave = wave[i] / scaling[i]
        normalized_waves.append(normalized_wave)
    wave = tf.stack(normalized_waves, axis=0)
    wave = tf.cast(wave, tf.float32)
    return wave


def read_labeled_tfrecord(example):
    tfrec_format = {
        "wave": tf.io.FixedLenFeature([], tf.string),
        "wave_id": tf.io.FixedLenFeature([], tf.string),
        "target": tf.io.FixedLenFeature([], tf.int64)
    }
    example = tf.io.parse_single_example(example, tfrec_format)
    return prepare_wave(example["wave"]), tf.reshape(tf.cast(example["target"], tf.float32), [1]), example["wave_id"]


def read_unlabeled_tfrecord(example, return_image_id):
    tfrec_format = {
        "wave": tf.io.FixedLenFeature([], tf.string),
        "wave_id": tf.io.FixedLenFeature([], tf.string)
    }
    example = tf.io.parse_single_example(example, tfrec_format)
    return prepare_wave(example["wave"]), example["wave_id"] if return_image_id else 0


def get_dataset(files, batch_size=16, repeat=False, cache=False, 
                shuffle=False, labeled=True, return_image_ids=True):
    ds = tf.data.TFRecordDataset(files, num_parallel_reads=AUTO, compression_type="GZIP")
    if cache:
        # You'll need around 15GB RAM if you'd like to cache val dataset, and 50~60GB RAM for train dataset.
        ds = ds.cache()

    if repeat:
        ds = ds.repeat()

    if shuffle:
        ds = ds.shuffle(1024 * 2)
        opt = tf.data.Options()
        opt.experimental_deterministic = False
        ds = ds.with_options(opt)

    if labeled:
        ds = ds.map(read_labeled_tfrecord, num_parallel_calls=AUTO)
    else:
        ds = ds.map(lambda example: read_unlabeled_tfrecord(example, return_image_ids), num_parallel_calls=AUTO)

    ds = ds.batch(batch_size)
    ds = ds.prefetch(AUTO)
    return tfds.as_numpy(ds)

In [9]:
class TFRecordDataLoader:
    def __init__(self, files, batch_size=32, cache=False, train=True, 
                              repeat=False, shuffle=False, labeled=True, 
                              return_image_ids=True):
        self.ds = get_dataset(
            files, 
            batch_size=batch_size,
            cache=cache,
            repeat=repeat,
            shuffle=shuffle,
            labeled=labeled,
            return_image_ids=return_image_ids)
        
        self.num_examples = count_data_items(files, labeled)

        self.batch_size = batch_size
        self.labeled = labeled
        self.return_image_ids = return_image_ids
        self._iterator = None
    
    def __iter__(self):
        if self._iterator is None:
            self._iterator = iter(self.ds)
        else:
            self._reset()
        return self._iterator

    def _reset(self):
        self._iterator = iter(self.ds)

    def __next__(self):
        batch = next(self._iterator)
        return batch

    def __len__(self):
        n_batches = self.num_examples // self.batch_size
        if self.num_examples % self.batch_size == 0:
            return n_batches
        else:
            return n_batches + 1

## MODEL

In [10]:
class GeM(nn.Module):
    '''
    Code modified from the 2d code in
    https://amaarora.github.io/2020/08/30/gempool.html
    '''
    def __init__(self, kernel_size=8, p=3, eps=1e-6):
        super(GeM,self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p)
        self.kernel_size = kernel_size
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
        
    def gem(self, x, p=3, eps=1e-6):
        return F.max_pool1d(x.clamp(min=eps).pow(p), self.kernel_size).pow(1./p)
        
    def __repr__(self):
        return self.__class__.__name__ + \
                '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
                ', ' + 'eps=' + str(self.eps) + ')'

In [11]:
class AttentionHead(nn.Module):
    def __init__(self, in_features, hidden_dim):
        super().__init__()
        self.in_features = in_features
        self.middle_features = hidden_dim
        self.W = nn.Linear(in_features, hidden_dim)
        self.V = nn.Linear(hidden_dim, 1)
        self.out_features = hidden_dim

    def forward(self, features):
        att = torch.tanh(self.W(features))
        score = self.V(att)
        attention_weights = torch.softmax(score, dim=1)
        context_vector = attention_weights * features
        context_vector = torch.sum(context_vector, dim=1)

        return context_vector

In [12]:
class CNN1d(nn.Module):
    """1D convolutional neural network. Classifier of the gravitational waves.
    Architecture from there https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.120.141103
    """

    def __init__(self, debug=False):
        super().__init__()
        self.cnn1 = nn.Sequential(
            nn.Conv1d(3, 64, kernel_size=64),
            nn.BatchNorm1d(64),
            nn.SiLU(),
        )
        self.cnn2 = nn.Sequential(
            nn.Conv1d(64, 64, kernel_size=32),
            nn.AvgPool1d(kernel_size=8),
            nn.BatchNorm1d(64),
            nn.SiLU(),
        )
        self.cnn3 = nn.Sequential(
            nn.Conv1d(64, 128, kernel_size=32),
            nn.BatchNorm1d(128),
            nn.SiLU(),
        )
        self.cnn4 = nn.Sequential(
            nn.Conv1d(128, 128, kernel_size=16),
            nn.AvgPool1d(kernel_size=6),
            nn.BatchNorm1d(128),
            nn.SiLU(),
        )
        self.cnn5 = nn.Sequential(
            nn.Conv1d(128, 256, kernel_size=16),
            nn.BatchNorm1d(256),
            nn.SiLU(),
        )
        self.cnn6 = nn.Sequential(
            nn.Conv1d(256, 256, kernel_size=16),
            nn.AvgPool1d(kernel_size=4),
            nn.BatchNorm1d(256),
            nn.SiLU(),
        )
        self.fc1 = nn.Sequential(
            nn.Linear(256 * 11, 128),
            nn.BatchNorm1d(128),
            nn.Dropout(0.25),
            nn.SiLU(),
        )
        self.fc2 = nn.Sequential(
            nn.Linear(128, 128),
            nn.BatchNorm1d(128),
            nn.Dropout(0.25),
            nn.SiLU(),
        )
        self.fc3 = nn.Sequential(
            nn.Linear(128, 1),
        )
        self.debug = debug

    def forward(self, x, pos=None):
        x = self.cnn1(x)
        x = self.cnn2(x)
        x = self.cnn3(x)
        x = self.cnn4(x)
        x = self.cnn5(x)
        x = self.cnn6(x)
        x = x.flatten(start_dim=1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x


## Helper functions

In [13]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def max_memory_allocated():
    MB = 1024.0 * 1024.0
    mem = torch.cuda.max_memory_allocated() / MB
    return f"{mem:.0f} MB"

In [14]:
class SAM(torch.optim.Optimizer):
    def __init__(self, params, base_optimizer, rho=0.05, **kwargs):
        assert rho >= 0.0, f"Invalid rho, should be non-negative: {rho}"

        defaults = dict(rho=rho, **kwargs)
        super(SAM, self).__init__(params, defaults)
        
        self.base_optimizer = base_optimizer(self.param_groups, **kwargs)
        self.param_groups = self.base_optimizer.param_groups

    @torch.no_grad()
    def first_step(self, zero_grad=False):
        grad_norm = self._grad_norm()
        for group in self.param_groups:
            scale = group["rho"] / (grad_norm + 1e-12)

            for p in group["params"]:
                if p.grad is None: continue
                e_w = p.grad * scale.to(p)
                p.add_(e_w)  # climb to the local maximum "w + e(w)"
                self.state[p]["e_w"] = e_w

        if zero_grad: self.zero_grad()

    @torch.no_grad()
    def second_step(self, zero_grad=False):
        for group in self.param_groups:
            for p in group["params"]:
                if p.grad is None: continue
                p.sub_(self.state[p]["e_w"])  # get back to "w" from "w + e(w)"

        self.base_optimizer.step()  # do the actual "sharpness-aware" update

        if zero_grad: self.zero_grad()

    @torch.no_grad()
    def step(self, closure=None):
        assert closure is not None, "Sharpness Aware Minimization requires closure, but it was not provided"
        closure = torch.enable_grad()(closure)  # the closure should do a full forward-backward pass

        self.first_step(zero_grad=True)
        closure()
        self.second_step()

    def _grad_norm(self):
        shared_device = self.param_groups[0]["params"][0].device  # put everything on the same device, in case of model parallelism
        norm = torch.norm(
                    torch.stack([
                        p.grad.norm(p=2).to(shared_device)
                        for group in self.param_groups for p in group["params"]
                        if p.grad is not None
                    ]),
                    p=2
               )
        return norm

## Trainer

In [15]:
def train_fn(files, model, criterion, optimizer, epoch, scheduler, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()

    # switch to train mode
    model.train()
    start = end = time.time()
    global_step = 0

    train_loader = TFRecordDataLoader(
        files, batch_size=CFG.batch_size, 
        shuffle=True)
    for step, d in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        x = bandpass(d[0], **CFG.bandpass_params)
        x = torch.from_numpy(x).to(device)
        labels = torch.from_numpy(d[1]).to(device)

        batch_size = labels.size(0)
        y_preds = model(x)
        loss = criterion(y_preds.view(-1), labels.view(-1))
        # record loss
        losses.update(loss.item(), batch_size)
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        loss.backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            """
            optimizer.step()
            optimizer.zero_grad()
            """
            optimizer.first_step(zero_grad=True)
            criterion(model(x).view(-1), labels.view(-1)).backward()
            optimizer.second_step(zero_grad=True)
            global_step += 1
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if step % CFG.print_freq == 0:
            print('Epoch: [{0}/{1}][{2}/{3}] '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.6f}  '
                  'Elapsed: {remain:s} '
                  'Max mem: {mem:s}'
                  .format(
                   epoch+1, CFG.epochs, step, len(train_loader),
                   loss=losses,
                   grad_norm=grad_norm,
                   lr=scheduler.get_last_lr()[0],
                   remain=timeSince(start, float(step + 1) / len(train_loader)),
                   mem=max_memory_allocated()))
    return losses.avg


def valid_fn(files, model, criterion, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    # switch to evaluation mode
    model.eval()
    filenames = []
    targets = []
    preds = []
    start = end = time.time()
    valid_loader = TFRecordDataLoader(
        files, batch_size=CFG.batch_size * 2, shuffle=False)
    for step, d in enumerate(valid_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        
        targets.extend(d[1].reshape(-1).tolist())
        filenames.extend([f.decode("UTF-8") for f in d[2]])
        x = bandpass(d[0], **CFG.bandpass_params)
        x = torch.from_numpy(x).to(device)
        labels = torch.from_numpy(d[1]).to(device)

        batch_size = labels.size(0)
        # compute loss
        with torch.no_grad():
            y_preds = model(x)
        loss = criterion(y_preds.view(-1), labels.view(-1))
        losses.update(loss.item(), batch_size)

        preds.append(y_preds.sigmoid().to('cpu').numpy())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if step % CFG.print_freq == 0:
            print('EVAL: [{0}/{1}] '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(
                   step, len(valid_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses,
                   remain=timeSince(start, float(step+1)/len(valid_loader)),
                   ))
    predictions = np.concatenate(preds).reshape(-1)
    return losses.avg, predictions, np.array(targets), np.array(filenames)

## Train loop

In [16]:
# ====================================================
# Train loop
# ====================================================
def train_loop(train_tfrecords: np.ndarray, val_tfrecords: np.ndarray, fold: int):
    
    LOGGER.info(f"========== fold: {fold} training ==========")
    
    # ====================================================
    # scheduler 
    # ====================================================
    def get_scheduler(optimizer):
        if CFG.scheduler=='ReduceLROnPlateau':
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                             mode='max', 
                                                             factor=CFG.factor, 
                                                             patience=CFG.patience, 
                                                             verbose=True, 
                                                             eps=CFG.eps)
        elif CFG.scheduler=='CosineAnnealingLR':
            scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, 
                                                             T_max=CFG.T_max, 
                                                             eta_min=CFG.min_lr, 
                                                             last_epoch=-1)
        elif CFG.scheduler=='CosineAnnealingWarmRestarts':
            scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 
                                                                       T_0=CFG.T_0, 
                                                                       T_mult=1, 
                                                                       eta_min=CFG.min_lr, 
                                                                       last_epoch=-1)
        return scheduler

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CNN1d()
    model.to(device)
    
    base_optimizer = optim.Adam
    optimizer = SAM(model.parameters(), base_optimizer, lr=CFG.lr, weight_decay=CFG.weight_decay)
    scheduler = get_scheduler(optimizer)

    # ====================================================
    # loop
    # ====================================================
    criterion = nn.BCEWithLogitsLoss()

    best_score = 0.
    best_loss = np.inf
    
    for epoch in range(CFG.epochs):
        print("\n\n")
        start_time = time.time()
        
        # train
        avg_loss = train_fn(train_tfrecords, model, criterion, optimizer, epoch, scheduler, device)

        # eval
        avg_val_loss, preds, targets, files = valid_fn(val_tfrecords, model, criterion, device)
        valid_result_df = pd.DataFrame({"target": targets, "preds": preds, "id": files})
        
        if isinstance(scheduler, optim.lr_scheduler.ReduceLROnPlateau):
            scheduler.step(avg_val_loss)
        elif isinstance(scheduler, optim.lr_scheduler.CosineAnnealingLR):
            scheduler.step()
        elif isinstance(scheduler, optim.lr_scheduler.CosineAnnealingWarmRestarts):
            scheduler.step()

        # scoring
        score = get_score(targets, preds)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}')

        if score > best_score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(), 
                        'preds': preds},
                        SAVEDIR / f'{CFG.model_name}_fold{fold}_best_score.pth')
        
        if avg_val_loss < best_loss:
            best_loss = avg_val_loss
            LOGGER.info(f'Epoch {epoch+1} - Save Best Loss: {best_loss:.4f} Model')
            torch.save({'model': model.state_dict(), 
                        'preds': preds},
                        SAVEDIR / f'{CFG.model_name}_fold{fold}_best_loss.pth')
    
    valid_result_df["preds"] = torch.load(SAVEDIR / f"{CFG.model_name}_fold{fold}_best_loss.pth",
                                          map_location="cpu")["preds"]

    return valid_result_df

In [None]:
def get_result(result_df):
    preds = result_df['preds'].values
    labels = result_df[CFG.target_col].values
    score = get_score(labels, preds)
    LOGGER.info(f'Score: {score:<.4f}')

if CFG.train:
    # train 
    oof_df = pd.DataFrame()
    kf = KFold(n_splits=CFG.n_fold, shuffle=True, random_state=CFG.seed)

    folds = list(kf.split(all_files))
    for fold in range(CFG.n_fold):
        if fold in CFG.trn_fold:
            trn_idx, val_idx = folds[fold]
            train_files = all_files[trn_idx]
            valid_files = all_files[val_idx]
            _oof_df = train_loop(train_files, valid_files, fold)
            oof_df = pd.concat([oof_df, _oof_df])
            LOGGER.info(f"========== fold: {fold} result ==========")
            get_result(_oof_df)
    # CV result
    LOGGER.info(f"========== CV ==========")
    get_result(oof_df)
    # save result
    oof_df.to_csv(SAVEDIR / 'oof_df.csv', index=False)








2021-09-27 05:15:05.751092: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-09-27 05:15:05.753590: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-09-27 05:15:05.754295: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-09-27 05:15:05.756065: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

Epoch: [1/8][0/13125] Loss: 0.6947(0.6947) Grad: 12.4073  LR: 0.000500  Elapsed: 0m 2s (remain 458m 36s) Max mem: 222 MB
Epoch: [1/8][2500/13125] Loss: 0.4307(0.4992) Grad: 0.2999  LR: 0.000500  Elapsed: 3m 2s (remain 12m 55s) Max mem: 242 MB
Epoch: [1/8][5000/13125] Loss: 0.4767(0.4808) Grad: 0.2594  LR: 0.000500  Elapsed: 6m 3s (remain 9m 51s) Max mem: 242 MB
Epoch: [1/8][7500/13125] Loss: 0.2951(0.4712) Grad: 0.3236  LR: 0.000500  Elapsed: 9m 5s (remain 6m 48s) Max mem: 242 MB
Epoch: [1/8][10000/13125] Loss: 0.3775(0.4641) Grad: 0.2439  LR: 0.000500  Elapsed: 12m 6s (remain 3m 46s) Max mem: 242 MB
Epoch: [1/8][12500/13125] Loss: 0.4355(0.4590) Grad: 0.2190  LR: 0.000500  Elapsed: 15m 7s (remain 0m 45s) Max mem: 242 MB
EVAL: [0/2188] Data 0.202 (0.202) Elapsed 0m 0s (remain 9m 2s) Loss: 0.4964(0.4964) 


Epoch 1 - avg_train_loss: 0.4579  avg_val_loss: 0.4304  time: 1087s
Epoch 1 - Score: 0.8658
Epoch 1 - Save Best Score: 0.8658 Model
Epoch 1 - Save Best Loss: 0.4304 Model





Epoch: [2/8][0/13125] Loss: 0.4112(0.4112) Grad: 0.2912  LR: 0.000481  Elapsed: 0m 1s (remain 259m 41s) Max mem: 242 MB
Epoch: [2/8][2500/13125] Loss: 0.4368(0.4299) Grad: 0.2567  LR: 0.000481  Elapsed: 3m 4s (remain 13m 1s) Max mem: 242 MB
Epoch: [2/8][5000/13125] Loss: 0.3523(0.4283) Grad: 0.2720  LR: 0.000481  Elapsed: 6m 5s (remain 9m 53s) Max mem: 242 MB
Epoch: [2/8][7500/13125] Loss: 0.4617(0.4286) Grad: 0.1784  LR: 0.000481  Elapsed: 9m 6s (remain 6m 49s) Max mem: 242 MB
Epoch: [2/8][10000/13125] Loss: 0.4410(0.4274) Grad: 0.2926  LR: 0.000481  Elapsed: 12m 8s (remain 3m 47s) Max mem: 242 MB
Epoch: [2/8][12500/13125] Loss: 0.4977(0.4268) Grad: 0.2279  LR: 0.000481  Elapsed: 15m 9s (remain 0m 45s) Max mem: 242 MB
EVAL: [0/2188] Data 0.205 (0.205) Elapsed 0m 0s (remain 9m 32s) Loss: 0.4842(0.4842) 


Epoch 2 - avg_train_loss: 0.4267  avg_val_loss: 0.4195  time: 1088s
Epoch 2 - Score: 0.8693
Epoch 2 - Save Best Score: 0.8693 Model
Epoch 2 - Save Best Loss: 0.4195 Model





Epoch: [3/8][0/13125] Loss: 0.3702(0.3702) Grad: 0.1821  LR: 0.000427  Elapsed: 0m 1s (remain 262m 42s) Max mem: 242 MB
Epoch: [3/8][2500/13125] Loss: 0.2478(0.4185) Grad: 0.2368  LR: 0.000427  Elapsed: 3m 2s (remain 12m 54s) Max mem: 242 MB
Epoch: [3/8][5000/13125] Loss: 0.4163(0.4184) Grad: 0.2713  LR: 0.000427  Elapsed: 6m 4s (remain 9m 51s) Max mem: 242 MB
Epoch: [3/8][7500/13125] Loss: 0.2918(0.4189) Grad: 0.2858  LR: 0.000427  Elapsed: 9m 6s (remain 6m 49s) Max mem: 242 MB
Epoch: [3/8][10000/13125] Loss: 0.3860(0.4181) Grad: 0.1840  LR: 0.000427  Elapsed: 12m 7s (remain 3m 47s) Max mem: 242 MB
Epoch: [3/8][12500/13125] Loss: 0.3936(0.4182) Grad: 0.1249  LR: 0.000427  Elapsed: 15m 7s (remain 0m 45s) Max mem: 242 MB
EVAL: [0/2188] Data 0.181 (0.181) Elapsed 0m 0s (remain 8m 42s) Loss: 0.4695(0.4695) 


Epoch 3 - avg_train_loss: 0.4181  avg_val_loss: 0.4105  time: 1082s
Epoch 3 - Score: 0.8724
Epoch 3 - Save Best Score: 0.8724 Model
Epoch 3 - Save Best Loss: 0.4105 Model





Epoch: [4/8][0/13125] Loss: 0.4258(0.4258) Grad: 0.1743  LR: 0.000346  Elapsed: 0m 1s (remain 252m 50s) Max mem: 242 MB
Epoch: [4/8][2500/13125] Loss: 0.3564(0.4124) Grad: 0.2472  LR: 0.000346  Elapsed: 2m 55s (remain 12m 25s) Max mem: 242 MB
Epoch: [4/8][5000/13125] Loss: 0.5982(0.4118) Grad: 0.3169  LR: 0.000346  Elapsed: 5m 50s (remain 9m 29s) Max mem: 242 MB
Epoch: [4/8][7500/13125] Loss: 0.4535(0.4128) Grad: 0.1542  LR: 0.000346  Elapsed: 8m 45s (remain 6m 34s) Max mem: 242 MB
Epoch: [4/8][10000/13125] Loss: 0.4484(0.4122) Grad: 0.2299  LR: 0.000346  Elapsed: 11m 40s (remain 3m 38s) Max mem: 242 MB
Epoch: [4/8][12500/13125] Loss: 0.4614(0.4122) Grad: 0.1456  LR: 0.000346  Elapsed: 14m 35s (remain 0m 43s) Max mem: 242 MB
EVAL: [0/2188] Data 0.243 (0.243) Elapsed 0m 0s (remain 10m 42s) Loss: 0.4800(0.4800) 


Epoch 4 - avg_train_loss: 0.4122  avg_val_loss: 0.4090  time: 1050s
Epoch 4 - Score: 0.8731
Epoch 4 - Save Best Score: 0.8731 Model
Epoch 4 - Save Best Loss: 0.4090 Model





Epoch: [5/8][0/13125] Loss: 0.4128(0.4128) Grad: 0.1599  LR: 0.000250  Elapsed: 0m 1s (remain 257m 26s) Max mem: 242 MB
Epoch: [5/8][2500/13125] Loss: 0.3468(0.4066) Grad: 0.2380  LR: 0.000250  Elapsed: 2m 54s (remain 12m 21s) Max mem: 242 MB
Epoch: [5/8][5000/13125] Loss: 0.3187(0.4068) Grad: 0.2384  LR: 0.000250  Elapsed: 5m 47s (remain 9m 24s) Max mem: 242 MB
Epoch: [5/8][7500/13125] Loss: 0.3592(0.4077) Grad: 0.1723  LR: 0.000250  Elapsed: 8m 40s (remain 6m 30s) Max mem: 242 MB
Epoch: [5/8][10000/13125] Loss: 0.3807(0.4069) Grad: 0.1399  LR: 0.000250  Elapsed: 11m 34s (remain 3m 36s) Max mem: 242 MB
Epoch: [5/8][12500/13125] Loss: 0.3000(0.4068) Grad: 0.2136  LR: 0.000250  Elapsed: 14m 28s (remain 0m 43s) Max mem: 242 MB
EVAL: [0/2188] Data 0.191 (0.191) Elapsed 0m 0s (remain 8m 43s) Loss: 0.4706(0.4706) 


Epoch 5 - avg_train_loss: 0.4068  avg_val_loss: 0.4072  time: 1042s
Epoch 5 - Score: 0.8742
Epoch 5 - Save Best Score: 0.8742 Model
Epoch 5 - Save Best Loss: 0.4072 Model





Epoch: [6/8][0/13125] Loss: 0.4495(0.4495) Grad: 0.1893  LR: 0.000154  Elapsed: 0m 1s (remain 245m 1s) Max mem: 242 MB
Epoch: [6/8][2500/13125] Loss: 0.3545(0.4018) Grad: 0.1310  LR: 0.000154  Elapsed: 2m 55s (remain 12m 24s) Max mem: 242 MB
Epoch: [6/8][5000/13125] Loss: 0.4513(0.4024) Grad: 0.2595  LR: 0.000154  Elapsed: 5m 49s (remain 9m 28s) Max mem: 242 MB
Epoch: [6/8][7500/13125] Loss: 0.4299(0.4033) Grad: 0.1758  LR: 0.000154  Elapsed: 8m 44s (remain 6m 33s) Max mem: 242 MB
Epoch: [6/8][10000/13125] Loss: 0.2992(0.4022) Grad: 0.2218  LR: 0.000154  Elapsed: 11m 40s (remain 3m 38s) Max mem: 242 MB
Epoch: [6/8][12500/13125] Loss: 0.3945(0.4023) Grad: 0.2052  LR: 0.000154  Elapsed: 14m 35s (remain 0m 43s) Max mem: 242 MB
EVAL: [0/2188] Data 0.184 (0.184) Elapsed 0m 0s (remain 8m 40s) Loss: 0.4535(0.4535) 


Epoch 6 - avg_train_loss: 0.4022  avg_val_loss: 0.4063  time: 1050s
Epoch 6 - Score: 0.8748
Epoch 6 - Save Best Score: 0.8748 Model
Epoch 6 - Save Best Loss: 0.4063 Model





Epoch: [7/8][0/13125] Loss: 0.3675(0.3675) Grad: 0.1805  LR: 0.000073  Elapsed: 0m 1s (remain 243m 49s) Max mem: 242 MB
Epoch: [7/8][2500/13125] Loss: 0.4381(0.3990) Grad: 0.1733  LR: 0.000073  Elapsed: 2m 52s (remain 12m 13s) Max mem: 242 MB
Epoch: [7/8][5000/13125] Loss: 0.3268(0.3992) Grad: 0.2063  LR: 0.000073  Elapsed: 5m 47s (remain 9m 24s) Max mem: 242 MB
Epoch: [7/8][7500/13125] Loss: 0.3865(0.3999) Grad: 0.2309  LR: 0.000073  Elapsed: 8m 42s (remain 6m 31s) Max mem: 242 MB
Epoch: [7/8][10000/13125] Loss: 0.4835(0.3991) Grad: 0.2940  LR: 0.000073  Elapsed: 11m 36s (remain 3m 37s) Max mem: 242 MB
Epoch: [7/8][12500/13125] Loss: 0.4341(0.3989) Grad: 0.1591  LR: 0.000073  Elapsed: 14m 29s (remain 0m 43s) Max mem: 242 MB
EVAL: [0/2188] Data 0.178 (0.178) Elapsed 0m 0s (remain 8m 24s) Loss: 0.4787(0.4787) 


Epoch 7 - avg_train_loss: 0.3987  avg_val_loss: 0.4072  time: 1042s
Epoch 7 - Score: 0.8750
Epoch 7 - Save Best Score: 0.8750 Model





Epoch: [8/8][0/13125] Loss: 0.3528(0.3528) Grad: 0.1948  LR: 0.000019  Elapsed: 0m 1s (remain 245m 23s) Max mem: 242 MB
Epoch: [8/8][2500/13125] Loss: 0.4338(0.3965) Grad: 0.1706  LR: 0.000019  Elapsed: 2m 55s (remain 12m 24s) Max mem: 242 MB
Epoch: [8/8][5000/13125] Loss: 0.4734(0.3967) Grad: 0.2114  LR: 0.000019  Elapsed: 5m 49s (remain 9m 27s) Max mem: 242 MB
Epoch: [8/8][7500/13125] Loss: 0.3549(0.3977) Grad: 0.1874  LR: 0.000019  Elapsed: 8m 44s (remain 6m 33s) Max mem: 242 MB
Epoch: [8/8][10000/13125] Loss: 0.4128(0.3968) Grad: 0.2423  LR: 0.000019  Elapsed: 11m 38s (remain 3m 38s) Max mem: 242 MB
Epoch: [8/8][12500/13125] Loss: 0.4951(0.3965) Grad: 0.2529  LR: 0.000019  Elapsed: 14m 33s (remain 0m 43s) Max mem: 242 MB
EVAL: [0/2188] Data 0.147 (0.147) Elapsed 0m 0s (remain 7m 10s) Loss: 0.4692(0.4692) 


## Inference

In [None]:
states = []
for fold  in CFG.trn_fold:
    states.append(torch.load(os.path.join(SAVEDIR, f'{CFG.model_name}_fold{fold}_best_score.pth')))

In [None]:
path = f"test"
            
all_files = []
all_files.extend(np.sort(np.array(tf.io.gfile.glob(path + "/test*.tfrecords"))))
    
print("test_files: ", len(all_files))
all_files = np.array(all_files)

In [None]:
model= CNN1d()
model.to(device)

wave_ids = []
probs_all = []

for fold, state in enumerate(states):

    model.load_state_dict(state['model'])
    model.eval()
    probs = []

    test_loader = TFRecordDataLoader(all_files, batch_size=CFG.val_batch_size, 
                                     shuffle=False, labeled=False)

    for i, d in tqdm(enumerate(test_loader), total=len(test_loader)):
        x = bandpass(d[0], **CFG.bandpass_params)
        x = torch.from_numpy(x).to(device)

        with torch.no_grad():
            y_preds = model(x)
        preds = y_preds.sigmoid().to('cpu').numpy()
        probs.append(preds)

        if fold==0: # same test loader, no need to do this the second time
            wave_ids.append(d[1].astype('U13'))

    probs = np.concatenate(probs)
    probs_all.append(probs)

probs_avg = np.asarray(probs_all).mean(axis=0).flatten()
wave_ids = np.concatenate(wave_ids)

In [None]:
test_df = pd.DataFrame({'id': wave_ids, 'target': probs_avg})
# Save test dataframe to disk
folds = '_'.join([str(s) for s in CFG.trn_fold])
test_df.to_csv(f'{CFG.model_name}_folds_{folds}.csv', index = False)

In [None]:
!sudo shutdown -h now