In [None]:
!pip install -q loguru
!pip install -q mlcrate
!pip install -q omegaconf
!pip install -q segmentation_models
!pip install -q iterative-stratification

In [None]:
import os, glob
import sys
import random
import math
import numpy as np
import pandas as pd
import tqdm
from collections import OrderedDict
from argparse import ArgumentParser

from omegaconf import OmegaConf
from loguru import logger
import mlcrate as mlc

from sklearn.model_selection import StratifiedKFold
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

---

In [None]:
parser = ArgumentParser()
parser.add_argument('--config', required=True)
parser.add_argument('options', nargs='*')
args = parser.parse_args('--config ./config/base.yaml expr_name=hoge'.split())

config = OmegaConf.load(args.config)
config.merge_with_dotlist(args.options)

OmegaConf.update(config.scheduler.params, 'num_epochs', config.num_epochs, merge=True)
OmegaConf.update(config, 'save_path', os.path.join(config.save_root, config.expr_name), merge=True)

In [None]:
if config.device.name == 'gpu':
    os.environ['CUDA_VISIBLE_DEVICES'] = config.device.id
elif config.device.name == 'cpu':
    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [None]:
import tensorflow as tf
import classification_models.tfkeras as cs
import efficientnet.tfkeras as eff
import scripts.schedulers
import scripts.augmentations

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

seed_everything(config.seed)


if config.device.name == 'tpu':
    from kaggle_datasets import KaggleDatasets
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()

OmegaConf.update(config, 'batch_size', config.batch_size * strategy.num_replicas_in_sync, merge=True)
OmegaConf.update(config, 'scheduler.params.init_lr', config.scheduler.params.init_lr * strategy.num_replicas_in_sync, merge=True)
OmegaConf.update(config, 'scheduler.params.min_lr', config.scheduler.params.min_lr * strategy.num_replicas_in_sync, merge=True)

---

In [None]:
'''
Train/Val(/Test) List - CUSTOM
'''

def get_train_val_list(fold_idx):
    dev_df = pd.read_csv('train.csv')
    dev_df.fpath = config.data_root + 'images/' + dev_df.fpath
    
    skf = StratifiedKFold(n_splits=config.num_folds, shuffle=True, random_state=config.seed)
    tidx, vidx = list(skf.split(dev_df, y=dev_df.label))[fold_idx]
    
    train_df = dev_df.iloc[tidx].reset_index(drop=True)
    val_df = dev_df.iloc[vidx].reset_index(drop=True)
    
    return (train_df.fpath, train_df.label), (val_df.fpath, val_df.label)


def get_test_list():
    df = pd.read_csv('test.csv')
    df.fpath = config.data_root + 'images/' + df.fpath
    return df.fpath, df.label

In [None]:
'''
Transform
'''
def load_image(num_classes, fine_size, aug_func):
    @tf.function
    def load_image_(fpath, label):
        img = tf.image.decode_jpeg(tf.io.read_file(fpath), channels=3)
        img = tf.cast(tf.image.resize(img, fine_size), tf.uint8)
        img = tf.ensure_shape(img, fine_size + (3,))
        
        if aug_func is not None:
            img = aug_func(img)
            
        img = tf.cast(img, tf.float32) / 255.
        label = tf.one_hot(label, num_classes)
        return img, label
    return load_image_

In [None]:
'''
DataLoader
'''
def get_train_ds(train_data, num_classes, batch_size, train_size, aug_func):
    train_ds = tf.data.Dataset.from_tensor_slices(
        train_data
    ).shuffle(
        len(train_data[0])
    ).map(
        load_image(num_classes, train_size, aug_func), num_parallel_calls=tf.data.AUTOTUNE, deterministic=False
    ).batch(
        batch_size, drop_remainder=True
    ).repeat(-1).prefetch(1)

    return train_ds


def get_val_ds(val_data, num_classes, batch_size, val_size):
    val_ds = tf.data.Dataset.from_tensor_slices(
        val_data
    ).map(
        load_image(num_classes, val_size, None), num_parallel_calls=tf.data.AUTOTUNE, deterministic=True
    ).batch(
        batch_size, drop_remainder=False
    ).prefetch(1)
    
    return val_ds

In [None]:
'''
Model - CUSTOM
'''
def build_model(input_shape, num_classes, dropout_rate, weight_decay, base_func, base_weights):
    import os, tempfile
    def add_regularization(model, weight_reg):
        custom_objects={}
        for layer in model.layers:
            for attr in ['kernel_regularizer']:
                if hasattr(layer, attr):
                    setattr(layer, attr, tf.keras.regularizers.l2(weight_reg))
        model_json = model.to_json()
        tmp_weights_path = os.path.join(tempfile.gettempdir(), 'tmp_weights.h5')
        model.save_weights(tmp_weights_path)
        model = tf.keras.models.model_from_json(model_json, custom_objects=custom_objects)
        model.load_weights(tmp_weights_path, by_name=True)
        return model
    
    '''
    model definition
    '''
    base_model = base_func(input_shape=input_shape, include_top=False, weights=base_weights)
    
    ip = tf.keras.layers.Input(input_shape)
    h = base_model(ip)
    h = tf.keras.layers.GlobalAveragePooling2D()(h)
    h = tf.keras.layers.Dropout(dropout_rate)(h)
    h = tf.keras.layers.Dense(num_classes, activation=tf.nn.sigmoid)(h)
    model = tf.keras.models.Model(ip, h)
    
    if weight_decay > 0.:
        model = add_regularization(model, weight_decay)
    return model


def model_regularizer_loss(model):
    loss = 0
    for l in model.layers:
        if hasattr(l,'layers') and l.layers:
            loss += model_regularizer_loss(l)
        if hasattr(l,'kernel_regularizer') and l.kernel_regularizer:
            loss += l.kernel_regularizer(l.kernel)
        if hasattr(l,'bias_regularizer') and l.bias_regularizer:
            loss += l.bias_regularizer(l.bias)
    return loss

In [None]:
'''
Metrics - CUSTOM
'''
class Metrics:
    def __init__(self):
        self.scores = [tf.keras.metrics.CategoricalAccuracy()]
        self.loss = tf.keras.metrics.Mean()
        self.header = ['loss'] + [f'score_{i+1}' for i in range(len(self.scores))]
        self.df = pd.DataFrame(columns=self.header)
    
    def update_state(self, y_trues, y_preds, loss):
        self.scores[0].update_state(y_trues, y_preds)
        self.loss.update_state(loss)
    
    def get_loss(self):
        return self.loss.result().numpy()
    
    def get_scores(self):
        score = self.scores[0].result().numpy()
        return [score]
    
    def reset_state(self):
        self.scores[0].reset_states()
        self.loss.reset_states()
    
    def on_epoch_end(self, e):
        self.df.loc[e] = [self.get_loss()] + self.get_scores()
    
    def get_latest(self):
        return self.df.index.tolist()[-1], self.df.iloc[-1, :].tolist()

In [None]:
'''
Train/Val(/Test) Proc on epoch - Classification
'''
def get_proc_on_batch():
    @tf.function
    def train_on_batch(inputs, model, criterion, optimizer):
        x, y = inputs
        with tf.GradientTape() as tape:
            p = model(x, training=True)
            loss = criterion(y, p)
            total_loss = tf.reduce_mean(loss) + model_regularizer_loss(model)
        grads = tape.gradient(loss, sources=model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        return y, p, loss

    @tf.function
    def val_on_batch(inputs, model, criterion):
        x, y = inputs
        p = model(x, training=False)
        loss = criterion(y, p)
        return y, p, loss
    
    return train_on_batch, val_on_batch


def proc_on_epoch(training, proc_on_batch, dataset, metrics, criterion, model, epoch, total_epoch, iters_per_epoch=None, optimizer=None):
    
    metrics.reset_state()
    if training:
        assert iters_per_epoch is not None
        assert optimizer is not None
        phase = 'train'
    else:
        phase = 'valid'
    
    with tqdm.tqdm(dataset, total=iters_per_epoch, ncols=0, desc=f'{phase} {(1+epoch)}/{total_epoch}') as tq:
        for iter_i, inputs in enumerate(tq):
            if training:
                if iter_i > iters_per_epoch:
                    break
                y, p, loss = proc_on_batch(inputs, model, criterion, optimizer)
            else:
                y, p, loss = proc_on_batch(inputs, model, criterion)
            
            metrics.update_state(y, p, loss)
            tq.set_postfix(OrderedDict(
                loss = metrics.get_loss(),
                scores = metrics.get_scores()
            ))
    
    metrics.on_epoch_end(epoch+1)
    if training:
        return metrics, model, optimizer
    else:
        return metrics


def proc_eval(phase, proc_on_batch, dataset, metrics, criterion, model):
    assert phase in ['val', 'test']
    
    metrics.reset_state()
    
    trues = []
    preds = []
    with tqdm.tqdm(dataset, ncols=0, desc=f'{phase}') as tq:
        for iter_i, inputs in enumerate(tq):
            y, p, loss = proc_on_batch(inputs, model, criterion)
            trues.append(y.numpy())
            preds.append(p.numpy())
            metrics.update_state(y, p, loss)
            
    scores = metrics.get_scores()
    trues = np.concatenate(trues)
    preds = np.concatenate(preds)
    
    return scores, trues, preds

---

In [None]:
def run_fold(fold_idx, config):
    
    scheduler = eval(config.scheduler.name)(**config.scheduler.params)
    optimizer = eval(config.optimizer.name)(**config.optimizer.params)
    criterion = eval(config.loss.name)(**config.loss.params)
    
    train_metrics = Metrics()
    val_metrics = Metrics()
    
    train_data, val_data = get_train_val_list(fold_idx)
    train_iters_per_epoch = len(train_data[0]) // config.batch_size
    
    if config.has_test:
        test_metrics = Metrics()
        test_data = get_test_list()
        test_ds = get_val_ds(
            test_data,
            config.num_classes, config.batch_size, 
            tuple(config.transform.test_size)
        )
    
    best_score = 0.
    best_loss = 10000.
    
    for e in range(config.num_epochs):

        '''
        init: dataset, inference_method, train/val_model
        '''
        if (e == 0) or (('stages' in config) and (e in config.stages.epochs)):
            if ('stages' in config) and (e in config.stages.epochs):
                '''
                stage start
                '''
                stage_idx = config.stages.epochs.index(e)
                config.transform = OmegaConf.merge(config.transform, config.stages.transforms[stage_idx])
                config.model = OmegaConf.merge(config.model, config.stages.models[stage_idx])
                if e > 0:
                    logger.info(f'New Stage {stage_idx}')
            
            
            train_ds = get_train_ds(
                train_data,
                config.num_classes, config.batch_size, 
                tuple(config.transform.train_size),
                eval(config.transform.aug_name)(**config.transform.aug_params),
            )
            val_ds = get_val_ds(
                val_data,
                config.num_classes, config.batch_size, 
                tuple(config.transform.val_size)
            )
            
            train_on_batch, val_on_batch = get_proc_on_batch()

            with strategy.scope():
                train_model = build_model(
                    tuple(config.transform.train_size) + (3,), config.num_classes, weight_decay=config.loss.weight_decay,
                    dropout_rate=config.model.dropout_rate, 
                    base_func=eval(config.model.base_func),
                    base_weights=config.model.base_weights
                )
                if ('stages' in config) and (stage_idx > 0):
                    train_model.load_weights(os.path.join(config.save_path, f'latest-{fold_idx}.h5'))
                
                val_model = build_model(
                    tuple(config.transform.val_size) + (3,), config.num_classes, weight_decay=config.loss.weight_decay,
                    dropout_rate=config.model.dropout_rate,
                    base_func=eval(config.model.base_func),
                    base_weights=config.model.base_weights
                )
        
        '''
        logger start
        '''
        if e == 0:
            csv_logger = mlc.LinewiseCSVWriter(
                os.path.join(config.save_path, f'log-{fold_idx}.csv'),
                header=['epoch'] + [f'train_{h}' for h in train_metrics.header] + [f'val_{h}' for h in val_metrics.header]
            )
            timer = mlc.time.Timer()
            logger.info(f'fold-{fold_idx} start')


        '''
        train
        '''
        timer.add('train')
        train_metrics, train_model, optimizer = proc_on_epoch(
            True, train_on_batch, train_ds, train_metrics, criterion, train_model, e, config.num_epochs, 
            iters_per_epoch=train_iters_per_epoch, optimizer=optimizer
        )
        train_elapsed = timer.fsince('train')


        '''
        val
        '''
        val_model.set_weights(train_model.get_weights())
        timer.add('val')
        val_metrics = proc_on_epoch(
            False, val_on_batch, val_ds, val_metrics, criterion, val_model, e, config.num_epochs, 
        )
        val_elapsed = timer.fsince('val')


        '''
        log
        '''
        logger.info(f'epoch: {e+1} train: {train_elapsed} val: {val_elapsed}')
        logger.info(f'train: {train_metrics.get_latest()[1]}')
        logger.info(f'val: {val_metrics.get_latest()[1]}')
        csv_logger.write([e+1] + train_metrics.get_latest()[1] + val_metrics.get_latest()[1])


        '''
        save
        '''
        val_model.save(os.path.join(config.save_path, f'latest-{fold_idx}.h5'))

        val_loss = val_metrics.get_loss()
        if val_loss < best_loss:
            logger.info(f'loss got improved: {best_loss:.4f} to {val_loss:.4f}')
            best_loss = val_loss
            val_model.save(os.path.join(config.save_path, f'best_loss-{fold_idx}.h5'))

        val_score = val_metrics.get_scores()[0]
        if val_score > best_score:
            logger.info(f'score got improved: {best_score:.4f} to {val_score:.4f}')
            best_score = val_score
            val_model.save(os.path.join(config.save_path, f'best_score-{fold_idx}.h5'))
            
            
        '''
        lr update
        '''
        curr_lr = optimizer.learning_rate.numpy()
        next_lr = scheduler.get_next_lr(e+1, val_loss, val_score)
        optimizer.learning_rate = next_lr
        if curr_lr != next_lr:
            logger.info(f'    lr {curr_lr} -> {next_lr}')
    
    
    '''
    oof
    '''
    val_model.load_weights(os.path.join(config.save_path, f'best_score-{fold_idx}.h5'))
    val_scores, val_trues, val_preds = proc_eval('val', val_on_batch, val_ds, val_metrics, criterion, val_model)
    np.savetxt(os.path.join(config.save_path, f'val_scores-{fold_idx}.txt'), np.array(val_scores))
    if config.save_preds:
        np.save(os.path.join(config.save_path, f'val_preds-{fold_idx}.npy'), val_preds)
        
    logger.info(f'fold-{fold_idx} val: {val_scores[0]:.4f}')
    
    
    '''
    test
    '''
    if config.has_test:
        test_scores, test_trues, test_preds = proc_eval('test', val_on_batch, test_ds, test_metrics, criterion, val_model)
        np.savetxt(os.path.join(config.save_path, f'test_scores-{fold_idx}.txt'), np.array(test_scores))
        if config.save_preds:
            np.save(os.path.join(config.save_path, f'test_preds-{fold_idx}.npy'), test_preds)
            
        logger.info(f'fold-{fold_idx} test: {test_scores[0]:.4f}')
    
    
    logger.info(f'fold-{fold_idx} end')            

In [None]:
def main(config):
    os.makedirs(config.save_path, exist_ok=True)
    logger.add(os.path.join(config.save_path, 'log.txt'), mode='w')
    OmegaConf.save(config, os.path.join(config.save_path, f'{config.expr_name}.yaml'))
    
    logger.info(f'{config.expr_name} start')
    
    for fold_idx in range(config.num_folds):
        run_fold(fold_idx, config)
        
        if config.run_fold1_only:
            break
    
    '''
    mean test
    '''
    test_score = []
    for fpath in sorted(glob.glob(os.path.join(config.save_path, f'test_scores-*.txt'))):
        score = np.loadtxt(fpath)
        if score.ndim > 0:
            score = score[0]
        test_score.append(score)
    test_score = np.mean(test_score)
    
    with open(f'{config.expr_name}_{test_score:.6f}.rslt', 'w') as f:
        pass
    
    logger.info(f'mean test score: {test_score}')
    logger.info(f'{config.expr_name} end')

---

In [None]:
if __name__ == '__main__':
    main(config)