# [TRAIN] Siim EFNB7 tf.keras (study) 
[[Notebook] using TPU](https://www.kaggle.com/xhlulu/ranzcr-efficientnet-tpu-training)  
[[Notebook] Reference Notebook](https://www.kaggle.com/h053473666/siim-covid19-efnb7-train-study)  

* Cutmix  
    [[Notebook] CutMix and MixUp on GPU/TPU](https://www.kaggle.com/cdeotte/cutmix-and-mixup-on-gpu-tpu)


In [1]:
!pip install efficientnet

Collecting efficientnet
  Downloading efficientnet-1.1.1-py3-none-any.whl (18 kB)
Collecting keras-applications<=1.0.8,>=1.0.7
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[K     |████████████████████████████████| 50 kB 661 kB/s eta 0:00:01
Installing collected packages: keras-applications, efficientnet
Successfully installed efficientnet-1.1.1 keras-applications-1.0.8


In [2]:
import os
import efficientnet.tfkeras as efn
import numpy as np
import pandas as pd
from kaggle_datasets import KaggleDatasets # what?
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow.keras.backend as K
from sklearn.model_selection import GroupKFold

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GroupKFold
from sklearn.metrics import roc_auc_score, precision_recall_curve
import shutil

import tensorflow_hub as tfhub
import tensorflow.keras.backend as K

import random
import albumentations
from PIL import Image, ImageOps, ImageEnhance
from albumentations.core.transforms_interface import ImageOnlyTransform
from albumentations.augmentations import functional as F

import glob

print('numpy version:',np.__version__)
print('TF version:', tf.__version__)
print('Hub version:', tfhub.__version__)
print('Physical devices:', tf.config.list_physical_devices())

numpy version: 1.19.5
TF version: 2.4.1
Hub version: 0.12.0
Physical devices: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


In [3]:
# Config

v1 = False
v2 = True

def seed_everything(SEED):
    os.environ['PYTHONHASHSEED'] = str(SEED)
    random.seed(SEED)
    np.random.seed(SEED)
    tf.random.set_seed(SEED)
    os.environ['TF_CUDNN_DETERMINISTIC'] = str(SEED)

print(f"Use model Version : {'V1' if v1 else 'V2'}")
seed_everything(42)

Use model Version : V2


# CutMix

In [4]:
# Define CUTMIX

def onehot(image, label):
    CLASSES = 4 # study_level class
    return image, tf.one_hot(label, CLASSES)

def cutmix(image, label, PROBABILITY = 1.0):
    # input_image : is a batch of images of size [n, dim, dim, 3] not a single image of [dim, dim, 3]
    # output : a batch of images with cutmix applied
    DIM = 768
    CLASSES = 4
    AUG_BATCH = 128

    imgs = []
    labs = []
    for j in range(AUG_BATCH):
        # DO CUTMIX WITH PROBABILITY DEFINED ABOVE
        P = tf.cast(tf.random.uniform([],0,1) <= PROBABILITY, tf.int32)
        # CHOOSE RANDOM IMAGE TO CUTMIX WITH
        # tf.cast(data, type) -> type 에 맞춰서 data 의 형태를 바꿔준다.
        k = tf.cast(tf.random.uniform([],0,AUG_BATCH), tf.int32)
        # CHOOSE RANDOM LOCATION
        x = tf.cast(tf.random.uniform([],0,DIM), tf.int32)
        y = tf.cast(tf.random.uniform([],0,DIM), tf.int32)
        b = tf.random.uniform([],0,1) # this is beta dist with alpha=1.0
        WIDTH = tf.cast(DIM * tf.math.sqrt(1-b), tf.int32) * P
        ya = tf.math.maximum(0, y-WIDTH//2)
        yb = tf.math.minimum(DIM, y+WIDTH//2)
        xa = tf.math.maximum(0, x-WIDTH//2)
        xb = tf.math.minimum(DIM, x+WIDTH//2)

        # MAKE CUTMIX IMAGE
        one = image[j, ya:yb, 0:xa, :]
        two = image[k, ya:yb, xa:xb, :]
        three = image[j, ya:yb, xb:DIM, :]
        middle = tf.concat([one, two, three], axis=1)
        img = tf.concat([image[j, 0:ya, :, :], middle, image[j,yb:DIM,:,:]], axis=0)
        imgs.append(img)

        # MAKE CUTMIX LABEL
        a = tf.cast(WIDTH * WIDTH/DIM/DIM, tf.float32)
        if len(label.shape) == 1:
            lab1 = tf.one_hot(label[j], CLASSES)
            lab2 = tf.one_hot(label[k], CLASSES)
        else:
            lab1 = tf.cast(label[j,], tf.float32)
            lab2 = tf.cast(label[k,], tf.float32)
        labs.append((1-a)*lab1 + a*lab2)

    # RESHAPE HACK SO TPU COMPILER KNOWS SHAPE OF OUTPUT TENSOR 
    # (maybe use Python typing instead?)
    image2 = tf.reshape(tf.stack(imgs), (AUG_BATCH,DIM,DIM,3))
    label2 = tf.reshape(tf.stack(labs), (AUG_BATCH,CLASSES))
    return image2, label2

def transform_cutmix(image, label):
    # THIS FUNCTION APPLIES BOTH CUTMIX AND MIXUP
    DIM = 768
    CLASSES = 4
    SWITCH = 0.5
    CUTMIX_PROB = 0.666
    AUG_BATCH = 128
    # FOR SWITCH PERCENT OF TIME WE DO CUTMIX AND (1-SWITCH) WE DO MIXUP
    image2, label2 = cutmix(image, label, CUTMIX_PROB)
    imgs = []
    labs = []
    for j in range(AUG_BATCH):
        #P = tf.cast( tf.random.uniform([],0,1)<=SWITCH, tf.float32)
        imgs.append(image2[j,])
        labs.append(label2[j,])
    # RESHAPE HACK SO TPU COMPILER KNOWS SHAPE OF OUTPUT TENSOR
    image4 = tf.reshape(tf.stack(imgs), (AUG_BATCH, DIM, DIM, 3))
    label4 = tf.reshape(tf.stack(labs), (AUG_BATCH, CLASSES))
    return image4, label4

## Display CutMix Augmentation

In [5]:
# DISPLAY CUTMIX AUGMENTATION
display_image = False
if display_image:
    i = 0
    detail_path = '/new_resized_data/image_512/'
    valid_paths = GCS_DS_PATH + detail_path + df[df['fold'] == i]['id'] + '.png'
    train_paths = GCS_DS_PATH + detail_path + df[df['fold'] != i]['id'] + '.png'
    valid_labels = df[df['fold'] == i][label_cols].values
    train_labels = df[df['fold'] != i][label_cols].values
    img_size = 512
    BATCH_SIZE = 128
    # train image
    decoder = build_decoder(with_labels=True,
                            target_size=(img_size, img_size),
                            ext='png')
    # valid image
    test_decoder = build_decoder(with_labels=False, 
                                 target_size=(img_size, img_size),
                                 ext='png')

    train_dataset = build_dataset(train_paths,
                                  train_labels,
                                  bsize=BATCH_SIZE,
                                  decode_fn=decoder)

    valid_dataset = build_dataset(valid_paths,
                                  valid_labels,
                                  bsize=BATCH_SIZE,
                                  decode_fn=decoder,
                                  repeat=False,
                                  shuffle=False,
                                  augment=False,
                                  do_mix=False)
    # iterator 가 아니다. repeat() 로 batch 를 읽는다.
    sample = train_dataset.repeat()
    for (img, label) in sample:
        img = img[:16]
        label = label[:16]
        break

# AugMix Augmentation
[[github] Original AugMix github](https://github.com/google-research/augmix/blob/master/augment_and_mix.py)  
[[Notebook] Augmix for TPU](https://www.kaggle.com/szacho/augmix-data-augmentation-on-tpu)

### Transformations
These are simple augmentations used by AugMix. Every function takes `image` and `level` (integer from 1 to 10) as arguments. The second one indicates how much variation will particular transformation yield, in other words, how strong it will be.

Translate, shear and rotate augmentations are based on [this notebook](https://www.kaggle.com/cdeotte/rotation-augmentation-gpu-tpu-0-96).  

(구현 아직 안됨)

In [6]:
import numpy as np
from PIL import Image, ImageOps, ImageEnhance

# ImageNet code should change this value
IMAGE_SIZE = 768


def int_parameter(level, maxval):
    """Helper function to scale `val` between 0 and maxval .
    Args:
    level: Level of the operation that will be between [0, `PARAMETER_MAX`].
    maxval: Maximum value that the operation can have. This will be scaled to
      level/PARAMETER_MAX.
    Returns:
    An int that results from scaling `maxval` according to `level`.
    """
    return int(level * maxval / 10)


def float_parameter(level, maxval):
    """Helper function to scale `val` between 0 and maxval.
    Args:
    level: Level of the operation that will be between [0, `PARAMETER_MAX`].
    maxval: Maximum value that the operation can have. This will be scaled to
      level/PARAMETER_MAX.
    Returns:
    A float that results from scaling `maxval` according to `level`.
    """
    return float(level) * maxval / 10.


def sample_level(n):
    return np.random.uniform(low=0.1, high=n)


def autocontrast(pil_img, _):
    return ImageOps.autocontrast(pil_img)


def equalize(pil_img, _):
    return ImageOps.equalize(pil_img)


def posterize(pil_img, level):
    level = int_parameter(sample_level(level), 4)
    return ImageOps.posterize(pil_img, 4 - level)


def rotate(pil_img, level):
    degrees = int_parameter(sample_level(level), 30)
    if np.random.uniform() > 0.5:
        degrees = -degrees
    return pil_img.rotate(degrees, resample=Image.BILINEAR)


def solarize(pil_img, level):
    level = int_parameter(sample_level(level), 256)
    return ImageOps.solarize(pil_img, 256 - level)


def shear_x(pil_img, level):
    level = float_parameter(sample_level(level), 0.3)
    if np.random.uniform() > 0.5:
        level = -level
    return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE),
                           Image.AFFINE, (1, level, 0, 0, 1, 0),
                           resample=Image.BILINEAR)


def shear_y(pil_img, level):
    level = float_parameter(sample_level(level), 0.3)
    if np.random.uniform() > 0.5:
        level = -level
    return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE),
                           Image.AFFINE, (1, 0, 0, level, 1, 0),
                           resample=Image.BILINEAR)


def translate_x(pil_img, level):
    level = int_parameter(sample_level(level), IMAGE_SIZE / 3)
    if np.random.random() > 0.5:
        level = -level
    return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE),
                           Image.AFFINE, (1, 0, level, 0, 1, 0),
                           resample=Image.BILINEAR)


def translate_y(pil_img, level):
    level = int_parameter(sample_level(level), IMAGE_SIZE / 3)
    if np.random.random() > 0.5:
        level = -level
    return pil_img.transform((IMAGE_SIZE, IMAGE_SIZE),
                           Image.AFFINE, (1, 0, 0, 0, 1, level),
                           resample=Image.BILINEAR)


# operation that overlaps with ImageNet-C's test set
def color(pil_img, level):
    level = float_parameter(sample_level(level), 1.8) + 0.1
    return ImageEnhance.Color(pil_img).enhance(level)


# operation that overlaps with ImageNet-C's test set
def contrast(pil_img, level):
    level = float_parameter(sample_level(level), 1.8) + 0.1
    return ImageEnhance.Contrast(pil_img).enhance(level)


# operation that overlaps with ImageNet-C's test set
def brightness(pil_img, level):
    level = float_parameter(sample_level(level), 1.8) + 0.1
    return ImageEnhance.Brightness(pil_img).enhance(level)


# operation that overlaps with ImageNet-C's test set
def sharpness(pil_img, level):
    level = float_parameter(sample_level(level), 1.8) + 0.1
    return ImageEnhance.Sharpness(pil_img).enhance(level)


augmentations = [
    autocontrast, equalize, posterize, rotate, solarize, shear_x, shear_y,
    translate_x, translate_y
]

augmentations_all = [
    autocontrast, equalize, posterize, rotate, solarize, shear_x, shear_y,
    translate_x, translate_y, color, contrast, brightness, sharpness
]

In [7]:
# Augmix
import numpy as np
from PIL import Image

# CIFAR-10 constants
MEAN = [0.4914, 0.4822, 0.4465]
STD = [0.2023, 0.1994, 0.2010]

def numpy_to_tensor(arg):
    arg = tf.convert_to_tensor(arg, dtype=tf.float32)
    return arg

def normalize(image):
    """Normalize input image channel-wise to zero mean and unit variance."""
    image = image.transpose(2, 0, 1)  # Switch to channel-first
    mean, std = np.array(MEAN), np.array(STD)
    image = (image - mean[:, None, None]) / std[:, None, None]
    return image.transpose(1, 2, 0)


def apply_op(image, op, severity):
    image = K.clip(image * 255., 0, 255)
    image = K.cast(image, dtype=tf.uint8)

    pil_img = Image.fromarray(image)  # Convert to PIL.Image
    pil_img = op(pil_img, severity)
    return np.asarray(pil_img) / 255.


def augment_and_mix(image, severity=3, width=3, depth=-1, alpha=1.):
    """Perform AugMix augmentations and compute mixture.
    Args:
    image: Raw input image as float32 np.ndarray of shape (h, w, c)
    severity: Severity of underlying augmentation operators (between 1 to 10).
    width: Width of augmentation chain
    depth: Depth of augmentation chain. -1 enables stochastic depth uniformly
      from [1, 3]
    alpha: Probability coefficient for Beta and Dirichlet distributions.
    Returns:
    mixed: Augmented and mixed image.
    """
    ws = np.float32(np.random.dirichlet([alpha] * width))
    m = np.float32(np.random.beta(alpha, alpha))
    
    image = tf.convert_to_tensor(image)
    mix = np.zeros_like(image)

    for i in range(width):
#        image_aug = image.copy()
        image_aug = tf.identity(image)
        d = depth if depth > 0 else np.random.randint(1, 4)
        for _ in range(d):
            op = np.random.choice(augmentations)
            image_aug = apply_op(image_aug, op, severity)
        # Preprocessing commutes since all coefficients are convex
        mix += ws[i] * normalize(image_aug)

    mixed = (1 - m) * normalize(image) + m * mix
    return numpy_to_tensor(mixed)

In [8]:
a = tf.constant([2,3,4])
a

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([2, 3, 4], dtype=int32)>

In [9]:
a.numpy()

array([2, 3, 4], dtype=int32)

In [10]:
tf.constant([2,3,5])

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([2, 3, 5], dtype=int32)>

# Function for Datasets & TPU

In [11]:
def auto_select_accelerator():
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Running on TPU:", tpu.master())
    except ValueError:
        strategy = tf.distribute.get_strategy()
    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    
    return strategy

# decoding
def build_decoder(with_labels=True, target_size=(256, 256), ext='jpg'):
    def decode(path):
        file_bytes = tf.io.read_file(path)

        if ext == 'png':
            img = tf.image.decode_png(file_bytes, channels=3)
        elif ext in ['jpg', 'jpeg']:
            img = tf.image.decode_jpeg(file_bytes, channels=3)
        else:
            raise ValueError("Image extension not supported")
        img = tf.cast(img, tf.float32) / 255.0
        img = tf.image.resize(img, target_size)

        return img

    def decode_with_labels(path, label):
        return decode(path), label
    
    return decode_with_labels if with_labels else decode

# augmentation function
def build_augmenter(with_labels=True):
    def augment(img):
        img = tf.image.random_flip_left_right(img)
#        img = tf.image.random_flip_up_down(img)
        img = tf.image.random_saturation(img, SATURATION[0], SATURATION[1])
        img = tf.image.random_contrast(img, CONTRAST[0], CONTRAST[1])
        img = tf.image.random_brightness(img, BRIGHTNESS)
        
        return img
    # augmenation experiment -> best way : H Flip, rotate, zoom, brightness, cutout
    
    def augment_with_labels(img, label):
        return augment(img), label
    
    return augment_with_labels if with_labels else augment

# augmix
def build_augmixer(with_labels=True):
    def augmixer(img):
        img = augment_and_mix(img, severity=3, width=3, depth=-1, alpha=1.)
        return img
    
    def augmix_with_labels(img, label):
        return augmixer(img), label
    
    return augmix_with_labels if with_labels else augmixer


# dataset
def build_dataset(paths, 
                  labels=None, bsize=128, cache=True,
                  decode_fn=None, 
                  augment_fn=None,
                  augmix_fn=None,
                  repeat=True, 
                  shuffle=1024,
                  augment=False, 
                  do_cutmix = False,
                  do_augmix = False,
                  cache_dir=""):
    
    if cache_dir != "" and cache is True:
        os.makedirs(cache_dir, exist_ok=True)
    
    if decode_fn is None:
        decode_fn = build_decoder(labels is not None)
    
    if augment_fn is None:
        augment_fn = build_augmenter(labels is not None)
        
    if augmix_fn is None:
        augmix_fn = build_augmixer(labels is not None)
    
    
    AUTO = tf.data.experimental.AUTOTUNE
    slices = paths if labels is None else (paths, labels)
    
    dset = tf.data.Dataset.from_tensor_slices(slices)
    dset = dset.map(decode_fn, num_parallel_calls=AUTO)
    dset = dset.map(augmix_fn, num_parallel_calls=AUTO) if do_augmix else dset # augmix
    dset = dset.cache(cache_dir) if cache else dset
    dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset # augmentation

    dset = dset.repeat() if repeat else dset
    dset = dset.shuffle(shuffle) if shuffle else dset
    dset = dset.batch(bsize).prefetch(AUTO)
    dset = dset.map(transform_cutmix, num_parallel_calls=AUTO) if do_cutmix else dset # cutmix
    return dset

In [None]:
COMPETITION_NAME = 'siim-image'
strategy = auto_select_accelerator()
BATCH_SIZE = strategy.num_replicas_in_sync * 8
#GCS_DS_PATH = KaggleDatasets().get_gcs_path(COMPETITION_NAME)

In [13]:
# load df : train_study_level
load_dir = f"/kaggle/input/{COMPETITION_NAME}/"
df = pd.read_csv('/kaggle/input/siim-covid19-detection/train_study_level.csv') # for study_level
label_cols = df.columns[1:5]
gkf = GroupKFold(n_splits=5)
df['fold'] = -1
for fold, (train_index, valid_index) in enumerate(gkf.split(df, groups = df.id.tolist())):
    df.loc[valid_index,'fold'] = fold
df

Unnamed: 0,id,Negative for Pneumonia,Typical Appearance,Indeterminate Appearance,Atypical Appearance,fold
0,00086460a852_study,0,1,0,0,3
1,000c9c05fd14_study,0,0,0,1,4
2,00292f8c37bd_study,1,0,0,0,0
3,005057b3f880_study,1,0,0,0,1
4,0051d9b12e72_study,0,0,0,1,2
...,...,...,...,...,...,...
6049,ffcb4630f46f_study,0,1,0,0,2
6050,ffe4d6e8fbb0_study,0,1,0,0,3
6051,ffe94fcb14fa_study,0,1,0,0,4
6052,ffebf1ef4a9c_study,0,1,0,0,4


# EfficientNetB7 Model Training

In [None]:
if v1:
    for i in range(5): # K_FOLDS
        valid_paths = GCS_DS_PATH + '/study/' + df[df['fold']==i]['id'] + '.png' 
        train_paths = GCS_DS_PATH + '/study/' + df[df['fold']!=i]['id'] + '.png' 
        valid_labels = df[df['fold']==i][label_cols].values
        train_labels = df[df['fold']!=i][label_cols].values

        IMSIZE = (224, 240, 260, 300, 380, 456, 528, 600) # study = 600
        IMS = 7
        
        # decoder
        decoder = build_decoder(with_labels=True,
                                target_size = (IMSIZE[IMS], IMSIZE[IMS]), 
                                ext='png')
        test_decoder = build_decoder(with_labels=False, 
                                     target_size = (IMSIZE[IMS], IMSIZE[IMS]), 
                                     ext='png')
        
        # dataset
        train_dataset = build_dataset(
                train_paths, 
                train_labels, 
                bsize=BATCH_SIZE, 
                decode_fn = decoder
        )
        
        valid_dataset = build_dataset(
                valid_paths, 
                valid_labels, 
                bsize=BATCH_SIZE, 
                decode_fn = decoder,
                repeat=False, 
                shuffle=False, 
                augment=False
        )

        try:
            n_labels = train_labels.shape[1]
        except:
            n_labels = 1

        with strategy.scope():
            model = tf.keras.Sequential([
                efn.EfficientNetB7(
                    input_shape = (IMSIZE[IMS], IMSIZE[IMS], 3),
                    weights='imagenet',
                    include_top=False),
                tf.keras.layers.GlobalAveragePooling2D(),
                tf.keras.layers.Dense(n_labels, activation='softmax')
            ])
            model.compile(
                optimizer = tf.keras.optimizers.Adam(),
                loss='categorical_crossentropy',
                metrics=[tf.keras.metrics.AUC(multi_label=True)])
            model.summary()

        steps_per_epoch = train_paths.shape[0] // BATCH_SIZE
        checkpoint = tf.keras.callbacks.ModelCheckpoint(
            f"model{i}.h5", save_best_only=True, monitor='val_loss', mode='min'
        )
        lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss', patience=3, min_lr=1e-6, mode='min')

        history = model.fit(
            train_dataset,
            epochs=20, # default 20
            verbose=1,
            callbacks = [checkpoint, lr_reducer],
            steps_per_epoch = steps_per_epoch,
            validation_data = valid_dataset
        )
        hist_df = pd.DataFrame(history.history)
        hist_df.to_csv(f"history{i}.csv")

# EfficientNetV2_XL model Training

In [14]:
# We first find the GCS path of the selected EffNetV2 architecture from the EffNetV2 weights Kaggle dataset

if v2:
    # Get the Tensorflow Hub model URL
    hub_type = 'feature_vector' # ['classification', 'feature_vector']
    model_arch = "efficientnetv2-l-21k-ft1k"
    # Get the GCS path of EfficientNet Models
    DS_GCS_PATH = KaggleDatasets().get_gcs_path('efficientnetv2-tfhub-weight-files')
    MODEL_GCS_PATH = f"{DS_GCS_PATH}/tfhub_models/{model_arch}/{hub_type}"
    print(MODEL_GCS_PATH)

gs://kds-40f526733933c01f1885087fe24fcb06ce3f5c601cd74b3d084cfe68/tfhub_models/efficientnetv2-l-21k-ft1k/feature_vector


In [15]:
if v2:
    trained_model_path = '/kaggle/working/effnetV2_model'
    if not os.path.isdir(trained_model_path):
        os.makedirs(trained_model_path)
    else:
        shutil.rmtree(trained_model_path)
    print('model path')

model path


In [17]:
tf.keras.layers.InputLayer(input_shape = [768,768,3])

<tensorflow.python.keras.engine.input_layer.InputLayer at 0x7f801dffa510>

In [None]:
view_model = True
if view_model:
    model = tf.keras.Sequential([
                    # Explicitly define the input shape so the model can be properly
                    # loaded by the TFLite Converter (input layer 를 명시적으로 설정)
                    tf.keras.layers.InputLayer(input_shape = [IMSIZE[IMS],IMSIZE[IMS],3]),
                    tfhub.KerasLayer(MODEL_GCS_PATH, trainable=True),
                    tf.keras.layers.Dropout(rate=0.1),
                    tf.keras.layers.Dense(n_labels, activation='softmax')
                ])

In [None]:
if v2:
    print(f"Dataset Name : {COMPETITION_NAME}")
    strategy = auto_select_accelerator() #Define TPU strategy and clear TPU - try to select TPU else GPU or CPU
    GCS_DS_PATH = KaggleDatasets().get_gcs_path(COMPETITION_NAME)
    BATCH_SIZE = strategy.num_replicas_in_sync * 8
    for i in range(5):
        print(f"Fold {i} start")        
        # Converting global config class object to a dictionary to log using Wandb
        # --- remove ---
        detail_path = '/new_resized_data/study_768/'
        valid_paths = GCS_DS_PATH + detail_path + df[df['fold'] == i]['id'] + '.png'
        train_paths = GCS_DS_PATH + detail_path + df[df['fold'] != i]['id'] + '.png'
        valid_labels = df[df['fold'] == i][label_cols].values
        train_labels = df[df['fold'] != i][label_cols].values
        
        IMSIZE = (768,)
        IMS = 0
        # image_level file 을 사용하므로 img_size = 512
        
        # train image decoder
        decoder = build_decoder(with_labels=True,
                                target_size=(IMSIZE[IMS], IMSIZE[IMS]),
                                ext='png')
        # valid build decoder
        test_decoder = build_decoder(with_labels=False,
                                     target_size=(IMSIZE[IMS], IMSIZE[IMS]),
                                     ext='png')
        
        train_dataset = build_dataset(train_paths,
                                      train_labels,
                                      bsize=BATCH_SIZE,
                                      decode_fn=decoder,
                                      augment=True,
                                      do_cutmix=False,
                                      do_augmix=False)
        
        valid_dataset = build_dataset(valid_paths,
                                      valid_labels,
                                      bsize=BATCH_SIZE,
                                      decode_fn=decoder,
                                      repeat=False,
                                      shuffle=False,
                                      augment=False,
                                      do_cutmix=False,
                                      do_augmix=False)
        try:
            n_labels = train_labels.shape[1]
        except:
            n_labels = 1
        # n_labels 부분은 num_classes -> for Dense output size
        print(n_labels)
        with strategy.scope():
            model = tf.keras.Sequential([
                # Explicitly define the input shape so the model can be properly
                # loaded by the TFLite Converter (input layer 를 명시적으로 설정)
                tf.keras.layers.InputLayer(input_shape = [IMSIZE[IMS],IMSIZE[IMS],3]),
                tfhub.KerasLayer(MODEL_GCS_PATH, trainable=True),
                tf.keras.layers.Dropout(rate=0.1),
                tf.keras.layers.Dense(n_labels, activation='softmax')
            ])
            model.compile(optimizer = tf.keras.optimizers.Adam(),
                          loss='categorical_crossentropy',
                          metrics = [tf.keras.metrics.AUC(multi_label=False)])
            model.summary()
            
        steps_per_epoch = train_paths.shape[0] // BATCH_SIZE 
        checkpoint = tf.keras.callbacks.ModelCheckpoint(f'{trained_model_path}/model{i}_2class.h5', 
                                                        save_best_only=True, 
                                                        monitor='val_loss',
                                                        mode='min')

        lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                          patience=3,
                                                          min_lr=1e-6,
                                                          mode='min')
        annealing = tf.keras.experimental.CosineDecayRestarts(initial_learning_rate=0.0001,
                                                              first_decay_steps=10,
                                                              t_mul=1.0,
                                                              m_mul=1.0,
                                                              alpha=0.0)
        lr_callback = tf.keras.callbacks.LearningRateScheduler(annealing, verbose = True)
        
        history = model.fit(train_dataset,
                            epochs=20,
                            verbose=1,
                            callbacks=[checkpoint, lr_reducer],
                            steps_per_epoch=steps_per_epoch,
                            validation_data=valid_dataset)

        hist_df = pd.DataFrame(history.history)
        hist_df.to_csv(f"{trained_model_path}/history{i}_2class.csv")
        
# efficientnetV2_XL paramter
# Total params: 117,748,129
# Trainable params: 117,235,553
# Non-trainable params: 512,576

In [None]:
#zip model
#!zip -r study_model.zip ./*

In [None]:
efnb7_history = [f"../input/siimcovid19efnb7trainstudy/history{i}.csv" for i in range(5)]
cols = pd.read_csv(efnb7_history[0]).columns
cols

In [None]:
fig, axes = plt.subplots(3,2, figsize=(18,18))

for i in range(6): # cols
    for j in range(5): # folds
        temp = pd.read_csv(efnb7_history[j])
        axes[i//2][i%2].plot(temp[temp.columns[i]], label = f"fold {j} {temp.columns[i]}")
        axes[i//2][i%2].legend(loc='best')
        axes[i//2][i%2].set_title(f"{cols[i]}", size=20)
plt.show();