In [None]:
!pip install efficientnet -q 

In [None]:
import os 

import efficientnet.tfkeras as efn 
import numpy as np
import pandas as pd


from kaggle_datasets import KaggleDatasets
from sklearn.model_selection import train_test_split
import tensorflow as tf 
from sklearn.model_selection import GroupKFold 

In [None]:
def auto_select_accelerator():
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Running on TPU:", tpu.master())
    except ValueError:
        strategy = tf.distribute.get_strategy()
    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    
    return strategy


def build_decoder(with_labels=True, target_size=(256, 256), ext='jpg'):
    def decode(path):
        file_bytes = tf.io.read_file(path)

        if ext == 'png':
            img = tf.image.decode_png(file_bytes, channels=3)
        elif ext in ['jpg', 'jpeg']:
            img = tf.image.decode_jpeg(file_bytes, channels=3)
        else:
            raise ValueError("Image extension not supported")
        img = tf.cast(img, tf.float32) / 255.0
        img = tf.image.resize(img, target_size)

        return img
    
    def decode_with_labels(path, label):
        return decode(path), label
    
    return decode_with_labels if with_labels else decode


def build_augmenter(with_labels=True):
    def augment(img):
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
        return img
    
    def augment_with_labels(img, label):
        return augment(img), label
    
    return augment_with_labels if with_labels else augment


def build_dataset(paths, labels=None, bsize=128, cache=True,
                  decode_fn=None, augment_fn=None,
                  augment=True, repeat=True, shuffle=1024, 
                  cache_dir=""):
    if cache_dir != "" and cache is True:
        os.makedirs(cache_dir, exist_ok=True)
    
    if decode_fn is None:
        decode_fn = build_decoder(labels is not None)
    
    if augment_fn is None:
        augment_fn = build_augmenter(labels is not None)
    
    AUTO = tf.data.experimental.AUTOTUNE
    slices = paths if labels is None else (paths, labels)
    
    dset = tf.data.Dataset.from_tensor_slices(slices)
    dset = dset.map(decode_fn, num_parallel_calls=AUTO)
    dset = dset.cache(cache_dir) if cache else dset
    dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
    dset = dset.repeat() if repeat else dset
    dset = dset.shuffle(shuffle) if shuffle else dset
    dset = dset.batch(bsize).prefetch(AUTO)
    
    return dset

In [None]:
COMPETITION_NAME = 'hpa-768768'
strategy = auto_select_accelerator()
BATCH_SIZE = strategy.num_replicas_in_sync * 16
GCS_DS_PATH = KaggleDatasets().get_gcs_path(COMPETITION_NAME)

In [None]:
print(GCS_DS_PATH)

In [None]:
load_dir = f"/kaggle/input/{COMPETITION_NAME}/"
df = pd.read_csv('../input/classification-label-csv-green/df_green.csv')
label_cols = df.columns[2:21]
paths = GCS_DS_PATH + '/' + df['ID'] + '.png'
labels = df[label_cols].values

In [None]:
train_paths, valid_paths, train_labels, valid_labels = train_test_split(paths, labels, test_size = 0.2, random_state = 21)

In [None]:
IMSIZE = (224, 240, 260, 300, 380, 456, 528, 600)
IMS = 7 

decoder = build_decoder(with_labels=True, target_size=(IMSIZE[IMS], IMSIZE[IMS]))
test_decoder = build_decoder(with_labels=False, target_size=(IMSIZE[IMS], IMSIZE[IMS]))

train_dataset = build_dataset(
    train_paths, train_labels, bsize=BATCH_SIZE, decode_fn=decoder
)

valid_dataset = build_dataset(
    valid_paths, valid_labels, bsize=BATCH_SIZE, decode_fn=decoder,
    repeat=False, shuffle=False, augment=False
)

In [None]:
try:
    n_labels = train_labels.shape[1]
except:
    n_labels = 1
    
with strategy.scope():
    model = tf.keras.Sequential([
        efn.EfficientNetB7(
        input_shape = (IMSIZE[IMS], IMSIZE[IMS], 3),
            weights = 'imagenet',
            include_top = False
        ),
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(n_labels, activation = 'sigmoid')
    ])
    
    model.compile(
    optimizer = tf.keras.optimizers.Adam(),
        loss = 'binary_crossentropy',
        metrics = [tf.keras.metrics.AUC(multi_label = True)]
    )
    model.summary()

In [None]:
color = '_green'


steps_per_epoch = train_paths.shape[0] // BATCH_SIZE

checkpoint = tf.keras.callbacks.ModelCheckpoint( 
    f'model{color}.h5', save_best_only = True, monitor = 'val_loss', mode = 'min'
)
lr_reducer = tf.keras.callbacks.ReduceLROnPlateau( 
    monitor = 'val_loss', patience = 3, min_lr = 1e-6, mode = 'min'
)



In [None]:
history = model.fit(train_dataset, epochs = 20, verbose = 1, callbacks = [checkpoint, lr_reducer], steps_per_epoch = steps_per_epoch, validation_data = valid_dataset)

In [None]:
hist_df = pd.DataFrame(history.history)
hist_df.to_csv(f'history{color}_effb7.csv')

In [None]:
try:
    n_labels = train_labels.shape[1]
except:
    n_labels = 1
    
with strategy.scope():
    model = tf.keras.Sequential([
        efn.EfficientNetB4(
        input_shape = (IMSIZE[IMS], IMSIZE[IMS], 3),
            weights = 'imagenet',
            include_top = False
        ),
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(n_labels, activation = 'sigmoid')
    ])
    
    model.compile(
    optimizer = tf.keras.optimizers.Adam(),
        loss = 'binary_crossentropy',
        metrics = [tf.keras.metrics.AUC(multi_label = True)]
    )
    model.summary()
    
color = '_green'


steps_per_epoch = train_paths.shape[0] // BATCH_SIZE

checkpoint = tf.keras.callbacks.ModelCheckpoint( 
    f'model{color}_b4.h5', save_best_only = True, monitor = 'val_loss', mode = 'min'
)
lr_reducer = tf.keras.callbacks.ReduceLROnPlateau( 
    monitor = 'val_loss', patience = 3, min_lr = 1e-6, mode = 'min'
)
history = model.fit(train_dataset, epochs = 20, verbose = 1, callbacks = [checkpoint, lr_reducer], steps_per_epoch = steps_per_epoch, validation_data = valid_dataset)


hist_df = pd.DataFrame(history.history)
hist_df.to_csv(f'history{color}_effb4.csv')

In [None]:
try:
    n_labels = train_labels.shape[1]
except:
    n_labels = 1
    
with strategy.scope():
    model = tf.keras.Sequential([
        efn.EfficientNetB1(
        input_shape = (IMSIZE[IMS], IMSIZE[IMS], 3),
            weights = 'imagenet',
            include_top = False
        ),
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(n_labels, activation = 'sigmoid')
    ])
    
    model.compile(
    optimizer = tf.keras.optimizers.Adam(),
        loss = 'binary_crossentropy',
        metrics = [tf.keras.metrics.AUC(multi_label = True)]
    )
    model.summary()
    
color = '_green'


steps_per_epoch = train_paths.shape[0] // BATCH_SIZE

checkpoint = tf.keras.callbacks.ModelCheckpoint( 
    f'model{color}_b1.h5', save_best_only = True, monitor = 'val_loss', mode = 'min'
)
lr_reducer = tf.keras.callbacks.ReduceLROnPlateau( 
    monitor = 'val_loss', patience = 3, min_lr = 1e-6, mode = 'min'
)

history = model.fit(train_dataset, epochs = 20, verbose = 1, callbacks = [checkpoint, lr_reducer], steps_per_epoch = steps_per_epoch, validation_data = valid_dataset)

hist_df = pd.DataFrame(history.history)
hist_df.to_csv(f'history{color}_effb1.csv')

Modified Training

In [None]:
!pip install -q efficientnet 

!pip install focal-loss 

In [None]:
import os 

import efficientnet.tfkeras as efn 
import numpy as np 
import pandas as pd 
import tensorflow as tf 

from focal_loss import BinaryFocalLoss 
from kaggle_datasets import KaggleDatasets 
from sklearn.model_selection import train_test_split 
from sklearn.model_selection import GroupKFold 



In [None]:
def auto_select_accelerator():
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Running on TPU:", tpu.master())
    except ValueError:
        strategy = tf.distribute.get_strategy()
    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    
    return strategy


def build_decoder(with_labels=True, target_size=(256, 256), ext='jpg'):
    def decode(path):
        file_bytes = tf.io.read_file(path)

        if ext == 'png':
            img = tf.image.decode_png(file_bytes, channels=3)
        elif ext in ['jpg', 'jpeg']:
            img = tf.image.decode_jpeg(file_bytes, channels=3)
        else:
            raise ValueError("Image extension not supported")
        img = tf.cast(img, tf.float32) / 255.0
        img = tf.image.resize(img, target_size)

        return img
    
    def decode_with_labels(path, label):
        return decode(path), label
    
    return decode_with_labels if with_labels else decode


def build_augmenter(with_labels=True):
    def augment(img):
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
        return img
    
    def augment_with_labels(img, label):
        return augment(img), label
    
    return augment_with_labels if with_labels else augment


def build_dataset(paths, labels=None, bsize=128, cache=True,
                  decode_fn=None, augment_fn=None,
                  augment=True, repeat=True, shuffle=1024, 
                  cache_dir=""):
    if cache_dir != "" and cache is True:
        os.makedirs(cache_dir, exist_ok=True)
    
    if decode_fn is None:
        decode_fn = build_decoder(labels is not None)
    
    if augment_fn is None:
        augment_fn = build_augmenter(labels is not None)
    
    AUTO = tf.data.experimental.AUTOTUNE
    slices = paths if labels is None else (paths, labels)
    
    dset = tf.data.Dataset.from_tensor_slices(slices)
    dset = dset.map(decode_fn, num_parallel_calls=AUTO)
    dset = dset.cache(cache_dir) if cache else dset
    dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
    dset = dset.repeat() if repeat else dset
    dset = dset.shuffle(shuffle) if shuffle else dset
    dset = dset.batch(bsize).prefetch(AUTO)
    
    return dset

In [None]:
COMPETITION_NAME = "hpa-768768"
EPOCHS = 20

strategy = auto_select_accelerator()
BATCH_SIZE = strategy.num_replicas_in_sync * 20

GCS_DS_PATH = KaggleDatasets().get_gcs_path(COMPETITION_NAME); print(GCS_DS_PATH)

In [None]:
# green

load_dir = f"/kaggle/input/{COMPETITION_NAME}/"

df = pd.read_csv("../input/classification-label-csv-green/df_green.csv")

label_cols = df.columns[2: 21]

paths = GCS_DS_PATH + '/' + df['ID'] + '.png'

labels = df[label_cols].values




In [None]:
(
train_paths, valid_paths, 
    train_labels, valid_labels
) = train_test_split(paths, labels, test_size = 0.2, random_state = 42)

In [None]:
IMSIZE = (224, 240, 260, 300, 380, 456, 512, 528, 600, 720)
IMS = -1

decoder = build_decoder(with_labels = True, target_size = (IMSIZE[IMS], IMSIZE[IMS]))
test_decoder = build_decoder(with_labels = False, target_size = (IMSIZE[IMS], IMSIZE[IMS]))

train_dataset = build_dataset(train_paths, train_labels, bsize = BATCH_SIZE, decode_fn = decoder)

valid_dataset = build_dataset(valid_paths, valid_labels, bsize = BATCH_SIZE, decode_fn = decoder, repeat = False, shuffle = False, augment = False)



In [None]:
try:
    
    n_labels = train_labels.shape[1]
    
except:
    n_labels = 1
    
    
binary_focal_loss = BinaryFocalLoss(gamma = 2)


with strategy.scope():
    
    model = tf.keras.Sequential([
        efn.EfficientNetB6( 
        input_shape = (IMSIZE[IMS], IMSIZE[IMS], 3),
            weights = "imagenet",
            include_top = False
        ),
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(n_labels, activation = 'sigmoid')
    ])
    
    
    model.compile(
    optimizer = tf.keras.optimizers.Adam(),
    loss = 'binary_crossentropy',
    metrics = [tf.keras.metrics.AUC(multi_label = True)])
    
    model.summary()

In [None]:
colour = '_green'

In [None]:
steps_per_epoch = train_paths.shape[0]//BATCH_SIZE

checkpoint = tf.keras.callbacks.ModelCheckpoint(
f'model{colour}_bce.h5', save_best_only = True, monitor = 'val_loss', mode = 'min'
)

lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(
monitor = 'val_loss', patience = 3, min_lr = 1e-6, mode = 'min'
)

In [None]:
history = model.fit(

train_dataset, 
    epochs = EPOCHS, 
    verbose = 1,
    callbacks = [checkpoint, lr_reducer],
    steps_per_epoch = steps_per_epoch,
    validation_data = valid_dataset
)

In [None]:
hist_df = pd.DataFrame(history.history)

hist_df.to_csv(f'history{colour}_bce.csv')

In [None]:
try:
    
    n_labels = train_labels.shape[1]
    
except:
    n_labels = 1
    
    
binary_focal_loss = BinaryFocalLoss(gamma = 2)


with strategy.scope():
    
    model = tf.keras.Sequential([
        efn.EfficientNetB6( 
        input_shape = (IMSIZE[IMS], IMSIZE[IMS], 3),
            weights = "imagenet",
            include_top = False
        ),
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(n_labels, activation = 'sigmoid')
    ])
    
    
    model.compile(
    optimizer = tf.keras.optimizers.Adam(),
    loss = binary_focal_loss,
    metrics = [tf.keras.metrics.AUC(multi_label = True)])
    
    model.summary()
    
    
steps_per_epoch = train_paths.shape[0]//BATCH_SIZE

checkpoint = tf.keras.callbacks.ModelCheckpoint(
f'model{colour}_bfl.h5', save_best_only = True, monitor = 'val_loss', mode = 'min'
)

lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(
monitor = 'val_loss', patience = 3, min_lr = 1e-6, mode = 'min'
)

history = model.fit(

train_dataset, 
    epochs = EPOCHS, 
    verbose = 1,
    callbacks = [checkpoint, lr_reducer],
    steps_per_epoch = steps_per_epoch,
    validation_data = valid_dataset
)

hist_df = pd.DataFrame(history.history)

hist_df.to_csv(f'history{colour}_bfl.csv')