# About Notebook

It is basic starter notebook for breast cancer detection, written in `keras` with `tensorflow 2` backend. However, the overall goal in this code example is to demonstrate the best practice of `keras` with its latest APIs.

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import random
import os
import glob
import cv2
import warnings
from packaging.version import parse
from matplotlib import pyplot as plt
from kaggle_datasets import KaggleDatasets
from sklearn.model_selection import GroupKFold, StratifiedGroupKFold

warnings.simplefilter(action="ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

**Device Setup**

In [2]:
import tensorflow as tf
from tensorflow import keras
import tensorflow_hub as hub
import tensorflow_addons as tfa
from tensorflow.python.client import device_lib
from tensorflow.experimental import numpy as tnp

def set_tpu(mixed_precision=True):
    try: 
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect() 
        
        if mixed_precision:
            keras.mixed_precision.set_global_policy(
                "mixed_bfloat16"
            )
        else:
            keras.mixed_precision.set_global_policy(
                keras.backend.floatx()
            )
            
        tf.config.set_soft_device_placement(True)
        strategy = tf.distribute.TPUStrategy(tpu)
        physical_devices = tf.config.list_logical_devices('TPU')
        return (strategy, physical_devices)
    except:
        return False

In [3]:
def set_cpu_gpus(mixed_precision=True, set_jit=False):
    try: 
        # printed out the detected devices
        list_ld = device_lib.list_local_devices()
        for dev in list_ld: 
            print(dev.name,dev.memory_limit)
        
        # get the lisf of physical devices
        physical_devices = tf.config.list_physical_devices(
            'GPU' if len(list_ld) - 1 else 'CPU'
        )
        
        # For GPU devices, configure related stuff
        if 'GPU' in physical_devices[-1]:
            tf.config.optimizer.set_jit(set_jit)
            
            if mixed_precision:
                keras.mixed_precision.set_global_policy(
                    "mixed_float16"
                )
            else:
                keras.mixed_precision.set_global_policy(
                    keras.backend.floatx()
                )
                
            for pd in physical_devices:
                tf.config.experimental.set_memory_growth(
                    pd, True
                )
                
        strategy = tf.distribute.MirroredStrategy()
        return (strategy, physical_devices)
    except: 
        raise ValueError('No Device Detected!')

In [4]:
mxp = True
jit = True

strategy, physical_devices = set_tpu(mixed_precision=mxp) or set_cpu_gpus(mixed_precision=mxp, set_jit=jit)
physical_devices, tf.__version__

/device:CPU:0 268435456
/device:GPU:0 14400880640
/device:GPU:1 14400880640


([PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
  PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')],
 '2.11.0')

**Utils**

In [5]:
def make_plot(tfdata, figsize=(20, 20)):
    
    plt.figure(figsize=figsize)
    xy = int(np.ceil(tfdata.shape[0] * 0.5))

    for i in range(tfdata.shape[0]):
        plt.subplot(xy, xy, i + 1)
        plt.imshow(tf.cast(tfdata[i], dtype=tf.uint8))
        plt.axis("off")

    plt.tight_layout()
    plt.show()

In [6]:
# True for 'Inference' (turn off the internet)
# False for 'Training' (turn on the internet)
SUBMIT = True

# General
# Supported: [convnext, efficientnet-v2, resnet-rs, densenet]
BACKBONE_MODEL = ['efficientnet-v2', 'convnext', 'resnet-rs', 'densenet'][0] 
INP_SIZE = 1024
SEED = 101
SPLITS = 4
ValidationFold = 0 # < SPLITS

# SetAutoTune
EPOCHS = 5
BATCH_SIZE = 16 * strategy.num_replicas_in_sync
AUTOTUNE = tf.data.AUTOTUNE
BATCHES_PER_STEPS = 10 # 10 BATCH_SIZE # Be aware (keras/issues/16573)
keras.utils.set_random_seed(SEED)

# Data Set

In [7]:
if physical_devices[-1].device_type in ['GPU', 'CPU']:
    DF_PATH = '/kaggle/input/rsna-breast-cancer-detection'
    IMG_PATH = f'/kaggle/input/rsna-breast-cancer-{INP_SIZE}-pngs/output'
else:
    DF_PATH = KaggleDatasets().get_gcs_path('rsna-breast-cancer-detection')
    IMG_PATH = KaggleDatasets().get_gcs_path(f'rsna-breast-cancer-{INP_SIZE}-pngs')
    
df = pd.read_csv(f"{DF_PATH}/train.csv")
df['img_path'] = df.apply(
    lambda i: os.path.join(
        f"{IMG_PATH}", str(i['patient_id']) + "_" + str(i['image_id']) + '.png'
    ), axis=1
)

display(df.head())
print(df.shape)
df.cancer.value_counts()

Unnamed: 0,site_id,patient_id,image_id,laterality,view,age,cancer,biopsy,invasive,BIRADS,implant,density,machine_id,difficult_negative_case,img_path
0,2,10006,462822612,L,CC,61.0,0,0,0,,0,,29,False,/kaggle/input/rsna-breast-cancer-1024-pngs/out...
1,2,10006,1459541791,L,MLO,61.0,0,0,0,,0,,29,False,/kaggle/input/rsna-breast-cancer-1024-pngs/out...
2,2,10006,1864590858,R,MLO,61.0,0,0,0,,0,,29,False,/kaggle/input/rsna-breast-cancer-1024-pngs/out...
3,2,10006,1874946579,R,CC,61.0,0,0,0,,0,,29,False,/kaggle/input/rsna-breast-cancer-1024-pngs/out...
4,2,10011,220375232,L,CC,55.0,0,0,0,0.0,0,,21,True,/kaggle/input/rsna-breast-cancer-1024-pngs/out...


(54706, 15)


0    53548
1     1158
Name: cancer, dtype: int64

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54706 entries, 0 to 54705
Data columns (total 15 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   site_id                  54706 non-null  int64  
 1   patient_id               54706 non-null  int64  
 2   image_id                 54706 non-null  int64  
 3   laterality               54706 non-null  object 
 4   view                     54706 non-null  object 
 5   age                      54669 non-null  float64
 6   cancer                   54706 non-null  int64  
 7   biopsy                   54706 non-null  int64  
 8   invasive                 54706 non-null  int64  
 9   BIRADS                   26286 non-null  float64
 10  implant                  54706 non-null  int64  
 11  density                  29470 non-null  object 
 12  machine_id               54706 non-null  int64  
 13  difficult_negative_case  54706 non-null  bool   
 14  img_path              

In [9]:
def find_missing_data(data):
    total = data.isnull().sum().sort_values(ascending = False)
    percentage = (data.isnull().sum()/data.isnull().count()).sort_values(ascending = False)
    return pd.concat([total,percentage] , axis = 1 , keys = ['Total' , 'Percent'])
find_missing_data(df)

Unnamed: 0,Total,Percent
BIRADS,28420,0.519504
density,25236,0.461302
age,37,0.000676
site_id,0,0.0
patient_id,0,0.0
image_id,0,0.0
laterality,0,0.0
view,0,0.0
cancer,0,0.0
biopsy,0,0.0


In [10]:
sgkf = StratifiedGroupKFold(
    n_splits=SPLITS, shuffle=True, random_state=SEED
)
df['fold'] = -1

for fold, (_, test_index) in enumerate(
    sgkf.split(df, df.cancer, df.patient_id)
):
    df.loc[test_index, 'fold'] = fold
    
display(df.groupby(['fold', "cancer"]).size())
df.to_csv(f'df_folds_{ValidationFold}.csv', index=False)

fold  cancer
0     0         13356
      1           279
1     0         13519
      1           277
2     0         13347
      1           284
3     0         13326
      1           318
dtype: int64

In [11]:
df = pd.read_csv(f'df_folds_{ValidationFold}.csv')
train_df = df.query(f'fold != {ValidationFold}')
valid_df = df.query(f'fold == {ValidationFold}')
print(train_df.shape, valid_df.shape)

(41071, 16) (13635, 16)


In [12]:
print(train_df.patient_id.nunique())
print(train_df.cancer.value_counts(normalize=True))

print(valid_df.patient_id.nunique())
print(valid_df.cancer.value_counts(normalize=True))

8938
0    0.978598
1    0.021402
Name: cancer, dtype: float64
2975
0    0.979538
1    0.020462
Name: cancer, dtype: float64


# Data Loader

In [13]:
def image_decoder(with_labels):

    def decode(path):
        file_bytes = tf.io.read_file(path)
        img = tf.image.decode_jpeg(file_bytes, channels = 3)
        img = tf.reshape(img, [*[INP_SIZE]*2, 3])
        return img
    
    def decode_with_labels(path, label):
        return decode(path), tf.cast(label, dtype=tf.float32)
    
    return decode_with_labels if with_labels else decode

def create_dataset(
    df, 
    batch_size  = 32, 
    with_labels = False,  
    shuffle     = False,
    repeat      = True
):
    # Image file decoder
    decode_fn = image_decoder(with_labels)

    # Create Dataset
    if with_labels:
        dataset = tf.data.Dataset.from_tensor_slices(
            (df['img_path'].values, df['cancer'].values)
        )
    else:
        dataset = tf.data.Dataset.from_tensor_slices(
            (df['img_path'].values)
        )
        
    dataset = dataset.map(decode_fn, num_parallel_calls = AUTOTUNE)
    dataset = dataset.shuffle(
        8 * BATCH_SIZE, reshuffle_each_iteration = False
    ) if shuffle else dataset
    dataset = dataset.batch(batch_size, drop_remainder=shuffle)
    dataset = dataset.repeat() if repeat else dataset
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

In [14]:
training_dataset = create_dataset(
    train_df,
    batch_size = BATCH_SIZE, 
    with_labels = True, 
    shuffle = True,
    repeat  = True
)

valid_dataset = create_dataset(
    valid_df,
    batch_size = BATCH_SIZE, 
    with_labels = True, 
    shuffle = False,
    repeat = False
)

# Hyperparameter Settings 

- Loss Functions
- Metrics
- Learning Rate Schedular
- Optimizer

In [15]:
from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras import metrics
from tensorflow.keras import optimizers
from tensorflow.keras import callbacks
from tensorflow.keras import applications

In [16]:
lr_start   = 0.000005
lr_max     = 0.00000125 * BATCH_SIZE
lr_min     = 0.000001
lr_ramp_ep = 5
lr_sus_ep  = 0
lr_decay   = 0.8
wd_decay   = lr_start * 0.04


def get_lr_callback(batch_size=8):
    
    def lrfn(epoch):
        if epoch < lr_ramp_ep:
            lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start
            
        elif epoch < lr_ramp_ep + lr_sus_ep:
            lr = lr_max
            
        else:
            lr = (lr_max - lr_min) * lr_decay**(epoch - lr_ramp_ep - lr_sus_ep) + lr_min
            
        return lr

    lr_callback = callbacks.LearningRateScheduler(lrfn, verbose=True)
    return lr_callback

[**Competition Metrics**](https://www.kaggle.com/code/sohier/probabilistic-f-score) - **stateless**

It will be used inside the callback API, typically after batch end or epoch end, depending on target.

In [17]:
def tf_pfbeta(from_logits=True, beta=1.0, epsilon=1e-07):
    
    def pfbeta(y_true, y_pred):
        y_pred = tf.cond(
            tf.cast(from_logits, dtype=tf.bool),
            lambda: tf.nn.sigmoid(y_pred),
            lambda: y_pred,
        )
        y_true = tf.reshape(y_true, [-1])
        y_pred = tf.reshape(y_pred, [-1])

        ctp = tf.reduce_sum(y_true * y_pred, axis=-1)
        cfp = tf.reduce_sum(y_pred, axis=-1) - ctp

        c_precision = ctp / (ctp + cfp)
        c_recall = ctp / tf.reduce_sum(y_true)
        
        def compute_fractions():
            numerator = c_precision * c_recall
            denominator = beta**2 * c_precision + c_recall + epsilon
            return (1 + beta**2) * tf.math.divide_no_nan(numerator, denominator)
        
        return tf.cond(
            tf.logical_and(
                tf.greater(c_precision, 0.), tf.greater(c_recall, 0.)
            ),
            compute_fractions,
            lambda: tf.constant(0, dtype=tf.float32)
        )
    
    return pfbeta

[**Competition Metrics**](https://www.kaggle.com/code/sohier/probabilistic-f-score) - **stateful**

It will be used in training time `(model.compile)`.

In [18]:
class pFBeta(keras.metrics.Metric):
    def __init__(
        self, from_logits=True, beta=1.0, threshold=None, epsilon=1e-07, name='pFBeta', **kwargs
    ):
        super().__init__(name=name, **kwargs)
        self.beta = beta
        self.epsilon = epsilon
        self.threshold = threshold
        self.from_logits = from_logits
        self.true_positives = self.add_weight(name='tp', initializer='zeros')
        self.false_positives = self.add_weight(name='fp', initializer='zeros')
        self.false_negatives = self.add_weight(name='fn', initializer='zeros')

    @tf.function
    def update_state(self, y_true, y_pred, sample_weight=None):
        y_pred = tf.cond(
            tf.cast(self.from_logits, dtype=tf.bool),
            lambda: tf.nn.sigmoid(y_pred),
            lambda: y_pred,
        )
        if self.threshold is not None:
            y_pred = y_pred > self.threshold
            
        y_true = tf.reshape(tf.cast(y_true, dtype=tf.float32), [-1])
        y_pred = tf.reshape(tf.cast(y_pred, dtype=tf.float32), [-1])
        
        self.true_positives.assign_add(tf.reduce_sum(y_true * y_pred, axis=-1))
        self.false_positives.assign_add(
            tf.reduce_sum(y_pred * (1 - y_true))
        )
        self.false_negatives.assign_add(
            tf.reduce_sum((1 - y_pred) * y_true)
        )

    @tf.function
    def result(self):
        precision = tf.math.divide_no_nan(
            self.true_positives, self.true_positives + self.false_positives
        )
        recall = tf.math.divide_no_nan(
            self.true_positives, self.true_positives + self.false_negatives
        )
        numerator = precision * recall
        denominator = self.beta**2 * precision + recall + self.epsilon
        fscore = (1 + self.beta**2) * tf.math.divide_no_nan(numerator, denominator)
        return fscore
    
    def reset_state(self):
        for v in self.variables:
            v.assign(0)
        
    def get_config(self):
        config = {
            "from_logits": self.from_logits,
            "beta": self.beta,
            "epsilon": self.epsilon,
            "threshold": self.threshold,
        }
        base_config = super().get_config()
        return {**base_config, **config}

In [19]:
def tf_auc(from_logits=True):
    auc_fn = metrics.AUC()
    
    def auc(y_true, y_pred):
        y_pred = tf.cond(
            tf.cast(from_logits, dtype=tf.bool),
            lambda: tf.nn.sigmoid(y_pred),
            lambda: y_pred,
        )
        return auc_fn(y_true, y_pred)
    
    return auc

**Weighted Binary Loss**

> A value `pos_weight > 1` decreases the false negative count, hence increasing the recall. Conversely setting `pos_weight < 1` decreases the false positive count and increases the precision. This can be seen from the fact that `pos_weight` is introduced as a multiplicative coefficient for the positive labels term in the loss expression:

In [20]:
def weighted_binary_loss(
    apply_positive_weight, from_logits=True, reduction="mean"
):
    def inverse_sigmoid(sigmoidal):
        return - tf.math.log(1. / sigmoidal - 1.)

    def weighted_loss(labels, predictions):
        predictions = tf.convert_to_tensor(predictions)
        labels = tf.cast(labels, predictions.dtype)
        num_samples = tf.cast(tf.shape(labels)[-1], dtype=labels.dtype)

        logits = tf.cond(
            tf.cast(from_logits, dtype=tf.bool),
            lambda: predictions,
            lambda: inverse_sigmoid(sigmoidal=predictions),
        )
        loss = tf.nn.weighted_cross_entropy_with_logits(
            labels, 
            logits, 
            pos_weight=apply_positive_weight
        )
        
        if reduction.lower() == "mean":
            return tf.reduce_mean(loss)
        elif reduction.lower() == "sum":
            return tf.reduce_sum(loss) / num_samples
        elif reduction.lower() == "none":
            return loss
        else:
            raise ValueError(
                'Reduction type is should be `mean` or `sum` or `none`. ',
                f'But, received {reduction}'
            )
    return weighted_loss

**Binary Focal Crossentropy**

> `focal_factor = (1 - output) ** gamma` for class 1 `focal_factor = output ** gamma` for class 0 where `gamma` is a focusing parameter. When `gamma=0`, this function is equivalent to the binary crossentropy loss.

In [21]:
# ref...tf/keras/losses/BinaryFocalCrossentropy (available from tf 2.9)
def binary_focal_loss(
    alpha=0.25, 
    gamma=2.0, 
    label_smoothing=0, 
    from_logits=False,
    apply_class_balancing=False,
    apply_positive_weight=1,
    reduction="mean"
):
    '''
    alpha: A weight balancing factor for class 1, default is 0.25. 
        The weight for class 0 is 1.0 - alpha.
    
    gamma: A focusing parameter used to compute the focal factor, default is 2.0
    
    apply_class_balancing: A bool, whether to apply weight balancing on the binary 
        classes 0 and 1.
    '''
    
    def smooth_labels(labels):
        return labels * (1.0 - label_smoothing) + 0.5 * label_smoothing
    
    def compute_loss(labels, logits):
        logits = tf.convert_to_tensor(logits)
        labels = tf.cast(labels, logits.dtype)
        labels = tf.cond(
            tf.cast(label_smoothing, dtype=tf.bool),
            lambda: smooth_labels(labels),
            lambda: labels,
        )
        num_samples = tf.cast(tf.shape(labels)[-1], dtype=labels.dtype)
        cross_entropy = weighted_binary_loss(
            apply_positive_weight, from_logits, reduction='none'
        )(labels, logits)
        
        sigmoidal = tf.cond(
            tf.cast(from_logits, dtype=tf.bool),
            lambda: tf.nn.sigmoid(logits),
            lambda: logits,
        )
        pt = labels * sigmoidal + (1.0 - labels) * (1.0 - sigmoidal)
        focal_factor = tf.pow(1.0 - pt, gamma)
        focal_bce = focal_factor * cross_entropy
        
        if apply_class_balancing:
            weight = labels * alpha + (1 - labels) * (1 - alpha)
            focal_bce = weight * focal_bce

        if reduction == 'mean':
            return tf.reduce_mean(focal_bce)
        elif reduction == 'sum':
            return tf.reduce_sum(focal_bce) / num_samples
        else:
            raise ValueError(
                'Reduction type should be `mean` or `sum` ',
                f'But, received {reduction}'
            )
    return compute_loss

**Symbolic Discovery of Optimization Algorithms**

A new optimizer from google (2023). [Code.](https://github.com/google/automl/tree/master/lion)

In [22]:
class Lion(keras.optimizers.legacy.Optimizer):
    def __init__(
        self,
        learning_rate=0.0001,
        beta_1=0.9,
        beta_2=0.99,
        wd=0,
        name='lion', 
        **kwargs
    ):
        super().__init__(name, **kwargs)
        self._set_hyper('learning_rate', kwargs.get('lr', learning_rate))
        self._set_hyper('beta_1', beta_1)
        self._set_hyper('beta_2', beta_2)
        self._set_hyper('wd', wd)
    
    def _create_slots(self, var_list):
        # Create slots for the first and second moments.
        # Separate for-loops to respect the ordering of slot variables from v1.
        for var in var_list:
            self.add_slot(var, 'm')
    
    def _prepare_local(self, var_device, var_dtype, apply_state):
        super(Lion, self)._prepare_local(var_device, var_dtype, apply_state)
        beta_1_t = tf.identity(self._get_hyper('beta_1', var_dtype))
        beta_2_t = tf.identity(self._get_hyper('beta_2', var_dtype))
        wd_t = tf.identity(self._get_hyper('wd', var_dtype))
        lr = apply_state[(var_device, var_dtype)]['lr_t']
        apply_state[(var_device, var_dtype)].update(
            dict(
                lr=lr,
                beta_1_t=beta_1_t,
                one_minus_beta_1_t=1 - beta_1_t,
                beta_2_t=beta_2_t,
                one_minus_beta_2_t=1 - beta_2_t,
                wd_t=wd_t
            )
        ) 
    
    @tf.function(jit_compile=True)
    def _resource_apply_dense(self, grad, var, apply_state=None):
        var_device, var_dtype = var.device, var.dtype.base_dtype
        coefficients = (
            (apply_state or {}).get(
                (
                    var_device, var_dtype
                )
            ) or self._fallback_apply_state(var_device, var_dtype)
        ) 
        
        m = self.get_slot(var, 'm')
        var_t = var.assign_sub(
            coefficients['lr_t'] * (
                tf.math.sign(
                    m * coefficients['beta_1_t'] + 
                    grad * coefficients['one_minus_beta_1_t']
                ) + var * coefficients['wd_t'])
        )
        
        with tf.control_dependencies([var_t]):
            m.assign(
                m * coefficients['beta_2_t'] + grad * coefficients['one_minus_beta_2_t']
            )
    
    @tf.function(jit_compile=True)
    def _resource_apply_sparse(self, grad, var, indices, apply_state=None):
        var_device, var_dtype = var.device, var.dtype.base_dtype
        coefficients = (
            (apply_state or {}).get(
                (
                    var_device, var_dtype
                )
            ) or self._fallback_apply_state(var_device, var_dtype)
        )

        m = self.get_slot(var, 'm')
        m_t = m.assign(m * coefficients['beta_1_t'])
        m_scaled_g_values = grad * coefficients['one_minus_beta_1_t']
        m_t = m_t.scatter_add(tf.IndexedSlices(m_scaled_g_values, indices))
        var_t = var.assign_sub(
            coefficients['lr'] * (
                tf.math.sign(m_t) + var * coefficients['wd_t'])
        )

        with tf.control_dependencies([var_t]):
            m_t = m_t.scatter_add(tf.IndexedSlices(-m_scaled_g_values, indices))
            m_t = m_t.assign(
                m_t * coefficients['beta_2_t'] / coefficients['beta_1_t']
            )
            m_scaled_g_values = grad * coefficients['one_minus_beta_2_t']
            m_t.scatter_add(tf.IndexedSlices(m_scaled_g_values, indices))
    
    def get_config(self):
        config = super(Lion, self).get_config()
        config.update({
            'learning_rate': self._serialize_hyperparameter('learning_rate'),
            'beta_1': self._serialize_hyperparameter('beta_1'),
            'beta_2': self._serialize_hyperparameter('beta_2'),
            'wd': self._serialize_hyperparameter('wd'),
        })
        return config

## Data Augmentation

We like to insert data augmentation inside the model to get leverage the GPU/TPU speed. Note, the augmentation layers are active during the training time but not testing or inference time. That makes sense but you may want to consider **Test-Time-Augmentation**, see more [details](https://github.com/keras-team/keras/issues/17385). 

In [23]:
from tensorflow.keras.layers import Resizing
from tensorflow.keras.layers import Rescaling
from tensorflow.keras.layers import RandomCrop
from tensorflow.keras.layers import RandomFlip
from tensorflow.keras.layers import RandomZoom
from tensorflow.keras.layers import RandomRotation
from tensorflow.keras.layers import RandomBrightness
from tensorflow.keras.layers import RandomContrast

In [24]:
# Preprocessing
data_preprocessing = keras.Sequential(
    [
        Resizing(
            *[INP_SIZE] * 2, 
            interpolation="bilinear"
        ),
    ], 
    name='PreprocessingLayers'
)

In [25]:
# why this RandomApply? ans. https://stackoverflow.com/a/72558994/9215780
class RandomApply(layers.Layer):
    """RandomApply will randomly apply the transformation layer
    based on the given probability.
    
    Ref. https://stackoverflow.com/a/72558994/9215780
    """

    def __init__(self, layer, probability, **kwargs):
        super().__init__(**kwargs)
        self.layer = layer
        self.probability = probability

    def call(self, inputs, training=True):
        apply_layer = tf.random.uniform([]) < self.probability
        outputs = tf.cond(
            pred=tf.logical_and(apply_layer, training),
            true_fn=lambda: self.layer(inputs),
            false_fn=lambda: inputs,
        )
        return outputs

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "layer": layers.serialize(self.layer),
                "probability": self.probability,
            }
        )
        return config

# Augmentation
data_augmentation = keras.Sequential(
    [
        RandomApply(
            RandomFlip("horizontal"), probability=0.7
        ),
        RandomApply(
            RandomContrast(factor=0.4), probability=0.2
        ),
        RandomApply(
            RandomBrightness(
                factor=0.3, value_range=(0, 255)
            ), probability=0.2
        ),
    ],
    name="augment",
)

In [26]:
if not SUBMIT:
    temp_ds = create_dataset(
        valid_df.sample(20),
        batch_size=10, 
        with_labels=True, 
        shuffle=False
    )
    x, y = next(iter(temp_ds))
    x = data_preprocessing(x)
    aug_x = data_augmentation(x, training=True)

In [27]:
if not SUBMIT:
    make_plot(x)

In [28]:
if not SUBMIT:
    make_plot(aug_x) 

# Model

---

**Might be useful**

- [All Weights variation of official EfficientNet V2](https://www.kaggle.com/competitions/petfinder-pawpularity-score/discussion/285720)
- [Latest EfficientNets-B0-B7 checkpoints](https://www.kaggle.com/competitions/petfinder-pawpularity-score/discussion/275221)
- [Hybrid EfficientNet Swin-Transformer](https://www.kaggle.com/code/ipythonx/tf-hybrid-efficientnet-swin-transformer-gradcam)

In [29]:
def get_backbone_mode(model_name):
    
    if model_name == 'convnext':
        backbone = applications.ConvNeXtTiny(
                include_top=False, 
                pooling='avg', 
                include_preprocessing=True,
                weights='imagenet' if not SUBMIT else None
            )
    elif model_name == 'efficientnet-v2':
        backbone = applications.EfficientNetV2B0(
            include_top=False, 
            pooling='avg', 
            include_preprocessing=True,
            weights='imagenet' if not SUBMIT else None
        )
    elif model_name == 'resnet-rs':
        backbone = applications.ResNetRS50(
            include_top=False, 
            pooling='avg', 
            include_preprocessing=True,
            weights='imagenet' if not SUBMIT else None
        )
    elif model_name == 'densenet':
        backbone = keras.Sequential(
            [
                layers.Rescaling(scale=1/255., offset=0.0),
                applications.DenseNet121(
                    include_top=False, 
                    pooling='avg', 
                    weights='imagenet' if not SUBMIT else None
                )
            ], name=model_name
        )
    else:
        raise ValueError(
            f'Supported Models are [convnext, efficientnet-v2, resnet-rs, densenet] ',
            f'But got {model_name}'
        )
    
    return backbone

In [30]:
def BreastCancerDetect(backbone_model_name='efficientnet-v2'):
    
    backbone_model = get_backbone_mode(
        model_name=backbone_model_name.lower()
    )
    
    model = keras.Sequential(
        [
            keras.layers.InputLayer(input_shape=(INP_SIZE, INP_SIZE, 3)),
            data_preprocessing,
            data_augmentation,
            backbone_model,
            keras.layers.Dense(1, activation=None, dtype='float32')
        ], name='CancerDetect'
    )
    
    return model

In [31]:
with tf.device('/CPU:0'):
    val_gt = tf.Variable(
        tnp.empty((0), dtype=tf.float32), shape=[None], trainable=False
    )
    val_pred = tf.Variable(
        tnp.empty((0), dtype=tf.float32), shape=[None], trainable=False
    )
    

class ExtendedModel(keras.Model):
    def __init__(self, model, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # Actual model
        self.model = model
    
    def test_step(self, data):
        x, y = data
        y_pred = self.model(x, training=False)
        self.compiled_loss(y, y_pred, regularization_losses=self.losses)
        self.compiled_metrics.update_state(y, y_pred)

        val_gt.assign(
            tf.concat([val_gt, y], axis=0)
        )
        val_pred.assign(
            tf.concat([val_pred, tf.squeeze(y_pred)], axis=0)
        )
        return {m.name: m.result() for m in self.metrics}
    
    def call(self, inputs):
        return self.model(inputs)
    
    def save_weights(
        self, filepath, overwrite=True, save_format=None, options=None
    ):
        # Overriding this method will allow us to use the `ModelCheckpoint`
        self.model.save_weights(
            filepath=filepath,
            overwrite=overwrite,
            save_format=save_format,
            options=options,
        )
        
    def save(
        self, filepath, overwrite=True, include_optimizer=True, 
        save_format=None, signatures=None, options=None
    ):
        # Overriding this method will allow us to use the `ModelCheckpoint`
        self.model.save(
            filepath=filepath,
            overwrite=overwrite,
            save_format=save_format,
            options=options,
            include_optimizer=include_optimizer,
            signatures=signatures
        )

In [32]:
def get_optimizer(mode='adamw'):
    
    if mode.lower() == 'adamw':
        opt = tfa.optimizers.AdamW(
            learning_rate=0.003, weight_decay=wd_decay
        )
    elif mode.lower() == 'lion':
        opt = Lion(
            learning_rate=0.003, wd=wd_decay
        )
    else:
        opt = keras.optimizers.Adam(
            learning_rate=0.003, weight_decay=wd_decay
        )
        
    return opt

In [33]:
def get_loss_fn(mode='weighted_binary'):
    
    if mode.lower() == 'weighted_binary':
        loss_fn = weighted_binary_loss(
            apply_positive_weight=5, 
            from_logits=True, 
            reduction="mean"
        )
    elif mode.lower() == 'focal':
        loss_fn = binary_focal_loss(
            alpha=0.25, 
            gamma=2.0, 
            label_smoothing=0.05, 
            from_logits=True,
            apply_class_balancing=True,
            apply_positive_weight=1,
            reduction="mean"
        )
    else:
        loss_fn = keras.losses.BinaryCrossentropy(
            from_logits=True,
            label_smoothing=0.01
        )
    
    return loss_fn

In [34]:
def get_metrics():
    metrics_list = [
        pFBeta(beta=1.0, from_logits=True), 
        tf_auc(from_logits=True)
    ]
    return metrics_list

In [35]:
# Resets all state generated by Keras, if.
keras.backend.clear_session()

# Open a strategy scope.
with strategy.scope():
    # build cancer detecting model
    model = BreastCancerDetect(
        backbone_model_name=BACKBONE_MODEL
    )
    
    # Doesn't work on TPU: https://github.com/tensorflow/tensorflow/issues/59511 
    # Known issue: 
    # https://www.tensorflow.org/xla/known_issues#dynamic_tftensorarray_is_not_supported
    if physical_devices[-1].device_type in ['GPU', 'CPU']:
        model = ExtendedModel(model, name=model.name)
    
    # Compile
    model.compile(
        optimizer=get_optimizer(mode='lion'),
        loss=get_loss_fn(mode='weighted_binary'),
        metrics=get_metrics(),
        steps_per_execution=BATCHES_PER_STEPS,
    )

_ = model(tf.ones(shape=(1, INP_SIZE, INP_SIZE, 3)))    
model.summary()

Model: "CancerDetect"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 CancerDetect (Sequential)   (None, 1)                 5920593   
                                                                 
Total params: 5,920,593
Trainable params: 5,859,985
Non-trainable params: 60,608
_________________________________________________________________


In [36]:
for layer in model.layers:
    print(layer.trainable, layer.name)

True CancerDetect


# Callbacks

We will create a custom callback, that will be used to find the optimal value of the target metric (`F1`) with corresponding threshold value. 

In [37]:
class OptimalPFBetaWithThresholdCallback(keras.callbacks.Callback):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.cmp_metric = tf_pfbeta(
            from_logits=False, beta=1.0, epsilon=1e-07
        )
    
    def on_epoch_begin(self, epoch, logs=None):
        val_gt.assign(
            tf.Variable(
                tnp.empty((0), dtype=tf.float32), shape=[None]
            )
        )
        val_pred.assign(
            tf.Variable(
                tnp.empty((0), dtype=tf.float32), shape=[None]
            )
        )

    def on_epoch_end(self, epoch, logs=None):
        y_true = val_gt.numpy()
        y_pred = val_pred.numpy()
        y_pred = tf.nn.sigmoid(y_pred)
        max_pfbeta_score, at_threshold = self.tf_pfbeta_opt(y_true, y_pred)
        logs['val_pFBeta_binarize'] = max_pfbeta_score
        logs['val_threshold'] = at_threshold
        
    def tf_pfbeta_opt(self, y_true, y_pred):
        thresholds = tf.range(0, 1, 0.05)
        pfbeta_scores = []
        
        for threshold in thresholds:
            scores = self.cmp_metric(
                y_true, tf.cast(y_pred > threshold, dtype=tf.float32)
            )
            pfbeta_scores.append(scores)
            
        max_pfbeta_score = tf.reduce_max(pfbeta_scores)
        at_threshold = thresholds[tf.argmax(pfbeta_scores)]
        return max_pfbeta_score.numpy(), at_threshold.numpy()

In [38]:
def get_callbacks(monitor, ckpt_id, more_callback=None):
    list_of_callbacks = [
        get_lr_callback(BATCH_SIZE),
        callbacks.ModelCheckpoint(
            filepath=ckpt_id,
            monitor=monitor,
            mode='max',
            save_best_only=True
        ),
        callbacks.CSVLogger(f'history_fold_{ValidationFold}.csv')
    ]
    
    if more_callback:
        for i, cbk in enumerate(more_callback):
            list_of_callbacks.insert(i, cbk)
    
    return list_of_callbacks 

In [39]:
if physical_devices[-1].device_type in ['GPU', 'CPU']:
    exp_callback = OptimalPFBetaWithThresholdCallback()
    monitor = 'val_pFBeta_binarize'
    ckpt_id = 'model.{epoch:02d}-{val_loss:.4f}-{val_pFBeta_binarize:.3f}-{val_threshold:.2f}.h5'
else:
    exp_callback = None
    monitor = 'val_pFBeta'
    ckpt_id = 'model.{epoch:02d}-{val_loss:.4f}-{val_pFBeta:.3f}.h5'

training_callbacks = get_callbacks(monitor, ckpt_id, [exp_callback])

**Training**

In [40]:
if not SUBMIT:
    model.fit(
        training_dataset, 
        validation_data=valid_dataset, 
        epochs=EPOCHS,
        callbacks=training_callbacks,
        steps_per_epoch=len(train_df) // BATCH_SIZE,
    )
    history = pd.read_csv(f'history_fold_{ValidationFold}.csv')
else:
    history = pd.read_csv('/kaggle/input/rsna-breast-cancer/history_fold_0.csv')

In [41]:
try:
    display(
        history.style.highlight_max(
            axis=0, props='background-color:lightblue;', subset=['val_auc','val_pFBeta', monitor]
        ).highlight_min(
            axis=0, props='background-color:lightgreen;', subset=['loss', 'val_loss']
        )
    )
except:
    display(
        history.style.highlight_max(
            axis=0, props='background-color:lightblue;', subset=['val_auc', monitor]
        ).highlight_min(
            axis=0, props='background-color:lightgreen;', subset=['loss', 'val_loss']
        )
    )

Unnamed: 0,epoch,auc,loss,lr,pFBeta,val_@threshold,val_auc,val_loss,val_pFBeta,val_pFBeta_binarize
0,0,0.523794,0.381754,5e-06,0.038641,0.15,0.544239,0.338323,0.038952,0.052239
1,1,0.563267,0.341021,2e-05,0.041049,0.15,0.5805,0.326347,0.044425,0.060475
2,2,0.594839,0.336168,3.5e-05,0.044227,0.15,0.605494,0.321721,0.048065,0.073276
3,3,0.617038,0.329174,5e-05,0.049621,0.3,0.626275,0.312994,0.05821,0.156863
4,4,0.639134,0.307858,6.5e-05,0.071097,0.5,0.652346,0.290719,0.093943,0.266667
5,5,0.666511,0.291106,8e-05,0.089035,0.9,0.677636,0.3116,0.115283,0.273684
6,6,0.691432,0.254368,6.4e-05,0.140848,0.7,0.703022,0.301233,0.126069,0.343434
7,7,0.714365,0.23213,5.2e-05,0.177014,0.7,0.724813,0.297712,0.132327,0.336634
8,8,0.736678,0.196099,4.1e-05,0.246913,0.8,0.746693,0.321248,0.138435,0.299065
9,9,0.756832,0.169233,3.3e-05,0.308679,0.7,0.765704,0.32672,0.13672,0.264151


**Load Best Model**

In [42]:
!ls /kaggle/working/

__notebook_source__.ipynb  df_folds_0.csv


In [43]:
def get_best_weight(weight_list):
    max_pfbeta_bin = round(
        history.val_pFBeta_binarize.max(), 3
    )
    for wg in weight_list:
        if str(max_pfbeta_bin) in str(wg):
            return wg
        
trained_weight_files = get_best_weight(glob.glob('/kaggle/working/*.h5'))
trained_weight_files = trained_weight_files or glob.glob('/kaggle/input/rsna-breast-cancer/*.h5')[0]
trained_weight_files 

'/kaggle/input/rsna-breast-cancer/model.07-0.3012-0.343-0.70.h5'

In [44]:
with strategy.scope():
    model = BreastCancerDetect(backbone_model_name=BACKBONE_MODEL)
    model.load_weights(trained_weight_files)
    model.compile(steps_per_execution=BATCHES_PER_STEPS, jit_compile=True)
    model.trainable = False
model.summary(line_length=80)

Model: "CancerDetect"
________________________________________________________________________________
 Layer (type)                       Output Shape                    Param #     
 PreprocessingLayers (Sequential)   (None, 1024, 1024, 3)           0           
                                                                                
 augment (Sequential)               (None, 1024, 1024, 3)           0           
                                                                                
 efficientnetv2-b0 (Functional)     (None, 1280)                    5919312     
                                                                                
 dense_1 (Dense)                    (None, 1)                       1281        
                                                                                
Total params: 5,920,593
Trainable params: 0
Non-trainable params: 5,920,593
________________________________________________________________________________


**Build Model for TTA**

In [45]:
# Set-up for Test-Time-Augmentation
class Flip(keras.layers.Layer):
    def call(self, inputs):
        x = tf.image.flip_left_right(inputs)
        return x

In [46]:
def insert_layer(model, new_layer):
    initial_input = model.input
    flipped_input = new_layer(initial_input)
    tensor_output = keras.layers.Average(name='tta_avg')(
        [
            model(initial_input), model(flipped_input)
        ]
    )
    new_model = keras.Model(
        inputs=initial_input, outputs=tensor_output, name=model.name
    )
    return new_model


with strategy.scope():
    tta_model = insert_layer(
        model, Flip(name='InputFlipping')
    )
    tta_model.compile(
        steps_per_execution=BATCHES_PER_STEPS, jit_compile=True
    )
    tta_model.trainable = False
tta_model.summary(line_length=100)

Model: "CancerDetect"
____________________________________________________________________________________________________
 Layer (type)                    Output Shape          Param #     Connected to                     
 input_4 (InputLayer)            [(None, 1024, 1024,   0           []                               
                                 3)]                                                                
                                                                                                    
 InputFlipping (Flip)            (None, 1024, 1024, 3  0           ['input_4[0][0]']                
                                 )                                                                  
                                                                                                    
 CancerDetect (Sequential)       (None, 1)             5920593     ['input_4[0][0]',                
                                                                    '

In [47]:
if not SUBMIT:
    val_y_true = val_gt.numpy()
    val_y_pred = val_pred.numpy()
    val_y_pred = tf.nn.sigmoid(val_y_pred)
    val_y_pred_tta = tta_model.predict(valid_dataset)
    val_y_pred_tta = tf.nn.sigmoid(val_y_pred_tta)
else:
    val_y_true = np.array(
        list(
            map(np.float32, valid_df.cancer.tolist())
        )
    )
    val_y_pred = model.predict(valid_dataset)
    val_y_pred = tf.nn.sigmoid(val_y_pred)
    val_y_pred_tta = tta_model.predict(valid_dataset)
    val_y_pred_tta = tf.nn.sigmoid(val_y_pred_tta)
    val_y_pred_tta = tf.cast(val_y_pred_tta, dtype=tf.float32)
    
# return [max_pfbeta, threshold]
print(val_y_true.shape, val_y_true.shape, val_y_pred_tta.shape)
print('w/o tta : ', OptimalPFBetaWithThresholdCallback().tf_pfbeta_opt(val_y_true, val_y_pred))
print('w tta   : ', OptimalPFBetaWithThresholdCallback().tf_pfbeta_opt(val_y_true, val_y_pred_tta))

(13635,) (13635,) (13635, 1)
w/o tta :  (0.37160745, 0.65000004)
w tta   :  (0.38095233, 0.55)


# Inference

In [48]:
from IPython.display import clear_output
!pip install --no-deps ../input/for-pydicom/pylibjpeg-1.4.0-py3-none-any.whl
!pip install --no-deps ../input/for-pydicom/python_gdcm-3.0.14-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
!pip install --no-deps ../input/for-pydicom/pylibjpeg_libjpeg-1.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
!pip install --no-deps ../input/for-pydicom/dicomsdl-0.109.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl
clear_output()

In [49]:
EXTENSION = "png"
TEMP_FOLDER = "/kaggle/tmp/output/"
TEST_DICOM = glob.glob("/kaggle/input/rsna-breast-cancer-detection/test_images/*/*.dcm")
os.makedirs(TEMP_FOLDER, exist_ok=True)

test_df = pd.read_csv("/kaggle/input/rsna-breast-cancer-detection/test.csv")
test_df['cancer'] = 0
test_df['img_path'] = (
    TEMP_FOLDER + test_df["patient_id"].astype(str) + "_" + test_df["image_id"].astype(str) + ".png"
)
test_ds = create_dataset(
    test_df, 
    with_labels = False,
    batch_size=BATCH_SIZE,
    shuffle=False,
    repeat=False
)

The following preprocessing cell is taken from [this](https://www.kaggle.com/code/theoviel/rsna-breast-baseline-inference) code example.

In [50]:
import dicomsdl
from tqdm import tqdm
from joblib import Parallel, delayed

def read_dicom(dicom_file):
    dicom = dicomsdl.open(dicom_file)
    image = dicom.pixelData(storedvalue=False)
    image = (image - image.min()) / (image.max() - image.min())
    if dicom.PhotometricInterpretation == "MONOCHROME1":
        image = 1 - image
    return image
    
def saving(file, size=512, save_folder="", extension="png"):
    image = read_dicom(file)
    image = cv2.resize(image, (size, size))
    image = (image * 255).astype(np.uint8)
    
    patient_id = file.split('/')[-2]
    image_name = file.split('/')[-1][:-4]
    cv2.imwrite(
        save_folder + f"{patient_id}_{image_name}.{extension}", image
    )
    
_ = Parallel(n_jobs=-1)(
    delayed(saving)(
        uid, size=INP_SIZE, save_folder=TEMP_FOLDER, extension=EXTENSION
    ) for uid in tqdm(TEST_DICOM)
)

100%|██████████| 4/4 [00:01<00:00,  2.04it/s]


In [51]:
threshold = 0.70 # from model weight file name
pred = model.predict(test_ds)
pred = tf.nn.sigmoid(pred).numpy()
test_df["cancer"] = (pred > threshold).astype(int)

test_df['prediction_id'] = test_df['patient_id'].astype(str) + "_" + test_df['laterality']
sub = test_df[['prediction_id', 'cancer']].groupby("prediction_id").mean().reset_index()
sub.to_csv('/kaggle/working/submission.csv', index=False)
sub.head()



Unnamed: 0,prediction_id,cancer
0,10008_L,0.0
1,10008_R,0.0
