# `tensorflow.keras` sweep across pre-trained models


This notebook demonstrates the use of (1) well-known CNN architectures, (2) pretrained weights on `RadImageNet` in a multiclassification task, (3) `wandb.ai` sweeps in comparing the pre-trained architectures.

References:

* The pretrained weights published by [Mei et al (2022)](https://pubs.rsna.org/doi/10.1148/ryai.210315) in their github [link](https://github.com/BMEII-AI/RadImageNet).

* The github repository form the BAGLS team contained in this github [link](https://github.com/anki-xyz/bagls/blob/master/Utils/DataGenerator.py#L109)


In [14]:
# dev convenience
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [15]:
import sys
sys.path.append("..")
import PATHS

import os
import numpy as np

os.environ["CUDA_VISIBLE_DEVICES"] = "2"
os.environ["WANDB_SILENT"] = "True"
os.environ["WANDB_NOTEBOOK_NAME"] = "05-pretrained-sweep.ipynb"

PROJECT_NAME = 'bagls-sh-test'
GROUP_NAME = 'pretrained-models-sweep'
METRICS_TABLE_NAME = 'metrics_table'

In [16]:
import wandb
print("W&B: ", wandb.__version__)
wandb.login()

# # manage logs
# import logging

# logger = logging.getLogger("wandb")
# logger.setLevel(logging.ERROR)

# logging.getLogger('tensorflow').disabled = True

W&B:  0.13.5


True

In [17]:
# tf loader
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow import keras

In [18]:
from tensorflow.python.client import device_lib

print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 9055340085874400362
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 10925703168
locality {
  bus_id: 2
  numa_node: 1
  links {
  }
}
incarnation: 6532517587126003743
physical_device_desc: "device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:84:00.0, compute capability: 6.1"
]


2022-11-29 09:34:34.277810: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /device:GPU:0 with 10419 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:84:00.0, compute capability: 6.1


In [19]:
import config
configs = config.nb_configs
configs.update(config.pt_configs)

In [20]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.imagenet_utils import preprocess_input

# initialize data generator
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    validation_split=configs["validation_split"],
    rescale=configs["rescale"],
    width_shift_range=configs["width_shift_range"],
    height_shift_range=configs["height_shift_range"],
    shear_range=configs["shear_range"],
    zoom_range=configs["zoom_range"],
    fill_mode=configs["fill_mode"],
    horizontal_flip=configs["horizontal_flip"],
    rotation_range=configs["rotation_range"],
)

test_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input, 
    rescale=configs["rescale"],
)

In [21]:
train_dir = configs["train_dir"]
test_dir = configs["test_dir"]

batch_size = configs["batch_size"]
class_names = configs["class_names"]
interpol = configs["interpol"]
cmap = configs["cmap"]
label_mode = configs["label_mode"]
labels = configs["labels"]
image_size = configs["image_size"]


train_dataset = train_datagen.flow_from_directory(
    directory=train_dir,
    target_size=image_size,
    color_mode=cmap,
    classes=class_names,
    class_mode=label_mode,
    batch_size=batch_size,
    interpolation=interpol,
    subset="training",
)

val_dataset = train_datagen.flow_from_directory(
    directory=train_dir,
    target_size=image_size,
    color_mode=cmap,
    classes=class_names,
    class_mode=label_mode,
    batch_size=batch_size,
    interpolation=interpol,
    subset="validation",
)

test_dataset = test_datagen.flow_from_directory(
    directory=test_dir,
    target_size=image_size,
    color_mode=cmap,
    classes=class_names,
    class_mode=label_mode,
    batch_size=batch_size,
    interpolation=interpol,
    shuffle=False, # do not shuffle for later evaluation, alphanum sort
)

configs.update({"val_steps": val_dataset.samples // configs["batch_size"]})

Found 526 images belonging to 2 classes.
Found 26 images belonging to 2 classes.
Found 33 images belonging to 2 classes.


In [22]:
from tensorflow.keras import metrics
from tensorflow.keras import optimizers
from tensorflow.keras import losses

In [23]:
def define_model(model_arch, weights):
    conv_base = model_arch(
        weights=weights,
        include_top=configs["include_top"],
        input_shape=configs["input_shape"],
        pooling=configs["pooling"],
    )
    # freeze conv base weights
    conv_base.trainable = False
    x = conv_base.output
    
    # layers at this stage are arbitrary
    # can be subjected to hyperparam tuning
    x = keras.layers.Flatten()(x)
    x = keras.layers.Dense(units=512, activation='relu')(x)
    x = keras.layers.Dropout(configs["dropout_rate"])(x)
    outputs = keras.layers.Dense(units=2, activation="softmax")(x)
    model = keras.Model(inputs=conv_base.input, outputs=outputs)
    return model

In [24]:
from interpretation import ValLog, GRADCamLogger
import utils

def train(model_name):
    GRADCAM_LAYER_NAME = config.pt_gradcam_layers[model_name]
    model_arch = config.pt_models[model_name]
    weights = config.pt_weights[model_name]
    
    tf.keras.backend.clear_session()
    
    thresh = configs["thresh"]
    metrics_dict = {
        "ACC":  metrics.BinaryAccuracy(name="ACC", threshold=thresh),
        "AUC-ROC": metrics.AUC(name='ROC', curve='ROC'),
        "AUC-PR": metrics.AUC(name='PR', curve='PR'),
        "TP": metrics.TruePositives(name="TP", thresholds=thresh),
        "TN": metrics.TrueNegatives(name="TN", thresholds=thresh),
        "FP": metrics.FalsePositives(name="FP", thresholds=thresh),
        "FN": metrics.FalseNegatives(name="FN", thresholds=thresh),
    }

    # opt = optimizers.Adam(learning_rate=1e-06)
    opt = optimizers.Adam()
    met = list(metrics_dict.values())

    model = define_model(model_arch, weights)
    model.compile(
        loss=losses.CategoricalCrossentropy(),
        optimizer=opt,
        metrics=met,
    )
    # initialize run
    run = wandb.init(
        project=PROJECT_NAME, 
        group=GROUP_NAME,
        name=model_name,
        job_type='train',
        config=configs, 
    )

    wandb_callback = wandb.keras.WandbCallback(
        monitor="val_ROC",
        mode="max",
        save_model=True,
        save_graph=True,
        compute_flops=True,
    )

    callbacks = [
        wandb_callback,
        ValLog(generator=val_dataset, num_log_batches=1),
        GRADCamLogger(generator=test_dataset, 
                      layer_name=GRADCAM_LAYER_NAME, num_log_batches=1)
    ]
    history = model.fit(
        train_dataset,
        validation_data=val_dataset,
        epochs=configs["epochs"], 
        shuffle=True,
        callbacks=callbacks,
    )
    run.finish()
    
def evaluate(model_name):
    run = wandb.init(
        project=PROJECT_NAME, 
        group=GROUP_NAME,
        name=model_name,
        job_type="inference-evaluation", 
        config=configs, 
    )
    model_at = run.use_artifact("model-" + model_name + ":latest")
    model_dir = model_at.download()
    best_model = keras.models.load_model(model_dir)
    
    metrics_results = best_model.evaluate(test_dataset)
    metrics_results = dict(zip(["loss"] + list(metrics_dict.keys()), 
                               metrics_results))
    tp, fp, tn, fn = (metrics_results["TP"], metrics_results["FP"], 
                      metrics_results["TN"], metrics_results["FN"])

    add_metrics = {
        "SENSITIVITY": utils.get_sensitivity(tp, fp, tn, fn),
        "SPECIFICTY": utils.get_specificity(tp, fp, tn, fn),
        "PPV": utils.get_ppv(tp, fp, tn, fn),
        "NPV": utils.get_npv(tp, fp, tn, fn),
        "F1" : utils.get_fbeta(tp, fp, tn, fn, beta=1),
    }
    metrics_results.update(add_metrics)

    print(f"Metrics: \n", metrics_results)

    columns = list(metrics_results.keys())
    metrics_table = wandb.Table(columns=columns)
    metrics_table.add_data(*metrics_results.values())
    wandb.run.log({METRICS_TABLE_NAME : metrics_table})

    # add logging of confusion matrix image from matplotlib

    # get preds
    trained_preds = best_model.predict(test_dataset)
    run.finish()

In [25]:
for model_name in config.pt_models.keys():
    print(model_name)
    train(model_name)
    evaluate(model_name)

ResNet50


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01666939412243664, max=1.0)…

2022-11-29 09:35:01.375037: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
2022-11-29 09:35:01.375299: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2022-11-29 09:35:01.378214: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10419 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:84:00.0, compute capability: 6.1
2022-11-29 09:35:01.393652: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1137] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.008ms.
  function_optimizer: function_optimizer did nothing. time = 0.001ms.



Epoch 1/5




INFO:tensorflow:Assets written to: /home/mdorosan/2022/bagls-sh-project/notebooks/wandb/run-20221129_093439-1njpxc4s/files/model-best/assets




Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016669200205554565, max=1.0…

Metrics: 
 {'loss': 0.39585328102111816, 'ACC': 0.9090909361839294, 'AUC-ROC': 0.9752066135406494, 'AUC-PR': 0.9762325286865234, 'TP': 30.0, 'TN': 30.0, 'FP': 3.0, 'FN': 3.0, 'SENSITIVITY': 0.9090909090909091, 'SPECIFICTY': 0.9090909090909091, 'PPV': 0.9090909090909091, 'NPV': 0.9090909090909091, 'F1': 0.9090909090909091}
InceptionV3


2022-11-29 09:38:25.095162: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
2022-11-29 09:38:25.095459: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2022-11-29 09:38:25.098692: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10419 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:84:00.0, compute capability: 6.1
2022-11-29 09:38:25.124014: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1137] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.032ms.
  function_optimizer: function_optimizer did nothing. time = 0.001ms.



Epoch 1/5
INFO:tensorflow:Assets written to: /home/mdorosan/2022/bagls-sh-project/notebooks/wandb/run-20221129_093814-2ipcgn8k/files/model-best/assets
Epoch 2/5
Epoch 3/5
INFO:tensorflow:Assets written to: /home/mdorosan/2022/bagls-sh-project/notebooks/wandb/run-20221129_093814-2ipcgn8k/files/model-best/assets
Epoch 4/5
INFO:tensorflow:Assets written to: /home/mdorosan/2022/bagls-sh-project/notebooks/wandb/run-20221129_093814-2ipcgn8k/files/model-best/assets
Epoch 5/5


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016669653790692487, max=1.0…

Metrics: 
 {'loss': 0.36867672204971313, 'ACC': 0.8787878751754761, 'AUC-ROC': 0.9706152081489563, 'AUC-PR': 0.9725255966186523, 'TP': 29.0, 'TN': 29.0, 'FP': 4.0, 'FN': 4.0, 'SENSITIVITY': 0.8787878787878788, 'SPECIFICTY': 0.8787878787878788, 'PPV': 0.8787878787878788, 'NPV': 0.8787878787878788, 'F1': 0.8787878787878788}
DenseNet121


2022-11-29 09:44:09.238499: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
2022-11-29 09:44:09.238847: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2022-11-29 09:44:09.242334: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10419 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:84:00.0, compute capability: 6.1
2022-11-29 09:44:09.297518: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1137] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.017ms.
  function_optimizer: function_optimizer did nothing. time = 0.001ms.



Epoch 1/5
INFO:tensorflow:Assets written to: /home/mdorosan/2022/bagls-sh-project/notebooks/wandb/run-20221129_094358-y5nb8c9u/files/model-best/assets
Epoch 2/5
INFO:tensorflow:Assets written to: /home/mdorosan/2022/bagls-sh-project/notebooks/wandb/run-20221129_094358-y5nb8c9u/files/model-best/assets
Epoch 3/5
INFO:tensorflow:Assets written to: /home/mdorosan/2022/bagls-sh-project/notebooks/wandb/run-20221129_094358-y5nb8c9u/files/model-best/assets
Epoch 4/5
INFO:tensorflow:Assets written to: /home/mdorosan/2022/bagls-sh-project/notebooks/wandb/run-20221129_094358-y5nb8c9u/files/model-best/assets
Epoch 5/5


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01666976932125787, max=1.0)…

Metrics: 
 {'loss': 0.372403085231781, 'ACC': 0.8787878751754761, 'AUC-ROC': 0.9678604602813721, 'AUC-PR': 0.9700316190719604, 'TP': 29.0, 'TN': 29.0, 'FP': 4.0, 'FN': 4.0, 'SENSITIVITY': 0.8787878787878788, 'SPECIFICTY': 0.8787878787878788, 'PPV': 0.8787878787878788, 'NPV': 0.8787878787878788, 'F1': 0.8787878787878788}
InceptionResNetV2


2022-11-29 09:52:23.562184: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
2022-11-29 09:52:23.562507: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2022-11-29 09:52:23.565765: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10419 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:84:00.0, compute capability: 6.1
2022-11-29 09:52:23.647589: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1137] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.02ms.
  function_optimizer: function_optimizer did nothing. time = 0.001ms.



Epoch 1/5
INFO:tensorflow:Assets written to: /home/mdorosan/2022/bagls-sh-project/notebooks/wandb/run-20221129_095211-3s6fgb9a/files/model-best/assets
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01667098915204406, max=1.0)…

Metrics: 
 {'loss': 0.42621949315071106, 'ACC': 0.8484848737716675, 'AUC-ROC': 0.9403122663497925, 'AUC-PR': 0.9452385306358337, 'TP': 28.0, 'TN': 28.0, 'FP': 5.0, 'FN': 5.0, 'SENSITIVITY': 0.8484848484848485, 'SPECIFICTY': 0.8484848484848485, 'PPV': 0.8484848484848485, 'NPV': 0.8484848484848485, 'F1': 0.8484848484848485}


## End