In [1]:
# dev convenience
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("..")

import os
import numpy as np
import pandas as pd
import PATHS
import config

configs = config.nb_configs
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"
# os.environ["WANDB_SILENT"] = "True"
os.environ["WANDB_NOTEBOOK_NAME"] = "07-model-evaluation.ipynb"

PROJECT_NAME = 'bagls-sh-test' # <=====================
RUN_NAME = 'convnet_from_scratch' # <=====================
METRICS_TABLE_NAME = 'metrics_table' # <=====================
GRADCAM_LAYER_NAME = "conv2d_3" # <=====================

In [3]:
import wandb
print("W&B: ", wandb.__version__)
wandb.login()

# # manage logs
# import logging

# logger = logging.getLogger("wandb")
# logger.setLevel(logging.ERROR)

# logging.getLogger('tensorflow').disabled = True

W&B:  0.13.5


[34m[1mwandb[0m: Currently logged in as: [33mmiked[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [4]:
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.imagenet_utils import preprocess_input
from tensorflow.python.client import device_lib

print(device_lib.list_local_devices())

In [7]:
test_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input, 
    rescale=configs["rescale"],
)

In [27]:
def prepare_df(csv_path):
    """Prepare dataframe for flow_from_dataframe"""
    df = pd.read_csv(csv_path)
    df['class'] = df["is_healthy"].apply(lambda x : "healthy" if x else "unhealthy")
    df['filename'] = (df["Image Id"].astype(str) + ".png")
    return df

## Create bootstraps of the `test` set

In [29]:
test_dir = "../test/test"
batch_size = configs["batch_size"]
class_names = configs["class_names"]
interpol = configs["interpol"]
cmap = configs["cmap"]
label_mode = configs["label_mode"]
labels = configs["labels"]
image_size = configs["image_size"]

num_bootstrap = 10
datasets, labels_list = [], []

for i in range(num_bootstrap):
    csv_path = os.path.join(PATHS.bootstrap_dir, f"test-{num_bs}.csv")
    df = prepare_df(csv_path)
    labels_list.append(df["class"])
    test_dataset = test_datagen.flow_from_dataframe(
        df, directory=test_dir, 
        x_col='filename', y_col='class',
        target_size=image_size,
        color_mode=cmap,
        classes=class_names,
        class_mode=label_mode,
        batch_size=batch_size,
        interpolation=interpol,
        validate_filenames=True,
        shuffle=False,
    )
    datasets.append(test_dataset)

Found 3300 validated image filenames belonging to 2 classes.
Found 3300 validated image filenames belonging to 2 classes.
Found 3300 validated image filenames belonging to 2 classes.
Found 3300 validated image filenames belonging to 2 classes.
Found 3300 validated image filenames belonging to 2 classes.
Found 3300 validated image filenames belonging to 2 classes.
Found 3300 validated image filenames belonging to 2 classes.
Found 3300 validated image filenames belonging to 2 classes.
Found 3300 validated image filenames belonging to 2 classes.
Found 3300 validated image filenames belonging to 2 classes.


In [None]:
def evaluate(model_name): # <=====================
    run = wandb.init(
        project=PROJECT_NAME, 
        group=GROUP_NAME,
        name=model_name,
        job_type="inference-evaluation", 
        config=configs, 
    )
    model_at = run.use_artifact("model-" + model_name + ":latest")
    model_dir = model_at.download()
    best_model = keras.models.load_model(model_dir)
    
    metrics_results = best_model.evaluate(test_dataset)
    metrics_results = dict(zip(["loss"] + list(metrics_dict.keys()), 
                               metrics_results))
    tp, fp, tn, fn = (metrics_results["TP"], metrics_results["FP"], 
                      metrics_results["TN"], metrics_results["FN"])

    add_metrics = {
        "SENSITIVITY": utils.get_sensitivity(tp, fp, tn, fn),
        "SPECIFICTY": utils.get_specificity(tp, fp, tn, fn),
        "PPV": utils.get_ppv(tp, fp, tn, fn),
        "NPV": utils.get_npv(tp, fp, tn, fn),
        "F1" : utils.get_fbeta(tp, fp, tn, fn, beta=1),
    }
    metrics_results.update(add_metrics)

    print(f"Metrics: \n", metrics_results)

    columns = list(metrics_results.keys())
    metrics_table = wandb.Table(columns=columns)
    metrics_table.add_data(*metrics_results.values())
    wandb.run.log({METRICS_TABLE_NAME : metrics_table})

    # add logging of confusion matrix image from matplotlib

    # get preds
    trained_preds = best_model.predict(test_dataset)
    run.finish()