In [1]:
import gc
import random

from matplotlib import pyplot as plt
from sklearn.metrics import roc_curve, auc

import h5py
from concurrent.futures import ThreadPoolExecutor

import cv2
import numpy as np
import pandas as pd
import pydicom as dicom
from keras import Model, Input
from keras.src.layers import Dense, Dropout
import tensorflow as tf
import matplotlib.pyplot as plt
from keras.src.callbacks import Callback
from sklearn.metrics import roc_curve, auc, roc_auc_score
from tensorflow.python.framework import constant_op
from tensorflow.python.ops import clip_ops, math_ops
from tensorflow.keras import backend as K
from keras.src import ops
from tqdm import tqdm

2024-07-21 19:30:16.587715: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
train_image_hdf5 = "train-image.hdf5"
train_metadata_csv = "train-metadata.csv"
test_image_hdf5 = "test-image.hdf5"
test_metadata_csv = "test-metadata.csv"
sample_submission_csv = "sample_submission.csv"

base_path = "/kaggle/input/isic-2024-challenge"
model_path = "/kaggle/input/isic2024_densenet/keras/default/1/model_dense.keras"

In [3]:
CONFIG = {
    "SEED": 42,
    "N_FOLDS": 5,
    "N_EPOCHS": 100,
    "BATCH_SIZE": 32,
    "VAL_BATCH_SIZE": 50,
    "LR": 0.001,
    "IMAGE_HEIGHT": 224,
    "IMAGE_WIDTH": 224,
    "IMAGE_CHANNEL": 3,
    "N_CLASSES": 1,
    "PATIENCE": 2,
    "TRAIN_RATIO": 0.8,
    "VAL_RATIO": 0.1,
    "TEST_RATIO": 0.1,
}

In [4]:
df_train = pd.read_csv(f"{base_path}/{train_metadata_csv}")
df_test = pd.read_csv(f"{base_path}/{test_metadata_csv}")

  df_train = pd.read_csv(f"{base_path}/{train_metadata_csv}")


In [22]:
def get_image_and_label(hdf, isic_id, mode = "train"):
    # 画像を取得
    image_data = hdf[isic_id][()]

    # Convert the binary data to a numpy array
    image_data = np.frombuffer(image_data, np.uint8)

    # Decode the image from the numpy array
    image_data = cv2.cvtColor(cv2.imdecode(image_data, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)

    image_data = cv2.resize(image_data, (CONFIG["IMAGE_HEIGHT"], CONFIG["IMAGE_WIDTH"]))

    # 画像の正規化 (0 ~ 1) に変換
    image_data = (image_data - image_data.min()) / (image_data.max() - image_data.min())
    
    # クラスを取得する. これが学習時のラベルになる
    if mode == "train":
        label = df_train[df_train["isic_id"] == isic_id]["target"].values[0]
    elif mode == "test":
        label = 0
    return image_data, label



def normalize_image(image):
    # 画像の正規化 (0 ~ 1) に変換
    image = (image - image.min()) / (image.max() - image.min())
    return image

def augmentation(image):
    
    # 確率で画像をズーム
    if np.random.rand() > 0.5:
        scale = np.random.uniform(0.8, 1.2)
        image = cv2.resize(image, (0, 0), fx=scale, fy=scale)
        image = cv2.resize(image, (CONFIG["IMAGE_HEIGHT"], CONFIG["IMAGE_WIDTH"]))
    
    # 明るさを変える
    alpha = 1.0 + np.random.uniform(-0.01, 0.01)
    beta = np.random.uniform(-0.01, 0.01)
    image = image * alpha + beta
    
    # 画像をぼかす
    k_size = np.random.randint(1, 10) * 2 + 1
    image = cv2.GaussianBlur(image, (k_size, k_size), 0)

    # 確率で付与するノイズを変える
    if np.random.rand() > 0.5:
        # ガウシアンノイズ
        image = image + np.random.normal(0, 0.1, image.shape)
        image = np.clip(image, 0, 1)
        
    # 画像を確率で反転
    if np.random.rand() > 0.5:
        image = cv2.flip(image, 1)

    # 特定の範囲を切り抜く
    n_cut = np.random.randint(1, 5)
    for _ in range(n_cut):
        WINDOW_SIZE = np.random.randint(10, 50)
        x_min = np.random.randint(0, image.shape[0] - WINDOW_SIZE)
        x_max = x_min + WINDOW_SIZE
        y_min = np.random.randint(0, image.shape[1] - WINDOW_SIZE)
        y_max = y_min + WINDOW_SIZE
        image[x_min:x_max, y_min:y_max] = 0

    # 画像を回転
    angle = np.random.randint(0, 360)
    image = cv2.warpAffine(image, cv2.getRotationMatrix2D((image.shape[1] / 2, image.shape[0] / 2), angle, 1.0),
                           (image.shape[1], image.shape[0]))

    return image

def generator(isic_ids,
              batch_size=CONFIG["BATCH_SIZE"],
              mode="train",
              is_augmentation=False,
              is_shuffle=False,
              is_one_epoch=False,
              is_multi_threading=False
              ):
    hdf_path = None
    if mode == "train":
        hdf_path = f"{base_path}/{train_image_hdf5}"
    elif mode == "test":
        hdf_path = f"{base_path}/{test_image_hdf5}"
    else:
        raise ValueError("mode must be 'train' or 'test'")
    
    def process_image_label(isic_id):
        with h5py.File(hdf_path, "r") as hdf:
            image, label = get_image_and_label(hdf, isic_id, mode)
            if is_augmentation:
                image = augmentation(image)
                
            # 画像を正規化
            image = normalize_image(image)
        return image, label

    while True:
        if is_shuffle:
            random.shuffle(isic_ids)

        for i in range(0, len(isic_ids), batch_size):
            end = min(i + batch_size, len(isic_ids))
            batch_isic_ids = isic_ids[i:end]
            if is_multi_threading:
                with ThreadPoolExecutor() as executor:
                    results = list(executor.map(process_image_label, batch_isic_ids))
            else:
                results = [process_image_label(isic_id) for isic_id in batch_isic_ids]

            images, labels = zip(*results)
            images = np.array(images)
            labels = np.array(labels)

            yield images, labels

        if is_one_epoch:
            break

In [23]:
def comp_score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str, min_tpr: float = 0.80):
    v_gt = abs(np.asarray(solution.values) - 1)
    v_pred = np.array([1.0 - x for x in submission.values])
    max_fpr = abs(1 - min_tpr)
    partial_auc_scaled = roc_auc_score(v_gt, v_pred, max_fpr=max_fpr)
    # change scale from [0.5, 1.0] to [0.5 * max_fpr**2, max_fpr]
    # https://math.stackexchange.com/questions/914823/shift-numbers-into-a-different-range
    partial_auc = 0.5 * max_fpr ** 2 + (max_fpr - 0.5 * max_fpr ** 2) / (1.0 - 0.5) * (partial_auc_scaled - 0.5)
    return partial_auc


def auroc(y_true, y_pred, min_fpr=0.8):
    v_gt = tf.abs(y_true - 1)
    v_pr = tf.abs(y_pred - 1)
    partial_auc_scaled = tf.py_function(comp_score, [v_gt, v_pr], tf.float64)
    partial_auc = 0.5 * min_fpr ** 2 + (min_fpr - 0.5 * min_fpr ** 2) / (1.0 - 0.5) * (partial_auc_scaled - 0.5)
    return partial_auc


class AUCROCMetric(tf.keras.metrics.Metric):
    def __init__(self, name='aucroc', **kwargs):
        super(AUCROCMetric, self).__init__(name=name, **kwargs)
        self.auc_metric = tf.keras.metrics.AUC()

    def update_state(self, y_true, y_pred, sample_weight=None):
        self.auc_metric.update_state(y_true, y_pred, sample_weight)

    def result(self):
        return self.auc_metric.result()

    def reset_states(self):
        self.auc_metric.reset_states()

def binary_crossentropy_balance(target, output):
    # target を output の型にキャスト
    target = tf.dtypes.cast(target, output.dtype)
    epsilon_ = constant_op.constant(0.00001, output.dtype)

    # nan を防ぐためにクリップ
    output = clip_ops.clip_by_value(output, epsilon_, 0.99999)

    # 交差エントロピーの計算
    bce = target * math_ops.log(output + epsilon_) * 3.0
    bce += (1.0 - target) * math_ops.log(1.0 - output + epsilon_)

    bce_sum = -K.sum(bce, axis=-1)
    return bce_sum

def create_model(model_name='DenseNet201'):
    # カスタム入力層
    input_shape = (CONFIG["IMAGE_HEIGHT"], CONFIG["IMAGE_WIDTH"], CONFIG["IMAGE_CHANNEL"])
    inputs = Input(shape=input_shape)

    x = inputs

    # DenseNet201
    base_model = tf.keras.applications.DenseNet201(
        include_top=True,
        weights="imagenet",
        input_tensor=x,
        input_shape=input_shape,
        pooling="avg",
        classes=1000,
        classifier_activation='softmax',
    )

    # ベースモデルの出力
    x = base_model.output
    x = Dense(1000, activation='sigmoid')(x)
    x = Dense(1, activation='sigmoid')(x)

    # モデルの作成
    model = Model(inputs=inputs, outputs=x, name=model_name)

    # optimizer : Adam
    opt = tf.keras.optimizers.AdamW(learning_rate=CONFIG["LR"])

    # モデルのコンパイル
    model.compile(optimizer=opt, loss=binary_crossentropy_balance, metrics=['accuracy'])

    return model

In [24]:
model = create_model()
# モデルの概要を表示
# model.summary()

In [25]:
# モデルを読み込み
model = tf.keras.models.load_model(model_path, custom_objects={'binary_crossentropy_balance': binary_crossentropy_balance})

In [26]:
test_ids = df_test["isic_id"].values
# テストデータの予測
y_pred = model.predict(
    generator(test_ids, 
              mode="test",
              is_augmentation=False,
              is_shuffle=False, 
              is_one_epoch=True,
              batch_size=1,
              ),
    steps=len(test_ids)
)

I0000 00:00:1721558028.862731  346871 service.cc:145] XLA service 0x7da714003dd0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1721558028.862770  346871 service.cc:153]   StreamExecutor device (0): NVIDIA GeForce RTX 2070 SUPER, Compute Capability 7.5
2024-07-21 19:33:49.135154: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-07-21 19:33:50.279549: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8907


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 15ms/step


I0000 00:00:1721558037.337685  346871 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


In [27]:
# 提出用のデータフレームを作成
submission = pd.DataFrame()
submission["isic_id"] = test_ids.reshape(-1)
submission["target"] = y_pred
submission.to_csv("submission.csv", index=False)

In [28]:
submission

Unnamed: 0,isic_id,target
0,ISIC_0015657,0.185343
1,ISIC_0015729,0.185343
2,ISIC_0015740,0.185343
