In [1]:
import os
import glob
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from sklearn.model_selection import train_test_split


2025-07-07 10:20:54.607885: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751883654.861433      13 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751883654.930896      13 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:

IMAGE_SIZE = 224
BATCH_SIZE_PER_REPLICA = 32
SEED = 42
DATA_DIR = '/kaggle/input/data'
PREVIOUS_WEIGHTS_FILE = '/kaggle/input/mobilenet-v2-nih-full-dataset-fine-tuning-1/initial_fine_tuning_weights.weights.h5'
NEW_MODEL_FILE = 'best_chest_xray_model.keras'
NEW_WEIGHTS_FILE = 'best_chest_xray_weights.weights.h5'


In [3]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()
    strategy = tf.distribute.TPUStrategy(tpu)
except ValueError:
    strategy = tf.distribute.MirroredStrategy()

REPLICAS = strategy.num_replicas_in_sync
BATCH_SIZE = BATCH_SIZE_PER_REPLICA * REPLICAS


2025-07-07 10:21:11.535767: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [4]:
df = pd.read_csv(os.path.join(DATA_DIR, 'Data_Entry_2017.csv'))
all_image_paths = {os.path.basename(p): p for p in glob.glob(os.path.join(DATA_DIR, '**', '*.png'), recursive=True)}
df['path'] = df['Image Index'].map(all_image_paths.get)
df = df.dropna(subset=['path'])



In [5]:
all_labels = sorted(['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Effusion', 'Emphysema', 'Fibrosis', 'Hernia', 'Infiltration', 'Mass', 'Nodule', 'Pleural_Thickening', 'Pneumonia', 'Pneumothorax'])

for label in all_labels:
    df[label] = df['Finding Labels'].map(lambda finding: 1.0 if label in finding else 0.0)

patient_ids = df['Patient ID'].unique()
train_val_ids, test_ids = train_test_split(patient_ids, test_size=0.2, random_state=SEED)
train_ids, val_ids = train_test_split(train_val_ids, test_size=0.1, random_state=SEED)

train_df = df[df['Patient ID'].isin(train_ids)]
val_df = df[df['Patient ID'].isin(val_ids)]
test_df = df[df['Patient ID'].isin(test_ids)]

pos_counts = train_df[all_labels].sum()
neg_counts = len(train_df) - pos_counts
weights = np.zeros((len(all_labels), 2))
total_samples = len(train_df)
for i, label in enumerate(all_labels):
    pos_weight = (1 / pos_counts[label]) * (total_samples / 2.0)
    neg_weight = (1 / neg_counts[label]) * (total_samples / 2.0)
    weights[i, 1] = pos_weight
    weights[i, 0] = neg_weight


In [6]:
def parse_image(path, label):
    image = tf.io.read_file(path)
    image = tf.image.decode_png(image, channels=3)
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.resize(image, size=[IMAGE_SIZE, IMAGE_SIZE])
    return image, label

data_augmentation_pipeline = keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
], name="data_augmentation")

def create_dataset(df, augment=False):
    dataset = tf.data.Dataset.from_tensor_slices((df['path'].values, df[all_labels].values))
    AUTOTUNE = tf.data.AUTOTUNE
    dataset = dataset.map(parse_image, num_parallel_calls=AUTOTUNE)
    if augment:
        dataset = dataset.map(lambda x, y: (data_augmentation_pipeline(x, training=True), y), num_parallel_calls=AUTOTUNE)
    dataset = dataset.shuffle(buffer_size=2048).batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)
    return dataset

train_ds = create_dataset(train_df, augment=True)
val_ds = create_dataset(val_df)
test_ds = create_dataset(test_df)


In [7]:
# Define data augmentation pipeline correctly
data_augmentation_pipeline = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomZoom(0.1),
], name="data_augmentation")

# Define custom weighted loss function
def get_weighted_loss(weights):
    weights = tf.constant(weights, dtype=tf.float32)
    def weighted_loss(y_true, y_pred):
        y_true = tf.cast(y_true, tf.float32)
        bce = K.binary_crossentropy(y_true, y_pred)
        loss_weights = (weights[:, 1] * y_true) + (weights[:, 0] * (1 - y_true))
        weighted_bce = loss_weights * bce
        return K.mean(weighted_bce)
    return weighted_loss

with strategy.scope():
    base_model = tf.keras.applications.MobileNetV2(
        input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3),
        include_top=False,
        weights='imagenet'
    )
    base_model.trainable = False

    # Model architecture
    inputs = tf.keras.layers.Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
    x = data_augmentation_pipeline(inputs)
    x = base_model(x, training=False)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dropout(0.4)(x)
    outputs = tf.keras.layers.Dense(len(all_labels), activation='sigmoid')(x)
    model = tf.keras.Model(inputs, outputs)

    
    try:
        model.load_weights(PREVIOUS_WEIGHTS_FILE)
    except Exception as e:
        print(f"Could not load weights. Error: {e}")

    # Unfreeze last 75% of base_model layers
    base_model.trainable = True
    total_layers = len(base_model.layers)
    unfreeze_at = int(total_layers * 0.25)
    for layer in base_model.layers[:unfreeze_at]:
        layer.trainable = False

    # Compile model with weighted loss
    model.compile(
        optimizer=tf.keras.optimizers.AdamW(learning_rate=1e-5, weight_decay=1e-6),
        loss=get_weighted_loss(weights),
        metrics=[tf.keras.metrics.AUC(name='auc_roc', multi_label=True)]
    )


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [8]:
checkpoint_model_callback = ModelCheckpoint(
    filepath=NEW_MODEL_FILE,
    save_weights_only=False,
    monitor='val_auc_roc',
    mode='max',
    save_best_only=True,
    verbose=1
)

checkpoint_weights_callback = ModelCheckpoint(
    filepath=NEW_WEIGHTS_FILE,
    save_weights_only=True,
    monitor='val_auc_roc',
    mode='max',
    save_best_only=True,
    verbose=1
)


In [9]:
reduce_lr_callback = ReduceLROnPlateau(
    monitor='val_loss', factor=0.2, patience=2, min_lr=1e-7, verbose=1
)

early_stopping_callback = EarlyStopping(
    monitor='val_loss', patience=5, restore_best_weights=True, verbose=1
)

history = model.fit(
    train_ds,
    epochs=5,
    validation_data=val_ds,
    callbacks=[
        checkpoint_model_callback,
        checkpoint_weights_callback,
        reduce_lr_callback,
        early_stopping_callback
    ]
)

loaded_model = tf.keras.models.load_model(
    NEW_MODEL_FILE,
    custom_objects={'weighted_loss': get_weighted_loss(weights)}
)

results = loaded_model.evaluate(test_ds)
print("\nTest Set Evaluation Results:")
for name, value in zip(loaded_model.metrics_names, results):
    print(f"{name}: {value:.4f}")

Epoch 1/5
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - auc_roc: 0.5706 - loss: 0.7005
Epoch 1: val_auc_roc improved from -inf to 0.63451, saving model to best_chest_xray_model.keras

Epoch 1: val_auc_roc improved from -inf to 0.63451, saving model to best_chest_xray_weights.weights.h5
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5506s[0m 2s/step - auc_roc: 0.5706 - loss: 0.7004 - val_auc_roc: 0.6345 - val_loss: 0.6574 - learning_rate: 1.0000e-05
Epoch 2/5
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - auc_roc: 0.6296 - loss: 0.6645
Epoch 2: val_auc_roc improved from 0.63451 to 0.66771, saving model to best_chest_xray_model.keras

Epoch 2: val_auc_roc improved from 0.63451 to 0.66771, saving model to best_chest_xray_weights.weights.h5
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5814s[0m 2s/step - auc_roc: 0.6296 - loss: 0.6645 - val_auc_roc: 0.6677 - val_loss: 0.6383 - learning_rate: 1.000