In [1]:
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import chain
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, auc, confusion_matrix, precision_recall_curve
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import backend as K

print("TensorFlow Version:", tf.__version__)

2025-07-06 16:39:07.934835: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751819948.217520      13 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751819948.298132      13 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


TensorFlow Version: 2.18.0


In [2]:
IMAGE_SIZE = 224
BATCH_SIZE_PER_REPLICA = 32
SEED = 42
DATA_DIR = '/kaggle/input/data'
WEIGHTS_FILE= '/kaggle/input/mobilenet-v2-nih-full-dataset/initial_training_weights.weights.h5'

In [3]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()
    strategy = tf.distribute.TPUStrategy(tpu)
    print('Running on TPU')
except ValueError:
    strategy = tf.distribute.MirroredStrategy()
    print('Running on GPU(s)')

print("REPLICAS: ", strategy.num_replicas_in_sync)
BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync
print(f"Effective batch size: {BATCH_SIZE}")

Running on GPU(s)
REPLICAS:  1
Effective batch size: 32


2025-07-06 16:39:23.848990: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [4]:
try:
    df = pd.read_csv(os.path.join(DATA_DIR, 'Data_Entry_2017.csv'))
    print("Metadata loaded successfully.")
except FileNotFoundError:
    print(f"Error: 'Data_Entry_2017.csv' not found in '{DATA_DIR}'.")
    exit()

all_image_paths = {os.path.basename(p): p for p in glob.glob(os.path.join(DATA_DIR, '**', '*.png'), recursive=True)}
df['path'] = df['Image Index'].map(all_image_paths.get)
df = df.dropna(subset=['path'])
print(f"Found {len(df)} images with corresponding metadata.")

all_labels = np.unique(list(chain.from_iterable(df['Finding Labels'].map(lambda x: x.split('|')).tolist())))
all_labels = [label for label in all_labels if label != 'No Finding']
print(f'All Labels ({len(all_labels)}): {all_labels}')

for label in all_labels:
    df[label] = df['Finding Labels'].map(lambda finding: 1.0 if label in finding else 0.0)

Metadata loaded successfully.
Found 112120 images with corresponding metadata.
All Labels (14): ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Effusion', 'Emphysema', 'Fibrosis', 'Hernia', 'Infiltration', 'Mass', 'Nodule', 'Pleural_Thickening', 'Pneumonia', 'Pneumothorax']


In [5]:
patient_ids = df['Patient ID'].unique()
train_val_ids, test_ids = train_test_split(patient_ids, test_size=0.2, random_state=SEED)
train_ids, val_ids = train_test_split(train_val_ids, test_size=0.1, random_state=SEED)

train_df = df[df['Patient ID'].isin(train_ids)]
val_df = df[df['Patient ID'].isin(val_ids)]
test_df = df[df['Patient ID'].isin(test_ids)]

print(f"Total Patients: {len(patient_ids)}")
print(f"Train Patients: {len(train_ids)}, Validation Patients: {len(val_ids)}, Test Patients: {len(test_ids)}")
print(f"Train Samples: {len(train_df)}, Validation Samples: {len(val_df)}, Test Samples: {len(test_df)}")

Total Patients: 30805
Train Patients: 22179, Validation Patients: 2465, Test Patients: 6161
Train Samples: 80657, Validation Samples: 9169, Test Samples: 22294


In [6]:
pos_counts = train_df[all_labels].sum()
neg_counts = len(train_df) - pos_counts
weights = np.zeros((len(all_labels), 2))
total_samples = len(train_df)
for i, label in enumerate(all_labels):
    pos_weight = (1 / pos_counts[label]) * (total_samples / 2.0)
    neg_weight = (1 / neg_counts[label]) * (total_samples / 2.0)
    weights[i, 1] = pos_weight
    weights[i, 0] = neg_weight
print("Weights calculated successfully.")

Weights calculated successfully.


In [7]:
data_augmentation_pipeline = keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
    layers.RandomBrightness(0.1)
], name="data_augmentation")

def parse_image(path, label):
    image = tf.io.read_file(path)
    image = tf.image.decode_png(image, channels=3)
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.resize(image, size=[IMAGE_SIZE, IMAGE_SIZE])
    return image, label

def create_dataset(df):
    dataset = tf.data.Dataset.from_tensor_slices((df['path'].values, df[all_labels].values))
    AUTOTUNE = tf.data.AUTOTUNE
    dataset = dataset.map(parse_image, num_parallel_calls=AUTOTUNE)
    dataset = dataset.shuffle(buffer_size=1024).batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)
    return dataset

train_ds = create_dataset(train_df)
val_ds = create_dataset(val_df)
test_ds = create_dataset(test_df)
print("tf.data pipelines created successfully.")

tf.data pipelines created successfully.


In [8]:
def get_weighted_loss(weights):
    weights = tf.constant(weights, dtype=tf.float32)
    def weighted_loss(y_true, y_pred):
        y_true = tf.cast(y_true, tf.float32)
        bce = K.binary_crossentropy(y_true, y_pred)
        loss_weights = (weights[:, 1] * y_true) + (weights[:, 0] * (1 - y_true))
        weighted_bce = loss_weights * bce
        return K.mean(weighted_bce)
    return weighted_loss

In [9]:
with strategy.scope():
    base_model = tf.keras.applications.MobileNetV2(
        input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3),
        include_top=False,
        weights='imagenet'
    )
    base_model.trainable = False

    inputs = tf.keras.layers.Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
    x = data_augmentation_pipeline(inputs)
    x = base_model(x, training=False)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dropout(0.3)(x)
    outputs = tf.keras.layers.Dense(len(all_labels), activation='sigmoid')(x)
    model = tf.keras.Model(inputs, outputs)

    # Load the previously saved weights
    try:
        model.load_weights(WEIGHTS_FILE)
        print("Successfully loaded weights from:", WEIGHTS_FILE)
    except Exception as e:
        print("Could not load weights. Make sure the WEIGHTS_FILE path is correct.", e)

model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Successfully loaded weights from: /kaggle/input/mobilenet-v2-nih-full-dataset/initial_training_weights.weights.h5


In [10]:
base_model.trainable = True
total_layers = len(base_model.layers)
# Unfreeze the top 50% of the layers
unfreeze_layers = int(total_layers * 0.50)

for layer in base_model.layers[:-unfreeze_layers]:
    layer.trainable = False

with strategy.scope():
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), # Use a lower learning rate for fine-tuning
        loss=get_weighted_loss(weights),
        metrics=[tf.keras.metrics.AUC(name='auc_roc', multi_label=True)]
    )

print(f"\nUnfrozen the top {unfreeze_layers} layers of the base model for fine-tuning.")
model.summary()


Unfrozen the top 77 layers of the base model for fine-tuning.


In [11]:
from tensorflow.keras.callbacks import ModelCheckpoint
checkpoint_weights_callback = ModelCheckpoint(
    filepath='initial_fine_tuning_weights.weights.h5',
    save_weights_only=True,
    monitor='val_loss',
    mode='min',
    save_best_only=True,
    verbose=1
)

In [12]:
print("\nStarting fine-tuning...")
history = model.fit(
    train_ds,
    epochs=5,
    validation_data=val_ds,
    callbacks=[checkpoint_weights_callback] 
)
print("Fine-tuning complete.")


Starting fine-tuning...
Epoch 1/5
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - auc_roc: 0.5181 - loss: 0.8167
Epoch 1: val_loss improved from inf to 0.73562, saving model to initial_fine_tuning_weights.weights.h5
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4739s[0m 2s/step - auc_roc: 0.5181 - loss: 0.8167 - val_auc_roc: 0.5357 - val_loss: 0.7356
Epoch 2/5
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - auc_roc: 0.5284 - loss: 0.7039
Epoch 2: val_loss improved from 0.73562 to 0.68859, saving model to initial_fine_tuning_weights.weights.h5
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4649s[0m 2s/step - auc_roc: 0.5284 - loss: 0.7039 - val_auc_roc: 0.5432 - val_loss: 0.6886
Epoch 3/5
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - auc_roc: 0.5362 - loss: 0.7149
Epoch 3: val_loss did not improve from 0.68859
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0