In [1]:
import os
import glob
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from sklearn.model_selection import train_test_split

2025-07-08 14:37:19.717980: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751985439.984512      13 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751985440.059946      13 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
IMAGE_SIZE = 224
BATCH_SIZE_PER_REPLICA = 32
SEED = 42
DATA_DIR = '/kaggle/input/data' 
PREVIOUS_MODEL_FILE = '/kaggle/input/mobile-net-v2-nih-full-dataset-fine-tuning-3/best_chest_xray_model_finetuned.keras' 
NEW_MODEL_FILE = 'best_chest_xray_model_finetuned4.keras'
NEW_WEIGHTS_FILE = 'best_chest_xray_weights_finetuned4.weights.h5'

In [3]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()
    strategy = tf.distribute.TPUStrategy(tpu)
    print("Running on TPU")
except ValueError:
    strategy = tf.distribute.MirroredStrategy()
    print("Running on GPU/CPU")

REPLICAS = strategy.num_replicas_in_sync
BATCH_SIZE = BATCH_SIZE_PER_REPLICA * REPLICAS
print(f"Number of replicas: {REPLICAS}")


Running on GPU/CPU
Number of replicas: 1


2025-07-08 14:37:37.253996: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [4]:
df = pd.read_csv(os.path.join(DATA_DIR, 'Data_Entry_2017.csv'))
all_image_paths = {os.path.basename(p): p for p in glob.glob(os.path.join(DATA_DIR, '**', '*.png'), recursive=True)}
df['path'] = df['Image Index'].map(all_image_paths.get)
df = df.dropna(subset=['path'])


In [5]:
all_labels = sorted(['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Effusion', 'Emphysema', 'Fibrosis', 'Hernia', 'Infiltration', 'Mass', 'Nodule', 'Pleural_Thickening', 'Pneumonia', 'Pneumothorax'])
for label in all_labels:
    df[label] = df['Finding Labels'].map(lambda finding: 1.0 if label in finding else 0.0)

In [6]:
patient_ids = df['Patient ID'].unique()
train_val_ids, test_ids = train_test_split(patient_ids, test_size=0.2, random_state=SEED)
train_ids, val_ids = train_test_split(train_val_ids, test_size=0.1, random_state=SEED)

train_df = df[df['Patient ID'].isin(train_ids)]
val_df = df[df['Patient ID'].isin(val_ids)]
test_df = df[df['Patient ID'].isin(test_ids)]


In [7]:
pos_counts = train_df[all_labels].sum()
neg_counts = len(train_df) - pos_counts
weights = np.zeros((len(all_labels), 2))
total_samples = len(train_df)
for i, label in enumerate(all_labels):
    pos_weight = (1 / pos_counts[label]) * (total_samples / 2.0)
    neg_weight = (1 / neg_counts[label]) * (total_samples / 2.0)
    weights[i, 1] = pos_weight
    weights[i, 0] = neg_weight

print("Data preparation complete.")

Data preparation complete.


In [8]:
# Function to parse and decode images
def parse_image(path, label):
    image = tf.io.read_file(path)
    image = tf.image.decode_png(image, channels=3)
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.resize(image, size=[IMAGE_SIZE, IMAGE_SIZE])
    return image, label

# Data augmentation pipeline
data_augmentation_pipeline = keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
], name="data_augmentation")

# Function to create a tf.data.Dataset
def create_dataset(df, augment=False):
    dataset = tf.data.Dataset.from_tensor_slices((df['path'].values, df[all_labels].values))
    AUTOTUNE = tf.data.AUTOTUNE
    dataset = dataset.map(parse_image, num_parallel_calls=AUTOTUNE)
    if augment:
        dataset = dataset.map(lambda x, y: (data_augmentation_pipeline(x, training=True), y), num_parallel_calls=AUTOTUNE)
    dataset = dataset.shuffle(buffer_size=2048).batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)
    return dataset

train_ds = create_dataset(train_df, augment=True)
val_ds = create_dataset(val_df)
test_ds = create_dataset(test_df)

print("Datasets created.")


Datasets created.


In [9]:
def get_weighted_loss(weights):
    weights = tf.constant(weights, dtype=tf.float32)
    def weighted_loss(y_true, y_pred):
        y_true = tf.cast(y_true, tf.float32)
        bce = K.binary_crossentropy(y_true, y_pred)
        loss_weights = (weights[:, 1] * y_true) + (weights[:, 0] * (1 - y_true))
        weighted_bce = loss_weights * bce
        return K.mean(weighted_bce)
    return weighted_loss

with strategy.scope():
    # Load the previously saved model
    model = tf.keras.models.load_model(
        PREVIOUS_MODEL_FILE,
        custom_objects={'weighted_loss': get_weighted_loss(weights)}
    )
    print("Previously trained model loaded successfully.")


Previously trained model loaded successfully.


In [10]:
base_model = model.layers[1]
base_model.trainable = True
    
# Unfreeze the top 90% of layers
total_layers = len(base_model.layers)
unfreeze_at = int(total_layers * 0.05) # Freeze only the first 5%
for layer in base_model.layers[:unfreeze_at]:
    layer.trainable = False

print(f"Unfreezing layers from index {unfreeze_at} onwards.")


Unfreezing layers from index 0 onwards.


In [11]:
model.compile(
        optimizer=tf.keras.optimizers.AdamW(learning_rate=1e-6, weight_decay=1e-7), # Slower learning rate
        loss=get_weighted_loss(weights),
        metrics=[tf.keras.metrics.AUC(name='auc_roc', multi_label=True)]
    )
print(" Model re-compiled with a slower learning rate for fine-tuning.")
model.summary()


 Model re-compiled with a slower learning rate for fine-tuning.


In [12]:
HISTORY_FILE = 'training_history.csv'

with strategy.scope():
    # Load the previously saved model
    model = tf.keras.models.load_model(
        PREVIOUS_MODEL_FILE,
        custom_objects={'weighted_loss': get_weighted_loss(weights)}
    )
    print("Previously trained model loaded successfully.")

    # Unfreeze more layers
    base_model = model.layers[1]
    base_model.trainable = True
    total_layers = len(base_model.layers)
    unfreeze_at = int(total_layers * 0.10)
    for layer in base_model.layers[:unfreeze_at]:
        layer.trainable = False
    print(f"Unfreezing layers from index {unfreeze_at} onwards.")

    # Re-compile the model with a slower learning rate
    model.compile(
        optimizer=tf.keras.optimizers.AdamW(learning_rate=1e-6, weight_decay=1e-7),
        loss=get_weighted_loss(weights),
        metrics=[tf.keras.metrics.AUC(name='auc_roc', multi_label=True)]
    )
    print(" Model re-compiled for fine-tuning.")

    checkpoint_model_callback = ModelCheckpoint(
        filepath=NEW_MODEL_FILE,
        save_weights_only=False,
        monitor='val_auc_roc',
        mode='max',
        save_best_only=True,
        verbose=1
    )
    early_stopping_callback = EarlyStopping(
        monitor='val_loss', patience=5, restore_best_weights=True, verbose=1
    )

    print("Starting further fine-tuning...")
    history = model.fit(
        train_ds,
        epochs=5,
        validation_data=val_ds,
        callbacks=[
            checkpoint_model_callback,
            early_stopping_callback
        ]
    )

print(f"Saving training history to {HISTORY_FILE}...")
history_df = pd.DataFrame(history.history)
history_df.to_csv(HISTORY_FILE, index=False)
print("History file saved successfully.")

print("Fine-tuning complete. Evaluating model on the test set...")
loaded_model = tf.keras.models.load_model(
    NEW_MODEL_FILE,
    custom_objects={'weighted_loss': get_weighted_loss(weights)}
)
results = loaded_model.evaluate(test_ds)
print("\n--- Test Set Evaluation Results ---")
for name, value in zip(loaded_model.metrics_names, results):
    print(f"{name}: {value:.4f}")

Previously trained model loaded successfully.
Unfreezing layers from index 0 onwards.
 Model re-compiled for fine-tuning.
Starting further fine-tuning...
Epoch 1/5
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - auc_roc: 0.7082 - loss: 0.6144
Epoch 1: val_auc_roc improved from -inf to 0.71679, saving model to best_chest_xray_model_finetuned4.keras
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5533s[0m 2s/step - auc_roc: 0.7082 - loss: 0.6144 - val_auc_roc: 0.7168 - val_loss: 0.6121
Epoch 2/5
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - auc_roc: 0.7100 - loss: 0.6134
Epoch 2: val_auc_roc improved from 0.71679 to 0.71790, saving model to best_chest_xray_model_finetuned4.keras
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5536s[0m 2s/step - auc_roc: 0.7100 - loss: 0.6134 - val_auc_roc: 0.7179 - val_loss: 0.6107
Epoch 3/5
[1m2521/2521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step

In [13]:
print("Fine-tuning complete. Evaluating model on the test set...")
# Load the best-performing model saved during training
loaded_model = tf.keras.models.load_model(
    NEW_MODEL_FILE,
    custom_objects={'weighted_loss': get_weighted_loss(weights)}
)

# Evaluate on the test dataset
results = loaded_model.evaluate(test_ds)
print("\n--- Test Set Evaluation Results ---")
for name, value in zip(loaded_model.metrics_names, results):
    print(f"{name}: {value:.4f}")

Fine-tuning complete. Evaluating model on the test set...
[1m697/697[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m644s[0m 895ms/step - auc_roc: 0.7199 - loss: 0.6137

--- Test Set Evaluation Results ---
loss: 0.6178
compile_metrics: 0.7243
