In [13]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.utils import Sequence
from tensorflow.keras.applications import DenseNet169
from tensorflow.keras.applications.densenet import preprocess_input
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input, Dropout, BatchNormalization, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, cohen_kappa_score
import seaborn as sns
import matplotlib.pyplot as plt
from collections import defaultdict
from sklearn.metrics import roc_auc_score, cohen_kappa_score, confusion_matrix

In [14]:
# Constants
IMG_SIZE = (320, 320)  
BATCH_SIZE = 8
EPOCHS = 20
DATA_PATH = "../MURA-v1.1"
TRAIN_PATH = DATA_PATH + "/train_labeled_studies.csv"
VALID_PATH = DATA_PATH + "/valid_labeled_studies.csv"


In [15]:
# Load Data
def load_data(csv_path):
    df = pd.read_csv(csv_path, names=['Path', 'Label'], header=None)
    image_paths, labels = [], []
    base_dir = "../"  # Root dataset directory
    for _, row in df.iterrows():
        study_path = os.path.join(base_dir, row["Path"])
        label = row["Label"]
        for image_file in os.listdir(study_path):
            image_path = os.path.join(study_path, image_file)
            if image_path.endswith(".png") and not image_file.startswith("._"):
                image_paths.append(image_path)
                labels.append(label)
    return np.array(image_paths), np.array(labels)

In [16]:
train_image_paths, train_labels = load_data(TRAIN_PATH)
valid_image_paths, valid_labels = load_data(VALID_PATH)
X_train, X_test, y_train, y_test = train_test_split(train_image_paths, train_labels, test_size=0.1, stratify=train_labels, random_state=42)


In [17]:
# Convert labels into a Pandas Series
todf = pd.Series(train_labels)

# Count occurrences of each class
class_counts = todf.value_counts().to_dict()

# Get the counts (handle cases where labels might be missing)
NormalCount = class_counts.get(0, 0)  # Normal (0)
AbnormalCount = class_counts.get(1, 0)  # Abnormal (1)

# Compute class weights
w1 = NormalCount / (NormalCount + AbnormalCount)  # Weight for class 0
w2 = AbnormalCount / (NormalCount + AbnormalCount)  # Weight for class 1

class_weights = {0: w2, 1: w1}  # More weight to minority class
print("Class Weights:", class_weights)

Class Weights: {0: 0.40406976744186046, 1: 0.5959302325581395}


In [18]:
# Data Augmentation
train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    zoom_range=0.2
)

In [19]:
class MURADataGenerator(Sequence):
    def __init__(self, image_paths, labels, class_weights, batch_size=BATCH_SIZE, img_size=IMG_SIZE, augment=False, shuffle=True):
        self.image_paths = image_paths
        self.labels = labels
        self.class_weights = class_weights  # Store class weights
        self.batch_size = batch_size
        self.img_size = img_size
        self.augment = augment
        self.shuffle = shuffle
        self.datagen = train_datagen if augment else None
        self.on_epoch_end()
    
    def __len__(self):
        return int(np.ceil(len(self.image_paths) / self.batch_size))
    
    def __getitem__(self, index):
        batch_paths = self.image_paths[index * self.batch_size:(index + 1) * self.batch_size]
        batch_labels = self.labels[index * self.batch_size:(index + 1) * self.batch_size]
        
        # Generate images and labels
        X, y = self.__data_generation(batch_paths, batch_labels)
        
        # Generate sample weights based on labels
        sample_weights = np.array([self.class_weights[label] for label in batch_labels])
        
        return X, y, sample_weights  # Now returning (X, y, sample_weights)
    
    def on_epoch_end(self):
        if self.shuffle:
            temp = list(zip(self.image_paths, self.labels))
            np.random.shuffle(temp)
            self.image_paths, self.labels = zip(*temp)
    
    def __data_generation(self, batch_paths, batch_labels):
        images = []
        for path in batch_paths:
            img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, self.img_size)
            img = img / 255.0  # Normalize
            img = np.stack([img] * 3, axis=-1)  # Convert to 3 channels
            if self.augment:
                img = self.datagen.random_transform(img)
            images.append(img)
        return np.array(images).reshape(-1, *self.img_size, 3), np.array(batch_labels)


In [20]:
# Generators
train_generator = MURADataGenerator(X_train, y_train,class_weights, augment=True)
valid_generator = MURADataGenerator(valid_image_paths, valid_labels,class_weights, augment=False, shuffle=False)
test_generator = MURADataGenerator(X_test, y_test,class_weights, augment=False, shuffle=False)


## image level performance

In [28]:
import numpy as np
def test(pred,y_test,th):# Ensure predictions are flattened
    predictions = pred.flatten()
    predictions = (predictions > th).astype(int)
    # Convert to NumPy arrays
    y_test = np.array(y_test).astype(int)
    predictions = np.array(predictions).astype(int)

    # Count correct and incorrect predictions
    true_positives = np.sum((predictions == 1) & (y_test == 1))
    true_negatives = np.sum((predictions == 0) & (y_test == 0))
    false_positives = np.sum((predictions == 1) & (y_test == 0))
    false_negatives = np.sum((predictions == 0) & (y_test == 1))

    # Print results
    print(f"True Positives: {true_positives}")
    print(f"True Negatives: {true_negatives}")
    print(f"False Positives: {false_positives}")
    print(f"False Negatives: {false_negatives}")

    # Calculate accuracy
    accuracy = (true_positives + true_negatives) / len(y_test)
    print(f"Calculated Accuracy: {accuracy * 100:.2f}%")

    precision = true_positives / (true_positives + false_positives)
    recall = true_positives / (true_positives + false_negatives)

    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    f1_score = 2 * (precision * recall) / (precision + recall)
    print(f"F1-Score: {f1_score:.2f}")
    from sklearn.metrics import cohen_kappa_score

    # Compute Cohen's Kappa
    kappa = cohen_kappa_score(y_test, predictions)
    print(f"Cohen's Kappa: {kappa:.2f}")


In [22]:
model=load_model("model_epoch09_val_loss0.2801.h5")
pred1=model.predict(test_generator)
test(pred1,y_test,0.5)

  self._warn_if_super_not_called()
2025-03-24 00:55:26.919781: W external/local_xla/xla/tsl/framework/bfc_allocator.cc:306] Allocator (GPU_0_bfc) ran out of memory trying to allocate 3.79GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2025-03-24 00:55:26.998325: W external/local_xla/xla/tsl/framework/bfc_allocator.cc:306] Allocator (GPU_0_bfc) ran out of memory trying to allocate 4.33GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2025-03-24 00:55:27.669718: W external/local_xla/xla/tsl/framework/bfc_allocator.cc:306] Allocator (GPU_0_bfc) ran out of memory trying to allocate 4.27GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2025-03-24 00:55:28.

[1m461/461[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 83ms/step
True Positives: 1122
True Negatives: 1841
False Positives: 353
False Negatives: 365
Calculated Accuracy: 80.49%
Precision: 0.76
Recall: 0.75
F1-Score: 0.76
Cohen's Kappa: 0.59


In [23]:
model=load_model("../denseModel5/model2_dense.h5")
pred2=model.predict(test_generator)
test(pred2,y_test,0.5)



[1m461/461[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 78ms/step
True Positives: 1198
True Negatives: 1644
False Positives: 550
False Negatives: 289
Calculated Accuracy: 77.21%
Precision: 0.69
Recall: 0.81
F1-Score: 0.74
Cohen's Kappa: 0.54


In [10]:
model=load_model("resnet101_model_epoch19_val_loss0.2636.h5")
pred3=model.predict(test_generator)
test(pred3,y_test,0.5)

I0000 00:00:1742770052.555883   22133 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4474 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1660, pci bus id: 0000:01:00.0, compute capability: 7.5
  self._warn_if_super_not_called()
I0000 00:00:1742770057.091696   22179 service.cc:148] XLA service 0x7831f4002110 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1742770057.091712   22179 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce GTX 1660, Compute Capability 7.5
2025-03-24 00:47:37.165305: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1742770057.886121   22179 cuda_dnn.cc:529] Loaded cuDNN version 90701
2025-03-24 00:47:38.423102: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:557] Omitted potentially buggy algorithm eng14{k25=2} for conv

[1m  2/461[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m41s[0m 89ms/step

I0000 00:00:1742770060.442903   22179 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m460/461[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 84ms/step

2025-03-24 00:48:21.930051: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:557] Omitted potentially buggy algorithm eng14{k25=2} for conv (f32[1,64,80,80]{3,2,1,0}, u8[0]{0}) custom-call(f32[1,64,80,80]{3,2,1,0}, f32[64,64,3,3]{3,2,1,0}, f32[64]{0}), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", backend_config={"cudnn_conv_backend_config":{"activation_mode":"kNone","conv_result_scale":1,"leakyrelu_alpha":0,"side_input_scale":0},"force_earliest_schedule":false,"operation_queue_id":"0","wait_on_operation_queues":[]}
2025-03-24 00:48:22.062402: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:557] Omitted potentially buggy algorithm eng14{k25=2} for conv (f32[1,128,40,40]{3,2,1,0}, u8[0]{0}) custom-call(f32[1,128,40,40]{3,2,1,0}, f32[128,128,3,3]{3,2,1,0}, f32[128]{0}), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivati

[1m461/461[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 94ms/step
True Positives: 1021
True Negatives: 1990
False Positives: 204
False Negatives: 466
Calculated Accuracy: 81.80%
Precision: 0.83
Recall: 0.69
F1-Score: 0.75
Cohen's Kappa: 0.61


In [11]:
model=load_model("resnet101_model2.h5")
pred4=model.predict(test_generator)
test(pred4,y_test,0.5)



[1m461/461[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 92ms/step
True Positives: 1185
True Negatives: 1776
False Positives: 418
False Negatives: 302
Calculated Accuracy: 80.44%
Precision: 0.74
Recall: 0.80
F1-Score: 0.77
Cohen's Kappa: 0.60


In [29]:
# Compute the mean prediction
ensemble_pred = (pred1 + pred2 + pred3 + pred4) / 4
c=555
print(pred1[c])
print(pred2[c])
print(pred3[c])
print(pred4[c])
print(ensemble_pred[c])
print(y_test[c])

# Evaluate the ensemble predictions
test(ensemble_pred, y_test,0.5)


[0.6297519]
[0.6769962]
[0.88547075]
[0.8647019]
[0.7642302]
1
True Positives: 1160
True Negatives: 1870
False Positives: 324
False Negatives: 327
Calculated Accuracy: 82.31%
Precision: 0.78
Recall: 0.78
F1-Score: 0.78
Cohen's Kappa: 0.63


In [35]:
# Evaluate the ensemble predictions
test(ensemble_pred, y_test,0.45)

True Positives: 1200
True Negatives: 1787
False Positives: 407
False Negatives: 287
Calculated Accuracy: 81.15%
Precision: 0.75
Recall: 0.81
F1-Score: 0.78
Cohen's Kappa: 0.61
