In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import pandas as pd
import tensorflow as tf
import math
import tensorflow_addons as tfa
import random
import re
import csv

from tensorflow.keras.models import Model
from sklearn import metrics
from sklearn.utils import shuffle
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from mlxtend.evaluate import mcnemar_table
from statsmodels.stats.contingency_tables import mcnemar




from utils import (
    F1Score,
    plot_metrics,
    plot_accuracy,
    study_oriented_transformation,
    write_csv,
    prediction_results,
    plot_confusion_matrix,
    plot_contigency_table,
)


 The versions of TensorFlow you are currently using is 2.10.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [2]:
# To Activate GPU if there is
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print("Num GPUs Available: ", len(physical_devices))
tf.config.experimental.set_memory_growth(physical_devices[0], True)

Num GPUs Available:  1


In [3]:
SEED = 1037

os.environ["PYTHONHASHSEED"] = str(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)
np.random.seed(SEED)


In [4]:
METRICS = [ 
    tf.keras.metrics.BinaryAccuracy(),
    tf.keras.metrics.Precision(name="precision"),
    tf.keras.metrics.Recall(name="recall"),
    tfa.metrics.CohenKappa(name="cohen_kappa", num_classes=2),
    F1Score(name="f1_score"),
]

STUDY_TYPES = [
    'XR_ELBOW',
    'XR_FINGER',
    'XR_FOREARM',
    'XR_HAND',
    'XR_HUMERUS',
    'XR_SHOULDER',
    'XR_WRIST',
]

CLASSES = ['NORMAL', 'ABNORMAL']

In [5]:
data_directory = 'dataset'
test_img = pd.read_csv(os.path.join(data_directory, 'MURA-v1.1_mod/test_image_paths.csv'), names=['path'])

In [6]:
test_img['label'] = test_img['path'].map(
    lambda x: '1' if 'positive' in x else '0'
)

test_img['study_type'] = test_img['path'].map(
    lambda x: x.split('/')[2]
)

test_img['study'] = test_img['path'].map(
    lambda x: x.split("/")[4]
)

test_img['study_path'] = test_img['path'].map(
    lambda x: re.sub(r"image\d+.png", "", x)
)
test_img

Unnamed: 0,path,label,study_type,study,study_path
0,MURA-v1.1_mod/test/XR_WRIST/patient11185/study...,1,XR_WRIST,study1_positive,MURA-v1.1_mod/test/XR_WRIST/patient11185/study...
1,MURA-v1.1_mod/test/XR_WRIST/patient11185/study...,1,XR_WRIST,study1_positive,MURA-v1.1_mod/test/XR_WRIST/patient11185/study...
2,MURA-v1.1_mod/test/XR_WRIST/patient11185/study...,1,XR_WRIST,study1_positive,MURA-v1.1_mod/test/XR_WRIST/patient11185/study...
3,MURA-v1.1_mod/test/XR_WRIST/patient11185/study...,1,XR_WRIST,study1_positive,MURA-v1.1_mod/test/XR_WRIST/patient11185/study...
4,MURA-v1.1_mod/test/XR_WRIST/patient11186/study...,1,XR_WRIST,study1_positive,MURA-v1.1_mod/test/XR_WRIST/patient11186/study...
...,...,...,...,...,...
3192,MURA-v1.1_mod/test/XR_FINGER/patient11967/stud...,0,XR_FINGER,study1_negative,MURA-v1.1_mod/test/XR_FINGER/patient11967/stud...
3193,MURA-v1.1_mod/test/XR_FINGER/patient11967/stud...,0,XR_FINGER,study1_negative,MURA-v1.1_mod/test/XR_FINGER/patient11967/stud...
3194,MURA-v1.1_mod/test/XR_FINGER/patient11738/stud...,0,XR_FINGER,study1_negative,MURA-v1.1_mod/test/XR_FINGER/patient11738/stud...
3195,MURA-v1.1_mod/test/XR_FINGER/patient11738/stud...,0,XR_FINGER,study1_negative,MURA-v1.1_mod/test/XR_FINGER/patient11738/stud...


In [7]:
img_height = img_width = 380

def resize_img(img):
    try:
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    except:
        print('error in resizing')
        img1 = isinstance(img, type(None))
        print('Does image is none: ', img1)
        print(img.shape)
    return cv2.resize(img, (img_height, img_width))

def canny_cropping(img):
    convert_img = np.array(img, dtype=np.uint8)

    gray = cv2.cvtColor(convert_img, cv2.COLOR_RGB2GRAY)


    ave_brightness = math.floor(np.average(gray))
    min_pixel = min(gray.flatten())

    edges = cv2.Canny(gray, min_pixel, ave_brightness)
    cnts = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    cnts = sorted(cnts, key=cv2.contourArea, reverse=True)

    for c in cnts:
        x, y, w, h = cv2.boundingRect(edges)
        gray = gray[y:y+h, x:x+w]
        break

    return gray

def apply_clahe(img):
    clahe = cv2.createCLAHE(clipLimit=4.0, tileGridSize=(8, 8))
    return clahe.apply(img.astype(np.uint8))

def preprocessing_without_clahe(img):
    cropped = canny_cropping(img)
    return resize_img(cropped)

def preprocessing_with_clahe(img):
    cropped = canny_cropping(img)
    clahe = apply_clahe(cropped)
    return resize_img(clahe)


# Evaluation of Model Without CLAHE

In [8]:
batch = 8
data_path = 'dataset/'

test_batches_without_clahe = ImageDataGenerator(
    preprocessing_function=preprocessing_without_clahe
).flow_from_dataframe(
    target_size=(img_height, img_width),
    dataframe=test_img, 
    directory= data_path,
    class_mode='binary',
    x_col='path',
    y_col='label',
    batch_size=batch, 
    shuffle=False)

Found 3197 validated image filenames belonging to 2 classes.


In [9]:
model_without_clahe = tf.keras.models.load_model(
    'models/without_clahe/efficientnetv2-b1_modified_finetuned_v2.h5',
    custom_objects={'F1Score': F1Score}
)

In [10]:
predictions_without_clahe = model_without_clahe.predict(test_batches_without_clahe, verbose=1)



In [11]:
conv_prediction_without_clahe = test_img.copy()

conv_prediction_without_clahe['label'] = conv_prediction_without_clahe['label'].map(int)
conv_prediction_without_clahe['prediction'] = predictions_without_clahe.ravel()

In [12]:
conv_prediction_without_clahe = pd.DataFrame(
    [*study_oriented_transformation(conv_prediction_without_clahe)],
    columns=['study_type', 'study', 'label', 'prediction'],
)
conv_prediction_without_clahe

Unnamed: 0,study_type,study,label,prediction
0,XR_ELBOW,MURA-v1.1_mod/test/XR_ELBOW/patient11186/study...,1,1
1,XR_ELBOW,MURA-v1.1_mod/test/XR_ELBOW/patient11189/study...,1,1
2,XR_ELBOW,MURA-v1.1_mod/test/XR_ELBOW/patient11204/study...,0,0
3,XR_ELBOW,MURA-v1.1_mod/test/XR_ELBOW/patient11205/study...,0,0
4,XR_ELBOW,MURA-v1.1_mod/test/XR_ELBOW/patient11217/study...,0,1
...,...,...,...,...
1194,XR_WRIST,MURA-v1.1_mod/test/XR_WRIST/patient11387/study...,0,0
1195,XR_WRIST,MURA-v1.1_mod/test/XR_WRIST/patient11388/study...,0,0
1196,XR_WRIST,MURA-v1.1_mod/test/XR_WRIST/patient11389/study...,0,0
1197,XR_WRIST,MURA-v1.1_mod/test/XR_WRIST/patient11390/study...,0,0


## Evaluation for all body parts concerned

In [13]:
results_without_clahe = prediction_results(conv_prediction_without_clahe)

write_csv(
    results_without_clahe, 
    'testing_results/without_clahe/general_prediction_efficientnetV2-b1.csv'
)

print('=' * 52)
print('Prediction for all Musculoskeletal radiographs')
print('-----------------')
for result in results_without_clahe[:-1]:
    print(f"{result['metric'] + ' ' * (30 - len(result['metric']))}: {result['value']}")
print('=' * 52)


Prediction for all Musculoskeletal radiographs
-----------------
Cohen's kappa Coefficient (κ) : 0.636321494385752
F1 Score                      : 0.7827868852459017
Accuracy                      : 0.823185988323603
Precision                     : 0.8721461187214612
Recall                        : 0.7100371747211895


In [14]:
cm_general_without_clahe = results_without_clahe[-1]['value']
print(cm_general_without_clahe)
plot_confusion_matrix(
    cm_general_without_clahe, 
    CLASSES, 
    'MURA Testing Confusion Matrix for EfficientnetV2-S Without CLAHE',
    False,
)

[[605  56]
 [156 382]]


<Figure size 432x288 with 0 Axes>

## Evaluation per body types

In [20]:
for body_part in STUDY_TYPES:
    parts = conv_prediction_without_clahe[conv_prediction_without_clahe['study_type'] == body_part]
    results = prediction_results(parts)
    write_csv(
        results, 
        'testing_results/without_clahe/{body_part}_efficientnetv2-b1_results.csv'
    )
    parts_cm = results[-1]['value']
    plot_confusion_matrix(
        parts_cm, 
        CLASSES, 
        f'{body_part} Confusion Matrix for Model Without CLAHE effnet-b1', 
        False
    )
    print('=' * 52)
    print(f'Prediction for {body_part} radiographs')
    print('-----------------')
    for result in results[:-1]:
        print(f"{result['metric'] + ' ' * (30 - len(result['metric']))}: {result['value']}")
    print('=' * 52)


Prediction for XR_ELBOW radiographs
-----------------
Cohen's kappa Coefficient (κ) : 0.6654243604946637
F1 Score                      : 0.7899159663865547
Accuracy                      : 0.8417721518987342
Precision                     : 0.8867924528301887
Recall                        : 0.7121212121212122
Prediction for XR_FINGER radiographs
-----------------
Cohen's kappa Coefficient (κ) : 0.5948806137972088
F1 Score                      : 0.7651006711409396
Accuracy                      : 0.8
Precision                     : 0.8636363636363636
Recall                        : 0.6867469879518072
Prediction for XR_FOREARM radiographs
-----------------
Cohen's kappa Coefficient (κ) : 0.6356996119607397
F1 Score                      : 0.7894736842105263
Accuracy                      : 0.8195488721804511
Precision                     : 0.9
Recall                        : 0.703125
Prediction for XR_HAND radiographs
-----------------
Cohen's kappa Coefficient (κ) : 0.45611463930955876
F1 Sc

<Figure size 432x288 with 0 Axes>

# Evaluation of Model with CLAHE

In [21]:
batch = 8
data_path = 'dataset/'

test_batches_with_clahe = ImageDataGenerator(
    preprocessing_function=preprocessing_with_clahe
).flow_from_dataframe(
    target_size=(img_height, img_width),
    dataframe=test_img, 
    directory= data_path,
    class_mode='binary',
    x_col='path',
    y_col='label',
    batch_size=batch, 
    shuffle=False)

Found 3197 validated image filenames belonging to 2 classes.


In [22]:
model_with_clahe = tf.keras.models.load_model(
    'models/with_clahe/efficientnetv2-b1_modified_finetuned_v2.h5',
    custom_objects={'F1Score': F1Score}
)

In [23]:
predictions_with_clahe = model_with_clahe.predict(test_batches_with_clahe, verbose=1)



In [24]:
conv_prediction_with_clahe = test_img.copy()

conv_prediction_with_clahe['label'] = conv_prediction_with_clahe['label'].map(int)
conv_prediction_with_clahe['prediction'] = predictions_with_clahe.ravel()

In [25]:
conv_prediction_with_clahe = pd.DataFrame(
    [*study_oriented_transformation(conv_prediction_with_clahe)],
    columns=['study_type', 'study', 'label', 'prediction'],
)

In [26]:
results_with_clahe = prediction_results(conv_prediction_with_clahe)

write_csv(
    results_with_clahe, 
    'testing_results/with_clahe/general_prediction_efficientnetv2-b1_finetuned_results.csv'
)

print('=' * 52)
print('Prediction for all Musculoskeletal radiographs')
print('-' * 52)
for result in results_with_clahe[:-1]:
    print(f"{result['metric'] + ' ' * (30 - len(result['metric']))}: {result['value']}")
print('=' * 52)


Prediction for all Musculoskeletal radiographs
----------------------------------------------------
Cohen's kappa Coefficient (κ) : 0.6323729998624548
F1 Score                      : 0.7789256198347108
Accuracy                      : 0.8215179316096747
Precision                     : 0.8767441860465116
Recall                        : 0.7007434944237918


In [27]:
cm_general_with_clahe = results_with_clahe[-1]['value']
print(cm_general_with_clahe)
plot_confusion_matrix(
    cm_general_with_clahe, 
    CLASSES, 
    'MURA Testing Confusion Matrix for EfficientnetV2-B1 With CLAHE',
    True,
    )

[[608  53]
 [161 377]]


<Figure size 432x288 with 0 Axes>

In [28]:
for body_part in STUDY_TYPES:
    parts = conv_prediction_with_clahe[conv_prediction_with_clahe['study_type'] == body_part]
    results = prediction_results(parts)
    write_csv(
        results, 
        f'testing_results/with_clahe/{body_part}_prediction_efficientnetv2-b1_results.csv'
    )
    parts_cm = results[-1]['value']
    plot_confusion_matrix(
        parts_cm, 
        CLASSES, 
        f'{body_part} Confusion Matrix for efficientnetv2-B1 With CLAHE', 
        True
    )
    print('=' * 52)
    print(f'Prediction for {body_part} radiographs')
    print('-----------------')
    for result in results[:-1]:
        print(f"{result['metric'] + ' ' * (30 - len(result['metric']))}: {result['value']}")
    print('=' * 52)

Prediction for XR_ELBOW radiographs
-----------------
Cohen's kappa Coefficient (κ) : 0.6624508631003247
F1 Score                      : 0.782608695652174
Accuracy                      : 0.8417721518987342
Precision                     : 0.9183673469387755
Recall                        : 0.6818181818181818
Prediction for XR_FINGER radiographs
-----------------
Cohen's kappa Coefficient (κ) : 0.5378317707645583
F1 Score                      : 0.7368421052631579
Accuracy                      : 0.7714285714285715
Precision                     : 0.8115942028985508
Recall                        : 0.6746987951807228
Prediction for XR_FOREARM radiographs
-----------------
Cohen's kappa Coefficient (κ) : 0.6814189574540892
F1 Score                      : 0.8173913043478261
Accuracy                      : 0.8421052631578947
Precision                     : 0.9215686274509803
Recall                        : 0.734375
Prediction for XR_HAND radiographs
-----------------
Cohen's kappa Coefficient (κ

<Figure size 432x288 with 0 Axes>

# Compare the two models using McNemar's Test

In [29]:
comparison = mcnemar_table(
    y_target= np.array(conv_prediction_without_clahe['label']),
    y_model1= np.array(conv_prediction_without_clahe['prediction']),
    y_model2= np.array(conv_prediction_with_clahe['prediction']),
)
print(comparison)
plot_contigency_table(
    comparison, 
    ['Correct', 'Wrong'],
    'Contigency Table for Both Models'
)

[[950  37]
 [ 35 177]]


<Figure size 432x288 with 0 Axes>

In [30]:
mncnemar_without_correction = mcnemar(comparison, exact=False)
mncnemar_with_correction = mcnemar(comparison, exact=False, correction=True)

print('=' * 35)
print('McNemar\'s test without correction')
print(mncnemar_without_correction)
print('=' * 35)
print('McNemar\'s test with correction')
print(mncnemar_with_correction)

McNemar's test without correction
pvalue      0.9061856157549283
statistic   0.013888888888888888
McNemar's test with correction
pvalue      0.9061856157549283
statistic   0.013888888888888888
