In [31]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import pandas as pd
import tensorflow as tf
import math
import tensorflow_addons as tfa
import random
import re
import csv


from sklearn import metrics
from sklearn.utils import shuffle
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from mlxtend.evaluate import mcnemar_table
from statsmodels.stats.contingency_tables import mcnemar




from utils import (
    F1Score,
    plot_metrics,
    plot_accuracy,
    study_oriented_transformation,
    write_csv,
    prediction_results,
    plot_confusion_matrix,
    plot_contigency_table,
)


In [32]:
# To Activate GPU if there is
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print("Num GPUs Available: ", len(physical_devices))
tf.config.experimental.set_memory_growth(physical_devices[0], True)

Num GPUs Available:  1


In [33]:
SEED = 1037

os.environ["PYTHONHASHSEED"] = str(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)
np.random.seed(SEED)


In [34]:
METRICS = [ 
    tf.keras.metrics.BinaryAccuracy(),
    tf.keras.metrics.Precision(name="precision"),
    tf.keras.metrics.Recall(name="recall"),
    tfa.metrics.CohenKappa(name="cohen_kappa", num_classes=2),
    F1Score(name="f1_score"),
]

STUDY_TYPES = [
    'XR_ELBOW',
    'XR_FINGER',
    'XR_FOREARM',
    'XR_HAND',
    'XR_HUMERUS',
    'XR_SHOULDER',
    'XR_WRIST',
]

CLASSES = ['NORMAL', 'ABNORMAL']

In [35]:
data_directory = 'dataset'
test_img = pd.read_csv(os.path.join(data_directory, 'MURA-v1.1_mod/test_image_paths.csv'), names=['path'])

In [36]:
test_img['label'] = test_img['path'].map(
    lambda x: '1' if 'positive' in x else '0'
)

test_img['study_type'] = test_img['path'].map(
    lambda x: x.split('/')[2]
)

test_img

Unnamed: 0,path,label,study_type
0,MURA-v1.1_mod/test/XR_WRIST/patient11185/study...,1,XR_WRIST
1,MURA-v1.1_mod/test/XR_WRIST/patient11185/study...,1,XR_WRIST
2,MURA-v1.1_mod/test/XR_WRIST/patient11185/study...,1,XR_WRIST
3,MURA-v1.1_mod/test/XR_WRIST/patient11185/study...,1,XR_WRIST
4,MURA-v1.1_mod/test/XR_WRIST/patient11186/study...,1,XR_WRIST
...,...,...,...
3192,MURA-v1.1_mod/test/XR_FINGER/patient11967/stud...,0,XR_FINGER
3193,MURA-v1.1_mod/test/XR_FINGER/patient11967/stud...,0,XR_FINGER
3194,MURA-v1.1_mod/test/XR_FINGER/patient11738/stud...,0,XR_FINGER
3195,MURA-v1.1_mod/test/XR_FINGER/patient11738/stud...,0,XR_FINGER


In [37]:
img_height = img_width = 300

def resize_img(img):
    try:
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    except:
        print('error in resizing')
        img1 = isinstance(img, type(None))
        print('Does image is none: ', img1)
        print(img.shape)
    return cv2.resize(img, (img_height, img_width))

def canny_cropping(img):
    convert_img = np.array(img, dtype=np.uint8)

    gray = cv2.cvtColor(convert_img, cv2.COLOR_RGB2GRAY)


    ave_brightness = math.floor(np.average(gray))
    min_pixel = min(gray.flatten())

    edges = cv2.Canny(gray, min_pixel, ave_brightness)
    cnts = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    cnts = sorted(cnts, key=cv2.contourArea, reverse=True)

    for c in cnts:
        x, y, w, h = cv2.boundingRect(edges)
        gray = gray[y:y+h, x:x+w]
        break

    return gray

def apply_clahe(img):
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    return clahe.apply(img.astype(np.uint8))

def preprocessing_without_clahe(img):
    cropped = canny_cropping(img)
    return resize_img(cropped)

def preprocessing_with_clahe(img):
    cropped = canny_cropping(img)
    clahe = apply_clahe(cropped)
    return resize_img(clahe)


# Evaluation of Model Without CLAHE

In [38]:
batch = 8
data_path = 'dataset/'

test_batches_without_clahe = ImageDataGenerator(
    preprocessing_function=preprocessing_without_clahe
).flow_from_dataframe(
    target_size=(img_height, img_width),
    dataframe=test_img, 
    directory= data_path,
    class_mode='binary',
    x_col='path',
    y_col='label',
    batch_size=batch, 
    shuffle=False)

Found 3197 validated image filenames belonging to 2 classes.


In [39]:
model_without_clahe = tf.keras.models.load_model(
    'models/without_clahe/efficientnetv2-b1_modified_finetuned_v2.h5',
    custom_objects={'F1Score': F1Score}
)

In [40]:
eval_without_clahe = model_without_clahe.evaluate(
    test_batches_without_clahe, 
    verbose = 1
)




In [41]:
eval_results = []

loss = eval_without_clahe.pop(0)

eval_results.append({'metric': 'test loss', 'value': loss})

# print('=' * 32)
# print(f'test loss   : {loss}')

for metric, score in zip(METRICS, eval_without_clahe):
    eval_results.append({'metric': metric.name, 'value': score})
    # print(f"{metric.name + ' ' * (12 - len(metric.name))}: {score}")

write_csv(
    eval_results, 
    'testing_results/without_clahe/evaluation_results.csv'
)

print('=' * 32)
for result in eval_results:
    print(f"{result['metric'] + ' ' * (12 - len(result['metric']))}: {result['value']}")
print('=' * 32)


test loss   : 0.45068061351776123
binary_accuracy: 0.8088833093643188
precision   : 0.8696701526641846
recall      : 0.7065359354019165
cohen_kappa : 0.6140888929367065
f1_score    : 0.7796609997749329


In [42]:
predictions_without_clahe = model_without_clahe.predict(test_batches_without_clahe, verbose=1)



In [43]:
conv_prediction_without_clahe = test_img.copy()

conv_prediction_without_clahe['label'] = conv_prediction_without_clahe['label'].map(int)
conv_prediction_without_clahe['prediction'] = predictions_without_clahe.ravel()

In [44]:
conv_prediction_without_clahe = pd.DataFrame(
    [*study_oriented_transformation(conv_prediction_without_clahe)],
    columns=['study_type', 'study', 'label', 'prediction'],
)

## Evaluation for all body parts concerned

In [45]:
results_without_clahe = prediction_results(conv_prediction_without_clahe)

write_csv(
    results_without_clahe, 
    'testing_results/without_clahe/general_prediction_results.csv'
)

print('=' * 52)
print('Prediction for all Musculoskeletal radiographs')
print('-----------------')
for result in results_without_clahe[:-1]:
    print(f"{result['metric'] + ' ' * (30 - len(result['metric']))}: {result['value']}")
print('=' * 52)


Prediction for all Musculoskeletal radiographs
-----------------
Cohen's kappa Coefficient (κ) : 0.6140888211294978
F1 Score                      : 0.7796610169491526
Accuracy                      : 0.8088833281201127
Precision                     : 0.8696701528559936
Recall                        : 0.7065359477124183


In [46]:
cm_general_without_clahe = results_without_clahe[-1]['value']
print(cm_general_without_clahe)
plot_confusion_matrix(
    cm_general_without_clahe, 
    CLASSES, 
    'Confusion Matrix for all Musculoskeletal Parts',
    False,
)

[[1505  162]
 [ 449 1081]]


<Figure size 432x288 with 0 Axes>

## Evaluation per body types

In [47]:
for body_part in STUDY_TYPES:
    parts = conv_prediction_without_clahe[conv_prediction_without_clahe['study_type'] == body_part]
    results = prediction_results(parts)
    write_csv(
        results_without_clahe, 
        f'testing_results/without_clahe/{body_part}_efficientnetv2-b1.csv'
    )
    parts_cm = results[-1]['value']
    plot_confusion_matrix(
        parts_cm, 
        CLASSES, 
        f'Confusion Matrix for {body_part}', 
        False
    )
    print('=' * 52)
    print(f'Prediction for {body_part} radiographs')
    print('-----------------')
    for result in results[:-1]:
        print(f"{result['metric'] + ' ' * (30 - len(result['metric']))}: {result['value']}")
    print('=' * 52)


Prediction for XR_ELBOW radiographs
-----------------
Cohen's kappa Coefficient (κ) : 0.7199703525269838
F1 Score                      : 0.8477751756440282
Accuracy                      : 0.8602150537634409
Precision                     : 0.9187817258883249
Recall                        : 0.7869565217391304
Prediction for XR_FINGER radiographs
-----------------
Cohen's kappa Coefficient (κ) : 0.5031496794574004
F1 Score                      : 0.7339449541284404
Accuracy                      : 0.7483731019522777
Precision                     : 0.8465608465608465
Recall                        : 0.6477732793522267
Prediction for XR_FOREARM radiographs
-----------------
Cohen's kappa Coefficient (κ) : 0.6348134912756711
F1 Score                      : 0.7955390334572491
Accuracy                      : 0.8172757475083057
Precision                     : 0.9067796610169492
Recall                        : 0.7086092715231788
Prediction for XR_HAND radiographs
-----------------
Cohen's kappa Coe

<Figure size 432x288 with 0 Axes>

# Evaluation of Model with CLAHE

In [48]:
batch = 8
data_path = 'dataset/'

test_batches_with_clahe = ImageDataGenerator(
    preprocessing_function=preprocessing_with_clahe
).flow_from_dataframe(
    target_size=(img_height, img_width),
    dataframe=test_img, 
    directory= data_path,
    class_mode='binary',
    x_col='path',
    y_col='label',
    batch_size=batch, 
    shuffle=False)

Found 3197 validated image filenames belonging to 2 classes.


In [49]:
model_with_clahe = tf.keras.models.load_model(
    'models/with_clahe/efficientnetv2-b1_modified_finetuned_v2.h5',
    custom_objects={'F1Score': F1Score}
)

In [50]:
eval_with_clahe = model_with_clahe.evaluate(
    test_batches_with_clahe, 
    verbose = 1
)



In [51]:
with_clahe_eval_results = []

loss = eval_with_clahe.pop(0)

with_clahe_eval_results.append({'metric': 'test loss', 'value': loss})


for metric, score in zip(METRICS, eval_with_clahe):
    with_clahe_eval_results.append({'metric': metric.name, 'value': score})

write_csv(
    with_clahe_eval_results, 
    'testing_results/with_clahe/evaluation_results.csv'
)

print('=' * 32)
for result in with_clahe_eval_results:
    print(f"{result['metric'] + ' ' * (12 - len(result['metric']))}: {result['value']}")
print('=' * 32)


test loss   : 0.47785472869873047
binary_accuracy: 0.8076321482658386
precision   : 0.8639618158340454
recall      : 0.7098039388656616
cohen_kappa : 0.6117095947265625
f1_score    : 0.7793326377868652


In [52]:
predictions_with_clahe = model_with_clahe.predict(test_batches_with_clahe, verbose=1)



In [53]:
conv_prediction_with_clahe = test_img.copy()

conv_prediction_with_clahe['label'] = conv_prediction_with_clahe['label'].map(int)
conv_prediction_with_clahe['prediction'] = predictions_with_clahe.ravel()

In [54]:
conv_prediction_with_clahe = pd.DataFrame(
    [*study_oriented_transformation(conv_prediction_with_clahe)],
    columns=['study_type', 'study', 'label', 'prediction'],
)

In [55]:
results_with_clahe = prediction_results(conv_prediction_with_clahe)

write_csv(
    results_with_clahe, 
    'testing_results/with_clahe/general_prediction_results.csv'
)

print('=' * 52)
print('Prediction for all Musculoskeletal radiographs')
print('-----------------')
for result in results_with_clahe[:-1]:
    print(f"{result['metric'] + ' ' * (30 - len(result['metric']))}: {result['value']}")
print('=' * 52)


Prediction for all Musculoskeletal radiographs
-----------------
Cohen's kappa Coefficient (κ) : 0.6117095302786406
F1 Score                      : 0.7793326157158235
Accuracy                      : 0.8076321551454488
Precision                     : 0.863961813842482
Recall                        : 0.7098039215686275


In [56]:
cm_general_with_clahe = results_with_clahe[-1]['value']
print(cm_general_with_clahe)
plot_confusion_matrix(
    cm_general_with_clahe, 
    CLASSES, 
    'Confusion Matrix for all Musculoskeletal Parts',
    True,
    )

[[1496  171]
 [ 444 1086]]


<Figure size 432x288 with 0 Axes>

In [57]:
for body_part in STUDY_TYPES:
    parts = conv_prediction_with_clahe[conv_prediction_with_clahe['study_type'] == body_part]
    results = prediction_results(parts)
    write_csv(
        results_with_clahe, 
        f'testing_results/with_clahe/{body_part}_efficientnetv2-b1.csv'
    )
    parts_cm = results[-1]['value']
    plot_confusion_matrix(
        parts_cm, 
        CLASSES, 
        f'Confusion Matrix for {body_part}', 
        True
    )
    print('=' * 52)
    print(f'Prediction for {body_part} radiographs')
    print('-----------------')
    for result in results[:-1]:
        print(f"{result['metric'] + ' ' * (30 - len(result['metric']))}: {result['value']}")
    print('=' * 52)

Prediction for XR_ELBOW radiographs
-----------------
Cohen's kappa Coefficient (κ) : 0.7027102154273801
F1 Score                      : 0.8376470588235294
Accuracy                      : 0.8516129032258064
Precision                     : 0.9128205128205128
Recall                        : 0.7739130434782608
Prediction for XR_FINGER radiographs
-----------------
Cohen's kappa Coefficient (κ) : 0.5802836466712346
F1 Score                      : 0.7931769722814498
Accuracy                      : 0.789587852494577
Precision                     : 0.8378378378378378
Recall                        : 0.7530364372469636
Prediction for XR_FOREARM radiographs
-----------------
Cohen's kappa Coefficient (κ) : 0.601553192428187
F1 Score                      : 0.782608695652174
Accuracy                      : 0.8006644518272426
Precision                     : 0.864
Recall                        : 0.7152317880794702
Prediction for XR_HAND radiographs
-----------------
Cohen's kappa Coefficient (κ) : 0

<Figure size 432x288 with 0 Axes>

# Compare the two models using McNemar's Test

In [58]:
comparison = mcnemar_table(
    y_target= np.array(conv_prediction_without_clahe['label']),
    y_model1= np.array(conv_prediction_without_clahe['prediction']),
    y_model2= np.array(conv_prediction_with_clahe['prediction']),
)
print(comparison)
plot_contigency_table(
    comparison, 
    ['Correct', 'Wrong'],
    'Contigency Table for Both Models'
)

[[2433  153]
 [ 149  462]]


<Figure size 432x288 with 0 Axes>

In [59]:
mncnemar_without_correction = mcnemar(comparison, exact=False)
mncnemar_with_correction = mcnemar(comparison, exact=False, correction=True)

print('=' * 35)
print('McNemar\'s test without correction')
print(mncnemar_without_correction)
print('=' * 35)
print('McNemar\'s test with correction')
print(mncnemar_with_correction)

McNemar's test without correction
pvalue      0.8629417966554445
statistic   0.029801324503311258
McNemar's test with correction
pvalue      0.8629417966554445
statistic   0.029801324503311258
