In [1]:
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import classification_report, confusion_matrix
from skimage import exposure, feature
import cv2

import tensorflow as tf 
import tensorflow.keras as keras 
import tensorflow.keras.backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import warnings
import colorit
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
warnings.filterwarnings("ignore")
K.set_floatx('float32')

In [2]:
TEST_DIR = "../../InternsAssignment/xraysdatabase/" # 0-class-1root/"
MODEL_DIR = "./"
IMG_SHAPE = (230, 130)
NFOLDS = 5
BATCH_SIZE = 32

In [3]:
!ls -lh {MODEL_DIR}*.hdf5

-rw-r--r-- 1 jupyter jupyter 969K Nov 19 09:37 ./fold_0_best_model.hdf5


In [4]:
# files = glob.glob(f'{TEST_DIR}/*/*.*')
# datafiles_df = pd.DataFrame(files, columns=["filename"])
# datafiles_df["class"] = [file.split("/")[-2] for file in files]
# datafiles_df["class"].value_counts()

In [5]:
datafiles_df = pd.read_csv("./test.csv", index_col=0)

In [6]:
def equalize_hist(img):
    img_eq = exposure.equalize_hist(img, nbins=150)
    return img_eq

def adp_thresh(img):
    img = (img[:,:,0]*255).astype(np.uint8)
    img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 131, 0)
    return img

def preprocessing_function(img):
    img_processed = equalize_hist(img)
    img_processed = np.expand_dims(adp_thresh(img_processed).astype("float32")*1.0/255.0,2)
    return img_processed

test_datagen = ImageDataGenerator(rescale=1./255, 
                                  preprocessing_function=preprocessing_function)

generator = test_datagen.flow_from_dataframe(
                datafiles_df,
#                 directory="../",
                shuffle=False,
                target_size=IMG_SHAPE,
                batch_size=BATCH_SIZE,
                color_mode="grayscale",
                class_mode='categorical')

Found 23 validated image filenames belonging to 2 classes.


In [39]:
learning_history_df = pd.read_csv("learning_history_df.csv",index_col=0)
losses = learning_history_df.groupby(["seed","fold"]).agg({"val_loss":min})["val_loss"].tolist()

In [48]:
pred = np.zeros((len(datafiles_df), 2))

model_files = glob.glob(f"{MODEL_DIR}*.hdf5")

with tf.device('/device:cpu:0'):
    for model_file, l in zip(model_files, losses):
        model = keras.models.load_model(model_file)

        pred_ = model.predict(generator, batch_size=32)
        pred += pred_ / len(model_files)
        



# Evaluation on Test Dataset

In [49]:
test = pd.get_dummies(datafiles_df["class"]).values
test_y = test[:,1]
pred_y = pred.argmax(1)

labs = ["1 root", "multi-root"]
cr = classification_report(test_y, pred_y, target_names=labs)
print(cr)

              precision    recall  f1-score   support

      1 root       1.00      0.92      0.96        13
  multi-root       0.91      1.00      0.95        10

    accuracy                           0.96        23
   macro avg       0.95      0.96      0.96        23
weighted avg       0.96      0.96      0.96        23



In [50]:
correct_classified = (test_y == pred_y).sum()
total_samples = test_y.shape[0]
print(colorit.color_back(
    f"Accuracy: {np.round(correct_classified/total_samples, 2)}, {correct_classified}/{total_samples}\n" \
    f"Crossentropy {np.round(keras.metrics.categorical_crossentropy(test, pred).numpy().mean(), 4)}",
    255,255,0))


[48;2;255;255;0mAccuracy: 0.96, 22/23
Crossentropy 0.1343[0m


In [51]:
cm = pd.DataFrame(confusion_matrix(test_y, pred_y), 
             index=zip(["ture"]*2,labs), 
             columns=zip(["pred"]*2,labs))
cm

Unnamed: 0,"(pred, 1 root)","(pred, multi-root)"
"(ture, 1 root)",12,1
"(ture, multi-root)",0,10


In [15]:
# keras.utils.plot_model(model, show_shapes=True)