# A Notebook for evalutation train models on data

The evaluation is done by loading a data set, then using pre-trained models visualizing the resulting classes.

The loading, registering, and conversion to a observation array the most time. For each data-set this is done once and then the results of each model are evaluated. This approach will only work when all models use the same atlas and reference image parameters.

In [None]:
%matplotlib inline
TBP_DCM_PATH="/scratch/tb/cxr/"
INDIANA_DCM_PATH="/scratch/tb/NLM_Indiana_CXRs/cxr/" # Indiana University/NLM CXRs (https://openi.nlm.nih.gov)
NIH_CXR_PATH="/scratch/tb/NIH_CXR/images_001/images" # NIH CXRs (https://www.kaggle.com/nih-chest-xrays/data)
NUMBER_SAMPLES=500
RANDOM_SEED=13

SAMPLE_IMAGE_SIZE=64

In [None]:
from glob import glob
import random
import functools
import multiprocessing

from functools import partial
import SimpleITK as sitk

import pandas

import numpy as np
from numpy import linalg as LA


import matplotlib.pyplot as plt

from myshow import myshow

import pickle
import os

In [None]:
from importlib import reload  
import tbpcxr.model
from tbpcxr.model import PCAModel, Model
reload(tbpcxr.model)
PCAModel = tbpcxr.model.PCAModel
Model = tbpcxr.model.Model

from tbpcxr.utilities import read_dcm, normalize_img
import tbpcxr.registration 

In [None]:
def tile_with_classification(image_list, class_list, width=10): 
    """
    A method for visualizing images tiled, where the outliers are colored red.
    """
    
    def img_to_classification(img, c):
        img = sitk.RescaleIntensity(img, outputMinimum=0, outputMaximum=255) 
        img = sitk.Cast(img, sitk.sitkUInt8)
    
        img2 = img//2
    
        if (c < 0 ):
            img = sitk.Compose(img, img2, img2)
        else:
            img = sitk.Compose(img, img, img)
        return img
    
    return sitk.Tile([ img_to_classification(img, c) for img,c  in zip(image_list, class_list)], [width,0])



In [None]:
# A list of models in the models subdirectory to evaluate
model_name_list = ["pca-2-10c", "pca-2-07c", "pca-2-06c", "pca-2-05c"]
model_name_list = [os.path.join("models", fn+".pkl") for fn in model_name_list]
model_name = model_name_list[0]
cxr_model = Model.load_model(model_name)

In [None]:
print(dir(cxr_model))
if False:
    # Coversion from v0.1 class to new ABC
    cxr_model.image_reference = cxr_model.image_ref
    cxr_model.reference_crop = 6
    del cxr_model.image_ref

    pkl_filename = "pca-002.pkl"
    with open(pkl_filename, 'wb') as file:
        pickle.dump(cxr_model, file)

# TB Portals Data Set

In [None]:
tbp_file_list = glob(TBP_DCM_PATH+"/**.dcm")

print( "Found {0} DICOM.".format(len(tbp_file_list)))


random.seed(RANDOM_SEED)
tbp_sample_list = random.sample(tbp_file_list, NUMBER_SAMPLES)
tbp_arr = cxr_model.to_observations(map(read_dcm, tbp_sample_list))



In [None]:
img = tile_with_classification([normalize_img(read_dcm(fn), sample_size=128) for fn in tbp_sample_list], [1]*len(tbp_sample_list), 25)
myshow(img)
sitk.WriteImage(img, "tbp-input.png")

In [None]:
for model_name in model_name_list: 

    cxr_model = Model.load_model(model_name+".pkl")
    
    outlier_results = cxr_model.outlier_predictor(tbp_arr)
    
    
    print("Outlier {}% for model {}".format(np.count_nonzero( outlier_results == -1)/len(outlier_results), model_name))
    #img = tile_with_classification(cxr_model._arr_to_images(tbp_arr), outlier_results, 25)
    img = tile_with_classification([normalize_img(read_dcm(fn), sample_size=128) for fn in tbp_sample_list], outlier_results, 25)
    sitk.WriteImage(img, "tbp-"+model_name+".png")

    myshow(img, title=model_name)


In [None]:
list_idx=25
filename = tbp_sample_list[list_idx]
img = read_dcm(filename)

myshow(normalize_img(img, sample_size=256))
rimg = cxr_model.register_to_atlas_and_resample(normalize_img(img))
print(tbp_file_list[list_idx] )
myshow(rimg)
arr = cxr_model._images_to_arr([rimg])

print("PCA RMS image residuals: {}".format(cxr_model.residuals(arr)))
print("PCA mahalanobis: {}".format(cxr_model.robust_distance(arr)))
print(cxr_model.outlier_predictor(arr))

In [None]:
indiana_file_list =  glob(INDIANA_DCM_PATH+"/**/*.dcm", recursive=True)
print( "Found {0} DICOM.".format(len(indiana_file_list)))


random.seed(RANDOM_SEED)
indiana_sample_list = random.sample(indiana_file_list, NUMBER_SAMPLES)
indiana_arr = cxr_model.to_observations(map(read_dcm, indiana_sample_list))


In [None]:
for model_name in model_name_list: 

    cxr_model = Model.load_model(model_name+".pkl")
    
    outlier_results = cxr_model.outlier_predictor(indiana_arr)
    
    
    print("Outlier {}% for model {}".format(np.count_nonzero( outlier_results == -1)/len(outlier_results), model_name))
    #img = tile_with_classification(cxr_model._arr_to_images(tbp_arr), outlier_results, 25)
    img = tile_with_classification([normalize_img(read_dcm(fn), sample_size=128) for fn in indiana_sample_list], outlier_results, 25)
    sitk.WriteImage(img, "indiana-"+model_name+".png")

    myshow(img, title=model_name)


# NIH CXR Data Set

In [None]:
nih_file_list =  glob(NIH_CXR_PATH+"/**/*.png", recursive=True)
print( "Found {0} PNG.".format(len(nih_file_list)))


random.seed(RANDOM_SEED)
nih_sample_list = random.sample(nih_file_list, NUMBER_SAMPLES)
nih_arr = cxr_model.to_observations(map(lambda fn: sitk.ReadImage(fn, sitk.sitkFloat32), nih_sample_list))


In [None]:
for model_name in model_name_list: 

    cxr_model = Model.load_model(model_name+".pkl")
    
    outlier_results = cxr_model.outlier_predictor(nih_arr)
    
    
    print("Outlier {}% for model {}".format(np.count_nonzero( outlier_results == -1)/len(outlier_results), model_name))
    #img = tile_with_classification(cxr_model._arr_to_images(nih_arr), outlier_results, 25)
    img = tile_with_classification([normalize_img(sitk.ReadImage(fn, sitk.sitkFloat32), sample_size=128) for fn in nih_sample_list], outlier_results, 25)
    sitk.WriteImage(img, "nih-"+model_name+".png")

    myshow(img, title=model_name)
    

In [None]:
myshow(sitk.ReadImage(nih_file_list[91]))

# Natural Images

These images are not medical and should not look anything like a CXR image.

In [None]:
image_list = glob("images/*")
sitk.Show(sitk.Tile([normalize_img(sitk.ReadImage(fn,sitk.sitkFloat32), sample_size=128) for fn in image_list], [5,0]))
sitk.Show(sitk.Tile(cxr_model._arr_to_images(image_arr), [5,0]))
image_arr = cxr_model.to_observations(map(lambda fn: sitk.ReadImage(fn, sitk.sitkFloat32), image_list))


In [None]:
for model_name in model_name_list: 

    cxr_model = Model.load_model(model_name+".pkl")
    
    outlier_results = cxr_model.outlier_predictor(image_arr)
    img = tile_with_classification([normalize_img(sitk.ReadImage(fn,sitk.sitkFloat32), sample_size=128) for fn in image_list], outlier_results, 5)
    sitk.WriteImage(img, "image-"+model_name+".png")

    myshow(img, title=model_name)

# CXR Images Flipped and Rotated

In [None]:
def permute_image(img):
    def fimgs(img):
        imgs = [img]
        imgs.append(sitk.Flip(img, [False, True]))
        imgs.append(sitk.Flip(img, [True, False]))
        imgs.append(sitk.Flip(img, [True, True]))
        return imgs
    return fimgs(img) + fimgs(sitk.PermuteAxes(img, [1,0]))

permuted_images = []
for fn in  nih_file_list[1:100:23]:
    permuted_images += permute_image(sitk.ReadImage(fn, sitk.sitkFloat32))

myshow(sitk.Tile([normalize_img(img, 128) for img in permuted_images], [8,0]))

permuted_images_arr = cxr_model.to_observations(permuted_images)

In [None]:
for model_name in model_name_list: 

    cxr_model = Model.load_model(model_name+".pkl")
    
    outlier_results = cxr_model.outlier_predictor(permuted_images_arr)
    img = tile_with_classification([normalize_img(img, sample_size=128) for img in permuted_images], outlier_results, 8)
    sitk.WriteImage(img, "permuted-"+model_name+".png")

    myshow(img, title=model_name)