# Do region-based explanations align with HPO terms labeled by clinicians?

In [1]:
import sys
import pathlib
import random
import time
import numpy as np
import pandas as pd
import pingouin as pg
import cv2
import torch
import torchmetrics

sys.path.insert(0,'../')
import datasets

from zennit.attribution import Gradient, SmoothGrad
from zennit.core import Stabilizer
from zennit.composites import EpsilonGammaBox, EpsilonPlusFlat
from zennit.composites import SpecialFirstLayerMapComposite, NameMapComposite
from zennit.image import imgify, imsave
from zennit.rules import Epsilon, ZPlus, ZBox, Norm, Pass, Flat
from zennit.types import Convolution, Activation, AvgPool, Linear as AnyLinear
from zennit.types import BatchNorm, MaxPool
from zennit.torchvision import VGGCanonizer, ResNetCanonizer
from zennit.attribution import Gradient, SmoothGrad, IntegratedGradients
from zennit.composites import GuidedBackprop, ExcitationBackprop, DeconvNet, EpsilonPlus, EpsilonPlusFlat, EpsilonAlpha2Beta1, EpsilonAlpha2Beta1Flat, EpsilonGammaBox
from zennit.image import imgify, imsave
from captum.attr import Occlusion
from captum.attr import DeepLift, GuidedGradCam
from captum.attr import LayerDeepLift, LayerGradCam, LayerAttribution, LayerGradientShap
from pytorch_grad_cam.utils.image import show_cam_on_image # TODO: later remove dependency
from skimage.measure import label

from loguru import logger
import warnings
warnings.filterwarnings("ignore")

In [2]:
def get_regions(segmap, filename):
    # (1)
    # There is some noisy predictions in segmentation: use only the largest connected component's region for each segment.
    # Try to get rid of those unreliable parts and create region bounding boxes around the most reliable segment.
    def get_largest_connected_components(segmentation):
        labels = label(segmentation)
        assert( np.unique(label(segmap)).size>1)
        largestCC = labels == np.argmax(np.bincount(labels.flat)[1:])+1
        return largestCC

    group_indices = [[2,4,3,5], [6], [7,9]] # 8,
    group_names = ['eye', 'nose', 'mouth']
    bboxes =  []
    for indices in group_indices:
        bb = []
        for i in indices:
            # Get bounding boxes of all segments, i.e., left-eye, left-eyebrow, nose, and so on...
            if np.sum(segmap==i)!=0:
                img_vis = get_largest_connected_components(label(segmap==i)).astype(np.uint8)
                x1, y1 = list(np.min(np.argwhere(img_vis==1),axis=0)) 
                x2, y2 = list(np.max(np.argwhere(img_vis==1),axis=0))
                bb.append([x1, y1, x2, y2])
            else:
                print('Check segmentation maps!!! ', filename)
        bboxes.append(bb)

    # (2) Combine parts of region: 
    # eye region
    # nose region
    # mouth region
    eye_region =[[np.array(bboxes[0])[0:2,0].min(), np.array(bboxes[0])[0:2,1].min(), np.array(bboxes[0])[0:2,2].max(), np.array(bboxes[0])[0:2,3].max()],
                 [np.array(bboxes[0])[2:,0].min(), np.array(bboxes[0])[2:,1].min(), np.array(bboxes[0])[2:,2].max(), np.array(bboxes[0])[2:,3].max()],
                 [np.array(bboxes[0])[[0,2],0].min(), np.array(bboxes[0])[[0,2],1].min(), np.array(bboxes[0])[[0,2],2].max(), np.array(bboxes[0])[[0,2],3].max()]]
    mouth_region = [[np.array(bboxes[2])[:,0].min(), np.array(bboxes[2])[:,1].min(), np.array(bboxes[2])[:,2].max(), np.array(bboxes[2])[:,3].max()]]
    nose_region = bboxes[1]
    maps = np.zeros(shape=(224,224,3), dtype=np.uint8)
    for index, region_bb in enumerate([eye_region, nose_region, mouth_region]):
        for bb in region_bb:
            x1,y1,x2,y2 = bb
            maps[x1:x2, y1:y2, index]=1 
    return maps

# https://github.com/jacobgil/pytorch-grad-cam/blob/2183a9cbc1bd5fc1d8e134b4f3318c3b6db5671f/pytorch_grad_cam/utils/image.py#L33
def show_cam_on_image(img: np.ndarray,
                      mask: np.ndarray,
                      use_rgb: bool = False,
                      colormap: int = cv2.COLORMAP_JET,
                      image_weight: float = 0.5) -> np.ndarray:
    heatmap = cv2.applyColorMap(np.uint8(255 * mask), colormap)
    if use_rgb:
        heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
    heatmap = np.float32(heatmap) / 255

    if np.max(img) > 1:
        raise Exception(
            "The input image should np.float32 in the range [0, 1]")

    if image_weight < 0 or image_weight > 1:
        raise Exception(
            f"image_weight should be in the range [0, 1].\
                Got: {image_weight}")

    cam = (1 - image_weight) * heatmap + image_weight * img
    cam = cam / np.max(cam)
    return np.uint8(255 * cam)
    
# https://zennit.readthedocs.io/en/latest/reference/zennit.torchvision.html#zennit.torchvision.ResNetBasicBlockCanonizer
#from zennit.torchvision import VGGCanonizer, ResNetCanonizer
canonizer = ResNetCanonizer()

In [3]:
def normalize_relevance(attribution):
    # absolute sum over the channels and min-max [0, 1]
    relevance = attribution.abs().sum(1).detach().cpu()
    relevance = ((relevance - relevance.min()) / (relevance.max() - relevance.min())).cpu().numpy().squeeze()
    return relevance

def get_relevance(model, input, pred_label_idx, method, num_classes=12):

    if method=='Gradient':
        # Gradient
        attributor = Gradient(model)
        target = torch.eye(num_classes).to(device)[[pred_label_idx]] 
        output, attribution = attributor(input, target)
    elif method=='SmoothGrad':
        # SmoothGrad
        attributor = SmoothGrad(model, noise_level=0.1, n_iter=20)
        target = torch.eye(num_classes).to(device)[[pred_label_idx]] 
        output, attribution = attributor(input, target)
    elif method=='IntegratedGradients':
        # IntegratedGradients
        attributor = IntegratedGradients(model, n_iter=20)
        target = torch.eye(num_classes).to(device)[[pred_label_idx]] 
        output, attribution = attributor(input, target)
    elif method=='GuidedBackprop':
        # GuidedBackprop
        attributor = Gradient(model=model, composite=GuidedBackprop())  
        target = torch.eye(num_classes).to(device)[[pred_label_idx]] 
        output, attribution = attributor(input, target)
    elif method=='ExcitationBackprop':
        # ExcitationBackprop
        attributor = Gradient(model=model, composite=ExcitationBackprop())  
        target = torch.eye(num_classes).to(device)[[pred_label_idx]] 
        output, attribution = attributor(input, target)
    elif method=='DeconvNet':
        # DeconvNet
        attributor = Gradient(model=model, composite=DeconvNet())  
        target = torch.eye(num_classes).to(device)[[pred_label_idx]] 
        output, attribution = attributor(input, target)
    elif method=='LRP-EpsilonPlus':
        # LRP-EpsilonPlus
        attributor = Gradient(model=model, composite=EpsilonPlus(canonizers=[canonizer]))  
        target = torch.eye(num_classes).to(device)[[pred_label_idx]] 
        output, attribution = attributor(input, target)
    elif method=='LRP-EpsilonPlusFlat':
        # LRP-EpsilonPlusFlat
        attributor = Gradient(model=model, composite=EpsilonPlusFlat(canonizers=[canonizer]))  
        target = torch.eye(num_classes).to(device)[[pred_label_idx]] 
        output, attribution = attributor(input, target)
    elif method=='LRP-EpsilonAlpha2Beta1':
        # LRP-EpsilonAlpha2Beta1
        attributor = Gradient(model=model, composite=EpsilonAlpha2Beta1(canonizers=[canonizer]))  
        target = torch.eye(num_classes).to(device)[[pred_label_idx]] 
        output, attribution = attributor(input, target)
    elif method=='LRP-EpsilonAlpha2Beta1Flat':
        # LRP-EpsilonAlpha2Beta1Flat
        attributor = Gradient(model=model, composite=EpsilonAlpha2Beta1Flat(canonizers=[canonizer]))  
        target = torch.eye(num_classes).to(device)[[pred_label_idx]] 
        output, attribution = attributor(input, target)
    elif method=='DeepLIFT':
        # DeepLIFT
        attributor = DeepLift(model)
        attribution = attributor.attribute(input, target=int(pred_label_idx))
    elif method=='GuidedGradCam':
        # GuidedGradCam
        attributor = GuidedGradCam(model, model.layer4)
        attribution = attributor.attribute(input, target=int(pred_label_idx))
    elif method=='LayerDeepLIFT':
        # GuidedGradCam
        attributor = LayerDeepLift(model, model.layer4)
        attribution = attributor.attribute(input, target=int(pred_label_idx))
        attribution = LayerAttribution.interpolate(attribution, (224, 224), interpolate_mode='bicubic')
    elif method=='LayerGradCam':
        # LayerGradCam
        attributor = LayerGradCam(model, model.layer4)
        attribution = attributor.attribute(input, target=int(pred_label_idx))
        attribution = LayerAttribution.interpolate(attribution, (224, 224), interpolate_mode='bicubic')
    elif method=='Occlusion':
        # Occlusion
        attributor = Occlusion(model)
        attribution = attributor.attribute(input,
                                           strides = (3, 8, 8),
                                           target=int(pred_label_idx),
                                           sliding_window_shapes=(3,15, 15),
                                           baselines=0)        
    else:
        raise ValueError('XAI saliency map not implemented!')
        
    return normalize_relevance(attribution)
    

In [4]:
region_groups = {}
region_groups['overall'] = ['Abnormal facial shape',
                            'Long face',
                            'Microcephaly', 
                            'Hypopigmentation of the skin', 
                            'Elfin facies',
                            'Narrow face',
                            'Coarse facial features',
                            'Triangular face',
                            'Midface retrusion',
                            'Webbed neck']
region_groups['hair'] = ['Fair hair']
region_groups['eye'] = ['Epicanthus',
                        'Upslanted palpebral fissure',
                        'Abnormal eyelid morphology',
                        'Ptosis',
                        'Telecanthus',
                        'Strabismus',
                        'Iris hypopigmentation',
                        'Blepharophimosis',
                        'Downslanted palpebral fissures',
                        'Hypertelorism',
                        'Proptosis',
                        'Highly arched eyebrow',
                        'Sparse lateral eyebrow',
                        'Long eyelashes',
                        'Eversion of lateral third of lower eyelids']
region_groups['nose'] = ['Prominent nasal bridge',
                         'Wide nasal bridge',
                         'Bulbous nose',
                         'Short nose',
                         'Short columella']
region_groups['ears'] = ['Low-set ears',
                         'Overfolded helix',
                         'Small earlobe',
                         'Low-set posteriorly rotated ears',
                         'Protruding ear',
                         'Thickened helices',
                         'Macrotia']
region_groups['mouth'] = ['Long philtrum',
                          'Wide mouth',
                          'Protruding tongue',
                          'Thick lower lip vermilion',
                          'Everted lower lip vermilion',
                          'Open bite',
                          'Widely spaced teeth',
                          'Microdontia',
                          'Abnormality of the dentition']
region_groups['chin'] = ['Pointed chin']
region_groups['forehead'] = ['High forehead',
                             'Broad forehead']
hpo_terms = []
for group in region_groups:
    hpo_terms += region_groups[group]
hpo_terms = list(set(hpo_terms))

dataset_folder = '/media/omersumer/DATA/databases/NIH-Faces'
hpo_terms_from_csv_files = []
image_names = []
for syndrome in ['22q11DS', 'Angelman', 'KS', 'NS', 'WS']:
    df = pd.read_csv(pathlib.Path(dataset_folder, 'metadata', 'hpo-annotations', '%s.csv'%syndrome))
    hpo_terms_from_csv_files += df.keys()[2:-1].to_list()
    image_names += df['image_name'].to_list()
assert(len([i for i in set(hpo_terms_from_csv_files) if i not in hpo_terms])==0)

df = pd.read_csv(pathlib.Path(dataset_folder, 'metadata', 'partitions.csv'))

print('I do not have these images in the database:')
missing_images = [i for i in image_names if i not in df['image_name'].to_list()]
print(missing_images)

hpo_occurrence = np.zeros(shape=(len(image_names), len(hpo_terms)), dtype=np.float32)
for i in range(0, len(image_names)):
    syndrome = image_names[i].split('Slide')[0]
    df = pd.read_csv(pathlib.Path(dataset_folder, 'metadata', 'hpo-annotations', '%s.csv'%syndrome))
    index = np.argwhere(df['image_name'].to_numpy()==image_names[i]).squeeze()
    #print(image_names[i], syndrome, index)
    
    for key in list(df.iloc[index][2:-1].keys()):
        #print(key)
        j = np.argwhere(np.array(hpo_terms) == key).squeeze()
        assert(j.size==1)

        # Annotations: 0, X, 2X, 3X
        if df.iloc[index][key] is np.nan:
            val = 0 
        elif df.iloc[index][key]=='1X':
            val = 0.333
        elif df.iloc[index][key]=='2X':
            val = 0.666
        elif df.iloc[index][key]=='3X':
            val = 1.000
        else:
            raise Exception('Check values: %s, %s, %s'%(syndrome, image_names[i], key))

        hpo_occurrence[i, j] = val
        
df = pd.DataFrame({'image_name':image_names, 'category':[i.split('Slide')[0]  for i in image_names]})
df = pd.concat([df, pd.DataFrame(hpo_occurrence)], axis=1)
df = df.drop([i for i in range(0, df.shape[0]) if df.iloc[i]['image_name'] in missing_images]).reset_index(drop=True)

I do not have these images in the database:
['22q11DSSlide150.png', 'KSSlide133.png', 'NSSlide6.png', 'WSSlide316.png']


In [5]:
seed = 42
device = 1
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.enabled = False
torch.use_deterministic_algorithms = True
device = torch.device("cuda:%d"%device if torch.cuda.is_available() and device>=0 else "cpu")

In [6]:
def evaluate_xai_maps(fold, xai_method):
    
    dataset_folder = '/media/omersumer/DATA/databases/NIH-Faces'
    project_root = './'
    mean_bgr = {'fold-1':[112, 123, 147],
                'fold-2':[112, 123, 147], 
                'fold-3':[112, 122, 147], 
                'fold-4':[112, 123, 147], 
                'fold-5':[112, 123, 147]}
    num_classes = 12
    categories = ['22q11DS', 'Angelman', 'BWS', 'CdLS', 'Down', 'KS', 'NS', 'PWS', 'RSTS1', 'Unaffected', 'WHS', 'WS']

    # Test dataset
    image_size = 224 # we use only VGG-Face2 pretrained ResNet50
    test_dataset = datasets.NIHFacesDataset(root_dir=dataset_folder,
                                            metadata_file='../metadata/partitions.csv',
                                            fold=fold, 
                                            split='test', 
                                            mean_bgr=mean_bgr[fold],
                                            image_size=image_size,
                                            flip=False)
    num_samples = len(test_dataset)

    # Load the models
    val_acc = np.array([float(s.as_posix().split('-test_accuracy-')[-1].replace('.pt','')) for s in list(pathlib.Path('../results/%s/%s'%('VGGFace2_ResNet50', fold)).glob('epoch-*.pt'))])
    model_path = list(pathlib.Path('../results/%s/%s'%('VGGFace2_ResNet50', fold)).glob('epoch-*.pt'))[np.argmax(val_acc)].as_posix()
    #logger.info('model_path: %s'%model_path) 
    model = torch.load(model_path)
    model.to(device)
    eval_mode = model.eval()
    
    results = pd.DataFrame(columns=['backbone', 'fold', 'image_name', 'method', 'eye_xai', 'nose_xai', 'mouth_xai', 'eye_hpo', 'nose_hpo', 'mouth_hpo'] + ['label','predicted']+['p_%s'%i for i in categories])
    n = 0
    
    for index in range(0, num_samples):

        # load images and labels
        image, gt_label, segmap, filename, landmark = test_dataset.__getitem__(index)
        input = image.unsqueeze(0).to(device)
        input.requires_grad = True

        output = model(input)
        prob_output = torch.nn.functional.softmax(output, dim=1)
        prob_output = prob_output.detach().cpu().numpy().squeeze()

        pred_label_idx = np.argmax(prob_output)
        predicted_label = categories[pred_label_idx]

        # get 3-channel RGB map for eye, nose and mouth regions
        def get_largest_connected_components(segmentation):
            labels = label(segmentation)
            assert( np.unique(label(segmap)).size>1)
            largestCC = labels == np.argmax(np.bincount(labels.flat)[1:])+1
            return largestCC

        segmap = cv2.resize(segmap, (224, 224))
        right_eye = [list(np.argwhere(get_largest_connected_components(segmap==i)==True).min(axis=0)) + list(np.argwhere(get_largest_connected_components(segmap==i)==True).max(axis=0)) for i in [2,4]]
        left_eye  = [list(np.argwhere(get_largest_connected_components(segmap==i)==True).min(axis=0)) + list(np.argwhere(get_largest_connected_components(segmap==i)==True).max(axis=0)) for i in [3,5]]
        right_eye, left_eye = np.array(right_eye), np.array(left_eye)

        reye_x1, reye_y1, reye_x2, reye_y2 = right_eye[:,1].min(), right_eye[:,0].min(), right_eye[:,3].max(), right_eye[:,2].max()
        leye_x1, leye_y1, leye_x2, leye_y2 = left_eye[:,1].min(),  left_eye[:,0].min(),  left_eye[:,3].max(),  left_eye[:,2].max()

        reye_w,  reye_h = reye_x2-reye_x1, reye_y2-reye_y1
        reye_x1, reye_y1, reye_x2, reye_y2 = reye_x1-int(0.2*reye_w), reye_y1-int(0.2*reye_h), reye_x2+int(0.1*reye_w), reye_y2+int(0.2*reye_h)
        leye_w,  leye_h = leye_x2-leye_x1, leye_y2-leye_y1
        leye_x1, leye_y1, leye_x2, leye_y2 = leye_x1-int(0.1*leye_w), leye_y1-int(0.2*leye_h), leye_x2+int(0.2*leye_w), leye_y2+int(0.2*leye_h) 

        nose_x1, nose_y1, nose_x2, nose_y2  = list(np.argwhere(get_largest_connected_components(segmap==6)==True).min(axis=0))[::-1] + list(np.argwhere(get_largest_connected_components(segmap==6)==True).max(axis=0))[::-1]  
        nose_y1 = min(reye_y1, leye_y1) + int(0.3 * min(reye_h, leye_h))

        mouth = np.array([list(np.argwhere(get_largest_connected_components(segmap==i)==True).min(axis=0)) + list(np.argwhere(get_largest_connected_components(segmap==i)==True).max(axis=0)) for i in [7,9]])
        mouth_x1, mouth_y1, mouth_x2, mouth_y2  = mouth[:,1].min(), mouth[:,0].min(), mouth[:,3].max(), mouth[:,2].max()
        mouth_y2 += int(0.5 * (mouth_y1 - nose_y2))
        mouth_y1, nose_y2 = 2*[int(0.5 * (mouth_y1 + nose_y2))]
        mouth_x1, mouth_x2 = mouth_x1-int(0.2*(mouth_x2-mouth_x1)), mouth_x2+int(0.2*(mouth_x2-mouth_x1))
        mouth_y2 = min(mouth_y2, 224)

        region_map = np.zeros(shape=(224,224,3), dtype=np.uint8)
        region_map[reye_y1:reye_y2, reye_x1:reye_x2, 0] = 1
        region_map[leye_y1:leye_y2, leye_x1:leye_x2, 0] = 1
        region_map[nose_y1:nose_y2, nose_x1:nose_x2, 1] = 1
        region_map[mouth_y1:mouth_y2, mouth_x1:mouth_x2, 2] = 1
        for i in range(0, 224):
            for j in range(0, 224):
                region_map[i,j,:] = [1,0,0]  if (region_map[i,j,:]==[1,1,0]).all() else region_map[i,j,:]


        # get saliency map
        relevance_map = get_relevance(model, input, pred_label_idx, xai_method)
        relevance_map = cv2.resize(relevance_map, (224, 224))
        relevance_map = region_map[:,:,0] * relevance_map + \
                        region_map[:,:,1] * relevance_map + \
                        region_map[:,:,2] * relevance_map

        # saliency region coefficients
        eye_xai   = np.sum(relevance_map * region_map[:,:,0]) / np.sum(region_map[:,:,0]!=0)
        nose_xai  = np.sum(relevance_map * region_map[:,:,1]) / np.sum(region_map[:,:,1]!=0)
        mouth_xai = np.sum(relevance_map * region_map[:,:,2]) / np.sum(region_map[:,:,2]!=0)

        # read HPO Annotations and calculate HPO-base coefficients
        ii = [idx for idx, val in enumerate(df['image_name']) if val.replace('.png','')==filename]
        ii = ii[0] if len(ii)==1 else None
        t = ['%2.3f'%(i) for i in df.iloc[ii][2:].to_list()]
        t_per_image =[]
        for i in range(0, 50):
            if t[i]!='0.000':
                t_per_image.append('%s'%(hpo_terms[i]))
        labeled_terms = [(key, t_per_image[ii]) for key in region_groups.keys() for ii in range(0, len(t_per_image)) if t_per_image[ii] in region_groups[key] ]

        eye_hpo = np.sum(np.array(df.iloc[ii][2:], dtype=np.float32)[ [idx for idx, val in enumerate(hpo_terms) if val in region_groups['eye']] ])
        nose_hpo = np.sum(np.array(df.iloc[ii][2:], dtype=np.float32)[ [idx for idx, val in enumerate(hpo_terms) if val in region_groups['nose']] ])
        mouth_hpo = np.sum(np.array(df.iloc[ii][2:], dtype=np.float32)[ [idx for idx, val in enumerate(hpo_terms) if val in region_groups['mouth']] ])

        results.loc[n] = ['VGGFace2_ResNet50'] + [fold] + [filename] + [xai_method] + [eye_xai, nose_xai, mouth_xai, eye_hpo, nose_hpo, mouth_hpo] + [test_dataset.categories[pred_label_idx]] + [test_dataset.categories[int(gt_label)]] + ['%2.3f'%i for i in list(prob_output)]
        n += 1

        
    results_stats = pd.DataFrame(columns=['method', 'fold', 'region', 'r', 'CI95%', 'p-val', 'power'] )
    results_stats.loc[0] = [xai_method] + [fold] + ['eye'] + [round(float(pg.corr( results['eye_xai'],results['eye_hpo'], method='spearman')['r']), 5)] +\
                                           [str(list(pg.corr( results['eye_xai'],results['eye_hpo'], method='spearman')['CI95%'][0])).replace(',',';')] +\
                                           [float(pg.corr( results['eye_xai'],results['eye_hpo'], method='spearman')['p-val'])] + \
                                           [round(float(pg.corr( results['eye_xai'],results['eye_hpo'], method='spearman')['power']), 5)]

    results_stats.loc[1] = [xai_method] + [fold] + ['nose'] + [round(float(pg.corr( results['nose_xai'],results['nose_hpo'], method='spearman')['r']), 5)] +\
                                             [str(list(pg.corr( results['nose_xai'],results['nose_hpo'], method='spearman')['CI95%'][0])).replace(',',';')] +\
                                             [float(pg.corr( results['nose_xai'],results['nose_hpo'], method='spearman')['p-val'])] + \
                                             [round(float(pg.corr( results['nose_xai'],results['nose_hpo'], method='spearman')['power']), 5)]

    results_stats.loc[2] = [xai_method] + [fold] + ['mouth'] + [round(float(pg.corr( results['mouth_xai'],results['mouth_hpo'], method='spearman')['r']), 5)] +\
                                             [str(list(pg.corr( results['mouth_xai'],results['mouth_hpo'], method='spearman')['CI95%'][0])).replace(',',';')] +\
                                             [float(pg.corr( results['mouth_xai'],results['mouth_hpo'], method='spearman')['p-val'])] + \
                                             [round(float(pg.corr( results['mouth_xai'],results['mouth_hpo'], method='spearman')['power']), 5)]
    #display(results_stats)

    return results, results_stats




xai_methods = ['Gradient', 'SmoothGrad', 'IntegratedGradients', 'GuidedBackprop', 'ExcitationBackprop', 'DeconvNet',
               'LRP-EpsilonPlus', 'LRP-EpsilonPlusFlat', 'LRP-EpsilonAlpha2Beta1', 'LRP-EpsilonAlpha2Beta1Flat',
               'DeepLIFT', 'GuidedGradCam', 'LayerDeepLIFT', 'LayerGradCam', 'Occlusion']


rq2_correlation_analysis = pd.DataFrame(columns=['method', 'eye', 'nose', 'mouth'] )


for xai_method in xai_methods:
    print('Processing (%s)'%xai_method)
    
    corr_res = pd.DataFrame(columns=['method', 'fold', 'region', 'r', 'CI95%', 'p-val', 'power'] ) 
    for fold in ['fold-1', 'fold-2', 'fold-3', 'fold-4', 'fold-5']:
        _, stats = evaluate_xai_maps(fold, xai_method)
        corr_res = pd.concat([corr_res, stats])

    result = []
    result.append(xai_method)
    for region in ['eye', 'nose', 'mouth']:
        result.append('%2.3f +/- %2.3f'%(corr_res[corr_res['region']==region]['r'].mean(), 
                                         corr_res[corr_res['region']==region]['r'].std()))
    #display(pd.DataFrame([result], columns=['method', 'eye', 'nose', 'mouth' ])) 
    
    rq2_correlation_analysis = pd.concat([rq2_correlation_analysis, pd.DataFrame([result], columns=['method', 'eye', 'nose', 'mouth' ])])

Processing (Gradient)
Processing (SmoothGrad)
Processing (IntegratedGradients)
Processing (GuidedBackprop)
Processing (ExcitationBackprop)
Processing (DeconvNet)
Processing (LRP-EpsilonPlus)
Processing (LRP-EpsilonPlusFlat)
Processing (LRP-EpsilonAlpha2Beta1)
Processing (LRP-EpsilonAlpha2Beta1Flat)
Processing (DeepLIFT)
Processing (GuidedGradCam)
Processing (LayerDeepLIFT)
Processing (LayerGradCam)
Processing (Occlusion)


In [7]:
display(rq2_correlation_analysis)

Unnamed: 0,method,eye,nose,mouth
0,Gradient,0.146 +/- 0.042,0.078 +/- 0.061,0.335 +/- 0.192
0,SmoothGrad,0.044 +/- 0.149,0.034 +/- 0.070,0.485 +/- 0.136
0,IntegratedGradients,-0.039 +/- 0.145,0.023 +/- 0.040,0.425 +/- 0.161
0,GuidedBackprop,0.054 +/- 0.091,-0.046 +/- 0.065,0.331 +/- 0.112
0,ExcitationBackprop,0.021 +/- 0.202,-0.006 +/- 0.072,0.345 +/- 0.078
0,DeconvNet,-0.102 +/- 0.066,-0.033 +/- 0.090,-0.414 +/- 0.097
0,LRP-EpsilonPlus,-0.009 +/- 0.055,-0.015 +/- 0.061,0.403 +/- 0.075
0,LRP-EpsilonPlusFlat,0.048 +/- 0.079,-0.011 +/- 0.088,0.422 +/- 0.090
0,LRP-EpsilonAlpha2Beta1,0.059 +/- 0.152,0.017 +/- 0.148,0.380 +/- 0.091
0,LRP-EpsilonAlpha2Beta1Flat,-0.016 +/- 0.124,-0.008 +/- 0.153,0.429 +/- 0.086
