In [None]:
import os
import sys
from pathlib import Path

os.chdir(Path(os.getcwd()).parents[0])
sys.path.append(os.getcwd())

import glob
import numpy as np
import torch
import pandas as pd

from scipy.stats import pearsonr
from scipy.stats import sem

from src.utils.plot_utils import *


def NormalizeData(data, min, max):
    return (data - min) / ((max - min) + 0.00000000001)

### Import Evaluation Scores

In [None]:
# File loading per dataset

file_image_inet = "/image/eval_scores_imagenet.npz"
file_image_oct = "/image/eval_scores_oct.npz"
file_image_r45 = "/image/eval_scores_resisc45.npz"

file_volume_adr = "/volume/eval_scores_adrenalmnist3d.npz"
file_volume_org = "/volume/eval_scores_organmnist3d.npz"
file_volume_ves = "/volume/eval_scores_vesselmnist3d.npz"

file_pc_coma = "/point_cloud/eval_scores_coma.npz"
file_pc_m40 = "/point_cloud/eval_scores_modelnet40.npz"
file_pc_shpn = "/point_cloud/eval_scores_shapenet.npz"

file_loc = os.getcwd() + "/data/evaluation_scores"

file = np.load(file_loc + file_image_inet, allow_pickle=True)
arr_image_inet = [file["arr_0"], file["arr_1"], file["arr_2"]]
file = np.load(file_loc + file_image_oct, allow_pickle=True)
arr_image_oct = [file["arr_0"], file["arr_1"], file["arr_2"]]
file = np.load(file_loc + file_image_r45, allow_pickle=True)
arr_image_r45 = [file["arr_0"], file["arr_1"], file["arr_2"]]

file = np.load(file_loc + file_volume_adr, allow_pickle=True)
arr_volume_adr = [file["arr_0"], file["arr_1"], file["arr_2"]]
file = np.load(file_loc + file_volume_org, allow_pickle=True)
arr_volume_org = [file["arr_0"], file["arr_1"], file["arr_2"]]
file = np.load(file_loc + file_volume_ves, allow_pickle=True)
arr_volume_ves = [file["arr_0"], file["arr_1"], file["arr_2"]]

file = np.load(file_loc + file_pc_coma, allow_pickle=True)
arr_pc_coma = [file["arr_0"], file["arr_1"], file["arr_2"]]
file = np.load(file_loc + file_pc_m40, allow_pickle=True)
arr_pc_m40 = [file["arr_0"], file["arr_1"], file["arr_2"]]
file = np.load(file_loc + file_pc_shpn, allow_pickle=True)
arr_pc_shpn = [file["arr_0"], file["arr_1"], file["arr_2"]]

### Score Distribution Plots

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

colors = px.colors.qualitative.G10

data = arr_pc_shpn

titles = [
    "<b>[F]</b> Faithfulness Correlation \u2191",
    "[F] Faithfulness Estimate \u2191",
    "[F] Monotonicity Correlation \u2191",
    "[F] Pixel Flipping (AUC) \u2193",
    "[F] Region Perturbation (AUC) \u2191",
    "[F] Insertion \u2191",
    "[F] Deletion \u2193",
    "[F] IROF (AOC) \u2191",
    "[F] ROAD (AUC) \u2193",
    "[F] Sufficiency \u2191",
    "[F] Infidelity \u2193",
    "<b>[R]</b> Local Lipschitz Estimate \u2193",
    "[R] Max Sensitivity \u2193",
    "[R] Continuity (PCC) \u2191",
    "[R] Relative Input Stability \u2193",
    "[R] Relative Output Stability \u2193",
    "[R] Relative Repr. Stability \u2193",
    "<b>[C]</b> Sparseness \u2191",
    "[C] Complexity \u2193",
    "[C] Effective Complexity \u2193",
]

methods = (
    [
        "OC",
        "LI",
        "KS",
        "SA",
        "IxG",
        "GB",
        "GC",
        "SC",
        "C+",
        "IG",
        "EG",
        "DL",
        "DLS",
        "LRP",
        "RA",
        "RoA",
        "LA",
    ]
    if data[0].shape[0] >= 14
    else [
        "OC",
        "LI",
        "KS",
        "SA",
        "IxG",
        "GB",
        "IG",
        "EG",
        "DL",
        "DLS",
        "LRP",
        "RA",
        "RoA",
        "LA",
    ]
)

fig = make_subplots(
    rows=5,
    cols=4,
    vertical_spacing=0.05,
    horizontal_spacing=0.03,
    subplot_titles=titles,
)

plot_row = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5]
plot_col = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4]
plot_x = (
    [
        "attr",
        "attr",
        "attr",
        "attr",
        "attr",
        "attr",
        "attr",
        "attr",
        "attr",
        "attr",
        "attr",
        "attr",
        "attr",
        "attr",
        "atten",
        "atten",
        "atten",
    ]
    if data[0].shape[1] == 20
    else [
        "attr",
        "attr",
        "attr",
        "attr",
        "attr",
        "attr",
        "attr",
        "attr",
        "attr",
        "attr",
        "attr",
        "atten",
        "atten",
        "atten",
    ]
)

for i in range(20):  # per eval
    d = np.vstack(
        [
            data[0][:, i, :],
            data[1][:, i, :],
            data[2][(data[2].shape[0] - 3) : data[2].shape[0], i, :],
        ]
    ).flatten()
    q_h = np.quantile(d, 0.9)
    q_l = np.quantile(d, 0.1)
    d = np.clip(d, q_l, q_h)
    d_max = d.max()
    d_min = d.min()

    data[0][:, i, :] = NormalizeData(np.clip(data[0][:, i, :], q_l, q_h), d_min, d_max)
    data[1][:, i, :] = NormalizeData(np.clip(data[1][:, i, :], q_l, q_h), d_min, d_max)
    data[2][(data[2].shape[0] - 3) : data[2].shape[0], i, :] = NormalizeData(
        np.clip(data[2][(data[2].shape[0] - 3) : data[2].shape[0], i, :], q_l, q_h),
        d_min,
        d_max,
    )

    for j in range(data[0].shape[0]):  # per attribution
        fig.add_trace(
            go.Box(
                y=np.concatenate((data[0][j, i, :], data[1][j, i, :])),
                name=methods[j],
                marker_color=colors[0],
                showlegend=False,
            ),  # model, explain, eval, n
            row=plot_row[i],
            col=plot_col[i],
        )
    for j in range(data[2].shape[0] - 3, data[2].shape[0]):  # per attention
        fig.add_trace(
            go.Box(
                y=data[2][j, i, :],
                name=methods[j],
                marker_color=colors[2],
                showlegend=False,
            ),  # model, explain, eval, n
            row=plot_row[i],
            col=plot_col[i],
        )

    fig.add_hline(
        y=np.median(np.concatenate((data[0][:, i, :], data[1][:, i, :]))),
        x0=0,
        x1=(1 / 17) * 14,
        line_dash="dot",
        row=plot_row[i],
        col=plot_col[i],
        line_color="#000000",
        line_width=2,
    )
    fig.add_hline(
        y=np.median(data[2][(data[2].shape[0] - 3) : data[2].shape[0], i, :]),
        x0=(1 / 17) * 14,
        x1=1,
        line_dash="dot",
        row=plot_row[i],
        col=plot_col[i],
        line_color="#000000",
        line_width=2,
    )

fig.update_layout(
    height=1000,
    width=2000,
    margin=dict(t=60, b=10, r=10, l=10),
    font=dict(
        family="Helvetica",
        color="#000000",
    ),
    title_font=dict(
        family="Helvetica",
        color="#000000",
    ),
    title={
        "text": "Evaluation Score Distributions for ShapeNet Dataset per XAI Method and grouped into Attribution and Attention",
        # 'y':0.9,
        "x": 0.012,
    },
)

fig = left_align_facet_plot_titles(fig)
fig.update_annotations(font_size=12)
# fig.write_image(os.getcwd() + "/data/figures/eval_distr/shapenet.png", scale=2)
fig.show()

### Ranking Computation

In [None]:
# Full Ranking
arr_image = [arr_image_inet, arr_image_oct, arr_image_r45]
arr_volume = [arr_volume_adr, arr_volume_org, arr_volume_ves]
arr_pc = [arr_pc_coma, arr_pc_m40, arr_pc_shpn]
arr_modalities = [arr_image, arr_volume, arr_pc]

arr_ranking = np.empty(
    [3, 3, 3, 17, 20], dtype=float
)  # modality, dataset, model, xai, eval
arr_ranking[:] = np.nan

bup_order = [0, 1, 2, 4, 5, 7, 9, 12, 17]

for modality in range(3):
    for dataset in range(3):
        for model in range(3):
            for xai in range(arr_modalities[modality][dataset][model].shape[0]):
                for eval in range(20):
                    ranking = np.median(
                        arr_modalities[modality][dataset][model][:, eval, :], -1
                    ).argsort()  # compute ranking based on median obs score
                    if eval in bup_order:
                        ranking = ranking[
                            ::-1
                        ]  # reverse ranking to bottom up if larger is better

                    pos = (
                        ranking.argsort()[xai] + 1
                    )  # get rankin position of xai method (+1 so ranking starts at 1 and not 0)
                    arr_ranking[modality, dataset, model, xai, eval] = pos

arr_table = []
for eval in range(20):
    for modality in range(3):
        for dataset in range(3):
            arr_col_val = []
            for model in [2]:
                for xai in range(17):
                    if modality == 2 and xai == 6:
                        arr_col_val = arr_col_val + [
                            np.round(np.mean(arr_ranking[(0, 1), :, :, 6, eval])),
                            np.round(np.mean(arr_ranking[(0, 1), :, :, 7, eval])),
                            np.round(np.mean(arr_ranking[(0, 1), :, :, 8, eval])),
                        ]
                    if modality == 2 and xai == 11:
                        break
                    x = arr_ranking[modality, dataset, model, xai, eval]
                    val = np.round(np.mean(x[~np.isnan(x)]))
                    arr_col_val.append(val)
                arr_table.append(arr_col_val)

df_table = pd.DataFrame(arr_table).transpose()
df_table.index = [
    "OC",
    "LIME",
    "KS",
    "VG",
    "IxG",
    "GB",
    "GC",
    "SC",
    "C+",
    "IG",
    "EG",
    "DL",
    "DLS",
    "LRP",
    "RA",
    "RoA",
    "LA",
]

In [None]:
# Ranking accross models and datasets
arr_image = [arr_image_inet, arr_image_oct, arr_image_r45]
arr_volume = [arr_volume_adr, arr_volume_org, arr_volume_ves]
arr_pc = [arr_pc_coma, arr_pc_m40, arr_pc_shpn]
arr_modalities = [arr_image, arr_volume, arr_pc]

arr_ranking = np.empty([3, 17, 20], dtype=float)  # modality, dataset, xai, eval
arr_ranking[:] = np.nan

bup_order = [0, 1, 2, 4, 5, 7, 9, 12, 17]

for modality in range(3):
    for eval in range(20):
        arr_scores = []
        for model in range(3):
            for data in range(3):
                d = arr_modalities[modality][data][model][:, eval, :]
                q_h = np.quantile(d, 0.975)
                q_l = np.quantile(d, 0.025)

                d = np.clip(d, q_l, q_h)
                d_max = d.max()
                d_min = d.min()
                arr_scores.append(NormalizeData(d, d_min, d_max))

        model_1 = np.column_stack(
            (
                np.median(arr_scores[0], 1),
                np.median(arr_scores[1], 1),
                np.median(arr_scores[2], 1),
            )
        )
        model_2 = np.column_stack(
            (
                np.median(arr_scores[3], 1),
                np.median(arr_scores[4], 1),
                np.median(arr_scores[5], 1),
            )
        )
        model_3 = np.column_stack(
            (
                np.median(arr_scores[6], 1),
                np.median(arr_scores[7], 1),
                np.median(arr_scores[8], 1),
            )
        )

        ranking = np.concatenate(
            [
                np.mean(
                    np.hstack([model_1, model_2, model_3[:-3]]),
                    -1,
                ),
                np.mean(model_3[-3:], -1),
            ]
        ).argsort()
        # compute ranking based on median obs score
        if eval in bup_order:
            ranking = ranking[::-1]  # reverse ranking to bottom up if larger is better

        for xai in range(ranking.shape[0]):
            pos = (
                ranking.argsort()[xai] + 1
            )  # get rankin position of xai method (+1 so ranking starts at 1 and not 0)
            arr_ranking[modality, xai, eval] = pos

#### New Table as in Paper

In [None]:
img_faith = arr_ranking[0, :, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16]].T.round(1)
img_faith_table = np.column_stack(
    (img_faith.mean(1).round(1), img_faith.std(1).round(1), img_faith)
)
np.savetxt("./img_faith.csv", img_faith_table, delimiter=",")

vol_faith = arr_ranking[1, :, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16]].T.round(1)
vol_faith_table = np.column_stack(
    (vol_faith.mean(1).round(1), vol_faith.std(1).round(1), vol_faith)
)
np.savetxt("./vol_faith.csv", vol_faith_table, delimiter=",")

pc_faith = arr_ranking[2, :, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16]].T.round(1)
pc_faith_table = np.column_stack(
    (pc_faith.mean(1).round(1), pc_faith.std(1).round(1), pc_faith)
)
np.savetxt("./pc_faith.csv", pc_faith_table, delimiter=",")

In [None]:
img_rob = arr_ranking[0, :, [10, 11, 12, 13, 14, 15]].T.round(1)
img_rob_table = np.column_stack(
    (img_rob.mean(1).round(1), img_rob.std(1).round(1), img_rob)
)
np.savetxt("./img_rob.csv", img_rob_table, delimiter=",")

vol_rob = arr_ranking[1, :, [10, 11, 12, 13, 14, 15]].T.round(1)
vol_rob_table = np.column_stack(
    (vol_rob.mean(1).round(1), vol_rob.std(1).round(1), vol_rob)
)
np.savetxt("./vol_rob.csv", vol_rob_table, delimiter=",")

pc_rob = arr_ranking[2, :, [10, 11, 12, 13, 14, 15]].T.round(1)
pc_rob_table = np.column_stack(
    (pc_rob.mean(1).round(1), pc_rob.std(1).round(1), pc_rob)
)
np.savetxt("./pc_rob.csv", pc_rob_table, delimiter=",")

In [None]:
img_comp = arr_ranking[0, :, [17, 18, 19]].T.round(1)
img_comp_table = np.column_stack(
    (img_comp.mean(1).round(1), img_comp.std(1).round(1), img_comp)
)
np.savetxt("./img_comp.csv", img_comp_table, delimiter=",")

vol_comp = arr_ranking[1, :, [17, 18, 19]].T.round(1)
vol_comp_table = np.column_stack(
    (vol_comp.mean(1).round(1), vol_comp.std(1).round(1), vol_comp)
)
np.savetxt("./vol_comp.csv", vol_comp_table, delimiter=",")

pc_comp = arr_ranking[2, :, [17, 18, 19]].T.round(1)
pc_comp_table = np.column_stack(
    (pc_comp.mean(1).round(1), pc_comp.std(1).round(1), pc_comp)
)
np.savetxt("./pc_comp.csv", pc_comp_table, delimiter=",")

#### Old Table

In [None]:
arr_table = []
for eval in [(0, 10), (10, 17), (17, 20)]:
    for modality in range(3):
        arr_col_val = []
        arr_col_std = []
        for xai in range(17):
            if modality == 2 and xai == 6:
                arr_col_val = arr_col_val + ["-", "-", "-"]
                arr_col_std = arr_col_std + [np.nan, np.nan, np.nan]
            if modality == 2 and xai == 14:
                break
            x = arr_ranking[modality, :, xai, eval[0] : eval[1]]
            val = np.round(np.mean(x[~np.isnan(x)]))
            std = np.round(np.std(x[~np.isnan(x)]), 2)
            if not np.isnan(val):
                val = int(val)
            else:
                val = "-"
                std = "-"
            arr_col_val.append(val)
            # arr_col_std.append(std)
        arr_table.append(arr_col_val)
        # arr_table.append(arr_col_std)

df_table = pd.DataFrame(arr_table).transpose()
df_table.index = [
    "OC",
    "LI",
    "KS",
    "VG",
    "IxG",
    "GB",
    "GC",
    "SC",
    "C+",
    "IG",
    "EG",
    "DL",
    "DLS",
    "LRP",
    "RA",
    "RoA",
    "LA",
]

#### Attention vs. Attribution Plots

In [None]:
from sklearn.manifold import MDS, TSNE

mds = MDS(n_components=2, random_state=4)
# mds = TSNE(perplexity= 10)
X_transformed = mds.fit_transform(df_table)

import plotly.graph_objects as go
import plotly.express as px

colors = px.colors.qualitative.G10

fig = go.Figure()

X_transformed[4, 0] = X_transformed[4, 0] - 4

fig.add_trace(
    go.Scatter(
        x=X_transformed[:-3, 0],
        y=X_transformed[:-3, 1],
        mode="markers+text",
        text=[
            "OC",
            "LIME",
            "KS",
            "VG",
            "IxG",
            "GB",
            "GC",
            "SC",
            "C+",
            "IG",
            "EG",
            "DL",
            "DLS",
            "LRP",
        ],
        textposition="top right",
        name="Attribution",
        marker=dict(color=colors[0], size=8),
    )
)

fig.add_trace(
    go.Scatter(
        x=X_transformed[-3:, 0],
        y=X_transformed[-3:, 1],
        mode="markers+text",
        text=["RA", "RoA", "LA"],
        textposition="top right",
        name="Attention",
        marker=dict(color=colors[2], size=8),
    )
)

fig.update_xaxes(
    # range=[-60.5, 60.5], tickvals=[-60, -40, -20, 0, 20, 40, 60], zerolinewidth=3
)
fig.update_yaxes(
    # range=[-75.5, 75.5], tickvals=[-75, -50, -25, 0, 25, 50, 75], zerolinewidth=3
)

fig.update_layout(
    height=500,
    width=550,
    xaxis=dict(
        title="Dim 1",
        titlefont_size=16,
        tickfont_size=14,
    ),
    yaxis=dict(
        title="Dim 2",
        titlefont_size=16,
        tickfont_size=14,
    ),
    font=dict(family="Helvetica", color="#000000", size=14),
    legend_title=dict(
        text="XAI Methods", font=dict(family="Helvetica", size=16, color="#000000")
    ),
    template="plotly_white",
)

# fig.write_image(os.getcwd() + "/data/figures/mds_plot.png", scale=2)
fig.show()

### Avg. Rank per XAI Method

In [None]:
import plotly.graph_objects as go
import plotly.express as px

colors = px.colors.qualitative.G10
fig = go.Figure()

x = [
    [
        "Attribution",
        "Attribution",
        "Attribution",
        "Attribution",
        "Attribution",
        "Attribution",
        "Attribution",
        "Attribution",
        "Attribution",
        "Attribution",
        "Attribution",
        "Attribution",
        "Attribution",
        "Attribution",
        "Attention",
        "Attention",
        "Attention",
    ],
    [
        "OC",
        "LI",
        "KS",
        "VG",
        "IxG",
        "GB",
        "GC",
        "SC",
        "C+",
        "IG",
        "EG",
        "DL",
        "DLS",
        "LRP",
        "RA",
        "RoA",
        "LA",
    ],
]
fig.add_trace(
    go.Bar(
        x=x,
        y=np.round(np.mean(df_table.iloc[:, :90], axis=1), 1),
        name="Faithfullness",
        marker_color=colors[0],
        textposition="inside",
        insidetextanchor="start",
        text=np.round(np.mean(df_table.iloc[:, :90], axis=1), 1),
        error_y=dict(
            type="data", array=np.round(sem(df_table.iloc[:, :90], axis=1), 2)
        ),
    )
)

fig.add_trace(
    go.Bar(
        x=x,
        y=np.round(np.mean(df_table.iloc[:, 90:153], axis=1), 1),
        name="Robustness",
        marker_color=colors[4],
        textposition="inside",
        insidetextanchor="start",
        text=np.round(np.mean(df_table.iloc[:, 90:153], axis=1), 1),
        error_y=dict(
            type="data", array=np.round(sem(df_table.iloc[:, 90:153], axis=1), 2)
        ),
    )
)

fig.add_trace(
    go.Bar(
        x=x,
        y=np.round(np.mean(df_table.iloc[:, 153:180], axis=1), 1),
        name="Complexity",
        marker_color=colors[5],
        textposition="inside",
        insidetextanchor="start",
        text=np.round(np.mean(df_table.iloc[:, 153:180], axis=1), 1),
        error_y=dict(
            type="data", array=np.round(sem(df_table.iloc[:, 153:180], axis=1), 2)
        ),
    )
)

fig.update_yaxes(zerolinewidth=4)

fig.update_layout(
    xaxis=dict(
        title="XAI Methods",
        titlefont_size=16,
        tickfont_size=14,
    ),
    yaxis=dict(
        title="Average Rank",
        titlefont_size=16,
        tickfont_size=14,
    ),
    font=dict(family="Helvetica", color="#000000", size=14),
    legend_title=dict(
        text="Evaluation Criteria",
        font=dict(family="Helvetica", size=16, color="#000000"),
    ),
    barmode="group",
    template="plotly_white",
    bargap=0.15,  # gap between bars of adjacent location coordinates.
    bargroupgap=0.05,  # gap between bars of the same location coordinate.
    height=500,
    width=1500,
)

# fig.write_image(os.getcwd() + "/data/figures/aa_full_plot.png", scale=2)
fig.show()

In [None]:
import plotly.graph_objects as go
import plotly.express as px

colors = px.colors.qualitative.G10


df_corr = np.corrcoef(df_table)
mask = np.ones_like(df_corr, dtype=bool)
mask[:] = np.nan
mask = np.triu(mask).T


heat = go.Heatmap(
    z=df_corr * mask,
    x=df_table.index,
    y=df_table.index,
    text=df_corr * mask,
    zmin=-1,  # Sets the lower bound of the color domain
    zmax=1,
    xgap=1,  # Sets the horizontal gap (in pixels) between bricks
    ygap=1,
    colorscale=[
        [0, "#B5545C"],
        [0.1, "#B5545C"],
        [0.5, "rgb(245, 245, 245)"],
        [0.9, "#76BB40"],
        [1, "#76BB40"],
    ],
    coloraxis_colorbar=dict(
        thicknessmode="pixels",
        thickness=10,
        lenmode="pixels",
        len=10,
    ),
)

layout = go.Layout(
    width=600,
    height=600,
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    yaxis_autorange="reversed",
    xaxis=dict(
        title="XAI Methods",
        titlefont_size=18,
        tickfont_size=16,
    ),
    yaxis=dict(
        title="XAI Methods",
        titlefont_size=18,
        tickfont_size=16,
    ),
    font=dict(family="Helvetica", color="#000000", size=16),
)

fig = go.Figure(data=[heat], layout=layout)

fig.write_image(os.getcwd().split("src")[0] + "data/figures/corr_total.png", scale=2)
fig.show()

### Avg rank per Model architecture

In [None]:
import plotly.graph_objects as go
import plotly.express as px

colors = px.colors.qualitative.G10
fig = go.Figure()

x = [
    "OC",
    "LI",
    "KS",
    "VG",
    "IxG",
    "GB",
    "GC",
    "SC",
    "C+",
    "IG",
    "EG",
    "DL",
    "DLS",
    "LRP",
]

for model in range(3):
    fig.add_trace(
        go.Bar(
            x=x,
            y=np.round(np.mean(arr_ranking[2, :, model, :14, :10], axis=(0, 2)), 1),
            name=["CNN<sub>1</sub>", "CNN<sub>2</sub>", "Transformer"][model],
            marker_color=[colors[i] for i in [0, 5, 2]][model],
            textposition="inside",
            insidetextanchor="start",
            text=np.round(np.mean(arr_ranking[2, :, model, :14, :10], axis=(0, 2)), 1),
            error_y=dict(
                type="data",
                array=[
                    np.round(sem(arr_ranking[2, :, model, i, :10].flatten()), 2)
                    for i in range(14)
                ],
            ),
        )
    )

fig.update_yaxes(zerolinewidth=4)

fig.update_layout(
    xaxis=dict(
        title="XAI Methods",
        titlefont_size=18,
        tickfont_size=16,
    ),
    yaxis=dict(
        title="Average Rank",
        titlefont_size=18,
        tickfont_size=16,
    ),
    font=dict(family="Helvetica", color="#000000", size=16),
    legend_title=dict(
        text="Model Architecture",
        font=dict(family="Helvetica", size=16, color="#000000"),
    ),
    barmode="group",
    template="plotly_white",
    bargap=0.15,  # gap between bars of adjacent location coordinates.
    bargroupgap=0.05,  # gap between bars of the same location coordinate.
    height=500,
    width=1500,
)

fig.write_image(
    os.getcwd().split("src")[0] + "data/figures/bar_models_faith_vol.png", scale=2
)
fig.show()

### Avg. Rank per Evaluation Criteria

In [None]:
import plotly.graph_objects as go
import plotly.express as px

colors = px.colors.qualitative.G10
fig = go.Figure()

fig.add_trace(
    go.Bar(
        x=["Faithfullness", "Robustness", "Complexity"],
        y=[
            np.round(np.nanmean(df_table.iloc[:-3, :90]), 1),
            np.round(np.nanmean(df_table.iloc[:-3, 90:153]), 1),
            np.round(np.nanmean(df_table.iloc[:-3, 153:180]), 1),
        ],
        name="Attribution",
        marker_color=colors[0],
        textposition="inside",
        insidetextanchor="start",
        text=[
            np.round(np.nanmean(df_table.iloc[:-3, :90]), 1),
            np.round(np.nanmean(df_table.iloc[:-3, 90:153]), 1),
            np.round(np.nanmean(df_table.iloc[:-3, 153:180]), 1),
        ],
        error_y=dict(
            type="data",
            array=[
                sem(df_table.iloc[:-3, :90], axis=None),
                sem(df_table.iloc[:-3, 90:153], axis=None),
                sem(df_table.iloc[:-3, 153:180], axis=None),
            ],
        ),
    )
)

fig.add_trace(
    go.Bar(
        x=["Faithfullness", "Robustness", "Complexity"],
        y=[
            np.round(np.nanmean(df_table.iloc[-3:, :90]), 1),
            np.round(np.nanmean(df_table.iloc[-3:, 90:153]), 1),
            np.round(np.nanmean(df_table.iloc[-3:, 153:180]), 1),
        ],
        name="Attention",
        marker_color=colors[2],
        textposition="inside",
        insidetextanchor="start",
        text=[
            np.round(np.nanmean(df_table.iloc[-3:, :90]), 1),
            np.round(np.nanmean(df_table.iloc[-3:, 90:153]), 1),
            np.round(np.nanmean(df_table.iloc[-3:, 153:180]), 1),
        ],
        error_y=dict(
            type="data",
            array=[
                sem(df_table.iloc[15:, :90], axis=None),
                sem(df_table.iloc[15:, 90:153], axis=None),
                sem(df_table.iloc[15:, 153:180], axis=None),
            ],
        ),
    )
)

fig.update_yaxes(zerolinewidth=4)

fig.update_layout(
    xaxis=dict(
        title="Evaluation Criteria",
        titlefont_size=16,
        tickfont_size=14,
    ),
    yaxis=dict(
        title="Average Rank",
        titlefont_size=16,
        tickfont_size=14,
    ),
    font=dict(family="Helvetica", color="#000000", size=14),
    legend_title=dict(
        text="XAI Methods", font=dict(family="Helvetica", size=16, color="#000000")
    ),
    barmode="group",
    template="plotly_white",
    bargap=0.15,  # gap between bars of adjacent location coordinates.
    bargroupgap=0.05,  # gap between bars of the same location coordinate.
    height=500,
    width=700,
)

from scipy import stats

t1 = stats.ttest_ind(
    a=df_table.iloc[:-3, :90].to_numpy().flatten(),
    b=df_table.iloc[-3:, :90].to_numpy().flatten(),
    equal_var=False,
)[1]
t2 = stats.ttest_ind(
    a=df_table.iloc[:-3, 90:153].to_numpy().flatten(),
    b=df_table.iloc[-3:, 90:153].to_numpy().flatten(),
    equal_var=False,
)[1]
t3 = stats.ttest_ind(
    a=df_table.iloc[:-3, 153:180].to_numpy().flatten(),
    b=df_table.iloc[-3:, 153:180].to_numpy().flatten(),
    equal_var=False,
)[1]

for i in range(3):
    fig = add_p_value_annotation(
        fig,
        array_columns=[[-0.25 + i, 0.25 + i]],
        p_value=[[t1, t2, t3][i]],
        _format=dict(interline=0.06, text_height=1.08, color="black"),
    )

# fig.write_image(os.getcwd() + "/data/figures/aa_aggr_plot.png", scale=2)
fig.show()

### Modality Plots

In [None]:
arr_table = []
for modality in range(3):
    for eval in range(20):
        for dataset in range(3):
            arr_col_val = []
            for xai in range(17):
                if modality == 2 and xai == 6:
                    arr_col_val = arr_col_val + [np.nan, np.nan, np.nan]
                if modality == 2 and xai == 14:
                    break
                val = arr_ranking[modality, dataset, xai, eval]
                arr_col_val.append(val)
            arr_table.append(arr_col_val)

df_table = pd.DataFrame(arr_table).transpose()
df_table.index = [
    "OC",
    "LI",
    "KS",
    "SA",
    "IxG",
    "GB",
    "GC",
    "SC",
    "C+",
    "IG",
    "EG",
    "DL",
    "DLS",
    "LRP",
    "RA",
    "RoA",
    "LA",
]

In [None]:
import plotly.graph_objects as go
import plotly.express as px

colors = px.colors.qualitative.G10
fig = go.Figure()

x1 = ["Faithfullness"] * 4 + ["Robustness"] * 4 + ["Complexity"] * 4
x2 = ["LI", "GC", "EG", "LA"] * 3
x = [x1, x2]

fig.add_trace(
    go.Bar(
        x=x,
        y=np.concatenate(
            (
                np.round(np.mean(df_table.iloc[[1, 6, 10, 16], :30], axis=1), 1),
                np.round(np.mean(df_table.iloc[[1, 6, 10, 16], 30:51], axis=1), 1),
                np.round(np.mean(df_table.iloc[[1, 6, 10, 16], 51:60], axis=1), 1),
            ),
            axis=None,
        ),
        name="Image",
        marker_color=colors[0],
        textposition="inside",
        insidetextanchor="start",
        text=np.concatenate(
            (
                np.round(np.mean(df_table.iloc[[1, 6, 10, 16], :30], axis=1), 1),
                np.round(np.mean(df_table.iloc[[1, 6, 10, 16], 30:51], axis=1), 1),
                np.round(np.mean(df_table.iloc[[1, 6, 10, 16], 51:60], axis=1), 1),
            ),
            axis=None,
        ),
        error_y=dict(
            type="data",
            array=np.concatenate(
                (
                    sem(df_table.iloc[[1, 6, 10, 16], :30], axis=1),
                    sem(df_table.iloc[[1, 6, 10, 16], 30:51], axis=1),
                    sem(df_table.iloc[[1, 6, 10, 16], 51:60], axis=1),
                ),
                axis=None,
            ),
        ),
    )
)


fig.add_trace(
    go.Bar(
        x=x,
        y=np.concatenate(
            (
                np.round(np.mean(df_table.iloc[[1, 6, 10, 16], 60:90], axis=1), 1),
                np.round(np.mean(df_table.iloc[[1, 6, 10, 16], 90:111], axis=1), 1),
                np.round(np.mean(df_table.iloc[[1, 6, 10, 16], 111:120], axis=1), 1),
            ),
            axis=None,
        ),
        name="Volume",
        marker_color=colors[3],
        textposition="inside",
        insidetextanchor="start",
        text=np.concatenate(
            (
                np.round(np.mean(df_table.iloc[[1, 6, 10, 16], 60:90], axis=1), 1),
                np.round(np.mean(df_table.iloc[[1, 6, 10, 16], 90:111], axis=1), 1),
                np.round(np.mean(df_table.iloc[[1, 6, 10, 16], 111:120], axis=1), 1),
            ),
            axis=None,
        ),
        error_y=dict(
            type="data",
            array=np.concatenate(
                (
                    sem(df_table.iloc[[1, 6, 10, 16], 60:90], axis=1),
                    sem(df_table.iloc[[1, 6, 10, 16], 90:111], axis=1),
                    sem(df_table.iloc[[1, 6, 10, 16], 111:120], axis=1),
                ),
                axis=None,
            ),
        ),
    )
)

fig.add_trace(
    go.Bar(
        x=x,
        y=np.concatenate(
            (
                np.round(np.mean(df_table.iloc[[1, 6, 10, 16], 120:150], axis=1), 1),
                np.round(np.mean(df_table.iloc[[1, 6, 10, 16], 150:171], axis=1), 1),
                np.round(np.mean(df_table.iloc[[1, 6, 10, 16], 171:180], axis=1), 1),
            ),
            axis=None,
        ),
        name="Point Cloud",
        marker_color=colors[5],
        textposition="inside",
        insidetextanchor="start",
        text=np.concatenate(
            (
                np.round(np.mean(df_table.iloc[[1, 6, 10, 16], 120:150], axis=1), 1),
                np.round(np.mean(df_table.iloc[[1, 6, 10, 16], 150:171], axis=1), 1),
                np.round(np.mean(df_table.iloc[[1, 6, 10, 16], 171:180], axis=1), 1),
            ),
            axis=None,
        ),
        error_y=dict(
            type="data",
            array=np.concatenate(
                (
                    sem(df_table.iloc[[1, 6, 10, 16], 120:150], axis=1),
                    sem(df_table.iloc[[1, 6, 10, 16], 150:171], axis=1),
                    sem(df_table.iloc[[1, 6, 10, 16], 171:180], axis=1),
                ),
                axis=None,
            ),
        ),
    )
)

fig.update_yaxes(zerolinewidth=4)

fig.update_layout(
    xaxis=dict(
        title="XAI Methods per       Evaluation Criteria",
        titlefont_size=16,
        tickfont_size=14,
    ),
    yaxis=dict(
        title="Average Rank",
        titlefont_size=16,
        tickfont_size=14,
    ),
    font=dict(family="Helvetica", color="#000000", size=14),
    legend_title=dict(
        text="Modality", font=dict(family="Helvetica", size=16, color="#000000")
    ),
    barmode="group",
    template="plotly_white",
    bargap=0.15,  # gap between bars of adjacent location coordinates.
    bargroupgap=0.05,  # gap between bars of the same location coordinate.
    height=500,
    width=1500,
)

# fig.write_image(os.getcwd() + "/data/figures/mod_aggr_plot.png", scale=2)
fig.show()

### CNN vs. Transformer Attribution

#### Correlation Matrix

In [None]:
# Ranking accross models
arr_image = [arr_image_inet, arr_image_oct, arr_image_r45]
arr_volume = [arr_volume_adr, arr_volume_org, arr_volume_ves]
arr_pc = [arr_pc_coma, arr_pc_m40, arr_pc_shpn]
arr_modalities = [arr_image, arr_volume, arr_pc]

arr_ranking_transf = np.empty(
    [3, 3, 3, 14, 20], dtype=float
)  # modality, dataset, xai, eval
arr_ranking_transf[:] = np.nan

bup_order = [0, 1, 2, 4, 5, 7, 9, 12, 17]


for modality in range(3):
    for dataset in range(3):
        for model in range(3):
            for eval in range(20):
                if model == 2:
                    ranking = np.median(
                        arr_modalities[modality][dataset][model][:-3, eval, :], -1
                    ).argsort()
                else:
                    ranking = np.median(
                        arr_modalities[modality][dataset][model][:, eval, :], -1
                    ).argsort()
                # compute ranking based on median obs score
                if eval in bup_order:
                    ranking = ranking[
                        ::-1
                    ]  # reverse ranking to bottom up if larger is better

                for xai in range(ranking.shape[0]):
                    pos = (
                        ranking.argsort()[xai] + 1
                    )  # get rankin position of xai method (+1 so ranking starts at 1 and not 0)
                    arr_ranking_transf[modality, dataset, model, xai, eval] = pos

In [None]:
perf_img = np.array([[0.999, 0.9969, 0.999], [0.9535, 0.9549, 0.9567]])

perf_vol = np.array(
    [[0.8, 0.802, 0.7907], [0.9226, 0.8914, 0.84], [0.9358, 0.9162, 0.886]]
)  # dataset, model

perf_pc = np.array(
    [[0.98, 0.9423, 0.9662], [0.8438, 0.8864, 0.8796], [0.9668, 0.9745, 0.9716]]
)

In [None]:
img_corr = np.empty([3, 2, 14])  # criteria, dataset, xai

for dataset in range(2):
    for xai in range(14):
        img_corr[0, dataset, xai] = np.round(
            pearsonr(
                np.mean(arr_ranking_transf[0, dataset + 1, :, :, :10], axis=2)[:, xai],
                perf_img[dataset],
            )[0],
            2,
        )
        img_corr[1, dataset, xai] = np.round(
            pearsonr(
                np.mean(arr_ranking_transf[0, dataset + 1, :, :, 10:17], axis=2)[
                    :, xai
                ],
                perf_img[dataset],
            )[0],
            2,
        )
        img_corr[2, dataset, xai] = np.round(
            pearsonr(
                np.mean(arr_ranking_transf[0, dataset + 1, :, :, 17:], axis=2)[:, xai],
                perf_img[dataset],
            )[0],
            2,
        )

In [None]:
vol_corr = np.empty([3, 3, 14])  # criteria, dataset, xai

for dataset in range(3):
    for xai in range(14):
        vol_corr[0, dataset, xai] = np.round(
            pearsonr(
                np.mean(arr_ranking_transf[1, dataset, :, :, :10], axis=2)[:, xai],
                perf_vol[dataset],
            )[0],
            2,
        )
        vol_corr[1, dataset, xai] = np.round(
            pearsonr(
                np.mean(arr_ranking_transf[1, dataset, :, :, 10:17], axis=2)[:, xai],
                perf_vol[dataset],
            )[0],
            2,
        )
        vol_corr[2, dataset, xai] = np.round(
            pearsonr(
                np.mean(arr_ranking_transf[1, dataset, :, :, 17:], axis=2)[:, xai],
                perf_vol[dataset],
            )[0],
            2,
        )

In [None]:
pc_corr = np.empty([3, 3, 14])  # criteria, dataset, xai

for dataset in range(3):
    for xai in range(11):
        pc_corr[0, dataset, xai] = np.round(
            pearsonr(
                np.mean(arr_ranking_transf[2, dataset, :, :11, :10], axis=2)[:, xai],
                perf_pc[dataset],
            )[0],
            2,
        )
        pc_corr[1, dataset, xai] = np.round(
            pearsonr(
                np.mean(arr_ranking_transf[2, dataset, :, :11, 10:17], axis=2)[:, xai],
                perf_pc[dataset],
            )[0],
            2,
        )
        pc_corr[2, dataset, xai] = np.round(
            pearsonr(
                np.mean(arr_ranking_transf[2, dataset, :, :11, 17:], axis=2)[:, xai],
                perf_pc[dataset],
            )[0],
            2,
        )

In [None]:
np.stack(
    [
        np.nanmean(img_corr, axis=(1, 2)),
        np.nanmean(vol_corr, axis=(1, 2)),
        np.nanmean(pc_corr, axis=(1, 2)),
    ]
)

In [None]:
import plotly.graph_objects as go
import plotly.express as px

colors = px.colors.qualitative.G10
fig = go.Figure()

fig.add_trace(
    go.Heatmap(
        z=np.round(
            np.stack(
                [
                    np.nanmean(img_corr, axis=(1, 2)),
                    np.nanmean(vol_corr, axis=(1, 2)),
                    np.nanmean(pc_corr, axis=(1, 2)),
                ]
            ),
            3,
        ),
        x=[
            "Faithfulness",
            "Robustness",
            "Complexity",
        ],
        y=["Image", "Volume", "Point Cloud"],
        texttemplate="%{z}",
        colorscale="plasma",
        zmin=-0.15,
        zmax=0.15,
        colorbar=dict(ticks="outside", thickness=9, len=1.1, tickvals=[-0.15, 0, 0.15]),
    )
)

fig.update_layout(
    yaxis=dict(
        title="Modality",
        scaleanchor="x",
        titlefont_size=16,
        tickfont_size=14,
    ),
    xaxis=dict(
        title="Evaluation Criteria",
        titlefont_size=16,
        tickfont_size=14,
    ),
    font=dict(family="Helvetica", color="#000000", size=16),
    template="plotly_white",
    height=420,
    width=415,
)

fig.write_image(
    os.getcwd().split("src")[0] + "data/figures/heatmap_correlation.png", scale=2
)
fig.show()

In [None]:
from scipy.stats import kendalltau

arr_corr_transf = np.empty(
    [3, 3, 3, 7], dtype=float
)  # modality, dataset, model combo, eval
arr_corr_transf[:] = np.nan

for modality in range(3):
    for dataset in range(3):
        for eval in range(7):
            arr_corr_transf[modality, dataset, 0, eval] = kendalltau(
                arr_ranking_transf[modality, dataset, 0, :, 10 + eval],
                arr_ranking_transf[modality, dataset, 1, :, 10 + eval],
                nan_policy="omit",
            )[0]
            arr_corr_transf[modality, dataset, 1, eval] = kendalltau(
                arr_ranking_transf[modality, dataset, 0, :, 10 + eval],
                arr_ranking_transf[modality, dataset, 2, :, 10 + eval],
                nan_policy="omit",
            )[0]
            arr_corr_transf[modality, dataset, 2, eval] = kendalltau(
                arr_ranking_transf[modality, dataset, 1, :, 10 + eval],
                arr_ranking_transf[modality, dataset, 2, :, 10 + eval],
                nan_policy="omit",
            )[0]

In [None]:
import plotly.graph_objects as go
import plotly.express as px

colors = px.colors.qualitative.G10
fig = go.Figure()

fig.add_trace(
    go.Heatmap(
        z=np.round(np.mean(arr_corr_transf, axis=(1, 3)), 2),
        x=[
            "CNN<sub>1</sub> vs. CNN<sub>2</sub>",
            "CNN<sub>1</sub> vs. Transformer",
            "CNN<sub>2</sub> vs. Transformer",
        ],
        y=["Image", "Volume", "Point Cloud"],
        texttemplate="%{z}",
        colorscale="plasma",
        zmin=0,
        zmax=0.6,
        colorbar=dict(ticks="outside", thickness=9, len=1.1, tickvals=[0, 0.3, 0.6]),
    )
)

fig.update_layout(
    yaxis=dict(
        title="Modality",
        scaleanchor="x",
        titlefont_size=16,
        tickfont_size=14,
    ),
    xaxis=dict(
        title="Compared Models",
        titlefont_size=16,
        tickfont_size=14,
    ),
    font=dict(family="Helvetica", color="#000000", size=16),
    template="plotly_white",
    height=420,
    width=415,
)

fig.write_image(os.getcwd().split("src")[0] + "data/figures/heatmap_rob.png", scale=2)
fig.show()

#### Difference Barchart

In [None]:
arr_image = [arr_image_inet, arr_image_oct, arr_image_r45]
arr_volume = [arr_volume_adr, arr_volume_org, arr_volume_ves]
arr_pc = [arr_pc_coma, arr_pc_m40, arr_pc_shpn]
arr_modalities = [arr_image, arr_volume, arr_pc]

arr_ranking = np.empty([3, 3, 3, 14, 20], dtype=float)  # modality, dataset, xai, eval
arr_ranking[:] = np.nan

bup_order = [0, 1, 2, 4, 5, 7, 9, 12, 17]


for modality in range(3):
    for dataset in range(3):
        for eval in range(20):
            arr_models = []
            for model in range(3):
                d = arr_modalities[modality][dataset][model][:, eval, :]
                q_h = np.quantile(d, 0.975)
                q_l = np.quantile(d, 0.025)

                d = np.clip(d, q_l, q_h)
                d_max = d.max()
                d_min = d.min()
                arr_models.append(NormalizeData(d, d_min, d_max))

            ranking_cnn = np.median(
                np.hstack([arr_models[0], arr_models[1]]), -1
            ).argsort()
            ranking_transf = np.median(arr_models[2][:-3], -1).argsort()
            # compute ranking based on median obs score
            if eval in bup_order:
                ranking_cnn = ranking_cnn[
                    ::-1
                ]  # reverse ranking to bottom up if larger is better
                ranking_transf = ranking_transf[::-1]

            for xai in range(ranking_cnn.shape[0]):
                pos = (
                    ranking_cnn.argsort()[xai] + 1
                )  # get rankin position of xai method (+1 so ranking starts at 1 and not 0)
                arr_ranking[modality, dataset, 0, xai, eval] = pos

            for xai in range(ranking_transf.shape[0]):
                pos = ranking_transf.argsort()[xai] + 1
                arr_ranking[modality, dataset, 1, xai, eval] = pos

In [None]:
arr_table = []
for model in range(3):
    for eval in range(10):
        for modality in range(1):
            for dataset in range(3):
                arr_col_val = []
                for xai in range(14):
                    if modality == 2 and xai == 6:
                        arr_col_val = arr_col_val + [np.nan, np.nan, np.nan]
                    if modality == 2 and xai == 11:
                        break
                    val = arr_ranking[modality, dataset, model, xai, eval]
                    arr_col_val.append(val)
                arr_table.append(arr_col_val)

df_table = pd.DataFrame(arr_table).transpose()
df_table.index = [
    "OC",
    "LI",
    "KS",
    "VG",
    "IxG",
    "GB",
    "GC",
    "SC",
    "C+",
    "IG",
    "EG",
    "DL",
    "DLS",
    "LRP",
]

In [None]:
diff_mean = []
diff_se = []
for i in range(14):
    diff_1 = df_table.iloc[i, 60:].to_numpy() - df_table.iloc[i, :60].to_numpy()
    diff_mean.append(np.nanmean(diff_1))
    diff_se.append(sem(np.abs(diff_1), nan_policy="omit"))

In [None]:
import plotly.graph_objects as go
import plotly.express as px
from scipy import stats

colors = px.colors.qualitative.G10
fig = go.Figure()

x = [
    "OC",
    "LI",
    "KS",
    "VG",
    "IxG",
    "GB",
    "GC",
    "SC",
    "C+",
    "IG",
    "EG",
    "DL",
    "DLS",
    "LRP",
]

fig.add_trace(
    go.Bar(
        x=x,
        y=np.round(diff_mean, 2),
        name="Faithfulness",
        marker_color=colors[0],
        error_y=dict(type="data", array=diff_se),
    )
)

fig.update_yaxes(zerolinewidth=4, range=[-2.5, 2.5])

fig.update_layout(
    yaxis=dict(
        title="Avg. Difference in Rank",
        titlefont_size=16,
        tickfont_size=14,
    ),
    xaxis=dict(
        title="Attribution Methods",
        titlefont_size=16,
        tickfont_size=14,
    ),
    font=dict(family="Helvetica", color="#000000", size=14),
    legend_title=dict(
        text="Evaluation Criteria",
        font=dict(family="Helvetica", size=16, color="#000000"),
    ),
    template="plotly_white",
    height=500,
    width=900,
)

for i in range(14):
    t1 = stats.ttest_1samp(
        a=df_table.iloc[i, 60:].to_numpy() - df_table.iloc[i, :60].to_numpy(),
        popmean=-0.1,
        nan_policy="omit",
    )[1]
    fig = add_p_value_annotation(
        fig,
        array_columns=[[i, i]],
        p_value=[t1],
        _format=dict(interline=0.05, text_height=1.05, color="black"),
    )

# fig.write_image(os.getcwd() + "/data/figures/diff_plot.png", scale=2)
fig.show()

In [None]:
# Full Ranking
arr_image = [arr_image_inet, arr_image_oct, arr_image_r45]
arr_volume = [arr_volume_adr, arr_volume_org, arr_volume_ves]
arr_pc = [arr_pc_coma, arr_pc_m40, arr_pc_shpn]
arr_modalities = [arr_image, arr_volume, arr_pc]

arr_ranking = np.empty(
    [3, 3, 3, 14, 20], dtype=float
)  # modality, dataset, model, xai, eval
arr_ranking[:] = np.nan

bup_order = [0, 1, 2, 4, 5, 7, 9, 12, 17]

for modality in range(3):
    for dataset in range(3):
        for model in range(3):
            for xai in range(11 if modality == 2 else 14):
                for eval in range(20):
                    ranking = np.median(
                        arr_modalities[modality][dataset][model][
                            : 11 if modality == 2 else 14, eval, :
                        ],
                        -1,
                    ).argsort()  # compute ranking based on median obs score

                    if eval in bup_order:
                        ranking = ranking[
                            ::-1
                        ]  # reverse ranking to bottom up if larger is better

                    pos = (
                        ranking.argsort()[xai] + 1
                    )  # get rankin position of xai method (+1 so ranking starts at 1 and not 0)
                    arr_ranking[modality, dataset, model, xai, eval] = pos

In [None]:
arr_diff_01 = np.empty([3, 3, 14, 20], dtype=float)
for modality in range(3):
    for dataset in range(3):
        for xai in range(14):
            for eval in range(20):
                arr_diff_01[modality, dataset, xai, eval] = np.abs(
                    arr_ranking[modality, dataset, 0, xai, eval]
                    - arr_ranking[modality, dataset, 1, xai, eval]
                )

arr_diff_02 = np.empty([3, 3, 14, 20], dtype=float)
for modality in range(3):
    for dataset in range(3):
        for xai in range(14):
            for eval in range(20):
                arr_diff_02[modality, dataset, xai, eval] = np.abs(
                    arr_ranking[modality, dataset, 2, xai, eval]
                    - arr_ranking[modality, dataset, 0, xai, eval]
                )

arr_diff_12 = np.empty([3, 3, 14, 20], dtype=float)
for modality in range(3):
    for dataset in range(3):
        for xai in range(14):
            for eval in range(20):
                arr_diff_12[modality, dataset, xai, eval] = np.abs(
                    arr_ranking[modality, dataset, 2, xai, eval]
                    - arr_ranking[modality, dataset, 1, xai, eval]
                )

In [None]:
arr_sum_01 = np.concatenate(
    [
        (arr_diff_01[0, :, :6, :] + arr_diff_01[1, :, :6, :] + arr_diff_01[2, :, :6, :])
        / 3,
        (arr_diff_01[0, :, 6:9, :] + arr_diff_01[1, :, 6:9, :]) / 2,
        (
            arr_diff_01[0, :, 9:, :]
            + arr_diff_01[1, :, 9:, :]
            + arr_diff_01[2, :, 6:11, :]
        )
        / 3,
    ],
    axis=1,
)

arr_sum_02 = np.concatenate(
    [
        (arr_diff_02[0, :, :6, :] + arr_diff_02[1, :, :6, :] + arr_diff_02[2, :, :6, :])
        / 3,
        (arr_diff_02[0, :, 6:9, :] + arr_diff_02[1, :, 6:9, :]) / 2,
        (
            arr_diff_02[0, :, 9:, :]
            + arr_diff_02[1, :, 9:, :]
            + arr_diff_02[2, :, 6:11, :]
        )
        / 3,
    ],
    axis=1,
)

arr_sum_12 = np.concatenate(
    [
        (arr_diff_01[0, :, :6, :] + arr_diff_01[1, :, :6, :] + arr_diff_01[2, :, :6, :])
        / 3,
        (arr_diff_12[0, :, 6:9, :] + arr_diff_12[1, :, 6:9, :]) / 2,
        (
            arr_diff_12[0, :, 9:, :]
            + arr_diff_12[1, :, 9:, :]
            + arr_diff_12[2, :, 6:11, :]
        )
        / 3,
    ],
    axis=1,
)

In [None]:
import plotly.graph_objects as go
import plotly.express as px

colors = px.colors.qualitative.G10
fig = go.Figure()

x = [
    "OC",
    "LI",
    "KS",
    "VG",
    "IxG",
    "GB",
    "GC",
    "SC",
    "C+",
    "IG",
    "EG",
    "DL",
    "DLS",
    "LRP",
]

fig.add_trace(
    go.Bar(
        x=x,
        y=np.round(np.nanmean(arr_sum_01, axis=(0, 2)), 1),
        name="|CNN<sub>1</sub> - CNN<sub>2</sub>|",
        marker_color=colors[0],
        textposition="inside",
        insidetextanchor="start",
        text=np.round(np.mean(arr_sum_01, axis=(0, 2)), 1),
        error_y=dict(
            type="data",
            array=[np.round(sem(arr_sum_01[:, i, :].flatten()), 2) for i in range(14)],
        ),
    )
)

fig.add_trace(
    go.Bar(
        x=x,
        y=np.round(np.nanmean(arr_sum_02, axis=(0, 2)), 1),
        name="|Transformer - CNN<sub>1</sub>|",
        marker_color=colors[2],
        textposition="inside",
        insidetextanchor="start",
        text=np.round(np.mean(arr_sum_02, axis=(0, 2)), 1),
        error_y=dict(
            type="data",
            array=[np.round(sem(arr_sum_02[:, i, :].flatten()), 2) for i in range(14)],
        ),
    )
)

fig.add_trace(
    go.Bar(
        x=x,
        y=np.round(np.nanmean(arr_sum_12, axis=(0, 2)), 1),
        name="|Transformer - CNN<sub>2</sub>|",
        marker_color=colors[5],
        textposition="inside",
        insidetextanchor="start",
        text=np.round(np.mean(arr_sum_12, axis=(0, 2)), 1),
        error_y=dict(
            type="data",
            array=[np.round(sem(arr_sum_12[:, i, :].flatten()), 2) for i in range(14)],
        ),
    )
)

fig.update_yaxes(zerolinewidth=4)

fig.update_layout(
    xaxis=dict(
        title="XAI Methods",
        titlefont_size=18,
        tickfont_size=16,
    ),
    yaxis=dict(
        title="Average Rank Distance",
        titlefont_size=18,
        tickfont_size=16,
    ),
    font=dict(family="Helvetica", color="#000000", size=16),
    legend_title=dict(
        text="Difference",
        font=dict(family="Helvetica", size=16, color="#000000"),
    ),
    barmode="group",
    template="plotly_white",
    bargap=0.15,  # gap between bars of adjacent location coordinates.
    bargroupgap=0.05,  # gap between bars of the same location coordinate.
    height=500,
    width=1500,
)

fig.write_image(
    os.getcwd().split("src")[0] + "data/figures/bar_archdiff_total.png", scale=2
)
fig.show()

In [None]:
import plotly.graph_objects as go
import plotly.express as px

colors = px.colors.qualitative.G10
fig = go.Figure()

x = [
    [
        "Image",
        "Image",
        "Image",
        "Volume",
        "Volume",
        "Volume",
        "Image",
        "Volume",
        "Point Cloud",
    ],
    [
        "GC",
        "SC",
        "C+",
        "GC",
        "SC",
        "C+",
        "LRP",
        "LRP",
        "LRP",
    ],
]

trace_01 = [
    *np.mean(arr_diff_01[0], axis=(0, 2))[6:9].tolist(),
    *np.mean(arr_diff_01[1], axis=(0, 2))[6:9].tolist(),
    np.mean(arr_diff_01[0], axis=(0, 2))[13],
    np.mean(arr_diff_01[1], axis=(0, 2))[13],
    np.mean(arr_diff_01[2], axis=(0, 2))[10],
]

trace_01_sem = [
    sem(arr_diff_01[0, :, 6, :].flatten()),
    sem(arr_diff_01[0, :, 7, :].flatten()),
    sem(arr_diff_01[0, :, 8, :].flatten()),
    sem(arr_diff_01[1, :, 6, :].flatten()),
    sem(arr_diff_01[1, :, 7, :].flatten()),
    sem(arr_diff_01[1, :, 8, :].flatten()),
    sem(arr_diff_01[0, :, 13, :].flatten()),
    sem(arr_diff_01[1, :, 13, :].flatten()),
    sem(arr_diff_01[2, :, 10, :].flatten()),
]

fig.add_trace(
    go.Bar(
        x=x,
        y=trace_01,
        name="|CNN<sub>1</sub> - CNN<sub>2</sub>|",
        marker_color=colors[0],
        textposition="inside",
        insidetextanchor="start",
        text=[round(i, 1) for i in trace_01],
        error_y=dict(type="data", array=trace_01_sem),
    )
)

trace_02 = [
    *np.mean(arr_diff_02[0], axis=(0, 2))[6:9].tolist(),
    *np.mean(arr_diff_02[1], axis=(0, 2))[6:9].tolist(),
    np.mean(arr_diff_02[0], axis=(0, 2))[13],
    np.mean(arr_diff_02[1], axis=(0, 2))[13],
    np.mean(arr_diff_02[2], axis=(0, 2))[10],
]

trace_02_sem = [
    sem(arr_diff_02[0, :, 6, :].flatten()),
    sem(arr_diff_02[0, :, 7, :].flatten()),
    sem(arr_diff_02[0, :, 8, :].flatten()),
    sem(arr_diff_02[1, :, 6, :].flatten()),
    sem(arr_diff_02[1, :, 7, :].flatten()),
    sem(arr_diff_02[1, :, 8, :].flatten()),
    sem(arr_diff_02[0, :, 13, :].flatten()),
    sem(arr_diff_02[1, :, 13, :].flatten()),
    sem(arr_diff_02[2, :, 10, :].flatten()),
]

fig.add_trace(
    go.Bar(
        x=x,
        y=trace_02,
        name="|Transformer - CNN<sub>1</sub>|",
        marker_color=colors[2],
        textposition="inside",
        insidetextanchor="start",
        text=[round(i, 1) for i in trace_02],
        error_y=dict(type="data", array=trace_02_sem),
    )
)

trace_12 = [
    *np.mean(arr_diff_12[0], axis=(0, 2))[6:9].tolist(),
    *np.mean(arr_diff_12[1], axis=(0, 2))[6:9].tolist(),
    np.mean(arr_diff_12[0], axis=(0, 2))[13],
    np.mean(arr_diff_12[1], axis=(0, 2))[13],
    np.mean(arr_diff_12[2], axis=(0, 2))[10],
]

trace_12_sem = [
    sem(arr_diff_12[0, :, 6, :].flatten()),
    sem(arr_diff_12[0, :, 7, :].flatten()),
    sem(arr_diff_12[0, :, 8, :].flatten()),
    sem(arr_diff_12[1, :, 6, :].flatten()),
    sem(arr_diff_12[1, :, 7, :].flatten()),
    sem(arr_diff_12[1, :, 8, :].flatten()),
    sem(arr_diff_12[0, :, 13, :].flatten()),
    sem(arr_diff_12[1, :, 13, :].flatten()),
    sem(arr_diff_12[2, :, 10, :].flatten()),
]

fig.add_trace(
    go.Bar(
        x=x,
        y=trace_12,
        name="|Transformer - CNN<sub>2</sub> |",
        marker_color=colors[5],
        textposition="inside",
        insidetextanchor="start",
        text=[round(i, 1) for i in trace_12],
        error_y=dict(type="data", array=trace_12_sem),
    )
)

fig.add_hline(
    y=np.mean(np.stack((arr_diff_01[0], arr_diff_02[0], arr_diff_12[0]), 0)),
    x0=0,
    x1=4 / 9,
    line_dash="dash",
    line_color="#000000",
    line_width=3,
)

fig.add_hline(
    y=np.mean(np.stack((arr_diff_01[1], arr_diff_02[1], arr_diff_12[1]), 0)),
    x0=4 / 9,
    x1=8 / 9,
    line_dash="dash",
    line_color="#000000",
    line_width=3,
)

fig.add_hline(
    y=np.nanmean(np.stack((arr_diff_01[2], arr_diff_02[2], arr_diff_12[2]), 0)),
    x0=8 / 9,
    x1=9 / 9,
    line_dash="dash",
    line_color="#000000",
    line_width=3,
)

fig.update_yaxes(zerolinewidth=4)

fig.update_layout(
    xaxis=dict(
        title="XAI Methods",
        titlefont_size=18,
        tickfont_size=16,
    ),
    yaxis=dict(
        title="Average Rank Distance",
        titlefont_size=18,
        tickfont_size=16,
    ),
    font=dict(family="Helvetica", color="#000000", size=16),
    legend_title=dict(
        text="Model Architectures",
        font=dict(family="Helvetica", size=16, color="#000000"),
    ),
    barmode="group",
    template="plotly_white",
    bargap=0.15,  # gap between bars of adjacent location coordinates.
    bargroupgap=0.05,  # gap between bars of the same location coordinate.
    height=500,
    width=1500,
)

fig.write_image(os.getcwd().split("src")[0] + "data/figures/bar_archdiff.png", scale=2)
fig.show()