In [9]:
import pandas as pd
import json
import re
import matplotlib.pyplot as plt
import numpy as np
import statistics
from collections import Counter
import plotly
import plotly.graph_objects as go
import plotly.express as px
import colorlover as cl
from itertools import cycle
import os


In [109]:
#Loading Data
results_filepath = "db_06_07_21_resp.json"
with open(results_filepath) as f:
#remove image0/image1 vars since it prevents proper df merging
    g = re.sub(r'_image[0-3]',"", f.read())
    data = json.loads(g)
    df = pd.json_normalize(data)
    df.drop(labels=["__v", "_id.$oid"], axis=1, inplace=True)

#hacky solution to remove the panda-appended user_. entry
renamed= [name.split("_.")[-1] for name in df.columns]
col_rename = {i:j for i,j in zip(df.columns,renamed)}
df = df.rename(columns=col_rename)

df.loc[[0]]


Unnamed: 0,id,userProfiling_age,userProfiling_position,userProfiling_useOfAI,userProfiling_useOfDP,userProfiling_mlFamiliarity,saliencyMaps_globalSaliency_understandability,saliencyMaps_globalSaliency_usability,saliencyMaps_globalSaliency_informativeness,saliencyMaps_globalSaliency_value,...,userProfiling_useOfAI_details,saliencyMaps_globalSaliency_comments,saliencyMaps_localSaliency_comments,conceptAttribution_textAttributes_comments,trustScores_borderlineCases_comments,userProfiling_aiFamiliarity,userProfiling_comments,counterfactuals_twoAxisCounterfactuals_comments,counterfactuals_prototypeInterpolation_comments,userProfiling_position-Comment
0,1623319000000.0,30-40,Assisting physician (Assistenzarzt) for pathol...,in routine diagnostics,in routine diagnostics,1,5,5,6,6,...,,,,,,,,,,


In [119]:
user_df = df[['userProfiling_age', 'userProfiling_position',
    'userProfiling_useOfDP', 'userProfiling_useOfAI',
    'userProfiling_useOfAI_details', 'userProfiling_mlFamiliarity']]

attributes = ["understandability","usability","value","informativeness"]
explanation_instances = [
    "counterfactuals_prototypeInterpolation",
    "counterfactuals_twoAxisCounterfactuals",
    "saliencyMaps_localSaliency",
    "saliencyMaps_globalSaliency",
    "conceptAttribution_textAttributes",
    "prototypes_prototypes",
    "trustScores_borderlineCases"
]

dfs = dict()
for explanation_instance in explanation_instances:
    dfs[explanation_instance] = df[[f'{explanation_instance}_{attribute}' for attribute in attributes]]
    dfs[explanation_instance].columns = attributes

dfs


{'counterfactuals_prototypeInterpolation':     understandability  usability  value  informativeness
 0                   6          6      6                7
 1                   7          7      7                7
 2                   4          6      5                4
 3                   6          4      5                5
 4                   3          2      1                2
 5                   7          7      7                7
 6                   2          2      2                2
 7                   7          4      5                6
 8                   5          6      5                5
 9                   6          6      5                5
 10                  5          5      5                5
 11                  5          4      2                4
 12                  7          7      7                7
 13                  6          3      3                3
 14                  4          5      1                2
 15                  5        

In [122]:

def stackedBarChartDF(sub_df: pd.DataFrame, title: str,labels: list, palette, save_fig: bool = False, save_dir: str = "images"):
    #throw out non-numeric data
    int_df = sub_df.select_dtypes(include='number')
    data = int_df.values

    #get the occurences in the respective column
    understandability = Counter(data[:,0])
    usability = Counter(data[:,1])
    value = Counter(data[:,2])
    informativeness = Counter(data[:,3])

    fig = go.Figure() #type: ignore
    category_order = ["Strongly disagree","Disagree","Slightly disagree","Neutral","Slightly agree","Agree","Strongly agree"]
    
    def add_bar_trace(num, x_format):
        num_order = [1,2,3,4,5,6,7]
        rating = num_order[num]
        counts = [understandability[rating],value[rating],informativeness[rating],usability[rating]]
        total_count = sum(understandability.values())

        xvals = [x_format(count)/total_count for count in counts]

        fig.add_trace(
            go.Bar(
                x=xvals,
                y=labels,
                orientation='h',
                name=category_order[num],
                # customdata=xvals,
                # hovertemplate = "%{y}: %{customdata}",
                width=0.8,
                marker_color=palette[num]
                    )) #type: ignore

    #negative side
    add_bar_trace(3, lambda x: x * -0.5)
    for num in reversed(range(0,3)):
        add_bar_trace(num, lambda x: x *-1)

    #positive side
    add_bar_trace(3, lambda x: x * 0.5)
    for num in range(4,7):
        add_bar_trace(num, lambda x: x)

    fig.update_layout(barmode='relative', 
                    yaxis_autorange='reversed',
                    legend_y=0.5,
                    title = title,
                    title_x = 0.45,
                    xaxis = {
                        "tick0" : 0,
                        "title": "Frequency",
                        "tickformat" : '%',
                        "range" : [-1,1]
                        },
                    showlegend=False
    )
    fig.show()

    if save_fig:
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)
        
        fig.write_image(f"{save_dir}/{name}.png")


#boxPlotDF(trust_df,"Trust Based Methods")

labels = ["Intuitiveness ","Relevance ","Trustworthiness ","Value "]
palette = [plotly.colors.diverging.RdBu[n] for n in [2,3,4,5,6,7,8]] #type: ignore

for name, df in dfs.items():
    stackedBarChartDF(df,name,labels, palette, save_fig=True, save_dir=os.path.splitext(results_filepath)[0])

#function to get most relevant statistical measures for single column
def descriptiveStatistics(column):
    print(df[column].describe())

#descriptiveStatistics("user_profiling_useOfDP")

Unnamed: 0,counterfactuals_prototypeInterpolation,counterfactuals_twoAxisCounterfactuals,saliencyMaps_localSaliency,saliencyMaps_globalSaliency,conceptAttribution_textAttributes,prototypes_prototypes,trustScores_borderlineCases
0,6.25,4.25,4.5,5.5,6.0,6.0,5.5
1,7.0,6.0,5.0,7.0,7.0,7.0,3.5
2,4.75,3.5,4.0,4.5,4.75,5.5,5.0
3,5.0,6.5,4.5,5.25,5.75,6.25,4.75
4,2.0,1.75,4.25,4.75,2.0,2.75,4.0
5,7.0,6.5,5.25,5.5,6.25,6.5,6.0
6,2.0,1.0,6.0,5.0,3.25,4.75,5.0
7,5.5,4.0,1.75,1.0,2.75,4.0,2.5
8,5.25,4.0,4.0,2.5,3.0,5.0,6.0
9,5.5,5.25,7.0,5.5,5.0,5.5,4.25


In [136]:
#for all df
def boxPlotDF(df,title, labels):
    #throw out non-numeric data
    #set the color palette
    palette = cycle(plotly.colors.sequential.Viridis)#type: ignore

    #iterate over columns and show average spread
    fig = go.Figure()#type: ignore
    for column in df:
        fig.add_trace(go.Box(
            y=df[column],
            name=column,
            marker_color=next(palette)
            )) #type: ignore
    
    fig.update_layout(
                    title = title,
                    yaxis = dict(
                        dtick = 1
                    ),
                    yaxis_range=[1,7],
                    yaxis_title="Average Rating",
                    showlegend=False
    )
    
    fig.show()

aggregated_df = pd.DataFrame()

for name, df in dfs.items():
    aggregated_df[name] = df.apply(np.mean, axis = 1)

sorted_index = aggregated_df.median().sort_values(ascending=False).index

boxPlotDF(aggregated_df[sorted_index],"Comparison of Annotation Average",labels)
