In [169]:
import pandas as pd
import json
import re
import matplotlib.pyplot as plt
import numpy as np
import statistics
from collections import Counter
import plotly
import plotly.graph_objects as go
import plotly.express as px
import colorlover as cl
from itertools import cycle

In [150]:
#Loading Data
f = open("db_dump_greater_thursday.json").read()

#remove image0/image1 vars since it prevents proper df merging
g = re.sub(r'_image[0-2]',"",f)

data = json.loads(g)
df = pd.json_normalize(data)

#hacky solution to remove the panda-appended user_. entry
renamed= [name.split("_.")[-1] for name in df.columns]
col_rename = {i:j for i,j in zip(df.columns,renamed)}
df = df.rename(columns=col_rename)

In [151]:
def createSubDFs(df):
    profiling_df = df[['user_profiling_age', 'user_profiling_position',
        'user_profiling_useOfDP', 'user_profiling_useOfAI',
        'user_profiling_useOfAI_details', 'user_profiling_mlFamiliarity']]

    cf_df = df[[ 'counterfactuals_twoAxisCounterfactuals_understandability',
        'counterfactuals_twoAxisCounterfactuals_usability',
        'counterfactuals_twoAxisCounterfactuals_informativeness',
        'counterfactuals_twoAxisCounterfactuals_value',
        'counterfactuals_prototypeInterpolation_understandability',
        'counterfactuals_prototypeInterpolation_usability',
        'counterfactuals_prototypeInterpolation_informativeness',
        'counterfactuals_prototypeInterpolation_value']]

    ca_df = df[['conceptAttribution_textAttributes_understandability',
        'conceptAttribution_textAttributes_usability',
        'conceptAttribution_textAttributes_informativeness',
        'conceptAttribution_textAttributes_value',
        'conceptAttribution_textAttributes_comments']]

    prototypes_df = df[['prototypes_prototypes_understandability',
        'prototypes_prototypes_usability',
        'prototypes_prototypes_informativeness',
        'prototypes_prototypes_value']]

    trust_df = df[['trustScores_borderlineCases_understandability',
        'trustScores_borderlineCases_usability',
        'trustScores_borderlineCases_value',
        'trustScores_borderlineCases_informativeness',
        'trustScores_borderlineCases_comments']]

    return profiling_df,cf_df,ca_df,prototypes_df,trust_df

In [152]:
#remove NaN columns, careful since missing comments are read as NaN which can lead to very empty dfs
#df = df.dropna()

#get column values while dropping individual NaNs for debugging purposes
# test_data = df["user_profiling_useOfDP"]
# test_data = test_data.dropna().unique().tolist()

#select subdfs grouped by column for easier handling
profiling_df,cf_df,ca_df,prototypes_df,trust_df = createSubDFs(df)


In [159]:

labels = ["Intuitiveness ","Relevance ","Trustworthiness ","Value "]

def stackedBarChartDF(sub_df,title,labels):
    #throw out non-numeric data
    int_df = sub_df.select_dtypes(include="int64")
    data = int_df.values

    #get the occurences in the respective column
    understandability = Counter(data[:,0])
    usability = Counter(data[:,1])
    value = Counter(data[:,2])
    informativeness = Counter(data[:,3])

    fig = go.Figure()
    #throw the arrays together for iterating over them
    eval_arr = [understandability,value,informativeness,usability]
    category_order = ["Strongly disagree","Disagree","Lightly disagree","Neutral","Lightly agree","Agree","Strongly agree"]
    num_order = [0,1,2,3,4,5,6]
    palette = cycle(plotly.colors.sequential.Viridis)
    centerpoint = [x[3]/2 for x in eval_arr]
    print(centerpoint)

    #iterate reversed so we get right ordering of answers
    for num in reversed(num_order[0:3]):
        #xvals are number of responses eg. strongly disagree
        xvals = [understandability[num],value[num],informativeness[num],usability[num]]
        yvals = ["Intuitiveness ","Relevance ","Trustworthiness ","Value "]
        # #if we are at the centerpoint add an offset
        # if (num == 3):
        #     offset = centerpoint
        # else:
        #     offset = [0,0,0,0]
        print([(x*-1)-centerpoint[ind] for ind,x in enumerate(xvals)])
        fig.add_trace(
            go.Bar(
                #-centerpoint[ind]
                x=[(x*-1) for ind,x in enumerate(xvals)],
                y=yvals,
                orientation='h',
                name=category_order[num],
                customdata=xvals,
                hovertemplate = "%{y}: %{customdata}",
                width=0.8,
                marker_color=next(palette)
                    ))
    #regular forward iteration, now xvalues start from zero
    for num in num_order[3:]:
        xvals = [understandability[num],value[num],informativeness[num],usability[num]]
        yvals = ["Intuitiveness ","Relevance ","Trustworthiness ","Value "]
        fig.add_trace(
            go.Bar(
                #
                x=xvals,
                y=yvals,
                orientation='h',
                name=category_order[num],
                customdata=xvals,
                hovertemplate = "%{y}: %{customdata}",
                width=0.8,
                marker_color=next(palette)
                        ))

    fig.update_layout(barmode='relative', 
                    yaxis_autorange='reversed',
                    bargap=0.01,
                    legend_y=0.5,
                    title = "Evaluation of Trust-based Methods",
                    title_x = 0.45,
                    xaxis = dict(
                        tick0 = 0,
                        dtick = 1
                    ),
                    xaxis_title="Responses"
    )
    fig.show()

#boxPlotDF(trust_df,"Trust Based Methods")
stackedBarChartDF(cf_df,"Likert Data",labels)
plt.savefig("StackedBarChart")

#function to get most relevant statistical measures for single column
def descriptiveStatistics(column):
    print(df[column].describe())

#descriptiveStatistics("user_profiling_useOfDP")


[0.0, 1.0, 1.5, 1.5]
[-3.0, -2.0, -2.5, -3.5]
[-2.0, -4.0, -4.5, -3.5]
[0.0, -1.0, -1.5, -1.5]


<Figure size 432x288 with 0 Axes>

In [198]:
#compare all approaches 

#1. step: create average,var score of each rating question per subdf
df_list = [cf_df,ca_df,prototypes_df,trust_df]
def applyFunctionToSubDF(df_list,func):
    avg_container = []
    #for each df, get an average of the ratings for understand,usab,value and info (or any other function you want to apply to the seletced subdf)
    for sub_df in df_list:
        understand_avg = round(func(sub_df.filter(regex=(".*_understandability")).values),2)
        usability_avg = round(func(sub_df.filter(regex=(".*_usability")).values),2)
        value_avg = round(func(sub_df.filter(regex=(".*_value")).values),2)
        info_avg = round(func(sub_df.filter(regex=(".*_informativeness")).values),2)

        sub_df_avg = [info_avg,understand_avg,usability_avg,value_avg]
        avg_container.append(sub_df_avg)

    #make sure that the input ordering in the df_list is the same as in the method call since I only rely on ordering, not on names
    return avg_container[0],avg_container[1],avg_container[2],avg_container[3]

cf_avg,ca_avg,prototypes_avg,trust_avg = applyFunctionToSubDF(df_list,np.mean)
cf_var,ca_var,prototypes_var,trust_var = applyFunctionToSubDF(df_list,np.var)

#2. step: compare them in a table with avg+sdv
comparison_df = pd.DataFrame(index=["Understandability","Usability","Value","Informativeness"])
comparison_df["TrustScores"] = trust_avg
comparison_df["CounterFactuals"] = cf_avg
comparison_df["ConceptAttribution"] = ca_avg
comparison_df["Prototypes"] = prototypes_avg
print(comparison_df)

#3. step: visualize them with boxplots
#for all df
def boxPlotDF(comparison_df,title,labels):
    #throw out non-numeric data
    int_df = comparison_df.select_dtypes(include="float64")
    #set the color palette
    palette = cycle(plotly.colors.sequential.Viridis)

    #iterate over columns and show average spread
    fig = go.Figure()
    for column in int_df:
        fig.add_trace(go.Box(
            y=int_df[column],
            name=column,
            marker_color=next(palette)
            ))
    
    fig.update_layout(
                    title = title,
                    yaxis = dict(
                        dtick = 1
                    ),
                    yaxis_range=[0,7],
                    yaxis_title="Average Rating"
    )
    
    fig.show()
    

boxPlotDF(comparison_df,"Comparison of Annotation Average",labels)


                   TrustScores  CounterFactuals  ConceptAttribution  \
Understandability         4.62             4.43                4.29   
Usability                 4.67             4.93                4.90   
Value                     4.19             4.67                4.81   
Informativeness           4.48             4.33                4.76   

                   Prototypes  
Understandability        4.76  
Usability                5.90  
Value                    4.62  
Informativeness          4.86  
