In [2]:
import json
import pandas as pd
import requests
import io
from urllib.request import urlopen
import json
import logging
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from pandas.io.json import json_normalize
from pprint import pprint

In [3]:
# load JSON
def load_data(filename):
    df = pd.read_json(filename)
    data_sorted = df['formAnswers'].apply(pd.Series).join(df[['swymerId', 'workDurationInSeconds']])
    return data_sorted.fillna("")

In [4]:
from google.colab import files

#load json files
v1_q = load_data('drive/MyDrive/MA_XAI/answers-v1-basic-group-2-qualification.json')
v1_m = load_data('drive/MyDrive/MA_XAI/answers-v1-basic-group-2-main.json')

v2_q_p1 = load_data('drive/MyDrive/MA_XAI/answers-v2-salient-group-2-qualification.json')
v2_m_p1 = load_data('drive/MyDrive/MA_XAI/answers-v2-salient-group-2-main.json')

v3_q = load_data('drive/MyDrive/MA_XAI/answers-v3-explanations-group-2-qualification.json')
v3_m = load_data('drive/MyDrive/MA_XAI/answers-v3-explanations-group-2-main.json')

# missing data from second group
v2_q_p2 = load_data('drive/MyDrive/MA_XAI/answers-v2-salient-group-2-qualification-part2.json')
v2_m_p2 = load_data('drive/MyDrive/MA_XAI/answers-v2-salient-group-2-main-part2.json')

v2_q = pd.concat([v2_q_p1, v2_q_p2])
v2_m = pd.concat([v2_m_p1, v2_m_p2])

#load journalist
journos = load_data('drive/MyDrive/MA_XAI/answers-journalists_all_versions.json')


In [5]:
#confert to pandas data frame
df_v1_q = pd.DataFrame(v1_q)
df_v1_m = pd.DataFrame(v1_m)

df_v2_q = pd.DataFrame(v2_q)
df_v2_m = pd.DataFrame(v2_m)

df_v3_q = pd.DataFrame(v3_q)
df_v3_m = pd.DataFrame(v3_m)

df_journos = pd.DataFrame(journos)

In [6]:
# remove columns from df_XX_q which are redundant or not needed for the analysis
reduntant_columns = [
  "METADATA.FEATURE",
  "METADATA.GROUP",
  "confirm.0",
]

df_v1_q = df_v1_q.drop(columns=reduntant_columns)
df_v2_q = df_v2_q.drop(columns=reduntant_columns)
df_v3_q = df_v3_q.drop(columns=reduntant_columns)


# get a list of all column names starting with "tutorial."
tutorial_columns = [col for col in df_v1_q.columns if col.startswith("tutorial.")]
tutorial_columns.append("understand-task")

# remove tutorial columns from both dataframes
df_v1_q = df_v1_q.drop(columns=tutorial_columns)
df_v1_m = df_v1_m.drop(columns=tutorial_columns)

df_v2_q = df_v2_q.drop(columns=tutorial_columns)
df_v2_m = df_v2_m.drop(columns=tutorial_columns)

df_v3_q = df_v3_q.drop(columns=tutorial_columns)
df_v3_m = df_v3_m.drop(columns=tutorial_columns)

df_journos = df_journos.drop(columns=tutorial_columns)


# remove "METADATA.PART" columns from both dataframes
df_v1_q = df_v1_q.drop(columns=["METADATA.PART"])
df_v1_m = df_v1_m.drop(columns=["METADATA.PART"])

df_v2_q = df_v2_q.drop(columns=["METADATA.PART"])
df_v2_m = df_v2_m.drop(columns=["METADATA.PART"])

df_v3_q = df_v3_q.drop(columns=["METADATA.PART"])
df_v3_m = df_v3_m.drop(columns=["METADATA.PART"])

df_journos = df_journos.drop(columns=["METADATA.PART"])

In [7]:
# merge df_v1_q and df_v1_m on swymerId,
# only keep rows where aliasId is in both dataframes (inner join)
# keep all columns from both and suffix them with .qualification and .main (e.g. for POINTS)
df_v1 = pd.merge(df_v1_q, df_v1_m, on='swymerId', how='inner', suffixes=('.qualification', '.main'))


In [8]:
# dataframe of participants who did the qualification task for v1 and the main task for v2
pd.merge(df_v1_q, df_v2_m, on='swymerId', how='inner', suffixes=('.qualification', '.main'))


Unnamed: 0,information-literacy.identify-misinformation,information-seeking.sources.online-newspaper,expectations.sceptical-about-ai,information-seeking.sources.personal-social-networks,personal-code,information-literacy.efficiently-use-subscribed-and-openaccess,information-seeking.purpose.keep-up-to-date,POINTS.qualification,information-seeking.truthfulness.facebook,information-seeking.purpose.general-awareness,...,ai-system-evaluation.understand-why-system-provided-decision,ai-system-evaluation.system-criteria-acceptable,newsitem.7.rating-after-xai,ai-system-evaluation.benefit-of-doubt,newsitem.8.rating-before-xai,ai-system-evaluation.system-decides-consistently,ai-system-evaluation.everyday-work-helpful,ai-system-evaluation.system-has-functionality-for-work,ai-system-evaluation.why-not-helpful-for-everyday-work,workDurationInSeconds.main
0,7,6,7,7,X,7,7,2,7,7,...,7,1,91,6,85,1,,,,493
1,7,6,5,5,X,6,6,0,6,7,...,7,3,82,5,92,4,4.0,2.0,,558
2,6,4,3,6,LMB54,5,6,2,5,6,...,7,6,78,7,52,6,,,,2062
3,6,6,5,6,SDO8,6,6,2,5,7,...,4,3,90,3,55,3,,,,802
4,7,6,5,7,X,6,7,2,6,6,...,2,1,79,1,91,2,,,,970
5,3,1,2,6,Csr54,6,1,2,5,1,...,4,4,100,7,88,4,,,,423
6,5,7,4,6,YAU38,5,6,2,4,6,...,6,6,80,5,76,6,,,,1299
7,6,5,3,1,DRS48,6,6,2,4,6,...,7,7,85,3,22,7,,,,1198
8,5,4,3,4,ldi91,7,5,2,2,6,...,6,3,48,2,36,5,,,,889
9,6,7,4,7,nsp92,6,6,2,3,7,...,7,6,90,6,70,7,,,,2805


In [9]:
# dataframe of participants who are in the main task of v2, but not in either qualification task
df_v2_m[~df_v2_m.swymerId.isin(df_v1_q.swymerId) & ~df_v2_m.swymerId.isin(df_v2_q.swymerId)]

Unnamed: 0,news-dashboard-evaluation.assist-decisions,ai-system-evaluation.most-useful-explanation-feature,newsitem.7.system-evaluation.understand-what-system-does,ai-system-evaluation.easy-to-follow-what-system-does,newsitem.11.system-evaluation.classified-correctly,ai-system-evaluation.tend-to-trust-system-even-without-knowledge,news-dashboard-evaluation.useful-assess-news-articles,newsitem.8.system-evaluation.classified-correctly,newsitem.11.system-evaluation.explanations-comprehensible-and-help-assess,newsitem.8.system-evaluation.explanations-comprehensible-and-help-assess,...,ai-system-evaluation.system-criteria-acceptable,newsitem.7.rating-after-xai,ai-system-evaluation.benefit-of-doubt,newsitem.8.rating-before-xai,ai-system-evaluation.system-decides-consistently,ai-system-evaluation.everyday-work-helpful,ai-system-evaluation.system-has-functionality-for-work,ai-system-evaluation.why-not-helpful-for-everyday-work,swymerId,workDurationInSeconds
1,2,readability,5,4,2,2,5,4,3,4,...,2,30,3,52,1,2.0,3.0,Yes,CX_761234431695,276
59,3,truthfulness-assessment,5,3,2,4,3,3,5,6,...,3,64,5,55,4,5.0,4.0,,CX_961238893699,291
99,3,readability,5,3,6,2,3,5,6,5,...,4,84,5,83,4,,,,CX_381241034216,682
117,3,truthfulness-assessment,3,3,5,1,4,2,1,2,...,2,81,3,49,3,,,,CX_861245307212,2978
0,7,truthfulness-assessment,6,7,6,6,6,6,6,6,...,6,90,6,34,6,5.0,5.0,,CX_431259207049,1386
39,5,publishing-date,4,5,7,5,5,7,5,5,...,5,90,6,35,4,6.0,5.0,,CX_931183560242,1731


In [10]:
# merge df_v2_q and df_v2_m on swymerId,
# only keep rows where aliasId is in both dataframes (inner join)
# keep all columns from both and suffix them with .qualification and .main (e.g. for POINTS)
df_v2 = pd.merge(df_v2_q, df_v2_m, on='swymerId', how='inner', suffixes=('.qualification', '.main'))


In [11]:
df_v3 = pd.merge(df_v3_q, df_v3_m, on='swymerId', how='inner', suffixes=('.qualification', '.main'))


In [12]:
columns_to_rename = {
  "swymerId": "METADATA.swymerId",
  "workDurationInSeconds.main": "METADATA.workDurationInSeconds.main",
  "workDurationInSeconds.qualification": "METADATA.workDurationInSeconds.qualification",
  "personal-code": "METADATA.personal-code"
}

df_v1 = df_v1.rename(columns=columns_to_rename)
df_v2 = df_v2.rename(columns=columns_to_rename)
df_v3 = df_v3.rename(columns=columns_to_rename)
df_journos = df_journos.rename(columns=columns_to_rename)

In [13]:
# order columns alphabetically
df_v1 = df_v1.reindex(sorted(df_v1.columns), axis=1)
df_v2 = df_v2.reindex(sorted(df_v2.columns), axis=1)
df_v3 = df_v3.reindex(sorted(df_v3.columns), axis=1)
df_journos = df_journos.reindex(sorted(df_journos.columns), axis=1)

In [14]:
from pprint import pprint

columns = list(df_v1.columns)
pprint(columns)

['METADATA.FEATURE',
 'METADATA.GROUP',
 'METADATA.personal-code',
 'METADATA.swymerId',
 'METADATA.workDurationInSeconds.main',
 'METADATA.workDurationInSeconds.qualification',
 'POINTS.main',
 'POINTS.qualification',
 'ai-system-evaluation.additional-functionality',
 'ai-system-evaluation.benefit-of-doubt',
 'ai-system-evaluation.classification-comprehensible',
 'ai-system-evaluation.criteria-to-judge-reliability',
 'ai-system-evaluation.easy-to-follow-what-system-does',
 'ai-system-evaluation.everyday-work-helpful',
 'ai-system-evaluation.know-what-will-happen-next-time',
 'ai-system-evaluation.most-useful-explanation-feature',
 'ai-system-evaluation.other-information-wish',
 'ai-system-evaluation.rely-on-ai-system',
 'ai-system-evaluation.sceptical-about-ai-system',
 'ai-system-evaluation.system-able-to-classify-news-articles',
 'ai-system-evaluation.system-able-to-detect-fake-news',
 'ai-system-evaluation.system-can-correctly-classify-news-articles',
 'ai-system-evaluation.system-

In [15]:
# determine which columns only contain values which can be parsed to numbers
def is_number(s):
    if s == "":
        return True # empty cells are treated as possible numbers
    try:
        float(s)
        return True
    except ValueError:
        return False

In [16]:
# determine which columns only contain values which can be parsed to integers or are empty
# change data type of numeric columns to int
numeric_columns = df_v1.columns[df_v1.applymap(is_number).all()]
df_v1[numeric_columns] = df_v1[numeric_columns].apply(pd.to_numeric, downcast='integer')

numeric_columns = df_v2.columns[df_v2.applymap(is_number).all()]
df_v2[numeric_columns] = df_v2[numeric_columns].apply(pd.to_numeric, downcast='integer')

numeric_columns = df_v3.columns[df_v3.applymap(is_number).all()]
df_v3[numeric_columns] = df_v3[numeric_columns].apply(pd.to_numeric, downcast='integer')

numeric_columns = df_journos.columns[df_journos.applymap(is_number).all()]
df_journos[numeric_columns] = df_journos[numeric_columns].apply(pd.to_numeric, downcast='integer')

In [17]:
rating_columns = [col for col in df_v1.columns if ".rating-" in col]

In [18]:
# replace all value in rating columns which are < 0 with 0
df_v1[rating_columns] = df_v1[rating_columns].mask(df_v1[rating_columns] < 0, 0)
df_v2[rating_columns] = df_v2[rating_columns].mask(df_v2[rating_columns] < 0, 0)
df_v3[rating_columns] = df_v3[rating_columns].mask(df_v3[rating_columns] < 0, 0)
df_journos[rating_columns] = df_journos[rating_columns].mask(df_journos[rating_columns] < 0, 0)

# replace all value in rating columns which are > 100 with 100
df_v1[rating_columns] = df_v1[rating_columns].mask(df_v1[rating_columns] > 100, 100)
df_v2[rating_columns] = df_v2[rating_columns].mask(df_v2[rating_columns] > 100, 100)
df_v3[rating_columns] = df_v3[rating_columns].mask(df_v3[rating_columns] > 100, 100)
df_journos[rating_columns] = df_journos[rating_columns].mask(df_journos[rating_columns] > 100, 100)


In [19]:
#save to excel
df = pd.concat([df_v1, df_v2, df_v3], axis=0, ignore_index=True)
df.to_csv("drive/MyDrive/MA_XAI/df_cleaned.csv", index=False)

df_v1.to_csv("drive/MyDrive/MA_XAI/v1_cleaned_laymen.csv", index=False)
df_v2.to_csv("drive/MyDrive/MA_XAI/v2_cleaned_laymen.csv", index=False)
df_v3.to_csv("drive/MyDrive/MA_XAI/v3_cleaned_laymen.csv", index=False)
df_journos.to_csv("drive/MyDrive/MA_XAI/cleaned_journos.csv", index=False)

#Stats


In [20]:
#df = df[df['POINTS.qualification'] == 2]
#df = df[df["POINTS.main"] >= 5]
#df["METADATA.FEATURE"].value_counts()

In [21]:
df['METADATA.FEATURE'].value_counts()


salient         145
basic           142
explanations    141
Name: METADATA.FEATURE, dtype: int64

In [22]:
#df with cheater
df.shape[0]
df_journos.shape[0]

27

In [23]:
#df = df[df['POINTS.qualification'] == 2]
#df = df[df["POINTS.main"] >= 5]
df_journos["METADATA.FEATURE"].value_counts()
#df.shape[0]

explanations    10
salient         10
basic            7
Name: METADATA.FEATURE, dtype: int64