In [217]:
import pandas as pd
import numpy as np

import statistics

import pingouin as pt
from scipy.stats import f_oneway

import time
import math

import datetime

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import warnings
warnings.filterwarnings('ignore')

# Data structuring

#### Education

In [187]:
data_raw_education = pd.read_csv("education_survey_data_083021.csv")
data_education = data_raw_education.copy()
data_education.shape

(208, 79)

In [188]:
# Drop unnecessary header rows
data_education = data_education[2:]
data_education = data_education.reset_index(drop=True)

In [189]:
cutoff_education = datetime.datetime.strptime('2021-08-23 22:50:00', "%Y-%m-%d %H:%M:%S")
data_education['taken_in_appropiate_time_window'] = data_education['RecordedDate'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S") > cutoff_education)
data_education = data_education[data_education['taken_in_appropiate_time_window'] == True]
data_education.shape

(178, 80)

#### Housing

In [190]:
data_raw_housing = pd.read_csv("housing_survey_data_083021.csv")
data_housing = data_raw_housing.copy()
data_housing.shape

(96, 79)

In [191]:
# Drop unnecessary header rows
data_housing = data_housing[2:]
data_housing = data_housing.reset_index(drop=True)

In [192]:
cutoff_housing = datetime.datetime.strptime('2021-08-25 00:03:00', "%Y-%m-%d %H:%M:%S")
data_housing['taken_in_appropiate_time_window'] = data_housing['RecordedDate'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S") > cutoff_housing)
data_housing = data_housing[data_housing['taken_in_appropiate_time_window'] == True]
data_housing.shape

(88, 80)

### Combine data

In [193]:
# Stack data
data = data_education.append(data_housing, ignore_index=True)
data = data.reset_index(drop=True)
data.shape

(266, 80)

In [194]:
column_mapper = {
    "Q3.1": "prolific_id",
    "Duration (in seconds)": "duration",
    
    "Q5.7": "dt_q1",
    "Q5.13": "dt_q2",
    "Q5.14": "understood_system_did_well_dt",
    "Q5.15": "system_confusing_complicated_dt",
    "Q5.16": "system_confusing_confusing_dt",
    "Q5.17": "understood_system_easy_to_understand_dt",
    "Q5.18": "system_confusing_hard_to_remember_dt",
    "Q5.19": "understood_system_predictable_dt",
    "Q5.20": "understood_system_how_it_works_dt",
    "Q5.21": "understood_system_inputs_and_outputs_dt",
    
    "Q6.7": "lr_q1",
    "Q6.13": "lr_q2",
    "Q6.14": "understood_system_did_well_lr",
    "Q6.15": "system_confusing_complicated_lr",
    "Q6.16": "system_confusing_confusing_lr",
    "Q6.17": "understood_system_easy_to_understand_lr",
    "Q6.18": "system_confusing_hard_to_remember_lr",
    "Q6.19": "understood_system_predictable_lr",
    "Q6.20": "understood_system_how_it_works_lr",
    "Q6.21": "understood_system_inputs_and_outputs_lr",
    

    "Q7.7": "bb_no_shap_q1",
    "Q7.13": "bb_no_shap_q2",    
    "Q7.14": "understood_system_did_well_bb_no_shap",
    "Q7.15": "system_confusing_complicated_bb_no_shap",
    "Q7.16": "system_confusing_confusing_bb_no_shap",
    "Q7.17": "understood_system_easy_to_understand_bb_no_shap",
    "Q7.18": "system_confusing_hard_to_remember_bb_no_shap",
    "Q7.19": "understood_system_predictable_bb_no_shap",
    "Q7.20": "understood_system_how_it_works_bb_no_shap",
    "Q7.21": "understood_system_inputs_and_outputs_bb_no_shap",    
    
    "Q8.12": "bb_shap_q1",
    "Q8.18": "bb_shap_q2",
    "Q8.19": "understood_system_did_well_bb_shap",
    "Q8.20": "system_confusing_complicated_bb_shap",
    "Q8.21": "system_confusing_confusing_bb_shap",
    "Q8.22": "understood_system_easy_to_understand_bb_shap",
    "Q8.23": "system_confusing_hard_to_remember_bb_shap",
    "Q8.24": "understood_system_predictable_bb_shap",
    "Q8.25": "understood_system_how_it_works_bb_shap",
    "Q8.26": "understood_system_inputs_and_outputs_bb_shap"
}

In [195]:
data = data.rename(columns=column_mapper)

In [196]:
# Filter to only those with a valid value for prolific_id
data = data[(data.prolific_id.notnull()) & (data.prolific_id != "test")]
data.Finished.value_counts()

True    266
Name: Finished, dtype: int64

In [197]:
# Filter to only those who finished the survey
data = data[data.Finished=="True"]
data.shape

(266, 80)

In [198]:
# Filter only those that took > 4 minutes and were recorded after 2021-08-23 22:50:00
data.duration = pd.to_numeric(data.duration)
data = data[data.duration > 240]
data.shape
#time.strftime('%H:%M:%S', time.gmtime(math.ceil(data.duration.mean())))

(243, 80)

In [199]:
data.head(10)

Unnamed: 0,StartDate,EndDate,Status,IPAddress,Progress,duration,Finished,RecordedDate,ResponseId,RecipientLastName,RecipientFirstName,RecipientEmail,ExternalReference,LocationLatitude,LocationLongitude,DistributionChannel,UserLanguage,Q_RecaptchaScore,Q2.1_Id,Q2.1_Name,Q2.1_Size,Q2.1_Type,prolific_id,dt_q1,dt_q2,understood_system_did_well_dt,system_confusing_complicated_dt,system_confusing_confusing_dt,understood_system_easy_to_understand_dt,system_confusing_hard_to_remember_dt,understood_system_predictable_dt,understood_system_how_it_works_dt,understood_system_inputs_and_outputs_dt,lr_q1,lr_q2,understood_system_did_well_lr,system_confusing_complicated_lr,system_confusing_confusing_lr,understood_system_easy_to_understand_lr,system_confusing_hard_to_remember_lr,understood_system_predictable_lr,understood_system_how_it_works_lr,understood_system_inputs_and_outputs_lr,bb_no_shap_q1,bb_no_shap_q2,understood_system_did_well_bb_no_shap,system_confusing_complicated_bb_no_shap,system_confusing_confusing_bb_no_shap,understood_system_easy_to_understand_bb_no_shap,system_confusing_hard_to_remember_bb_no_shap,understood_system_predictable_bb_no_shap,understood_system_how_it_works_bb_no_shap,understood_system_inputs_and_outputs_bb_no_shap,bb_shap_q1,bb_shap_q2,understood_system_did_well_bb_shap,system_confusing_complicated_bb_shap,system_confusing_confusing_bb_shap,understood_system_easy_to_understand_bb_shap,system_confusing_hard_to_remember_bb_shap,understood_system_predictable_bb_shap,understood_system_how_it_works_bb_shap,understood_system_inputs_and_outputs_bb_shap,Q9.1,Q9.2,Q9.3_1,Q9.3_2,Q9.3_3,Q9.3_4,Q9.4,PROLIFIC_PID,dataset_name,bb_profile_1,bb_profile_2,int_profile_1,int_profile_2,first_model,bb_model,int_model,taken_in_appropiate_time_window
0,2021-08-23 22:54:23,2021-08-23 23:00:14,IP Address,104.35.20.232,100,351,True,2021-08-23 23:00:15,R_yqnE1JHcvGdFz7H,,,,,33.92999267578125,-117.94770050048828,anonymous,EN,1.0,F_z6iKgOtuJxyuhMZ,signature.png,14093,image/png,6102d3c05c31cb5c38158b34,,,,,,,,,,,Fail,First year grade,Strongly agree,Strongly agree,Somewhat agree,Somewhat disagree,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat agree,,,,,,,,,,,Pass,First year grade,Strongly agree,Strongly agree,Strongly agree,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat agree,Somewhat agree,8,Not knowledgeable at all,No,No,Yes,No,No,6102d3c05c31cb5c38158b34,education,2,4,1,4,int,shap,lr,True
1,2021-08-23 23:24:31,2021-08-23 23:31:44,IP Address,47.147.135.146,100,433,True,2021-08-23 23:31:45,R_2sErBZc7mlGv79v,,,,,33.79029846191406,-118.12149810791016,anonymous,EN,1.0,F_1P8lvw788zdBWFh,signature.png,14986,image/png,610b88a14eed14f2b8b17b86,,,,,,,,,,,Pass,Number of absences,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat agree,Somewhat agree,Somewhat disagree,Somewhat disagree,Somewhat disagree,Fail,First year grade,Somewhat agree,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat agree,,,,,,,,,,,16,Not knowledgeable at all,No,No,No,No,No,610b88a14eed14f2b8b17b86,education,1,3,4,1,bb,no_shap,lr,True
2,2021-08-23 23:55:52,2021-08-24 00:01:24,IP Address,68.172.164.31,100,332,True,2021-08-24 00:01:26,R_BEeWM93HGxSRK8N,,,,,43.21229553222656,-77.69950103759764,anonymous,EN,0.899999976158142,F_1cTaUoQcmSH3xbZ,signature.png,12886,image/png,610c24152939c927ad421fa9,Pass,First year grade,Somewhat agree,Somewhat disagree,Strongly disagree,Strongly agree,Somewhat agree,Neither agree nor disagree,Strongly agree,Strongly agree,,,,,,,,,,,Fail,First year grade,Somewhat agree,Strongly disagree,Strongly disagree,Strongly agree,Somewhat disagree,Somewhat agree,Strongly agree,Strongly agree,,,,,,,,,,,5,Slightly knowledgeable,No,No,Yes,No,No,610c24152939c927ad421fa9,education,3,2,4,3,int,no_shap,dt,True
3,2021-08-24 00:12:19,2021-08-24 00:21:13,IP Address,24.165.25.108,100,534,True,2021-08-24 00:21:14,R_PAMuZ9Y8gMdN4S5,,,,,21.28169250488281,-157.8249969482422,anonymous,EN,0.899999976158142,F_3k1cfNl8IpZWqxi,signature.png,15188,image/png,60fd5178af4a40ee405f57cf,Fail,First year grade,Somewhat agree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Strongly agree,Somewhat agree,Somewhat agree,Strongly agree,,,,,,,,,,,,,,,,,,,,,Fail,First year grade,Somewhat agree,Neither agree nor disagree,Neither agree nor disagree,Somewhat agree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Somewhat agree,0,Slightly knowledgeable,No,No,No,No,No,60fd5178af4a40ee405f57cf,education,1,2,1,4,bb,shap,dt,True
4,2021-08-24 00:19:30,2021-08-24 00:28:18,IP Address,107.133.143.192,100,527,True,2021-08-24 00:28:19,R_7UR8fT0mMTamY1P,,,,,34.16239929199219,-118.12750244140624,anonymous,EN,1.0,F_3lFk7l2JISgLF69,signature.png,18484,image/png,60fc576e3a4df1ba622a76a1,,,,,,,,,,,Pass,First year grade,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat disagree,Somewhat agree,Neither agree nor disagree,Somewhat disagree,Somewhat agree,,,,,,,,,,,Pass,First year grade,Somewhat agree,Somewhat agree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Somewhat agree,2,Not knowledgeable at all,No,No,No,No,No,60fc576e3a4df1ba622a76a1,education,2,4,4,1,int,shap,lr,True
5,2021-08-24 00:26:55,2021-08-24 00:33:58,IP Address,108.49.32.202,100,422,True,2021-08-24 00:33:58,R_32JgaURTMECz9Om,,,,,42.56390380859375,-70.94629669189453,anonymous,EN,1.0,F_Z1rUb0sFZUCzSHD,signature.png,12292,image/png,6108bae61196a3bd6fc3a24b,Fail,First year grade,Neither agree nor disagree,Somewhat disagree,Somewhat disagree,Somewhat agree,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat agree,,,,,,,,,,,,,,,,,,,,,Pass,First year grade,Strongly agree,Strongly disagree,Strongly disagree,Strongly agree,Strongly disagree,Somewhat agree,Strongly agree,Strongly agree,2,Moderately knowledgeable,No,No,No,No,No,6108bae61196a3bd6fc3a24b,education,4,2,1,2,int,shap,dt,True
6,2021-08-24 00:36:46,2021-08-24 00:48:20,IP Address,173.172.230.160,100,694,True,2021-08-24 00:48:21,R_3e4dH3tBn60smlx,,,,,32.76849365234375,-96.68090057373047,anonymous,EN,0.899999976158142,F_2yf6Sza7e7nLgVp,signature.png,15810,image/png,60fd0dd1b8b6c7057d178986,,,,,,,,,,,Fail,First year grade,Somewhat agree,Strongly agree,Strongly agree,Somewhat disagree,Strongly agree,Strongly agree,Somewhat agree,Somewhat agree,,,,,,,,,,,Pass,First year grade,Strongly agree,Somewhat disagree,Strongly disagree,Somewhat agree,Strongly disagree,Somewhat agree,Strongly agree,Neither agree nor disagree,3,Slightly knowledgeable,Yes,No,Yes,No,No,60fd0dd1b8b6c7057d178986,education,1,2,3,1,int,shap,lr,True
7,2021-08-24 01:14:37,2021-08-24 01:22:58,IP Address,104.9.232.139,100,500,True,2021-08-24 01:22:58,R_ulbijuqy9FMsS1r,,,,,32.804595947265625,-97.1937026977539,anonymous,EN,1.0,F_02iLelub1F8Xqw1,signature.png,8586,image/png,610dd9b10888c8a8801619d0,Pass,First year grade,Strongly agree,Somewhat agree,Neither agree nor disagree,Neither agree nor disagree,Strongly agree,Neither agree nor disagree,Somewhat agree,Somewhat agree,,,,,,,,,,,,,,,,,,,,,Pass,First year grade,Strongly agree,Somewhat agree,Somewhat agree,Somewhat disagree,Neither agree nor disagree,Somewhat agree,Neither agree nor disagree,Somewhat agree,4,Slightly knowledgeable,Yes,Yes,Yes,Yes,No,610dd9b10888c8a8801619d0,education,1,4,2,1,int,shap,dt,True
9,2021-08-24 02:39:19,2021-08-24 02:47:14,IP Address,174.103.0.57,100,475,True,2021-08-24 02:47:15,R_31GXhz4W0MZkX6a,,,,,37.75379943847656,-87.0779037475586,anonymous,EN,1.0,F_3n7z0I8VYhhRBnw,signature.png,14343,image/png,60fe250b3984cfdf32c05860,Fail,First year grade,Somewhat agree,Strongly agree,Strongly agree,Strongly disagree,Somewhat agree,Neither agree nor disagree,Neither agree nor disagree,Neither agree nor disagree,,,,,,,,,,,,,,,,,,,,,Pass,Number of absences,Somewhat agree,Strongly agree,Strongly agree,Somewhat disagree,Somewhat agree,Neither agree nor disagree,Neither agree nor disagree,Neither agree nor disagree,20,Moderately knowledgeable,No,No,No,No,No,60fe250b3984cfdf32c05860,education,2,1,1,2,int,shap,dt,True
10,2021-08-24 03:16:28,2021-08-24 03:21:33,IP Address,69.18.251.234,100,305,True,2021-08-24 03:21:34,R_1nNj4NgSb6jflfd,,,,,38.3572998046875,-97.02010345458984,anonymous,EN,0.899999976158142,F_eA0fmy2Ewx2kJGN,signature.png,15837,image/png,60fd4b01d7a25b34ca38a180,,,,,,,,,,,Fail,First year grade,Strongly agree,Somewhat agree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Somewhat agree,Pass,First year grade,Strongly agree,Somewhat agree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Somewhat agree,,,,,,,,,,,14,Not knowledgeable at all,No,No,No,No,No,60fd4b01d7a25b34ca38a180,education,4,1,1,3,bb,no_shap,lr,True


In [200]:
data.reset_index(inplace=True,drop=True)

In [201]:
# Average time to complete the survey
data.duration = pd.to_numeric(data.duration)
time.strftime('%H:%M:%S', time.gmtime(math.ceil(data.duration.mean())))

'00:09:17'

In [202]:
data.head()

Unnamed: 0,StartDate,EndDate,Status,IPAddress,Progress,duration,Finished,RecordedDate,ResponseId,RecipientLastName,RecipientFirstName,RecipientEmail,ExternalReference,LocationLatitude,LocationLongitude,DistributionChannel,UserLanguage,Q_RecaptchaScore,Q2.1_Id,Q2.1_Name,Q2.1_Size,Q2.1_Type,prolific_id,dt_q1,dt_q2,understood_system_did_well_dt,system_confusing_complicated_dt,system_confusing_confusing_dt,understood_system_easy_to_understand_dt,system_confusing_hard_to_remember_dt,understood_system_predictable_dt,understood_system_how_it_works_dt,understood_system_inputs_and_outputs_dt,lr_q1,lr_q2,understood_system_did_well_lr,system_confusing_complicated_lr,system_confusing_confusing_lr,understood_system_easy_to_understand_lr,system_confusing_hard_to_remember_lr,understood_system_predictable_lr,understood_system_how_it_works_lr,understood_system_inputs_and_outputs_lr,bb_no_shap_q1,bb_no_shap_q2,understood_system_did_well_bb_no_shap,system_confusing_complicated_bb_no_shap,system_confusing_confusing_bb_no_shap,understood_system_easy_to_understand_bb_no_shap,system_confusing_hard_to_remember_bb_no_shap,understood_system_predictable_bb_no_shap,understood_system_how_it_works_bb_no_shap,understood_system_inputs_and_outputs_bb_no_shap,bb_shap_q1,bb_shap_q2,understood_system_did_well_bb_shap,system_confusing_complicated_bb_shap,system_confusing_confusing_bb_shap,understood_system_easy_to_understand_bb_shap,system_confusing_hard_to_remember_bb_shap,understood_system_predictable_bb_shap,understood_system_how_it_works_bb_shap,understood_system_inputs_and_outputs_bb_shap,Q9.1,Q9.2,Q9.3_1,Q9.3_2,Q9.3_3,Q9.3_4,Q9.4,PROLIFIC_PID,dataset_name,bb_profile_1,bb_profile_2,int_profile_1,int_profile_2,first_model,bb_model,int_model,taken_in_appropiate_time_window
0,2021-08-23 22:54:23,2021-08-23 23:00:14,IP Address,104.35.20.232,100,351,True,2021-08-23 23:00:15,R_yqnE1JHcvGdFz7H,,,,,33.92999267578125,-117.94770050048828,anonymous,EN,1.0,F_z6iKgOtuJxyuhMZ,signature.png,14093,image/png,6102d3c05c31cb5c38158b34,,,,,,,,,,,Fail,First year grade,Strongly agree,Strongly agree,Somewhat agree,Somewhat disagree,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat agree,,,,,,,,,,,Pass,First year grade,Strongly agree,Strongly agree,Strongly agree,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat agree,Somewhat agree,8,Not knowledgeable at all,No,No,Yes,No,No,6102d3c05c31cb5c38158b34,education,2,4,1,4,int,shap,lr,True
1,2021-08-23 23:24:31,2021-08-23 23:31:44,IP Address,47.147.135.146,100,433,True,2021-08-23 23:31:45,R_2sErBZc7mlGv79v,,,,,33.79029846191406,-118.12149810791016,anonymous,EN,1.0,F_1P8lvw788zdBWFh,signature.png,14986,image/png,610b88a14eed14f2b8b17b86,,,,,,,,,,,Pass,Number of absences,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat agree,Somewhat agree,Somewhat disagree,Somewhat disagree,Somewhat disagree,Fail,First year grade,Somewhat agree,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat agree,,,,,,,,,,,16,Not knowledgeable at all,No,No,No,No,No,610b88a14eed14f2b8b17b86,education,1,3,4,1,bb,no_shap,lr,True
2,2021-08-23 23:55:52,2021-08-24 00:01:24,IP Address,68.172.164.31,100,332,True,2021-08-24 00:01:26,R_BEeWM93HGxSRK8N,,,,,43.21229553222656,-77.69950103759764,anonymous,EN,0.899999976158142,F_1cTaUoQcmSH3xbZ,signature.png,12886,image/png,610c24152939c927ad421fa9,Pass,First year grade,Somewhat agree,Somewhat disagree,Strongly disagree,Strongly agree,Somewhat agree,Neither agree nor disagree,Strongly agree,Strongly agree,,,,,,,,,,,Fail,First year grade,Somewhat agree,Strongly disagree,Strongly disagree,Strongly agree,Somewhat disagree,Somewhat agree,Strongly agree,Strongly agree,,,,,,,,,,,5,Slightly knowledgeable,No,No,Yes,No,No,610c24152939c927ad421fa9,education,3,2,4,3,int,no_shap,dt,True
3,2021-08-24 00:12:19,2021-08-24 00:21:13,IP Address,24.165.25.108,100,534,True,2021-08-24 00:21:14,R_PAMuZ9Y8gMdN4S5,,,,,21.28169250488281,-157.8249969482422,anonymous,EN,0.899999976158142,F_3k1cfNl8IpZWqxi,signature.png,15188,image/png,60fd5178af4a40ee405f57cf,Fail,First year grade,Somewhat agree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Strongly agree,Somewhat agree,Somewhat agree,Strongly agree,,,,,,,,,,,,,,,,,,,,,Fail,First year grade,Somewhat agree,Neither agree nor disagree,Neither agree nor disagree,Somewhat agree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Somewhat agree,0,Slightly knowledgeable,No,No,No,No,No,60fd5178af4a40ee405f57cf,education,1,2,1,4,bb,shap,dt,True
4,2021-08-24 00:19:30,2021-08-24 00:28:18,IP Address,107.133.143.192,100,527,True,2021-08-24 00:28:19,R_7UR8fT0mMTamY1P,,,,,34.16239929199219,-118.12750244140624,anonymous,EN,1.0,F_3lFk7l2JISgLF69,signature.png,18484,image/png,60fc576e3a4df1ba622a76a1,,,,,,,,,,,Pass,First year grade,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat disagree,Somewhat agree,Neither agree nor disagree,Somewhat disagree,Somewhat agree,,,,,,,,,,,Pass,First year grade,Somewhat agree,Somewhat agree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Somewhat agree,2,Not knowledgeable at all,No,No,No,No,No,60fc576e3a4df1ba622a76a1,education,2,4,4,1,int,shap,lr,True


## Add correct answers

In [203]:
data.bb_profile_1 = pd.to_numeric(data.bb_profile_1)
data.bb_profile_2 = pd.to_numeric(data.bb_profile_2)
data.int_profile_1 = pd.to_numeric(data.int_profile_1)
data.int_profile_1 = pd.to_numeric(data.int_profile_1)

In [204]:
correct_answers_raw = pd.read_csv("survey_analysis_profile_correct_answers.csv")
correct_answers = correct_answers_raw.copy()
correct_answers

Unnamed: 0,dataset_name,model,profile_number,q1_correct_answer,q2_correct_answer
0,education,dt,1,Fail,First year grade
1,education,dt,2,Pass,First year grade
2,education,dt,3,Fail,First year grade
3,education,dt,4,Pass,First year grade
4,education,lr,1,Fail,First year grade
5,education,lr,2,Pass,First year grade
6,education,lr,3,Fail,First year grade
7,education,lr,4,Pass,First year grade
8,education,shap,1,Fail,First year grade
9,education,shap,2,Pass,First year grade


In [205]:
# Join interpretable
temp = correct_answers.rename(columns={"model": "int_model",
                                       "profile_number":"int_profile_1",
                                       "q1_correct_answer": "int_q1_correct_answer",
                                       "q2_correct_answer": "int_q2_correct_answer"})
data = pd.merge(data,temp,on=['dataset_name','int_model','int_profile_1'],how='left')

# Join bb
# Join interpretable
temp = correct_answers.rename(columns={"model": "bb_model",
                                       "profile_number":"bb_profile_1",
                                       "q1_correct_answer":"bb_q1_correct_answer",
                                       "q2_correct_answer":"bb_q2_correct_answer"})
data = pd.merge(data,temp,on=['dataset_name','bb_model','bb_profile_1'],how='left')
data.head()

Unnamed: 0,StartDate,EndDate,Status,IPAddress,Progress,duration,Finished,RecordedDate,ResponseId,RecipientLastName,RecipientFirstName,RecipientEmail,ExternalReference,LocationLatitude,LocationLongitude,DistributionChannel,UserLanguage,Q_RecaptchaScore,Q2.1_Id,Q2.1_Name,Q2.1_Size,Q2.1_Type,prolific_id,dt_q1,dt_q2,understood_system_did_well_dt,system_confusing_complicated_dt,system_confusing_confusing_dt,understood_system_easy_to_understand_dt,system_confusing_hard_to_remember_dt,understood_system_predictable_dt,understood_system_how_it_works_dt,understood_system_inputs_and_outputs_dt,lr_q1,lr_q2,understood_system_did_well_lr,system_confusing_complicated_lr,system_confusing_confusing_lr,understood_system_easy_to_understand_lr,system_confusing_hard_to_remember_lr,understood_system_predictable_lr,understood_system_how_it_works_lr,understood_system_inputs_and_outputs_lr,bb_no_shap_q1,bb_no_shap_q2,understood_system_did_well_bb_no_shap,system_confusing_complicated_bb_no_shap,system_confusing_confusing_bb_no_shap,understood_system_easy_to_understand_bb_no_shap,system_confusing_hard_to_remember_bb_no_shap,understood_system_predictable_bb_no_shap,understood_system_how_it_works_bb_no_shap,understood_system_inputs_and_outputs_bb_no_shap,bb_shap_q1,bb_shap_q2,understood_system_did_well_bb_shap,system_confusing_complicated_bb_shap,system_confusing_confusing_bb_shap,understood_system_easy_to_understand_bb_shap,system_confusing_hard_to_remember_bb_shap,understood_system_predictable_bb_shap,understood_system_how_it_works_bb_shap,understood_system_inputs_and_outputs_bb_shap,Q9.1,Q9.2,Q9.3_1,Q9.3_2,Q9.3_3,Q9.3_4,Q9.4,PROLIFIC_PID,dataset_name,bb_profile_1,bb_profile_2,int_profile_1,int_profile_2,first_model,bb_model,int_model,taken_in_appropiate_time_window,int_q1_correct_answer,int_q2_correct_answer,bb_q1_correct_answer,bb_q2_correct_answer
0,2021-08-23 22:54:23,2021-08-23 23:00:14,IP Address,104.35.20.232,100,351,True,2021-08-23 23:00:15,R_yqnE1JHcvGdFz7H,,,,,33.92999267578125,-117.94770050048828,anonymous,EN,1.0,F_z6iKgOtuJxyuhMZ,signature.png,14093,image/png,6102d3c05c31cb5c38158b34,,,,,,,,,,,Fail,First year grade,Strongly agree,Strongly agree,Somewhat agree,Somewhat disagree,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat agree,,,,,,,,,,,Pass,First year grade,Strongly agree,Strongly agree,Strongly agree,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat agree,Somewhat agree,8,Not knowledgeable at all,No,No,Yes,No,No,6102d3c05c31cb5c38158b34,education,2,4,1,4,int,shap,lr,True,Fail,First year grade,Pass,First year grade
1,2021-08-23 23:24:31,2021-08-23 23:31:44,IP Address,47.147.135.146,100,433,True,2021-08-23 23:31:45,R_2sErBZc7mlGv79v,,,,,33.79029846191406,-118.12149810791016,anonymous,EN,1.0,F_1P8lvw788zdBWFh,signature.png,14986,image/png,610b88a14eed14f2b8b17b86,,,,,,,,,,,Pass,Number of absences,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat agree,Somewhat agree,Somewhat disagree,Somewhat disagree,Somewhat disagree,Fail,First year grade,Somewhat agree,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat agree,,,,,,,,,,,16,Not knowledgeable at all,No,No,No,No,No,610b88a14eed14f2b8b17b86,education,1,3,4,1,bb,no_shap,lr,True,Pass,First year grade,Fail,First year grade
2,2021-08-23 23:55:52,2021-08-24 00:01:24,IP Address,68.172.164.31,100,332,True,2021-08-24 00:01:26,R_BEeWM93HGxSRK8N,,,,,43.21229553222656,-77.69950103759764,anonymous,EN,0.899999976158142,F_1cTaUoQcmSH3xbZ,signature.png,12886,image/png,610c24152939c927ad421fa9,Pass,First year grade,Somewhat agree,Somewhat disagree,Strongly disagree,Strongly agree,Somewhat agree,Neither agree nor disagree,Strongly agree,Strongly agree,,,,,,,,,,,Fail,First year grade,Somewhat agree,Strongly disagree,Strongly disagree,Strongly agree,Somewhat disagree,Somewhat agree,Strongly agree,Strongly agree,,,,,,,,,,,5,Slightly knowledgeable,No,No,Yes,No,No,610c24152939c927ad421fa9,education,3,2,4,3,int,no_shap,dt,True,Pass,First year grade,Fail,First year grade
3,2021-08-24 00:12:19,2021-08-24 00:21:13,IP Address,24.165.25.108,100,534,True,2021-08-24 00:21:14,R_PAMuZ9Y8gMdN4S5,,,,,21.28169250488281,-157.8249969482422,anonymous,EN,0.899999976158142,F_3k1cfNl8IpZWqxi,signature.png,15188,image/png,60fd5178af4a40ee405f57cf,Fail,First year grade,Somewhat agree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Strongly agree,Somewhat agree,Somewhat agree,Strongly agree,,,,,,,,,,,,,,,,,,,,,Fail,First year grade,Somewhat agree,Neither agree nor disagree,Neither agree nor disagree,Somewhat agree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Somewhat agree,0,Slightly knowledgeable,No,No,No,No,No,60fd5178af4a40ee405f57cf,education,1,2,1,4,bb,shap,dt,True,Fail,First year grade,Fail,First year grade
4,2021-08-24 00:19:30,2021-08-24 00:28:18,IP Address,107.133.143.192,100,527,True,2021-08-24 00:28:19,R_7UR8fT0mMTamY1P,,,,,34.16239929199219,-118.12750244140624,anonymous,EN,1.0,F_3lFk7l2JISgLF69,signature.png,18484,image/png,60fc576e3a4df1ba622a76a1,,,,,,,,,,,Pass,First year grade,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat disagree,Somewhat agree,Neither agree nor disagree,Somewhat disagree,Somewhat agree,,,,,,,,,,,Pass,First year grade,Somewhat agree,Somewhat agree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Somewhat agree,2,Not knowledgeable at all,No,No,No,No,No,60fc576e3a4df1ba622a76a1,education,2,4,4,1,int,shap,lr,True,Pass,First year grade,Pass,First year grade


In [206]:
data.tail()

Unnamed: 0,StartDate,EndDate,Status,IPAddress,Progress,duration,Finished,RecordedDate,ResponseId,RecipientLastName,RecipientFirstName,RecipientEmail,ExternalReference,LocationLatitude,LocationLongitude,DistributionChannel,UserLanguage,Q_RecaptchaScore,Q2.1_Id,Q2.1_Name,Q2.1_Size,Q2.1_Type,prolific_id,dt_q1,dt_q2,understood_system_did_well_dt,system_confusing_complicated_dt,system_confusing_confusing_dt,understood_system_easy_to_understand_dt,system_confusing_hard_to_remember_dt,understood_system_predictable_dt,understood_system_how_it_works_dt,understood_system_inputs_and_outputs_dt,lr_q1,lr_q2,understood_system_did_well_lr,system_confusing_complicated_lr,system_confusing_confusing_lr,understood_system_easy_to_understand_lr,system_confusing_hard_to_remember_lr,understood_system_predictable_lr,understood_system_how_it_works_lr,understood_system_inputs_and_outputs_lr,bb_no_shap_q1,bb_no_shap_q2,understood_system_did_well_bb_no_shap,system_confusing_complicated_bb_no_shap,system_confusing_confusing_bb_no_shap,understood_system_easy_to_understand_bb_no_shap,system_confusing_hard_to_remember_bb_no_shap,understood_system_predictable_bb_no_shap,understood_system_how_it_works_bb_no_shap,understood_system_inputs_and_outputs_bb_no_shap,bb_shap_q1,bb_shap_q2,understood_system_did_well_bb_shap,system_confusing_complicated_bb_shap,system_confusing_confusing_bb_shap,understood_system_easy_to_understand_bb_shap,system_confusing_hard_to_remember_bb_shap,understood_system_predictable_bb_shap,understood_system_how_it_works_bb_shap,understood_system_inputs_and_outputs_bb_shap,Q9.1,Q9.2,Q9.3_1,Q9.3_2,Q9.3_3,Q9.3_4,Q9.4,PROLIFIC_PID,dataset_name,bb_profile_1,bb_profile_2,int_profile_1,int_profile_2,first_model,bb_model,int_model,taken_in_appropiate_time_window,int_q1_correct_answer,int_q2_correct_answer,bb_q1_correct_answer,bb_q2_correct_answer
238,2021-08-29 09:28:00,2021-08-29 09:35:31,IP Address,98.219.82.47,100,450,True,2021-08-29 09:35:32,R_3ltzQqqtrS0Q5vs,,,,,25.701095581054688,-80.41580200195312,anonymous,EN,1,F_URTWMm3VawpWWnT,signature.png,12992,image/png,61008822fe73d98313d44903,,,,,,,,,,,Will have a high sales price,House grade,Somewhat agree,Somewhat disagree,Neither agree nor disagree,Neither agree nor disagree,Somewhat agree,Somewhat disagree,Somewhat agree,Somewhat agree,Will have a high sales price,Size of living area,Somewhat agree,Somewhat disagree,Neither agree nor disagree,Somewhat agree,Somewhat agree,Somewhat disagree,Somewhat agree,Somewhat agree,,,,,,,,,,,18,Not knowledgeable at all,No,No,No,No,No,61008822fe73d98313d44903,housing,2,4,4,1,int,no_shap,lr,True,Will NOT have a high sales price,House grade,Will NOT have a high sales price,House grade
239,2021-08-29 15:07:26,2021-08-29 15:14:05,IP Address,73.183.210.184,100,398,True,2021-08-29 15:14:06,R_28FL3kZGEhez92F,,,,,29.830398559570312,-95.37860107421876,anonymous,EN,1,F_SN7dHyodvBsR9PX,signature.png,13299,image/png,6108b2cdc9fed60bc04bede4,,,,,,,,,,,Will have a high sales price,House grade,Strongly disagree,Strongly agree,Strongly agree,Somewhat disagree,Strongly agree,Somewhat disagree,Strongly disagree,Somewhat disagree,,,,,,,,,,,Will have a high sales price,House grade,Strongly agree,Strongly disagree,Strongly disagree,Strongly agree,Somewhat disagree,Somewhat agree,Strongly agree,Somewhat agree,1,Slightly knowledgeable,No,No,Yes,No,No,6108b2cdc9fed60bc04bede4,housing,3,1,1,4,bb,shap,lr,True,Will have a high sales price,House grade,Will have a high sales price,House grade
240,2021-08-29 15:20:11,2021-08-29 15:28:13,IP Address,68.3.129.214,100,481,True,2021-08-29 15:28:13,R_1kZizTo4ygU1PNx,,,,,33.43060302734375,-111.92559814453124,anonymous,EN,1,F_3KDwJ3zoW4Qcxis,signature.png,9920,image/png,611ff50de3f4642b22228331,Will have a high sales price,Size of house above ground,Strongly agree,Strongly agree,Strongly disagree,Strongly disagree,Strongly agree,Strongly disagree,Strongly agree,Strongly agree,,,,,,,,,,,,,,,,,,,,,Will have a high sales price,House grade,Somewhat agree,Somewhat disagree,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat agree,Somewhat agree,Somewhat agree,0,Not knowledgeable at all,No,No,No,No,No,611ff50de3f4642b22228331,housing,3,4,3,2,bb,shap,dt,True,Will have a high sales price,Size of living area,Will have a high sales price,House grade
241,2021-08-30 06:41:27,2021-08-30 06:50:40,IP Address,108.188.49.211,100,553,True,2021-08-30 06:50:41,R_1r0mxvhQku8zabN,,,,,28.39390563964844,-81.42160034179686,anonymous,EN,1,F_uxnQp38gs0Nl2ZX,signature.png,10917,image/png,612648079fa0866dd2bce10b,Will NOT have a high sales price,Size of house above ground,Strongly agree,Somewhat agree,Somewhat disagree,Somewhat disagree,Somewhat agree,Strongly agree,Somewhat agree,Strongly agree,,,,,,,,,,,,,,,,,,,,,Will NOT have a high sales price,House grade,Strongly agree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Strongly agree,Somewhat disagree,Somewhat agree,Strongly agree,8,Moderately knowledgeable,Yes,Yes,Yes,No,No,612648079fa0866dd2bce10b,housing,2,3,2,1,bb,shap,dt,True,Will NOT have a high sales price,Size of living area,Will NOT have a high sales price,House grade
242,2021-08-30 08:23:07,2021-08-30 08:33:08,IP Address,96.19.58.128,100,601,True,2021-08-30 08:33:09,R_3G33628RJN6h9B8,,,,,33.46690368652344,-94.07740020751952,anonymous,EN,1,F_1Q9KXJYJhNsZE4L,signature.png,12010,image/png,6111a1562121911e80e5f3cc,Will have a high sales price,House grade,Somewhat agree,Strongly agree,Somewhat disagree,Somewhat disagree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Somewhat agree,,,,,,,,,,,,,,,,,,,,,Will NOT have a high sales price,House grade,Strongly agree,Strongly disagree,Strongly disagree,Somewhat agree,Somewhat disagree,Somewhat agree,Somewhat agree,Strongly agree,0,Not knowledgeable at all,No,No,No,No,No,6111a1562121911e80e5f3cc,housing,4,1,1,2,int,shap,dt,True,Will have a high sales price,Size of living area,Will NOT have a high sales price,House grade


### Profile questions

In [207]:
def q1_is_correct(row,model_type):
    if model_type == 'bb_model':
        col_name = 'bb_' + row[model_type] + '_q1'
        correct_answer_col_name = 'bb_q1_correct_answer'
    elif model_type == 'int_model':
        col_name = row[model_type] + '_q1'
        correct_answer_col_name = 'int_q1_correct_answer'
    a = row[col_name]
    b = row[correct_answer_col_name]
    return (a==b)
        
def q2_is_correct(row,model_type):
    if model_type == 'bb_model':
        col_name = 'bb_' + row[model_type] + '_q2'
        correct_answer_col_name = 'bb_q2_correct_answer'
    elif model_type == 'int_model':
        col_name = row[model_type] + '_q2'
        correct_answer_col_name = 'int_q2_correct_answer'
    a = row[col_name].split(",")
    a.sort()
    b = row[correct_answer_col_name].split(",")
    b.sort()
    return (a==b)


data['q1_is_correct_int'] = data.apply(q1_is_correct,model_type='int_model',axis=1)
data['q1_is_correct_bb'] = data.apply(q1_is_correct,model_type='bb_model',axis=1)
data['q2_is_correct_int'] = data.apply(q2_is_correct,model_type='int_model',axis=1)
data['q2_is_correct_bb'] = data.apply(q2_is_correct,model_type='bb_model',axis=1)

data.head()

Unnamed: 0,StartDate,EndDate,Status,IPAddress,Progress,duration,Finished,RecordedDate,ResponseId,RecipientLastName,RecipientFirstName,RecipientEmail,ExternalReference,LocationLatitude,LocationLongitude,DistributionChannel,UserLanguage,Q_RecaptchaScore,Q2.1_Id,Q2.1_Name,Q2.1_Size,Q2.1_Type,prolific_id,dt_q1,dt_q2,understood_system_did_well_dt,system_confusing_complicated_dt,system_confusing_confusing_dt,understood_system_easy_to_understand_dt,system_confusing_hard_to_remember_dt,understood_system_predictable_dt,understood_system_how_it_works_dt,understood_system_inputs_and_outputs_dt,lr_q1,lr_q2,understood_system_did_well_lr,system_confusing_complicated_lr,system_confusing_confusing_lr,understood_system_easy_to_understand_lr,system_confusing_hard_to_remember_lr,understood_system_predictable_lr,understood_system_how_it_works_lr,understood_system_inputs_and_outputs_lr,bb_no_shap_q1,bb_no_shap_q2,understood_system_did_well_bb_no_shap,system_confusing_complicated_bb_no_shap,system_confusing_confusing_bb_no_shap,understood_system_easy_to_understand_bb_no_shap,system_confusing_hard_to_remember_bb_no_shap,understood_system_predictable_bb_no_shap,understood_system_how_it_works_bb_no_shap,understood_system_inputs_and_outputs_bb_no_shap,bb_shap_q1,bb_shap_q2,understood_system_did_well_bb_shap,system_confusing_complicated_bb_shap,system_confusing_confusing_bb_shap,understood_system_easy_to_understand_bb_shap,system_confusing_hard_to_remember_bb_shap,understood_system_predictable_bb_shap,understood_system_how_it_works_bb_shap,understood_system_inputs_and_outputs_bb_shap,Q9.1,Q9.2,Q9.3_1,Q9.3_2,Q9.3_3,Q9.3_4,Q9.4,PROLIFIC_PID,dataset_name,bb_profile_1,bb_profile_2,int_profile_1,int_profile_2,first_model,bb_model,int_model,taken_in_appropiate_time_window,int_q1_correct_answer,int_q2_correct_answer,bb_q1_correct_answer,bb_q2_correct_answer,q1_is_correct_int,q1_is_correct_bb,q2_is_correct_int,q2_is_correct_bb
0,2021-08-23 22:54:23,2021-08-23 23:00:14,IP Address,104.35.20.232,100,351,True,2021-08-23 23:00:15,R_yqnE1JHcvGdFz7H,,,,,33.92999267578125,-117.94770050048828,anonymous,EN,1.0,F_z6iKgOtuJxyuhMZ,signature.png,14093,image/png,6102d3c05c31cb5c38158b34,,,,,,,,,,,Fail,First year grade,Strongly agree,Strongly agree,Somewhat agree,Somewhat disagree,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat agree,,,,,,,,,,,Pass,First year grade,Strongly agree,Strongly agree,Strongly agree,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat agree,Somewhat agree,8,Not knowledgeable at all,No,No,Yes,No,No,6102d3c05c31cb5c38158b34,education,2,4,1,4,int,shap,lr,True,Fail,First year grade,Pass,First year grade,True,True,True,True
1,2021-08-23 23:24:31,2021-08-23 23:31:44,IP Address,47.147.135.146,100,433,True,2021-08-23 23:31:45,R_2sErBZc7mlGv79v,,,,,33.79029846191406,-118.12149810791016,anonymous,EN,1.0,F_1P8lvw788zdBWFh,signature.png,14986,image/png,610b88a14eed14f2b8b17b86,,,,,,,,,,,Pass,Number of absences,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat agree,Somewhat agree,Somewhat disagree,Somewhat disagree,Somewhat disagree,Fail,First year grade,Somewhat agree,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat agree,,,,,,,,,,,16,Not knowledgeable at all,No,No,No,No,No,610b88a14eed14f2b8b17b86,education,1,3,4,1,bb,no_shap,lr,True,Pass,First year grade,Fail,First year grade,True,True,False,True
2,2021-08-23 23:55:52,2021-08-24 00:01:24,IP Address,68.172.164.31,100,332,True,2021-08-24 00:01:26,R_BEeWM93HGxSRK8N,,,,,43.21229553222656,-77.69950103759764,anonymous,EN,0.899999976158142,F_1cTaUoQcmSH3xbZ,signature.png,12886,image/png,610c24152939c927ad421fa9,Pass,First year grade,Somewhat agree,Somewhat disagree,Strongly disagree,Strongly agree,Somewhat agree,Neither agree nor disagree,Strongly agree,Strongly agree,,,,,,,,,,,Fail,First year grade,Somewhat agree,Strongly disagree,Strongly disagree,Strongly agree,Somewhat disagree,Somewhat agree,Strongly agree,Strongly agree,,,,,,,,,,,5,Slightly knowledgeable,No,No,Yes,No,No,610c24152939c927ad421fa9,education,3,2,4,3,int,no_shap,dt,True,Pass,First year grade,Fail,First year grade,True,True,True,True
3,2021-08-24 00:12:19,2021-08-24 00:21:13,IP Address,24.165.25.108,100,534,True,2021-08-24 00:21:14,R_PAMuZ9Y8gMdN4S5,,,,,21.28169250488281,-157.8249969482422,anonymous,EN,0.899999976158142,F_3k1cfNl8IpZWqxi,signature.png,15188,image/png,60fd5178af4a40ee405f57cf,Fail,First year grade,Somewhat agree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Strongly agree,Somewhat agree,Somewhat agree,Strongly agree,,,,,,,,,,,,,,,,,,,,,Fail,First year grade,Somewhat agree,Neither agree nor disagree,Neither agree nor disagree,Somewhat agree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Somewhat agree,0,Slightly knowledgeable,No,No,No,No,No,60fd5178af4a40ee405f57cf,education,1,2,1,4,bb,shap,dt,True,Fail,First year grade,Fail,First year grade,True,True,True,True
4,2021-08-24 00:19:30,2021-08-24 00:28:18,IP Address,107.133.143.192,100,527,True,2021-08-24 00:28:19,R_7UR8fT0mMTamY1P,,,,,34.16239929199219,-118.12750244140624,anonymous,EN,1.0,F_3lFk7l2JISgLF69,signature.png,18484,image/png,60fc576e3a4df1ba622a76a1,,,,,,,,,,,Pass,First year grade,Somewhat disagree,Somewhat agree,Somewhat agree,Somewhat disagree,Somewhat agree,Neither agree nor disagree,Somewhat disagree,Somewhat agree,,,,,,,,,,,Pass,First year grade,Somewhat agree,Somewhat agree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Neither agree nor disagree,Somewhat agree,Somewhat agree,2,Not knowledgeable at all,No,No,No,No,No,60fc576e3a4df1ba622a76a1,education,2,4,4,1,int,shap,lr,True,Pass,First year grade,Pass,First year grade,True,True,True,True


### Likert questions

#### Recode likert scale questions

In [208]:
# Recode likert scale questions
likert_mapper = {
    "Strongly agree": 5,
    "Somewhat agree": 4,
    "Neither agree nor disagree": 3,
    "Somewhat disagree": 2,
    "Strongly disagree": 1
}

likert_reverse_mapper = {
    "Strongly agree": 1,
    "Somewhat agree": 2,
    "Neither agree nor disagree": 3,
    "Somewhat disagree": 4,
    "Strongly disagree": 5
}

In [209]:
system_understanding_likert_mappers = {
"understood_system_did_well": likert_mapper,
"system_confusing_complicated": likert_reverse_mapper,
"system_confusing_confusing": likert_reverse_mapper,
"understood_system_easy_to_understand": likert_mapper,
"system_confusing_hard_to_remember": likert_reverse_mapper,
"understood_system_predictable": likert_mapper,
"understood_system_how_it_works": likert_mapper,
"understood_system_inputs_and_outputs": likert_mapper
}

for q in system_understanding_likert_mappers:
    for name in data.columns:
        if q in name:
            data[name] = data[name].map(system_understanding_likert_mappers[q])
            # Convert to numeric
            #data[name] = pd.to_numeric(data[name])

#system_understanding_bb_columns = [col for col in data.columns if ('system_understanding' in col and '_bb' in col)]
#system_understanding_int_columns = [col for col in data.columns if ('system_understanding' in col and ('_dt' in col or '_lr' in col))]

In [210]:
data.head(5)

Unnamed: 0,StartDate,EndDate,Status,IPAddress,Progress,duration,Finished,RecordedDate,ResponseId,RecipientLastName,RecipientFirstName,RecipientEmail,ExternalReference,LocationLatitude,LocationLongitude,DistributionChannel,UserLanguage,Q_RecaptchaScore,Q2.1_Id,Q2.1_Name,Q2.1_Size,Q2.1_Type,prolific_id,dt_q1,dt_q2,understood_system_did_well_dt,system_confusing_complicated_dt,system_confusing_confusing_dt,understood_system_easy_to_understand_dt,system_confusing_hard_to_remember_dt,understood_system_predictable_dt,understood_system_how_it_works_dt,understood_system_inputs_and_outputs_dt,lr_q1,lr_q2,understood_system_did_well_lr,system_confusing_complicated_lr,system_confusing_confusing_lr,understood_system_easy_to_understand_lr,system_confusing_hard_to_remember_lr,understood_system_predictable_lr,understood_system_how_it_works_lr,understood_system_inputs_and_outputs_lr,bb_no_shap_q1,bb_no_shap_q2,understood_system_did_well_bb_no_shap,system_confusing_complicated_bb_no_shap,system_confusing_confusing_bb_no_shap,understood_system_easy_to_understand_bb_no_shap,system_confusing_hard_to_remember_bb_no_shap,understood_system_predictable_bb_no_shap,understood_system_how_it_works_bb_no_shap,understood_system_inputs_and_outputs_bb_no_shap,bb_shap_q1,bb_shap_q2,understood_system_did_well_bb_shap,system_confusing_complicated_bb_shap,system_confusing_confusing_bb_shap,understood_system_easy_to_understand_bb_shap,system_confusing_hard_to_remember_bb_shap,understood_system_predictable_bb_shap,understood_system_how_it_works_bb_shap,understood_system_inputs_and_outputs_bb_shap,Q9.1,Q9.2,Q9.3_1,Q9.3_2,Q9.3_3,Q9.3_4,Q9.4,PROLIFIC_PID,dataset_name,bb_profile_1,bb_profile_2,int_profile_1,int_profile_2,first_model,bb_model,int_model,taken_in_appropiate_time_window,int_q1_correct_answer,int_q2_correct_answer,bb_q1_correct_answer,bb_q2_correct_answer,q1_is_correct_int,q1_is_correct_bb,q2_is_correct_int,q2_is_correct_bb
0,2021-08-23 22:54:23,2021-08-23 23:00:14,IP Address,104.35.20.232,100,351,True,2021-08-23 23:00:15,R_yqnE1JHcvGdFz7H,,,,,33.92999267578125,-117.94770050048828,anonymous,EN,1.0,F_z6iKgOtuJxyuhMZ,signature.png,14093,image/png,6102d3c05c31cb5c38158b34,,,,,,,,,,,Fail,First year grade,5.0,1.0,2.0,2.0,4.0,4.0,4.0,4.0,,,,,,,,,,,Pass,First year grade,5.0,1.0,1.0,2.0,2.0,4.0,4.0,4.0,8,Not knowledgeable at all,No,No,Yes,No,No,6102d3c05c31cb5c38158b34,education,2,4,1,4,int,shap,lr,True,Fail,First year grade,Pass,First year grade,True,True,True,True
1,2021-08-23 23:24:31,2021-08-23 23:31:44,IP Address,47.147.135.146,100,433,True,2021-08-23 23:31:45,R_2sErBZc7mlGv79v,,,,,33.79029846191406,-118.12149810791016,anonymous,EN,1.0,F_1P8lvw788zdBWFh,signature.png,14986,image/png,610b88a14eed14f2b8b17b86,,,,,,,,,,,Pass,Number of absences,2.0,2.0,2.0,4.0,2.0,2.0,2.0,2.0,Fail,First year grade,4.0,4.0,2.0,4.0,4.0,4.0,4.0,4.0,,,,,,,,,,,16,Not knowledgeable at all,No,No,No,No,No,610b88a14eed14f2b8b17b86,education,1,3,4,1,bb,no_shap,lr,True,Pass,First year grade,Fail,First year grade,True,True,False,True
2,2021-08-23 23:55:52,2021-08-24 00:01:24,IP Address,68.172.164.31,100,332,True,2021-08-24 00:01:26,R_BEeWM93HGxSRK8N,,,,,43.21229553222656,-77.69950103759764,anonymous,EN,0.899999976158142,F_1cTaUoQcmSH3xbZ,signature.png,12886,image/png,610c24152939c927ad421fa9,Pass,First year grade,4.0,4.0,5.0,5.0,2.0,3.0,5.0,5.0,,,,,,,,,,,Fail,First year grade,4.0,5.0,5.0,5.0,4.0,4.0,5.0,5.0,,,,,,,,,,,5,Slightly knowledgeable,No,No,Yes,No,No,610c24152939c927ad421fa9,education,3,2,4,3,int,no_shap,dt,True,Pass,First year grade,Fail,First year grade,True,True,True,True
3,2021-08-24 00:12:19,2021-08-24 00:21:13,IP Address,24.165.25.108,100,534,True,2021-08-24 00:21:14,R_PAMuZ9Y8gMdN4S5,,,,,21.28169250488281,-157.8249969482422,anonymous,EN,0.899999976158142,F_3k1cfNl8IpZWqxi,signature.png,15188,image/png,60fd5178af4a40ee405f57cf,Fail,First year grade,4.0,2.0,3.0,4.0,1.0,4.0,4.0,5.0,,,,,,,,,,,,,,,,,,,,,Fail,First year grade,4.0,3.0,3.0,4.0,2.0,3.0,4.0,4.0,0,Slightly knowledgeable,No,No,No,No,No,60fd5178af4a40ee405f57cf,education,1,2,1,4,bb,shap,dt,True,Fail,First year grade,Fail,First year grade,True,True,True,True
4,2021-08-24 00:19:30,2021-08-24 00:28:18,IP Address,107.133.143.192,100,527,True,2021-08-24 00:28:19,R_7UR8fT0mMTamY1P,,,,,34.16239929199219,-118.12750244140624,anonymous,EN,1.0,F_3lFk7l2JISgLF69,signature.png,18484,image/png,60fc576e3a4df1ba622a76a1,,,,,,,,,,,Pass,First year grade,2.0,2.0,2.0,2.0,2.0,3.0,2.0,4.0,,,,,,,,,,,Pass,First year grade,4.0,2.0,2.0,3.0,2.0,3.0,4.0,4.0,2,Not knowledgeable at all,No,No,No,No,No,60fc576e3a4df1ba622a76a1,education,2,4,4,1,int,shap,lr,True,Pass,First year grade,Pass,First year grade,True,True,True,True


#### Get likert scale questions mean and standard devation

In [211]:
#likert_question_columns = [col for col in data.columns if 'system_understanding' in col]
#likert_questions = data.filter(items=likert_question_columns)
likert_questions = data.filter(regex='understood_system|system_confusing')
likert_questions

Unnamed: 0,understood_system_did_well_dt,system_confusing_complicated_dt,system_confusing_confusing_dt,understood_system_easy_to_understand_dt,system_confusing_hard_to_remember_dt,understood_system_predictable_dt,understood_system_how_it_works_dt,understood_system_inputs_and_outputs_dt,understood_system_did_well_lr,system_confusing_complicated_lr,system_confusing_confusing_lr,understood_system_easy_to_understand_lr,system_confusing_hard_to_remember_lr,understood_system_predictable_lr,understood_system_how_it_works_lr,understood_system_inputs_and_outputs_lr,understood_system_did_well_bb_no_shap,system_confusing_complicated_bb_no_shap,system_confusing_confusing_bb_no_shap,understood_system_easy_to_understand_bb_no_shap,system_confusing_hard_to_remember_bb_no_shap,understood_system_predictable_bb_no_shap,understood_system_how_it_works_bb_no_shap,understood_system_inputs_and_outputs_bb_no_shap,understood_system_did_well_bb_shap,system_confusing_complicated_bb_shap,system_confusing_confusing_bb_shap,understood_system_easy_to_understand_bb_shap,system_confusing_hard_to_remember_bb_shap,understood_system_predictable_bb_shap,understood_system_how_it_works_bb_shap,understood_system_inputs_and_outputs_bb_shap
0,,,,,,,,,5.0,1.0,2.0,2.0,4.0,4.0,4.0,4.0,,,,,,,,,5.0,1.0,1.0,2.0,2.0,4.0,4.0,4.0
1,,,,,,,,,2.0,2.0,2.0,4.0,2.0,2.0,2.0,2.0,4.0,4.0,2.0,4.0,4.0,4.0,4.0,4.0,,,,,,,,
2,4.0,4.0,5.0,5.0,2.0,3.0,5.0,5.0,,,,,,,,,4.0,5.0,5.0,5.0,4.0,4.0,5.0,5.0,,,,,,,,
3,4.0,2.0,3.0,4.0,1.0,4.0,4.0,5.0,,,,,,,,,,,,,,,,,4.0,3.0,3.0,4.0,2.0,3.0,4.0,4.0
4,,,,,,,,,2.0,2.0,2.0,2.0,2.0,3.0,2.0,4.0,,,,,,,,,4.0,2.0,2.0,3.0,2.0,3.0,4.0,4.0
5,3.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,,,,,,,,,,,,,,,,,5.0,5.0,5.0,5.0,5.0,4.0,5.0,5.0
6,,,,,,,,,4.0,1.0,1.0,2.0,1.0,5.0,4.0,4.0,,,,,,,,,5.0,4.0,5.0,4.0,5.0,4.0,5.0,3.0
7,5.0,2.0,3.0,3.0,1.0,3.0,4.0,4.0,,,,,,,,,,,,,,,,,5.0,2.0,2.0,2.0,3.0,4.0,3.0,4.0
8,4.0,1.0,1.0,1.0,2.0,3.0,3.0,3.0,,,,,,,,,,,,,,,,,4.0,1.0,1.0,2.0,2.0,3.0,3.0,3.0
9,,,,,,,,,5.0,2.0,2.0,3.0,2.0,3.0,4.0,4.0,5.0,2.0,2.0,3.0,2.0,3.0,4.0,4.0,,,,,,,,


In [212]:
likert_questions.stack().std()

1.2066439916298013

In [213]:
likert_questions.stack().mean()

3.297735460627895

#### Cronbach's alpha

In [224]:
# Understood system constuct
understood_system_likert_questions = pd.DataFrame()

#understood_system_did_well_cols = [col for col in data.columns if ('understood_system_did_well' in col)]
understood_system_items = ['understood_system_did_well',
                           'understood_system_easy_to_understand',
                           'understood_system_predictable',
                           'understood_system_how_it_works',
                           'understood_system_inputs_and_outputs']
for i in understood_system_items:
    temp = data.filter(regex=i)
    understood_system_likert_questions[i] = temp.melt()['value']
    
pt.cronbach_alpha(data=understood_system_likert_questions)

(0.8274695808415724, array([0.81 , 0.844]))

In [225]:
# Understood system constuct
system_confusing_likert_questions = pd.DataFrame()

#understood_system_did_well_cols = [col for col in data.columns if ('understood_system_did_well' in col)]
system_confusing_items = ['system_confusing_complicated',
                           'system_confusing_confusing',
                           'system_confusing_hard_to_remember']

for i in system_confusing_items:
    temp = data.filter(regex=i)
    system_confusing_likert_questions[i] = temp.melt()['value']
    
pt.cronbach_alpha(data=system_confusing_likert_questions)

(0.7871773693163944, array([0.763, 0.809]))

#### Calculate system understanding

In [214]:
def calculate_mean_understood_system(x, model_type):
    if model_type == 'int_model':
        columns = [col for col in data.columns if ('understood_system' in col and ('_' + x['int_model']) in col)]
    elif model_type == 'bb_model':
        columns = [col for col in data.columns if ('understood_system' in col and ('_bb_' + x['bb_model']) in col)]
    total = 0
    n = 0
    for col in columns:
        total += x[col]
        n += 1
        
    mean = total/n    
    return mean

def calculate_mean_system_confusing(x, model_type):
    if model_type == 'int_model':
        columns = [col for col in data.columns if ('system_confusing' in col and ('_' + x['int_model']) in col)]
    elif model_type == 'bb_model':
        columns = [col for col in data.columns if ('system_confusing' in col and ('_bb_' + x['bb_model']) in col)]
    total = 0
    n = 0
    for col in columns:
        total += x[col]
        n += 1
        
    mean = total/n    
    return mean

In [216]:
data['understood_system_bb'] = data.apply(calculate_mean_understood_system,model_type='bb_model',axis=1)
data['understood_system_int'] = data.apply(calculate_mean_understood_system,model_type='int_model',axis=1)

data['system_confusing_bb'] = data.apply(calculate_mean_system_confusing,model_type='bb_model',axis=1)
data['system_confusing_int'] = data.apply(calculate_mean_system_confusing,model_type='int_model',axis=1)
data.head()

Unnamed: 0,StartDate,EndDate,Status,IPAddress,Progress,duration,Finished,RecordedDate,ResponseId,RecipientLastName,RecipientFirstName,RecipientEmail,ExternalReference,LocationLatitude,LocationLongitude,DistributionChannel,UserLanguage,Q_RecaptchaScore,Q2.1_Id,Q2.1_Name,Q2.1_Size,Q2.1_Type,prolific_id,dt_q1,dt_q2,understood_system_did_well_dt,system_confusing_complicated_dt,system_confusing_confusing_dt,understood_system_easy_to_understand_dt,system_confusing_hard_to_remember_dt,understood_system_predictable_dt,understood_system_how_it_works_dt,understood_system_inputs_and_outputs_dt,lr_q1,lr_q2,understood_system_did_well_lr,system_confusing_complicated_lr,system_confusing_confusing_lr,understood_system_easy_to_understand_lr,system_confusing_hard_to_remember_lr,understood_system_predictable_lr,understood_system_how_it_works_lr,understood_system_inputs_and_outputs_lr,bb_no_shap_q1,bb_no_shap_q2,understood_system_did_well_bb_no_shap,system_confusing_complicated_bb_no_shap,system_confusing_confusing_bb_no_shap,understood_system_easy_to_understand_bb_no_shap,system_confusing_hard_to_remember_bb_no_shap,understood_system_predictable_bb_no_shap,understood_system_how_it_works_bb_no_shap,understood_system_inputs_and_outputs_bb_no_shap,bb_shap_q1,bb_shap_q2,understood_system_did_well_bb_shap,system_confusing_complicated_bb_shap,system_confusing_confusing_bb_shap,understood_system_easy_to_understand_bb_shap,system_confusing_hard_to_remember_bb_shap,understood_system_predictable_bb_shap,understood_system_how_it_works_bb_shap,understood_system_inputs_and_outputs_bb_shap,Q9.1,Q9.2,Q9.3_1,Q9.3_2,Q9.3_3,Q9.3_4,Q9.4,PROLIFIC_PID,dataset_name,bb_profile_1,bb_profile_2,int_profile_1,int_profile_2,first_model,bb_model,int_model,taken_in_appropiate_time_window,int_q1_correct_answer,int_q2_correct_answer,bb_q1_correct_answer,bb_q2_correct_answer,q1_is_correct_int,q1_is_correct_bb,q2_is_correct_int,q2_is_correct_bb,system_understanding_bb,system_understanding_int,understood_system_bb,understood_system_int,system_confusing_bb,system_confusing_int
0,2021-08-23 22:54:23,2021-08-23 23:00:14,IP Address,104.35.20.232,100,351,True,2021-08-23 23:00:15,R_yqnE1JHcvGdFz7H,,,,,33.92999267578125,-117.94770050048828,anonymous,EN,1.0,F_z6iKgOtuJxyuhMZ,signature.png,14093,image/png,6102d3c05c31cb5c38158b34,,,,,,,,,,,Fail,First year grade,5.0,1.0,2.0,2.0,4.0,4.0,4.0,4.0,,,,,,,,,,,Pass,First year grade,5.0,1.0,1.0,2.0,2.0,4.0,4.0,4.0,8,Not knowledgeable at all,No,No,Yes,No,No,6102d3c05c31cb5c38158b34,education,2,4,1,4,int,shap,lr,True,Fail,First year grade,Pass,First year grade,True,True,True,True,1.333333,2.333333,3.8,3.8,1.333333,2.333333
1,2021-08-23 23:24:31,2021-08-23 23:31:44,IP Address,47.147.135.146,100,433,True,2021-08-23 23:31:45,R_2sErBZc7mlGv79v,,,,,33.79029846191406,-118.12149810791016,anonymous,EN,1.0,F_1P8lvw788zdBWFh,signature.png,14986,image/png,610b88a14eed14f2b8b17b86,,,,,,,,,,,Pass,Number of absences,2.0,2.0,2.0,4.0,2.0,2.0,2.0,2.0,Fail,First year grade,4.0,4.0,2.0,4.0,4.0,4.0,4.0,4.0,,,,,,,,,,,16,Not knowledgeable at all,No,No,No,No,No,610b88a14eed14f2b8b17b86,education,1,3,4,1,bb,no_shap,lr,True,Pass,First year grade,Fail,First year grade,True,True,False,True,3.333333,2.0,4.0,2.4,3.333333,2.0
2,2021-08-23 23:55:52,2021-08-24 00:01:24,IP Address,68.172.164.31,100,332,True,2021-08-24 00:01:26,R_BEeWM93HGxSRK8N,,,,,43.21229553222656,-77.69950103759764,anonymous,EN,0.899999976158142,F_1cTaUoQcmSH3xbZ,signature.png,12886,image/png,610c24152939c927ad421fa9,Pass,First year grade,4.0,4.0,5.0,5.0,2.0,3.0,5.0,5.0,,,,,,,,,,,Fail,First year grade,4.0,5.0,5.0,5.0,4.0,4.0,5.0,5.0,,,,,,,,,,,5,Slightly knowledgeable,No,No,Yes,No,No,610c24152939c927ad421fa9,education,3,2,4,3,int,no_shap,dt,True,Pass,First year grade,Fail,First year grade,True,True,True,True,4.666667,3.666667,4.6,4.4,4.666667,3.666667
3,2021-08-24 00:12:19,2021-08-24 00:21:13,IP Address,24.165.25.108,100,534,True,2021-08-24 00:21:14,R_PAMuZ9Y8gMdN4S5,,,,,21.28169250488281,-157.8249969482422,anonymous,EN,0.899999976158142,F_3k1cfNl8IpZWqxi,signature.png,15188,image/png,60fd5178af4a40ee405f57cf,Fail,First year grade,4.0,2.0,3.0,4.0,1.0,4.0,4.0,5.0,,,,,,,,,,,,,,,,,,,,,Fail,First year grade,4.0,3.0,3.0,4.0,2.0,3.0,4.0,4.0,0,Slightly knowledgeable,No,No,No,No,No,60fd5178af4a40ee405f57cf,education,1,2,1,4,bb,shap,dt,True,Fail,First year grade,Fail,First year grade,True,True,True,True,2.666667,2.0,3.8,4.2,2.666667,2.0
4,2021-08-24 00:19:30,2021-08-24 00:28:18,IP Address,107.133.143.192,100,527,True,2021-08-24 00:28:19,R_7UR8fT0mMTamY1P,,,,,34.16239929199219,-118.12750244140624,anonymous,EN,1.0,F_3lFk7l2JISgLF69,signature.png,18484,image/png,60fc576e3a4df1ba622a76a1,,,,,,,,,,,Pass,First year grade,2.0,2.0,2.0,2.0,2.0,3.0,2.0,4.0,,,,,,,,,,,Pass,First year grade,4.0,2.0,2.0,3.0,2.0,3.0,4.0,4.0,2,Not knowledgeable at all,No,No,No,No,No,60fc576e3a4df1ba622a76a1,education,2,4,4,1,int,shap,lr,True,Pass,First year grade,Pass,First year grade,True,True,True,True,2.0,2.0,3.6,2.6,2.0,2.0


# Analysis

In [233]:
def generate_stats(data):
    data['q1_is_correct_int'] = data['q1_is_correct_int'].apply(lambda x: 1 if x == True else 0)
    data['q1_is_correct_bb'] = data['q1_is_correct_bb'].apply(lambda x: 1 if x == True else 0)
    data['q2_is_correct_int'] = data['q2_is_correct_int'].apply(lambda x: 1 if x == True else 0)
    data['q2_is_correct_bb'] = data['q2_is_correct_bb'].apply(lambda x: 1 if x == True else 0)

    print("Profile Task 1:")
    print("  q1_is_correct_int_mean ", data.q1_is_correct_int.mean())
    print("  q1_is_correct_bb_mean ", data.q1_is_correct_bb.mean())
    print(pt.ttest(data.q1_is_correct_int, data.q1_is_correct_bb, paired=True))
    print("\n--------------------\n")
    print("Profile Task 2:")
    print("  q2_is_correct_int_mean ", data.q2_is_correct_int.mean())
    print("  q2_is_correct_bb_mean ", data.q2_is_correct_bb.mean())
    print(pt.ttest(data.q2_is_correct_int, data.q2_is_correct_bb, paired=True))
    print("\n--------------------\n")
    print("Understood System Construct:")
    print("  undersood_system_int_mean", data.understood_system_int.mean())
    print("  undersood_system_bb_mean ", data.understood_system_bb.mean())
    print(pt.ttest(data.understood_system_int, data.understood_system_bb, paired=True))
    print("\n--------------------\n")
    print("System Confusing Construct:")
    print("  system_confusing_int_mean", data.system_confusing_int.mean())
    print("  system_confusing_bb_mean ", data.system_confusing_bb.mean())
    print(pt.ttest(data.system_confusing_int, data.system_confusing_bb, paired=True))

In [235]:
'''
print("--------------------")

data_no_shap = data[data['bb_model'] == 'no_shap']
data_shap = data[data['bb_model'] == 'shap']

print("q1_is_correct_bb_no_shap_mean ", data_no_shap.q1_is_correct_bb.mean())
print("q1_is_correct_bb_shap_mean ", data_shap.q1_is_correct_bb.mean())
print(pt.ttest(data_no_shap.q1_is_correct_bb, data_shap.q1_is_correct_bb, paired=True))
print("--------------------")
print("q2_is_correct_bb_no_shap_mean ", data_no_shap.q2_is_correct_bb.mean())
print("q2_is_correct_bb_shap_mean ", data_shap.q2_is_correct_bb.mean())
print(pt.ttest(data_no_shap.q2_is_correct_bb, data_shap.q2_is_correct_bb, paired=True))
print("--------------------")
print("system_understanding_bb_no_shap_mean", data_no_shap.system_understanding_bb.mean())
print("system_understanding_bb_shap_mean ", data_shap.system_understanding_bb.mean())
print(pt.ttest(data_no_shap.system_understanding_bb, data_shap.system_understanding_bb, paired=True))
'''

'\nprint("--------------------")\n\ndata_no_shap = data[data[\'bb_model\'] == \'no_shap\']\ndata_shap = data[data[\'bb_model\'] == \'shap\']\n\nprint("q1_is_correct_bb_no_shap_mean ", data_no_shap.q1_is_correct_bb.mean())\nprint("q1_is_correct_bb_shap_mean ", data_shap.q1_is_correct_bb.mean())\nprint(pt.ttest(data_no_shap.q1_is_correct_bb, data_shap.q1_is_correct_bb, paired=True))\nprint("--------------------")\nprint("q2_is_correct_bb_no_shap_mean ", data_no_shap.q2_is_correct_bb.mean())\nprint("q2_is_correct_bb_shap_mean ", data_shap.q2_is_correct_bb.mean())\nprint(pt.ttest(data_no_shap.q2_is_correct_bb, data_shap.q2_is_correct_bb, paired=True))\nprint("--------------------")\nprint("system_understanding_bb_no_shap_mean", data_no_shap.system_understanding_bb.mean())\nprint("system_understanding_bb_shap_mean ", data_shap.system_understanding_bb.mean())\nprint(pt.ttest(data_no_shap.system_understanding_bb, data_shap.system_understanding_bb, paired=True))\n'

In [236]:
def generate_system_level_stats(data):
    print("Decision Tree")
    print("Profile Task 1 (q1_is_correct_int_mean) ", data[data['int_model']=='dt'].q1_is_correct_int.mean())
    print("Profile Task 2 (q2_is_correct_int_mean) ", data[data['int_model']=='dt'].q2_is_correct_int.mean())
    print("understood_system_int_mean ", data[data['int_model']=='dt'].understood_system_int.mean())
    print("system_confusing_int_mean ", data[data['int_model']=='dt'].system_confusing_int.mean())

    print("\n--------------------\n")

    print("Linear Model")
    print("Profile Task 1 (q1_is_correct_int_mean) ", data[data['int_model']=='lr'].q1_is_correct_int.mean())
    print("Profile Task 2 (q2_is_correct_int_mean) ", data[data['int_model']=='lr'].q2_is_correct_int.mean())
    print("understood_system_int_mean ", data[data['int_model']=='lr'].understood_system_int.mean())
    print("system_confusing_int_mean ", data[data['int_model']=='lr'].system_confusing_int.mean())


    print("\n--------------------\n")

    print("BB (no SHAP) Model")
    print("Profile Task 1 (q1_is_correct_bb_mean) ", data[data['bb_model']=='no_shap'].q1_is_correct_bb.mean())
    print("Profile Task 2 (q2_is_correct_bb_mean) ", data[data['bb_model']=='no_shap'].q2_is_correct_bb.mean())
    print("understood_system_bb_mean ", data[data['bb_model']=='no_shap'].understood_system_bb.mean())
    print("system_confusing_bb_mean ", data[data['bb_model']=='no_shap'].system_confusing_bb.mean())

    print("\n--------------------\n")

    print("BB (w/ SHAP) Model")
    print("Profile Task 1 (q1_is_correct_bb_mean) ", data[data['bb_model']=='shap'].q1_is_correct_bb.mean())
    print("Profile Task 2 (q2_is_correct_bb_mean) ", data[data['bb_model']=='shap'].q2_is_correct_bb.mean())
    print("understood_system_bb_mean ", data[data['bb_model']=='shap'].understood_system_bb.mean())
    print("system_confusing_bb_mean ", data[data['bb_model']=='shap'].system_confusing_bb.mean())
    
    print("\n--------------------\n")
    
    print("Profile Task 1:")
    print(f_oneway(data[data['int_model']=='dt'].q1_is_correct_int,
                   data[data['int_model']=='lr'].q1_is_correct_int,
                   data[data['bb_model']=='shap'].q1_is_correct_bb,
                   data[data['bb_model']=='no_shap'].q1_is_correct_bb))
    
    print("Profile Task 2:")
    print(f_oneway(data[data['int_model']=='dt'].q2_is_correct_int,
                   data[data['int_model']=='lr'].q2_is_correct_int,
                   data[data['bb_model']=='shap'].q2_is_correct_bb,
                   data[data['bb_model']=='no_shap'].q2_is_correct_bb))
    
    print("Understood System Construct:")
    print(f_oneway(data[data['int_model']=='dt'].understood_system_int,
                   data[data['int_model']=='lr'].understood_system_int,
                   data[data['bb_model']=='shap'].understood_system_bb,
                   data[data['bb_model']=='no_shap'].understood_system_bb))
    
    print("System Confusing Construct:")
    print(f_oneway(data[data['int_model']=='dt'].system_confusing_int,
                   data[data['int_model']=='lr'].system_confusing_int,
                   data[data['bb_model']=='shap'].system_confusing_bb,
                   data[data['bb_model']=='no_shap'].system_confusing_bb))

In [251]:
temp = pd.DataFrame()
temp['dt'] = data[data['int_model']=='dt'].system_confusing_int
temp['lr'] = data[data['int_model']=='lr'].system_confusing_int
temp['shap'] = data[data['bb_model']=='shap'].system_confusing_bb
temp['no_shap'] = data[data['bb_model']=='no_shap'].system_confusing_bb

print(temp.head())

pt.rm_anova(data=temp)

         dt  lr      shap   no_shap
2  3.666667 NaN       NaN  4.666667
3  2.000000 NaN  2.666667       NaN
5  4.000000 NaN  5.000000       NaN
7  2.000000 NaN  2.333333       NaN
8  1.333333 NaN  1.333333       NaN


ValueError: DV and data must be specified

## Whole sample (all datasets)

In [None]:
generate_stats(data)

## Datasets by domain

#### Education

In [237]:
education_data = data[data['dataset_name'] == 'education']
generate_stats(education_data)

Profile Task 1:
  q1_is_correct_int_mean  0.8260869565217391
  q1_is_correct_bb_mean  0.8695652173913043
               T  dof alternative     p-val          CI95%   cohen-d   BF10  \
T-test -1.220394  160   two-sided  0.224111  [-0.11, 0.03]  0.120891  0.182   

           power  
T-test  0.331941  

--------------------

Profile Task 2:
  q2_is_correct_int_mean  0.8571428571428571
  q2_is_correct_bb_mean  0.9192546583850931
               T  dof alternative     p-val         CI95%   cohen-d   BF10  \
T-test -1.905194  160   two-sided  0.058549  [-0.13, 0.0]  0.197452  0.513   

           power  
T-test  0.702074  

--------------------

Understood System Construct:
  undersood_system_int_mean 3.6434782608695655
  undersood_system_bb_mean  3.7652173913043483
               T  dof alternative     p-val          CI95%   cohen-d   BF10  \
T-test -1.725313  160   two-sided  0.086402  [-0.26, 0.02]  0.160991  0.374   

           power  
T-test  0.528142  

--------------------

System Co

In [238]:
generate_system_level_stats(education_data)

Decision Tree
Profile Task 1 (q1_is_correct_int_mean)  0.8690476190476191
Profile Task 2 (q2_is_correct_int_mean)  0.7857142857142857
understood_system_int_mean  3.797619047619047
system_confusing_int_mean  2.979919678714859

--------------------

Linear Model
Profile Task 1 (q1_is_correct_int_mean)  0.7792207792207793
Profile Task 2 (q2_is_correct_int_mean)  0.935064935064935
understood_system_int_mean  3.475324675324675
system_confusing_int_mean  2.5238095238095237

--------------------

BB (no SHAP) Model
Profile Task 1 (q1_is_correct_bb_mean)  0.9571428571428572
Profile Task 2 (q2_is_correct_bb_mean)  0.8714285714285714
understood_system_bb_mean  3.991428571428571
system_confusing_bb_mean  3.3857142857142857

--------------------

BB (w/ SHAP) Model
Profile Task 1 (q1_is_correct_bb_mean)  0.8021978021978022
Profile Task 2 (q2_is_correct_bb_mean)  0.9560439560439561
understood_system_bb_mean  3.5912087912087913
system_confusing_bb_mean  2.769230769230769

--------------------

Profi

#### Housing

In [164]:
housing_data = data[data['dataset_name'] == 'housing']
generate_stats(housing_data)

q1_is_correct_int_mean  0.6951219512195121
q1_is_correct_bb_mean  0.7560975609756098
               T  dof alternative     p-val          CI95%   cohen-d  BF10  \
T-test -0.961811   81   two-sided  0.339007  [-0.19, 0.07]  0.136136  0.19   

           power  
T-test  0.229848  
--------------------
q2_is_correct_int_mean  0.6219512195121951
q2_is_correct_bb_mean  0.7073170731707317
               T  dof alternative     p-val          CI95%   cohen-d   BF10  \
T-test -1.222207   81   two-sided  0.225174  [-0.22, 0.05]  0.180448  0.249   

           power  
T-test  0.365107  
--------------------
system_understanding_int_mean 2.9074074074074074
system_understanding_bb_mean  3.288109756097561
               T  dof alternative     p-val          CI95%   cohen-d    BF10  \
T-test -3.737051   80   two-sided  0.000348  [-0.6, -0.18]  0.504099  62.914   

           power  
T-test  0.994163  
--------------------
q1_is_correct_bb_no_shap_mean  0.45454545454545453
q1_is_correct_bb_shap_mean  

#### Healthcare

In [110]:
healthcare_data = data[data['dataset_name'] == 'healthcare']
generate_stats(healthcare_data)

q1_is_correct_int_mean  nan
q1_is_correct_bb_mean  nan
Ttest_indResult(statistic=nan, pvalue=nan)
--------------------
q2_is_correct_int_mean  nan
q2_is_correct_bb_mean  nan
Ttest_indResult(statistic=nan, pvalue=nan)
--------------------
system_understanding_int_mean nan
system_understanding_bb_mean  nan
Ttest_indResult(statistic=nan, pvalue=nan)
--------------------
q1_is_correct_bb_no_shap_mean  nan
q1_is_correct_bb_shap_mean  nan
Ttest_indResult(statistic=nan, pvalue=nan)
--------------------
q2_is_correct_bb_no_shap_mean  nan
q2_is_correct_bb_shap_mean  nan
Ttest_indResult(statistic=nan, pvalue=nan)
--------------------
system_understanding_bb_no_shap_mean nan
system_understanding_bb_shap_mean  nan
Ttest_indResult(statistic=nan, pvalue=nan)


## Comparison across datasets

In [111]:
print("education_q1_is_correct_int_mean ", education_data.q1_is_correct_int.mean())
print("housing_q1_is_correct_int_mean ", housing_data.q1_is_correct_int.mean())
print("healtcare_q1_is_correct_int_mean ", healthcare_data.q1_is_correct_int.mean())
print(f_oneway(education_data.q1_is_correct_int, housing_data.q1_is_correct_int,healthcare_data.q1_is_correct_int))
print("--------------------")
print("education_q2_is_correct_int_mean ", education_data.q2_is_correct_int.mean())
print("housing_q2_is_correct_int_mean ", housing_data.q2_is_correct_int.mean())
print("healtcare_q2_is_correct_int_mean ", healthcare_data.q2_is_correct_int.mean())
print(f_oneway(education_data.q2_is_correct_int, housing_data.q2_is_correct_int,healthcare_data.q2_is_correct_int))
print("--------------------")
print("education_system_understanding_int_mean", education_data.system_understanding_int.mean())
print("housing_system_understanding_int_mean", housing_data.system_understanding_int.mean())
print("healthcare_system_understanding_int_mean", healthcare_data.system_understanding_int.mean())
print(f_oneway(education_data.system_understanding_int,housing_data.system_understanding_int,healthcare_data.system_understanding_int))

education_q1_is_correct_int_mean  0.8260869565217391
housing_q1_is_correct_int_mean  0.6951219512195121
healtcare_q1_is_correct_int_mean  nan
F_onewayResult(statistic=nan, pvalue=nan)
--------------------
education_q2_is_correct_int_mean  0.8571428571428571
housing_q2_is_correct_int_mean  0.6219512195121951
healtcare_q2_is_correct_int_mean  nan
F_onewayResult(statistic=nan, pvalue=nan)
--------------------
education_system_understanding_int_mean 3.3140625
housing_system_understanding_int_mean 2.9074074074074074
healthcare_system_understanding_int_mean nan
F_onewayResult(statistic=nan, pvalue=nan)


In [112]:
print("education_q1_is_correct_bb_mean ", education_data.q1_is_correct_bb.mean())
print("housing_q1_is_correct_bb_mean ", housing_data.q1_is_correct_bb.mean())
print("healtcare_q1_is_correct_bb_mean ", healthcare_data.q1_is_correct_bb.mean())
print(f_oneway(education_data.q1_is_correct_bb, housing_data.q1_is_correct_bb,healthcare_data.q1_is_correct_bb))
print("--------------------")
print("education_q2_is_correct_bb_mean ", education_data.q2_is_correct_bb.mean())
print("housing_q2_is_correct_bb_mean ", housing_data.q2_is_correct_bb.mean())
print("healtcare_q2_is_correct_bb_mean ", healthcare_data.q2_is_correct_bb.mean())
print(f_oneway(education_data.q2_is_correct_bb, housing_data.q2_is_correct_bb,healthcare_data.q2_is_correct_bb))
print("--------------------")
print("education_system_understanding_bb_mean", education_data.system_understanding_bb.mean())
print("housing_system_understanding_bb_mean", housing_data.system_understanding_bb.mean())
print("healthcare_system_understanding_bb_mean", healthcare_data.system_understanding_bb.mean())
print(f_oneway(education_data.system_understanding_bb,housing_data.system_understanding_bb,healthcare_data.system_understanding_bb))

education_q1_is_correct_bb_mean  0.8695652173913043
housing_q1_is_correct_bb_mean  0.7560975609756098
healtcare_q1_is_correct_bb_mean  nan
F_onewayResult(statistic=nan, pvalue=nan)
--------------------
education_q2_is_correct_bb_mean  0.9192546583850931
housing_q2_is_correct_bb_mean  0.7073170731707317
healtcare_q2_is_correct_bb_mean  nan
F_onewayResult(statistic=nan, pvalue=nan)
--------------------
education_system_understanding_bb_mean 3.4922360248447206
housing_system_understanding_bb_mean 3.288109756097561
healthcare_system_understanding_bb_mean nan
F_onewayResult(statistic=nan, pvalue=nan)


## Subgroups

In [None]:
knowledge_mapper = {
    "Very knowledgeable": 5,
    "Knowledgeable": 4,
    "Moderately knowledgeable": 3,
    "Slightly knowledgeable": 2,
    "Not knowledgeable at all": 1
}

data['Q9.2']
#print(data['Q9.2'].value_counts())
data['background_knowledge'] = data['Q9.2'].map(knowledge_mapper)
data['background_knowledge'].value_counts()

generate_stats(data[data['background_knowledge']>=3])

data['12_min_cutoff_flag'] = data['duration'].apply(lambda x: x >= 600)
print(data['12_min_cutoff_flag'].value_counts())
generate_stats(data[data['12_min_cutoff_flag'] == 1])