In [1]:
# Libraries Import

In [2]:
import pandas as pd
import numpy as np
from scipy.io import arff
import ipywidgets as widgets
from IPython.display import SVG
from datetime import datetime
import random
import urllib.request, json 
import gspread

In [3]:
# Dataset Load
data = arff.loadarff("./datasets/WINE.txt.arff")
dataset = pd.DataFrame(data[0])

In [4]:
with urllib.request.urlopen("https://xaisurvey.s3.amazonaws.com/survey_bot.json") as url:
    survey_bot = json.loads(url.read().decode())

In [5]:
def labelConversion(label):
    if label == b'1':
        return '1'
    elif label == b'2':
        return '2'
    else:
        return '3'

dataset['Class'] = dataset['Class'].apply(lambda x: labelConversion(x))

In [6]:
# "Ingredients" for the preparation of the survey 
N = 5
questions_numbers = [n for n in range(N)] 
questions_datetime = []
questions_representation = []

In [7]:
comprehension_test_datetime = []

In [8]:
# Definition of the buttons in the questionnaire
next_button = widgets.Button(
    description='Next'
)

submit_button = widgets.Button(
    description='Submit'
)

output = widgets.Output()

In [9]:
# Comprehension Test Section elements definition (radio buttons, dropdowns, etc)
example_selection_1 = widgets.RadioButtons(
                                options=[('Class 1', '1'), ('Class 2', '2'), ('Class 3', '3')],
                                value=None,
                                disabled=False
                      )
example_selection_2 = widgets.RadioButtons(
                                options=[('Proline, OD280/OD315, and Flavanoids', '1'), ('Proline, OD280/OD315, and Hue', '2'), ('Proline, Flavanoids, and Magnesium', '3'), ('Proline, Flavanoids, and Hue', '4')],
                                value=None,
                                disabled=False
                      )

In [10]:
# Preparation of the Questions section of the survey
questions_selection = []
for x in range(N):
    questions_selection.append(widgets.RadioButtons(
        options=[('Class 1', '1'), ('Class 2', '2'), ('Class 3', '3')],
        value=None,
        disabled=False
    ))

questions_samples = [dataset.iloc[0],dataset.iloc[58],dataset.iloc[164],dataset.iloc[177],dataset.iloc[156]]
correct_classifications = []
questions_order = []

random.shuffle(questions_samples)

for n,sample in zip(range(5),questions_samples.copy()):
    correct_classifications.append(questions_samples[n][-1])
    questions_samples[n] = questions_samples[n][:-1]
    questions_order.append(questions_samples[n].name)

In [11]:
# Preparation of the Participant information section

gender_selection = widgets.RadioButtons(
    options=[('Male', '1'), ('Female', '2'), ('Other', '3'), ('Prefer not to say', '4')],
    value=None,
    disabled=False
)

age_selection = widgets.RadioButtons(
    options=[('18-20', '1'), ('21-29', '2'), ('30-39', '3'),('40-49', '4'),('50-59', '5'),('60 or older', '6')],
    value=None,
    disabled=False
)

education_selection = widgets.RadioButtons(
    options=[('Less than high school degree', '1'), ('High school degree or equivalent', '2'), ('Undergraduate', '3'),('Graduate', '4')],
    value=None,
    disabled=False
)

english_level_selection = widgets.RadioButtons(
    options=[('Beginner (A1)', '1'), ('Elementary (A2)', '2'), ('Lower Intermidiate (B1)', '3'), ('Upper Intermidiate (B2)', '4'), ('Advanced (C1)', '5'), ('Proficient (C2)', '6')],
    value=None,
    disabled=False
)

In [12]:
# Preparation of the Additional comment section

participant_suggestion_text_area = (widgets.Textarea(
        value='',
        disabled=False
    ))

In [13]:
def welcomeWebPage():
    output.clear_output()
    with output:
        display(widgets.HTML(value = '''<h1>Survey of the interpretability of decision trees</h1>'''))
        display(widgets.HTML(value = '''<p>Welcome! This questionnaire aims to evaluate the interpretability of decision trees. In this survey, you will be asked to evaluate explanations in terms of decision tree interpretability following the instructions outlined in the next screens.<p>'''))
        display(widgets.HTML(value = '''<p>This study is carried out by researchers from Free University of Bolzano. The information that we collect is in agreement with European Union's General Data Protection Regulation (<a href="https://eur-lex.europa.eu/eli/reg/2016/679/oj">GDPR</a>). In addition, this research has been approved by the related Ethics Committee. 
        It is meant for research purposes only and based on non-personal or anonymous data which is provided during your voluntary participation.</p>'''))
        display(widgets.HTML(value = '''<p>It is expected to take you about 10 minutes to complete the survey. During that time, please, focus only on the survey and avoid any unnecessary interruptions until it is completed. If any break is needed, take it between tasks.<br>
        Should you have any questions regarding this survey, please address them to Marco Zenere (Marco.Zenere@stud-inf.unibz.it), the survey and data manager, before starting.</p>'''))
        display(widgets.HTML(value = '''<p>By clicking the next button, you partecipate to the questionnaire and confirm that:<br>
        <ul>
            <li> You have reached the age of majority </li>
            <li> You acknowledge that your participation is completely voluntary </li>
            <li> You acknowledge that your anonymous responses may be used for research purposes in accordance with General Data Protection Regulation </li>
        </ul>
        </p>'''))
        display(next_button)
    display(output)

In [14]:
def introductionWebPage():
    
    output.clear_output()
    with output:
        display(widgets.HTML(value = '''<h2>Introduction</h2>'''))
        display(widgets.HTML(value = '''<p>The data used in this questionnaire is part of the wine dataset provided by UCI. These data are the results of a chemical analysis of wines grown in the same region in Italy but derived from three different cultivars. The analysis determined the quantities of 13 constituents found in each of the three types of wines.
        <ul>
            <li> Alcohol </li>
            <li> Malic acid </li>
            <li> Ash </li>
            <li> Alcalinity of ash </li>
            <li> Magnesium </li>
            <li> Total phenols </li>
            <li> Flavanoids </li>
            <li> Nonflavanoid phenols </li>
            <li> Proanthocyanins </li>
            <li> Color intensity </li>
            <li> Hue </li>
            <li> OD280/OD315 of diluted wines </li>
            <li> Proline </li>
        </ul>
        This questionnaire considers decision tree representation in the form depicted below.</p>'''))
        display(SVG(filename='./Images/Decision_Tree_3_layers.svg'))
        display(widgets.HTML(value = '''<h3>How to interpret the decision tree?</h3>'''))
        display(widgets.HTML(value = '''<p>A decision tree is a tree-like model that checks one attribute/feature at a time of a given instance to classify it in one of the classes of the domain of interest. 
                         A decision tree is made up of several levels and nodes. Each level is composed of one or more nodes, and the nodes correspond to the features of the domain of interest. 
                         Typically, a decision tree considers part of the available features, which correspond to those most relevant to correctly classifying an instance of the domain.  
                         Each node evaluates whether the value of the considered attribute is above a specific threshold. If the attribute value exceeds the threshold, the decision tree will test the attribute on the right path. Otherwise, it will test the one on the left path. 
                         In the decision tree representation of the following survey, the threshold of each node is represented by the black triangle in the histogram, and each node is a histogram. To classify an instance of the domain of interest, the decision tree must reach one of the leaf nodes starting from the root node. A leaf node could be impure, so there could be the case where the leaf node contains multiple classes. 
                         The decision tree representation in the following survey highlights the composition of each leaf node and indicates the dominant class.</p>'''))
        display(widgets.HTML(value = '''<p>Let's take a look at an example.</p>'''))
        display(next_button)

In [15]:
def exampleWebPage():
    output.clear_output()
    with output:
        display(widgets.HTML(value = '''<h2>Example</h2>'''))
        display(widgets.HTML(value = '''<p>Considering we have a wine instance with the following features:</p>'''))

        # Example 1
        example = dataset.iloc[1][:-1].to_frame()
        example.rename(columns = {example.columns[0]:''}, inplace = True)
        display(example.transpose())
        # SVG Image
        display(SVG(filename='./Images/Decision_Tree_3_layers.svg'))
        # Explanation
        display(widgets.HTML(value = '''<p>Considering the above decision tree representation and starting from the top level of the decision tree, the first feature to consider is the Proline. 
        The value of the example is above the threshold of 760.0, so we need to take the right path of the decision tree and continue to go through the tree. 
        The next feature we need to consider is Flavanoids, and the example has a value above the threshold of 2.17 and as before we need to take the right path of the decision tree and continue to go through the tree.
        The last feature to consider before the classification of the example is Magnesium. Our example has a value under the threshold of 135.50, and by taking the left path of the decision tree, we can conclude that the wine is of Class 1.</p>'''))
        display(widgets.HTML(value = """<p>Before starting the survey, let's do a quick comprehension test to understand whether or not the concept is clear.</p>"""))
        
        display(next_button)

In [16]:
def comprehensionTestWebPage():
    output.clear_output()
    with output:
        display(widgets.HTML(value = '''<h2>Comprehension Test</h2>'''))
        
        display(widgets.HTML(value = '''<p>Considering we have a wine with the following features:</p>'''))
        
        # Example 96
        example = dataset.iloc[96][:-1].to_frame()
        example.rename(columns = {example.columns[0]:''}, inplace = True)
        display(example.transpose())
        # SVG Image
        display(widgets.HTML(value = '''<p>Let's consider the decision tree representation below.</p>'''))
        display(SVG(filename='./Images/Decision_Tree_3_layers.svg'))
        # Warm-Up Questions
        display(widgets.HTML(value = '''<h3>Which class correspond the wine with the following features?</p>'''))
        display(example_selection_1)
        display(widgets.HTML(value = '''<h3>Which of the following features/attributes did you consider for the classification?</p>'''))
        display(example_selection_2)
        
        display(next_button)
        
    # Datetime question displayed to the participant
    comprehension_test_datetime.append(datetime.now())

In [17]:
def questionnaireInstructionWebPage():
    output.clear_output()
    with output:
        display(widgets.HTML(value = '''<h2>Survey Instruction</h2>'''))
        display(widgets.HTML(value = '''<p>The questionnaire is composed by five task-based questions, and each of them is made of the following elements:
            <ul>
            <li> The features of a wine instance. </li>
            <li> A decision tree representation of the the domain of interest. </li>
            <li> A question asking for the correct classification of the wine instance, considering the decision tree representation of the domain of interest. </li>
            </ul>
            Once the questions are complete, we will allow you to leave comments or suggestions on the questionnaire via the appropriate section. To conclude the questionnaire, we will ask you to provide us with some information about yourself to do a demographic analysis of the participants of this questionnaire. <br>
            You can start the questionnaire</p>'''))
        
        display(next_button)
        
    # Datetime comprehension test was completed by the participant
    comprehension_test_datetime.append(datetime.now())

In [18]:
def questionWebPage():
    output.clear_output()
    with output:
        question_number = questions_numbers.pop(0)
        display(widgets.HTML(value = '''<h2>Question %s</h2>'''%(str(question_number + 1))))
        
        # Input
        display(widgets.HTML(value='''<p>Considering a wine instance with the following features:</p>'''))
        sample_DataFrame = questions_samples.pop(0).to_frame()
        sample_DataFrame.rename(columns = {sample_DataFrame.columns[0]:''}, inplace = True)
        display(sample_DataFrame.transpose())
        
        # Explanation
        display(widgets.HTML(value = '''<p>Let's consider the following decision tree representation:</p>'''))
        # Random selection of a decision tree representation
        image_selector = random.randint(0,100)
        if image_selector % 2 == 0 :
            questions_representation.append('1')
            display(SVG(filename='./Images/Decision_Tree_3_layers.svg'))
        else:
            questions_representation.append('2')
            display(SVG(filename='./Images/Decision_Tree_5_layers.svg'))
        
        # Radio Button/Question
        display(widgets.HTML(value = '''<h3>Which class correspond the wine with the following features?</h3>'''))
        display(questions_selection[question_number])
        display(next_button)

    # Datetime question displayed to the participant
    questions_datetime.append(datetime.now())

In [19]:
def participantCommentWebPage():
    # Datetime last question ended to be displayed to the participant
    questions_datetime.append(datetime.now())
    
    output.clear_output()
    with output:
        display(widgets.HTML(value = '''<h2>Additional Comments/Suggestions</h2>'''))
        display(widgets.HTML(value = '''<p>If you have any comments or suggestions regarding the questionnaire you want to tell us, please fill the text box below.<br>
        NB: It is optional, so you haven't any, you can leave the box empty and click the button next.</p>'''))
        display(participant_suggestion_text_area)
        
        display(next_button)

In [20]:
def participatInfoWebPage():
    output.clear_output()
    with output:
        display(widgets.HTML(value = '''<h2>Information about the survey participant</h2>'''))
        display(widgets.HTML(value = '''<p>We would like to collect some information about you to do a demographic analysis of the participants in our questionnaire.</p>'''))
        display(widgets.HTML(value = '''<h3>Gender</h3>'''))
        display(gender_selection)
        display(widgets.HTML(value = '''<h3>Age</h3>'''))
        display(age_selection)
        display(widgets.HTML(value = '''<h3>Education</h3>'''))
        display(education_selection)
        display(widgets.HTML(value = '''<h3>English Level</h3>'''))
        display(english_level_selection)
        display(submit_button)
        display(widgets.HTML(value = '''<p>NB: The sending of the answers could take a few seconds.</p>'''))

In [21]:
def endquestionnaireWebPage():
    output.clear_output()
    with output:
        display(widgets.HTML(value = '''<h2>The questionnaire is ended</h2>'''))
        display(widgets.HTML(value = '''<p>The questionnaire was submitted correctly and you can now close the browser tab. <br>
        Thank you very much for the partecipation.</p>'''))

In [22]:
web_pages_order = [introductionWebPage, exampleWebPage, comprehensionTestWebPage, questionnaireInstructionWebPage, participantCommentWebPage, participatInfoWebPage, endquestionnaireWebPage]

index_element = web_pages_order.index(questionnaireInstructionWebPage)

for x in range(N):
    web_pages_order.insert(index_element + 1, questionWebPage)

In [23]:
web_pages = ['welcomeWebPage', 'introductionWebPage', 'exampleWebPage', 'comprehensionTestWebPage', 'questionnaireInstructionWebPage', 'participantCommentWebPage', 'participatInfoWebPage', 'endquestionnaireWebPage']

index_element = web_pages.index('questionnaireInstructionWebPage')

for x in range(N):
    web_pages.insert(index_element + 1, 'questionWebPage')

In [24]:
def answerCreation():
    row = []
    
    #section Warm_Up
    row += [str(example_selection_1.value) + ', ' + str(example_selection_2.value)] +  [(comprehension_test_datetime[1] - comprehension_test_datetime[0]).total_seconds()]
    
    # Section Questions
    for n in range(5):
        # Time taken by the participant to answer the ith question
        time_taken = (questions_datetime[n + 1] - questions_datetime[n]).total_seconds()
        
        row += [str(questions_order[n]) + ', ' + str((questions_selection[n].value)) + ', ' + str(correct_classifications[n]) + ', ' + questions_representation[n]] + [str(time_taken)]
    
    # Section Additional Comment
    row += [str(participant_suggestion_text_area.value)]
    
    
    # Section Participant Information
    row += [str(gender_selection.value)] + [str(age_selection.value)] + [str(education_selection.value)] + [str(english_level_selection.value)]
            
    return row

In [25]:
error_message = False

In [26]:
def next_clicked(b):
    current_web_page = web_pages[0]
    global error_message

    if current_web_page == 'comprehensionTestWebPage':
        if example_selection_1.value == None or example_selection_2.value == None:
            if error_message == False:
                error_message = True
                with output:
                    display(widgets.HTML(value = '''<p>Answer all the questions before continuing!</p>'''))
            return
        
        if error_message:
            error_message = False
            
    elif current_web_page == 'questionWebPage':
        if (len(questions_numbers) != 0 and questions_selection[questions_numbers[0] - 1].value == None) or (len(questions_numbers) == 0 and questions_selection[4].value == None):
            if error_message == False:
                error_message = True
                with output:
                    display(widgets.HTML(value = '''<p>Answer all the questions before continuing!</p>'''))
            return
        
        if error_message:
            error_message = False
        
    web_pages.pop(0)
    web_page = web_pages_order.pop(0)
    web_page()

def submit_clicked(b):
    current_web_page = web_pages[0]
    global error_message
    
    if current_web_page == 'participatInfoWebPage':
        if gender_selection.value == None or age_selection.value == None or education_selection.value == None or english_level_selection.value == None:
            if error_message == False:
                error_message = True
                with output:
                    print('Answer all the questions before continuing!')
            return
            
        if error_message:
            error_message = False
    
    gc = gspread.service_account_from_dict(survey_bot)
    sh = gc.open('SurveyAnswers')
    worksheet = sh.sheet1
    worksheet.append_row(answerCreation())
    
    web_pages.pop(0)
    web_page = web_pages_order.pop(0)
    web_page()
    
next_button.on_click(next_clicked)
submit_button.on_click(submit_clicked)

In [27]:
welcomeWebPage()

Output()