# User Study Evaluation

In [None]:
import os
import pandas as pd

from pathlib import Path
from dotenv import load_dotenv
import matplotlib.pyplot as plt

# 2-sample t-test
from scipy.stats import ttest_ind, ttest_1samp

load_dotenv()
DATA_PATH = Path(os.getenv("DATA_PATH"))

## Load results from file

In [None]:
# path to results
results_path = DATA_PATH / "study" / "study-results.csv"

df = pd.read_csv(results_path)
# df

## Add column indicating which version was used first

In [None]:
used_first = ["real", "generated", "generated", "real", "real", "generated"]

df["first version"] = used_first

df

## Demographics

In [None]:
demo_questions = [
    "How do you identify?",
    "How old are you?",
    "What is your job/background?",
    "Have you designed websites before?",
    "Have you worked with AI technologies before?",
]

demo_df = df[demo_questions].copy()

demo_df

## Gender 

In [None]:
import matplotlib.patches as mpatches

fig1, ax1 = plt.subplots()

blue_patch = mpatches.Patch(color='tab:blue', label='Male')
red_patch = mpatches.Patch(color='tab:red', label='Female')
orange_patch = mpatches.Patch(color='tab:orange', label='Non-binary')
green_patch = mpatches.Patch(color='tab:green', label='Prefer not to say')

ax1.pie(x=[66.7, 33.3], autopct='%1.1f%%', startangle=90, colors=["tab:blue", "tab:red"], explode=(0.00,0.005))
ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

ax1.legend(handles=[blue_patch, red_patch, orange_patch, green_patch], loc=(0.85,0.68))

# labels=["Male", "Female", "Non-binary", "Prefer not to say"]
plt.show()

## Age

In [None]:
# demo_df.plot(x="How old are you?", y="How old are you?", backend="matplotlib")
plt.bar(x=[21,22,23,24,25,26], height=[2,3,0,0,0,1])
plt.xlabel("Age")
plt.ylabel("Count")
plt.ylim(0,4)

plt.show()

## Questions

In [None]:
csi_questions_first = [
    'I would be happy to use this chatbot on a regular basis.',
    'I enjoyed using the chatbot.',
    'It was easy for me to explore many different ideas, options, designs, or outcomes, using this chatbot.',
    'The chatbot was helpful in allowing me to track different ideas, outcomes, or possibilities.',
    'I was able to be very creative while doing the activity inside this chatbot.',
    'The chatbot allowed me to be very expressive.',
    'My attention was fully tuned to the activity, and I forgot about the chatbot that I was using.',
    'I became so absorbed in the activity that I forgot about the chatbot that I was using.',
    'I was satisfied with what I got out of the chatbot.',
    'What I was able to produce was worth the effort I had to exert to produce it.'
]

custom_questions_first = [
    'Using the chatbot, I felt like I was stealing work from others.',
    'I would use the chatbot over traditional sources of inspiration (e. g. Awwwards, Dribbble, ...).',
    'I would use the chatbot complementary with traditional sources of inspiration.',
    'I was able to interpret something new into the presented designs.',
    'The layout of the presented designs was helpful for my work.'
]

csi_questions_second = [
    'I would be happy to use this chatbot on a regular basis..1',
    'I enjoyed using the chatbot..1',
    'It was easy for me to explore many different ideas, options, designs, or outcomes, using this chatbot..1',
    'The chatbot was helpful in allowing me to track different ideas, outcomes, or possibilities..1',
    'I was able to be very creative while doing the activity inside this chatbot..1',
    'The chatbot allowed me to be very expressive..1',
    'My attention was fully tuned to the activity, and I forgot about the chatbot that I was using..1',
    'I became so absorbed in the activity that I forgot about the chatbot that I was using..1',
    'I was satisfied with what I got out of the chatbot..1',
    'What I was able to produce was worth the effort I had to exert to produce it..1'
]

custom_questions_second = [
    'Using the chatbot, I felt like I was stealing work from others..1',
    'I would use the chatbot over traditional sources of inspiration (e. g. Awwwards, Dribbble, ...)..1',
    'I would use the chatbot complementary with traditional sources of inspiration..1',
    'I was able to interpret something new into the presented designs..1',
    'The layout of the presented designs was helpful for my work..1',
]

## Extract answers depnding on version: 'real' or 'fake'

In [None]:
# csi
real_csi_dict = {}
fake_csi_dict = {}

# custom
real_custom_dict = {}
fake_custom_dict = {}


for index in range(0,6):
    # use first or second questionare depending on version
    if df.iloc[index]["first version"] == "real":
        # csi
        real_csi_dict[index] = df.iloc[index][csi_questions_first].values
        fake_csi_dict[index] = df.iloc[index][csi_questions_second].values

        # custom
        real_custom_dict[index] = df.iloc[index][custom_questions_first].values
        fake_custom_dict[index] = df.iloc[index][custom_questions_second].values
    else:
        # csi
        real_csi_dict[index] = df.iloc[index][csi_questions_second].values
        fake_csi_dict[index] = df.iloc[index][csi_questions_first].values

        # custom
        real_custom_dict[index] = df.iloc[index][custom_questions_second].values
        fake_custom_dict[index] = df.iloc[index][custom_questions_first].values
        
# create dfs
real_csi_df = pd.DataFrame.from_dict(
    real_csi_dict, orient="index", columns=csi_questions_first
)
fake_csi_df = pd.DataFrame.from_dict(
    fake_csi_dict, orient="index", columns=csi_questions_first # first questions to avoid .1 in question
)
real_custom_df = pd.DataFrame.from_dict(
    real_custom_dict, orient="index", columns=custom_questions_first
) 
fake_custom_df = pd.DataFrame.from_dict(
    fake_custom_dict, orient="index", columns=custom_questions_first # first questions to avoid .1 in question
) 


In [None]:
real_custom_df

## Real vs. Fake CSI

In [None]:
# calculate means
real_csi = real_csi_df.mean().round(1).values
fake_csi = fake_csi_df.mean().round(1).values

# calculate std deviation
real_csi_std = real_csi_df.std().round(1).values
fake_csi_std = fake_csi_df.std().round(1).values

# create df
real_fake_csi_df = pd.DataFrame.from_dict({"real csi (mean)": real_csi, "fake csi (mean)": fake_csi, "difference (mean)": None, "real csi (std)": real_csi_std, "fake csi (std)": fake_csi_std} )

# add column for difference
real_fake_csi_df["difference (mean)"] = ((real_csi - fake_csi) * (-1)).round(1)

# add index
real_fake_csi_df.index = csi_questions_first 
real_fake_csi_df.index.name = "Questions"

real_fake_csi_df.to_csv(DATA_PATH / "study" / "csi-results.csv")
real_fake_csi_df
# csi_ttest_df


## CSI calculation

In [None]:
real_fake_csi_df.sum()

## Real vs Fake Custom

In [None]:
# calculate means
real_custom = real_custom_df.mean().round(1).values
fake_custom = fake_custom_df.mean().round(1).values

# calculate std deviation
real_custom_std = real_custom_df.std().round(1).values
fake_custom_std = fake_custom_df.std().round(1).values

# create df
real_fake_custom_df = pd.DataFrame.from_dict({"real custom (mean)": real_custom, "fake custom (mean)": fake_custom, "difference (mean)": None, "real custom (std)": real_custom_std, "fake custom (std)": fake_custom_std} )

# add column for difference
real_fake_custom_df["difference (mean)"] = (real_custom - fake_custom) * (-1)

# add index
real_fake_custom_df.index = custom_questions_first 

real_fake_custom_df.to_csv(DATA_PATH / "study" / "custom-results.csv")
real_fake_custom_df

## Bar plots with mean and std for CSI

In [None]:
# questions with line break 
csi_questions_first_wrap = [
    'I would be happy to use this\nchatbot on a regular basis.',
    'I enjoyed using the chatbot.',
    'It was easy for me to explore\nmany different ideas, options, designs,\nor outcomes, using this chatbot.',
    'The chatbot was helpful in\nallowing me to track different ideas,\noutcomes, or possibilities.',
    'I was able to be very creative\nwhile doing the activity inside\nthis chatbot.',
    'The chatbot allowed me to be\nvery expressive.',
    'My attention was fully tuned\nto the activity, and I forgot about\nthe chatbot that I was using.',
    'I became so absorbed in the\nactivity that I forgot about the\nchatbot that I was using.',
    'I was satisfied with what\nI got out of the chatbot.',
    'What I was able to produce\nwas worth the effort I had to exert\nto produce it.'
]

csi_cats = ["Enjoyment", "Exploration", "Expressiveness", "Immersion", "Results Worth Effort"]

In [None]:
i = 0
x = 0
for index, row in real_fake_csi_df.iterrows():
    if x == 0: 
        fig, axes = plt.subplots(1,2,figsize=(8, 3))

    error_bars = row[["real csi (std)", "fake csi (std)"]].values

    axes[x].bar(x=["real (mean)", "fake (mean)"], height=row[["real csi (mean)", "fake csi (mean)"]].values, color=["green", "red"], yerr=error_bars, ecolor='black', capsize=10)

    axes[x].set_title(csi_questions_first_wrap[i])
    axes[x].set_ylim([0,8])
    axes[x].set_yticks(range(1,8))
    axes[x].set_yticklabels(["Strongly\ndisagree (1)",2,3,4,5,6,"Strongly\nagree (7)"])


    # save fig
    if x == 1:
        fig.tight_layout()
        fig_path = DATA_PATH / "study" / (csi_cats[int(i/2)].replace(" ", "-").lower() + ".jpg")

        fig.savefig(fig_path, format="jpg", backend="Agg")

    # increment position counters
    x = 1 - x
    i += 1

## Bar plots with mean and std for custom

In [None]:
# questions with line wrap
custom_questions_wrap = [
    'Using the chatbot, I felt like\nI was stealing work from others.',
    'I would use the chatbot over\ntraditional sources of inspiration\n(e. g. Awwwards, Dribbble, ...).',
    'I would use the chatbot complementary\nwith traditional sources of inspiration.',
    'I was able to interpret something\nnew into the presented designs.',
    'The layout of the presented designs\nwas helpful for my work.'
]

custom_cats = ["stealing", "traditional-sources", "interpretable", "layout-helpful"]

In [None]:
i = 0
x = 0

save_first = True

# first three questions
for index, row in real_fake_custom_df.iterrows():
    if x == 0: 
        fig, axes = plt.subplots(1,3,figsize=(12, 3))
    elif x == 3:
        x = 0
        save_first = False
        fig, axes = plt.subplots(1,2,figsize=(8, 3))

    error_bars = row[["real custom (std)", "fake custom (std)"]].values

    axes[x].bar(x=["real (mean)", "fake (mean)"], height=row[["real custom (mean)", "fake custom (mean)"]].values, color=["green", "red"], yerr=error_bars, ecolor='black', capsize=10)

    axes[x].set_title(custom_questions_wrap[i])
    axes[x].set_ylim([0,8])
    axes[x].set_yticks(range(1,8))
    axes[x].set_yticklabels(["Strongly\ndisagree (1)",2,3,4,5,6,"Strongly\nagree (7)"])


    # save fig
    if x == 2 and save_first:
        fig.tight_layout()
        fig_path = DATA_PATH / "study" / "stealing-traditional-sources.jpg"
        fig.savefig(fig_path, format="jpg", backend="Agg")
    elif x == 1 and not save_first:
        fig.tight_layout()
        fig_path = DATA_PATH / "study" / "interpretable-helpful.jpg"
        fig.savefig(fig_path, format="jpg", backend="Agg")

    # increment position counters
    x += 1
    i += 1
    # i += 1

## Statistical significance using t-test

In [None]:
noidea_a = [4,4,4,4,4,2,3,4,3,4]
noidea_b = [4,3,5,4,4,3,4,5,4,3]

 
t_a = ttest_ind(noidea_a, noidea_b)
t_b = ttest_ind(noidea_b, noidea_a)

print(t_a)
print(t_b)


In [None]:
# 2-sample t-test
csi_ttest = {}
for quest in csi_questions_first:
    csi_ttest[quest] = ttest_ind(real_csi_df[quest].values, fake_csi_df[quest].values)

csi_ttest_df = pd.DataFrame.from_dict(csi_ttest, orient="index", columns=["t-statistic", "p-value"])

csi_ttest = ttest_ind(real_csi_df.values, fake_csi_df.values)
print(csi_ttest)

csi_ttest_df

In [None]:
# 2-sample t-test
custom_ttest = {}
for quest in custom_questions_first:
    custom_ttest[quest] = ttest_ind(real_custom_df[quest].values, fake_custom_df[quest].values)

# custom
custom_ttest_df = pd.DataFrame.from_dict(custom_ttest, orient="index", columns=["t-statistic", "p-value (two-sided)"])

# one-sided p-value
custom_ttest_df["p-value (one-sided)"] = custom_ttest_df["p-value (two-sided)"] / 2

custom_ttest = ttest_ind(real_custom_df.values, fake_custom_df.values)
custom_ttest_df