# Abandonment Analysis

In [1]:
from IPython.display import display, HTML
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

prop_cycle = plt.rcParams['axes.prop_cycle']
colors = prop_cycle.by_key()['color']

In [2]:
# Sorted according to preference score
captcha_type = {
    "task: 5": "reCAPTCHA (easy)",
    "task: 6": "Geetest (slide)",
    "task: 3": "Arkose (selection)",
    "task: 2": "Arkose (rotation)",
    "task: 9": "Distorted text (simple)",
    "task: 7": "Distorted text (moving)",
    "task: 4": "reCAPTCHA (difficult)",
    "task: 8": "Distorted text (masked)",
    "task: 1": "hCAPTCHA (easy)",
    "task: 0": "hCAPTCHA (difficult)",
}

action_order = ['captcha_1', 'captcha_2', 'captcha_3', 'captcha_4', 'captcha_5',
                'captcha_6', 'captcha_7', 'captcha_8', 'captcha_9', 'captcha_10']

# Import data
def load_data_from_file(file_name):
    df = pd.read_json("../data/" + file_name)
    print("Number of total participants: " + str(df['UID'].nunique()))
    df = df.replace(captcha_type.keys(), captcha_type.values())
    df = df.sort_values(by=['UID', 'time'])
    return df

# Remove tasks
def remove_tasks(df):
    df = df.drop(df[df['type'] == 'tasks'].index)
    return df

## Plotting

In [3]:
# At what point in task did the participant abadon?

def plot_captcha_type_vs_task_number(df):
    uids = df['UID'].unique()
    expected = list(captcha_type.values())
    data = [[0 for _ in expected] for _ in expected]
    for uid in uids:
        action_sequence = df[df['UID'] == uid]['type'].to_list()
        if len(action_sequence) < len(expected):
            abandoned_action = action_sequence[-1]
            data[expected.index(abandoned_action)][action_sequence.index(abandoned_action)] += 1

    col_label = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
    dd = pd.DataFrame(np.array(data), index=expected, columns=col_label)
    dd['Total'] = dd.sum(axis=1)
    dd.loc['Total'] = dd.sum(axis=0)
    display(dd.style.applymap(lambda v: 'opacity: 20%;' if (v == 0) else None))

### Unbiased (75 cents)

In [4]:
unbiased_75_df = load_data_from_file("abandonment_unbiased_75_cents.json")

Number of total participants: 183


In [5]:
plot_captcha_type_vs_task_number(unbiased_75_df)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,Total
reCAPTCHA (easy),5,0,0,0,2,0,0,0,0,0,7
Geetest (slide),3,1,2,1,3,0,0,1,1,0,12
Arkose (selection),8,2,0,1,1,0,0,0,0,0,12
Arkose (rotation),2,1,1,0,1,1,0,0,0,0,6
Distorted text (simple),2,1,0,0,0,2,1,0,0,0,6
Distorted text (moving),0,1,2,1,1,0,1,0,1,0,7
reCAPTCHA (difficult),5,0,1,1,0,0,0,0,0,0,7
Distorted text (masked),4,2,1,0,0,0,0,0,0,0,7
hCAPTCHA (easy),2,2,2,0,1,0,0,0,0,0,7
hCAPTCHA (difficult),4,1,2,1,0,0,1,0,0,0,9


### Biased (30 cents)

In [6]:
biased_30_df = remove_tasks(load_data_from_file("abandonment_biased_30_cents.json"))

Number of total participants: 125


In [7]:
plot_captcha_type_vs_task_number(biased_30_df)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,Total
reCAPTCHA (easy),0,0,0,1,0,0,0,0,0,0,1
Geetest (slide),1,1,0,0,1,0,1,2,0,0,6
Arkose (selection),2,1,1,0,0,1,0,0,0,0,5
Arkose (rotation),0,0,0,0,0,1,0,0,0,0,1
Distorted text (simple),0,0,0,0,0,0,0,0,0,0,0
Distorted text (moving),0,0,0,0,0,1,1,0,0,0,2
reCAPTCHA (difficult),0,0,0,1,0,0,0,0,0,0,1
Distorted text (masked),0,0,0,0,0,0,1,0,0,0,1
hCAPTCHA (easy),1,1,0,1,0,0,0,0,0,0,3
hCAPTCHA (difficult),1,0,0,1,0,0,0,0,0,0,2


### Unbiased (150 cents)

In [8]:
unbiased_150_df = load_data_from_file("abandonment_unbiased_150_cents.json")

Number of total participants: 140


In [9]:
plot_captcha_type_vs_task_number(unbiased_150_df)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,Total
reCAPTCHA (easy),2,1,0,0,0,0,0,0,0,0,3
Geetest (slide),4,0,0,0,0,1,0,1,2,0,8
Arkose (selection),1,2,0,0,0,1,0,0,0,0,4
Arkose (rotation),4,0,1,0,0,0,0,1,0,0,6
Distorted text (simple),2,0,0,1,0,0,0,0,0,0,3
Distorted text (moving),1,1,1,0,0,1,1,0,0,0,5
reCAPTCHA (difficult),2,1,0,0,0,0,0,0,0,0,3
Distorted text (masked),1,2,0,0,0,0,0,0,0,0,3
hCAPTCHA (easy),1,1,0,0,0,0,0,0,0,0,2
hCAPTCHA (difficult),0,0,1,0,0,0,0,0,1,0,2


### Biased (60 cents)

In [10]:
biased_60_df = remove_tasks(load_data_from_file("abandonment_biased_60_cents.json"))

Number of total participants: 134


In [11]:
plot_captcha_type_vs_task_number(biased_60_df)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,Total
reCAPTCHA (easy),0,0,0,0,0,0,0,0,0,0,0
Geetest (slide),4,3,2,0,3,5,0,0,2,0,19
Arkose (selection),0,0,1,0,0,0,0,0,0,0,1
Arkose (rotation),1,0,0,2,1,0,0,0,0,0,4
Distorted text (simple),0,0,0,0,0,0,0,0,0,0,0
Distorted text (moving),1,0,0,0,0,0,0,0,1,0,2
reCAPTCHA (difficult),0,0,0,0,0,0,0,0,1,0,1
Distorted text (masked),2,0,0,0,0,0,0,0,0,0,2
hCAPTCHA (easy),0,1,0,1,0,0,0,0,0,0,2
hCAPTCHA (difficult),0,0,0,0,1,0,0,0,0,0,1


### Combined

In [12]:
total_df = pd.concat([unbiased_75_df, unbiased_150_df, biased_30_df, biased_60_df], ignore_index=True)

In [13]:
plot_captcha_type_vs_task_number(total_df)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,Total
reCAPTCHA (easy),7,1,0,1,2,0,0,0,0,0,11
Geetest (slide),12,5,4,1,7,6,1,4,5,0,45
Arkose (selection),11,5,2,1,1,2,0,0,0,0,22
Arkose (rotation),7,1,2,2,2,2,0,1,0,0,17
Distorted text (simple),4,1,0,1,0,2,1,0,0,0,9
Distorted text (moving),2,2,3,1,1,2,3,0,2,0,16
reCAPTCHA (difficult),7,1,1,2,0,0,0,0,1,0,12
Distorted text (masked),7,4,1,0,0,0,1,0,0,0,13
hCAPTCHA (easy),4,5,2,2,1,0,0,0,0,0,14
hCAPTCHA (difficult),5,1,3,2,1,0,1,0,1,0,14
