# Mann-Whitney U test: Compare Bot Performance

In [19]:
from scipy.stats import mannwhitneyu
import pandas as pd
from scripts.utils import rank_biserial
import seaborn as sns
import matplotlib.pyplot as plt

## Compare: Bot

In [20]:
# Read the CSV file
df_6bots = pd.read_csv('../csv_data/6bots.csv')
df_1bot_5hum = pd.read_csv('../csv_data/nonsem_1bot_5hum.csv')
df_3bots_3hum = pd.read_csv('../csv_data/nonsem_3bots_3hum.csv')

# Extract the 'ItemsFound' column when isRobot == 0
items_found_6bots = df_6bots['ItemsFound']
items_found_1bot = df_1bot_5hum[df_1bot_5hum['isRobot'] == 1]['ItemsFound']
items_found_3bots = df_3bots_3hum[df_3bots_3hum['isRobot'] == 1]['ItemsFound']

# Extract the 'Score' column when isRobot == 0
score_6bots = df_6bots['Score']
score_1bot = df_1bot_5hum[df_1bot_5hum['isRobot'] == 1]['Score']
score_3bots = df_3bots_3hum[df_3bots_3hum['isRobot'] == 1]['Score']

#### Compare Items: 3 bots, 3 humans vs 6 bots

In [21]:
# Perform Mann-Whitney U test (alternative='greater') for the 'ItemsFound' column
stat_items_3bots_vs_6bots, p_items_3bots_vs_6bots = mannwhitneyu(items_found_3bots, items_found_6bots, alternative='greater')
r_items_3bots_vs_6bots = rank_biserial(items_found_3bots, items_found_6bots, alternative='greater')
print(f"Items 3 bots vs 6 bots: U-statistic = {stat_items_3bots_vs_6bots}, p-value = {p_items_3bots_vs_6bots}, Rank-Biserial = {r_items_3bots_vs_6bots}")

# Perform Mann-Whitney U test (alternative='greater') for the 'Score' column
stat_score_3bots_vs_6bots, p_score_3bots_vs_6bots = mannwhitneyu(score_3bots, score_6bots, alternative='greater')
r_score_3bots_vs_6bots = rank_biserial(score_3bots, score_6bots, alternative='greater')
print(f"Score 3 bots vs 6 bots: U-statistic = {stat_score_3bots_vs_6bots}, p-value = {p_score_3bots_vs_6bots}, Rank-Biserial = {r_score_3bots_vs_6bots}")

Items 3 bots vs 6 bots: U-statistic = 30876.0, p-value = 3.585527140652884e-11, Rank-Biserial = 0.4014161220043573
Score 3 bots vs 6 bots: U-statistic = 30495.0, p-value = 2.4998805020900417e-10, Rank-Biserial = 0.3841230936819171


#### Compare Items: 1 bot, 5 humans vs 6 bots

In [22]:
# Perform Mann-Whitney U test (alternative='greater') for the 'ItemsFound' column
stat_items_1bot_vs_6bots, p_items_1bot_vs_6bots = mannwhitneyu(items_found_1bot, items_found_6bots, alternative='greater')
r_items_1bot_vs_6bots = rank_biserial(items_found_1bot, items_found_6bots, alternative='greater')
print(f"Items 1 bot vs 6 bots: U-statistic = {stat_items_1bot_vs_6bots}, p-value = {p_items_1bot_vs_6bots}, Rank-Biserial = {r_items_1bot_vs_6bots}")

# Perform Mann-Whitney U test (alternative='greater') for the 'Score' column
stat_score_1bot_vs_6bots, p_score_1bot_vs_6bots = mannwhitneyu(score_1bot, score_6bots, alternative='greater')
r_score_1bot_vs_6bots = rank_biserial(score_1bot, score_6bots, alternative='greater')
print(f"Score 1 bot vs 6 bots: U-statistic = {stat_score_1bot_vs_6bots}, p-value = {p_score_1bot_vs_6bots}, Rank-Biserial = {r_score_1bot_vs_6bots}")

Items 1 bot vs 6 bots: U-statistic = 4987.5, p-value = 0.09876937944294217, Rank-Biserial = 0.1642156862745099
Score 1 bot vs 6 bots: U-statistic = 4874.0, p-value = 0.1404542584301403, Rank-Biserial = 0.13772175536881415


#### Compare: 3 bots, 3 humans vs 1 bot, 5 humans

In [23]:
# Perform Mann-Whitney U test (alternative='greater') for the 'ItemsFound' column
stat_items_3bots_vs_1bot, p_items_3bots_vs_1bot = mannwhitneyu(items_found_3bots, items_found_1bot, alternative='greater')
r_items_3bots_vs_1bot = rank_biserial(items_found_3bots, items_found_1bot, alternative='greater')
print(f"Items 3 bots vs 1 bot: U-statistic = {stat_items_3bots_vs_1bot}, p-value = {p_items_3bots_vs_1bot}, Rank-Biserial = {r_items_3bots_vs_1bot}")

# Perform Mann-Whitney U test (alternative='greater') for the 'Score' column
stat_score_3bots_vs_1bot, p_score_3bots_vs_1bot = mannwhitneyu(score_3bots, score_1bot, alternative='greater')
r_score_3bots_vs_1bot = rank_biserial(score_3bots, score_1bot, alternative='greater')
print(f"Score 3 bots vs 1 bot: U-statistic = {stat_score_3bots_vs_1bot}, p-value = {p_score_3bots_vs_1bot}, Rank-Biserial = {r_score_3bots_vs_1bot}")

Items 3 bots vs 1 bot: U-statistic = 1297.5, p-value = 0.13755862323911805, Rank-Biserial = 0.14417989417989419
Score 3 bots vs 1 bot: U-statistic = 1327.5, p-value = 0.10000594502202814, Rank-Biserial = 0.1706349206349207
