### Investigate if participants allocate more cubes for an NVC-Robot.

In [8]:
import pandas as pd
import numpy as np
from pathlib import Path
import os
import re
import matplotlib.pyplot as plt
import seaborn as sns

DATA_FOLDER = "/data/cubes/"
DATA_PATH = os.path.abspath(os.getcwd()) + DATA_FOLDER

In [9]:
# Iterate over files and find csv files that have allocation of cubes for each participant
dfs = []

for filename in os.listdir(FILES):
    if filename.endswith(".csv"): 
        participant_id = int(re.search(r'\d+', filename).group())
        temp = pd.read_csv(DATA_PATH + filename)
        temp["scene"] = temp["scene"].str[-1:]
        temp["scene"] = temp["scene"].astype('int')
        temp["participant"] = participant_id
        nvc = "A"
        if int(participant_id) % 2 == 0:
            nvc = "B"
    
        temp["NVC"] = nvc
        dfs.append(temp)


raw_data = pd.concat(dfs, axis=0, ignore_index=True)

In [10]:
raw_data.head()

Unnamed: 0,scene,robot_a,robot_b,participant,NVC
0,0,2,2,3,A
1,1,1,2,3,A
2,2,3,3,3,A
3,3,4,3,3,A
4,4,4,3,3,A


In [None]:
# Only from scene 2 to 4
filtered_data = raw_data[raw_data["scene"] > 1]
filtered_data.head()

In [None]:
# Find the total amount of cubes that participants allocated
cubes_per_participant = filtered_data.groupby("participant").sum()
cubes_per_participant = cubes_per_participant.drop(columns=['scene'])
cubes_per_participant['total_cubes'] = cubes_per_participant['robot_a'] + cubes_per_participant['robot_b'] 
cubes_per_participant.head()

In [None]:
plt.scatter(cubes_per_participant.index, cubes_per_participant["total_cubes"], label="participant")
plt.title("The distribution of cubes for each participant. Only from scene 2 to 4")
plt.xlabel("Participant ID")
plt.ylabel("Distributed cubes")
plt.xticks(range(1, len(dfs) + 1))

In [None]:
plt.scatter(cubes_per_participant.index, cubes_per_participant["robot_a"], label="A")
plt.scatter(cubes_per_participant.index, cubes_per_participant["robot_b"], label="B")
plt.title("The distribution of cubes for each participant and robot. Only from scene 2 to 4")
plt.xlabel("Participant ID")
plt.ylabel("Distributed cubes")
plt.xticks(range(1, len(dfs) + 1))
plt.legend()

In [None]:
scene2_data = raw_data[raw_data["scene"] == 2]
scene2_data

In [None]:
# Only scene 2 data merged with total number of cubes
cubes_per_participant = cubes_per_participant.reset_index()
eval_data = pd.merge(scene2_data, cubes_per_participant, on='participant')
eval_data = eval_data.drop(columns=['robot_a_y', 'robot_b_y'])
eval_data

In [None]:
# Only scene 2 data merged with total number of cubes. Extended with NVC or non-NVC.
eval_data['robot_a_significance'] = eval_data['robot_a_x'] / eval_data['total_cubes'] 
eval_data['robot_b_significance'] = eval_data['robot_b_x'] / eval_data['total_cubes']
eval_data['NVC_A'] = eval_data['NVC'] == "A"
eval_data['NVC_B'] = eval_data['NVC'] == "B"

eval_data

In [None]:
plt.scatter(eval_data["participant"], eval_data["robot_a_significance"], label="A")
plt.scatter(eval_data["participant"], eval_data["robot_b_significance"], label="B")
plt.title("The significance of cubes for each participant and robot. Only scene 2")
plt.xlabel("Participant ID")
plt.ylabel("Significance")
plt.xticks(range(1, len(dfs) + 1))
plt.legend()

In [None]:
fig, ax = plt.subplots()

colors = {True:'red', False:'blue'}
ax.scatter(eval_data['participant'], eval_data['robot_a_significance'], label=eval_data['NVC_A'], c=eval_data['NVC_A'].apply(lambda x: colors[x]))
ax.scatter(eval_data['participant'], eval_data['robot_b_significance'], label=eval_data['NVC_B'], c=eval_data['NVC_B'].apply(lambda x: colors[x]))
ax.set_xlabel("Participant")
ax.set_ylabel("Significance")

ax.set_title("Robot preference values: red is NVC, and blue is non-NVC")