# Compare VR(Tracy) and Cyprus Data (with fixation outlier)

- Distribution of looked at categories (amount and total time)
-


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
replacement_dict_vr = {
    r".*[Pp]avement.*": "Street",
    r".*[Rr]oad.*": "Street",
    r".*[Bb]uilding.*": "Building",
    r".*[Ll]andmark.*": "Building",
    r".*[Ww]all.*": "Building",
    r".*[Gg]raffiti.*": "Building",
    r".*[Cc]afe.*": "Building",
    r".*[Hh]ouse.*": "Building",
    r".*[Cc]hurch.*": "Building",
    r".*[Hh]edge.*": "Vegetation",
    r".*[Cc]ypress.*": "Vegetation",
    r".*[Tt]errain.*": "Vegetation",
    r".*[Tt]ree.*": "Vegetation",
    r".*[Gg]rass.*": "Vegetation",
    r".*[Ll]eaf.*": "Vegetation",
    r".*[Aa]gent.*": "Person",
    r".*[Cc]ma.*": "Person",  # active agent
    r".*[Ss]a.*": "Person",  # passive agent
    r".*[Ll]amppost.*": "Signs",
    r".*[Pp]ile[oO]f[Cc]lay.*": "Signs",
    r".*[Ff]ence.*": "Fence",
    r".*[Cc]ar.*": "Vehicle",
}

replacement_dict_cy = {
    "Rider": "Person",
    "Train": "Background",
    "Sky": "Background",
    "Motorcycle": "Vehicle",
    "Bicycle": "Vehicle",
}

# # load the dictionary from a json file
# with open('/label_mapping.json', 'r') as json_file:
#     data = json.load(json_file)
#     categories = data['categories']
#     category_mapping = data['category_mapping']
#     level3_mapping = data['level3_mapping']
#     replacement_dict_vr = data['replacement_dict_vr']
#     replacement_dict_cy = data['replacement_dict_cy']

In [None]:
# load two data frames
folder_path = "path/to/your/data/folder"
df_cy = pd.read_csv(folder_path + "data/fix_only_label_total.csv")
df_vr = pd.read_csv(
    "/VR-DataAnalysis/Complete_Data_AngularV_NO_Outliers.csv"
)

WIDTH = 6
dpi = 100

In [None]:
df_cy.columns

In [None]:
# if levelsum is LEVEL_4 replace fix_annotation_max_sum with ambiguous

# rename the columns
df_cy["fixation_label"] = df_cy["fixation_pXc_label"]
df_cy.loc[df_cy["level_pXc_annotation"] == 4, "fixation_label"] = "Ambiguous"
# df_cy["fixation_x"] = df_cy["fixation_x_[px]"]
# df_cy["fixation_y"] = df_cy["fixation_y_[px]"]
# df_cy["start_timestamp_ns"] = df_cy["start_timestamp_[ns]"]
# df_cy["end_timestamp_ns"] = df_cy["end_timestamp_[ns]"]
# df_cy["duration_ms"] = df_cy["duration_[ms]"]

# get the fixation dataframe
df_cy = df_cy[
    [
        "session",
        "fixation_id",
        "start_timestamp_ns",
        "end_timestamp_ns",
        "duration_ms",
        "fixation_x",
        "fixation_y",
        "fixation_label",
    ]
].copy()
df_cy.drop_duplicates(inplace=True)

In [None]:
# just keep relevant columns
df_vr = df_vr[
    [
        "SubjectID",
        "hitObjectColliderName",
        "Collider_Categorical",
        "Bitmask_flag",
        "Interpolated_collider",
        "counter",
        "Time_of_Gaze",
        "combined_vel",
        "isFix",
        "corrected_vel",
        "length",
        "distance",
        "avg_dist",
        "names",
        "Collider_CategoricalN",
        "Median_Standard_Duration",
        "gaze_mad_z",
        "Outlier_Gaze_Length",
    ]
]
df_vr["duration"] = df_vr["length"] * 1000

In [None]:
# map onto laebels
# VR
df_vr["mapped_fixation_label"] = df_vr["names"].replace(
    to_replace=replacement_dict_vr, regex=True
)
df_vr["mapped_fixation_label"] = df_vr["mapped_fixation_label"].replace(
    to_replace=r"^(?!Person$|Building$|Vegetation$|Fence$|Signs$|Street$|Vehicle$).*$",
    value="Background",
    regex=True,
)

# Cyprus
df_cy["mapped_fixation_label"] = df_cy["fixation_label"].str.capitalize()
df_cy["mapped_fixation_label"] = df_cy["mapped_fixation_label"].replace(
    to_replace=replacement_dict_cy, regex=True
)
df_cy["mapped_fixation_label"] = df_cy["mapped_fixation_label"].replace(
    "Ambiguous", value="Background"
)

In [None]:
# fixation distribution of vr
plt.figure(figsize=(WIDTH * 1.3, WIDTH), dpi=dpi)
sns.kdeplot(df_vr["duration"], fill=True)

plt.xlabel("Fixation Duration [ms]")
plt.ylabel("Density")

# threshold = 577
# plt.axvline(x=threshold, color="r", linestyle="--")
# name the line outlier threshold
# plt.text(590, 0.003, "Outlier Threshold = " + str(threshold), rotation=90)


plt.yticks(
    ticks=[0, 0.001, 0.002, 0.003, 0.004, 0.005],
    labels=[0, 0.001, 0.002, 0.003, 0.004, 0.005],
)
plt.tight_layout()
plt.show()

In [None]:
# fixation distribution of cy and vr
plt.figure(figsize=(WIDTH * 1.3, WIDTH), dpi=dpi)
sns.kdeplot(df_cy["duration_ms"], fill=True)
sns.kdeplot(df_vr["duration"], fill=True)

plt.xlabel("Fixation Duration [ms]")
plt.ylabel("Density")

plt.xlim(0, 1000)
plt.yticks(
    ticks=[0, 0.001, 0.002, 0.003, 0.004, 0.005],
    labels=[0, 0.001, 0.002, 0.003, 0.004, 0.005],
)
plt.tight_layout()
plt.show()

In [None]:
df_cy.groupby("mapped_fixation_label")["duration_ms"].sum()

In [None]:
total_vr = df_vr_grouped.groupby("mapped_fixation_label")["Sum"].mean().sum()
toltal_cy = df_cy.groupby("mapped_fixation_label")["duration_ms"].sum().sum()

In [None]:
color_vr = plt.cm.Blues(700)
color_cy = plt.cm.Blues(1000)

sns.color_palette("husl", 8)

# count the amount of fixations per label
Count_cy = df_cy.groupby("mapped_fixation_label")["duration_ms"].sum() / toltal_cy
Count_vr = df_vr_grouped.groupby("mapped_fixation_label")["Sum"].mean() / total_vr


# Convert Series to DataFrame and normalize
df_cy_cnt = pd.DataFrame(
    {"Fixation Label": Count_cy.index, "Percentage": Count_cy.values, "Condition": "RW"}
)
df_vr_cnt = pd.DataFrame(
    {"Fixation Label": Count_vr.index, "Percentage": Count_vr.values, "Condition": "VR"}
)

# Combine the dataframes
combined_df = pd.concat([df_cy_cnt, df_vr_cnt])

# Plotting
plt.figure(figsize=(WIDTH * 1.3, WIDTH), dpi=dpi)
sns.set_context("talk")
sns.set_palette("colorblind")
sns.barplot(
    data=combined_df,
    x="Fixation Label",
    y="Percentage",
    hue="Condition",
    palette=["C0", "C1"],
    order=sorted(combined_df["Fixation Label"].unique()),
)


plt.grid(False)
plt.xticks(rotation=45)
plt.gca().set_yticklabels(["{:.0f}%".format(x * 100) for x in plt.gca().get_yticks()])
plt.legend(title="Condition")
plt.ylabel("Average Fixation Count")
plt.xlabel("Category")
plt.show()