### Investigation Real World Data From Cyprus


- total distribution of Length
  - seperated by session(1,2,3)
  - by category
- investigation of outliers
  -  long fixations are distributed across categories
  - ultra long fixations 1 sec or 1.5 sec


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import re

pd.set_option("display.max_rows", 1000)
WIDTH = 6
dpi  = 100
heuristic1 = "fixation_pXc_label"
heuristic1_level = "level_pXc_annotation"

In [None]:
def shorten_session_name(sentence):
    pattern = r"Expl_(\d+)_ET_(\d+).*"
    replacement = r"\1.\2"
    shortened_sentence = re.sub(pattern, replacement, sentence)
    return shortened_sentence

In [None]:
folder_path = "path/to/data/"

session_names = [
    "Expl_1_ET_1_2023-09-05_11-56-16_ET",
    "Expl_1_ET_2_2023-09-05_12-34-24_ET",
    "Expl_1_ET_3_2023-09-05_13-10-01_ET",
    "Expl_2_ET_1_2023-09-06_10-36-37_ET",
    "Expl_2_ET_2_2023-09-06_11-08-36_ET",
    "Expl_2_ET_3_2023-09-06_11-39-21_ET",
    "Expl_3_ET_1_2023-09-06_13-24-43_ET",
    "Expl_3_ET_2_2023-09-06_13-57-57_ET",
    "Expl_3_ET_3_2023-09-06_14-28-39_ET",
    "Expl_4_ET_1_2023-09-06_18-31-33_ET",
    "Expl_4_ET_2_2023-09-06_18-57-24_ET",
    "Expl_5_ET_1_2023-09-07_18-17-19_ET",
    "Expl_5_ET_2_2023-09-07_18-48-26_ET",
]
fixation_and_labels = [
    folder_path + session + "/fixation_and_labels_leveled.csv"
    for session in session_names
]
fix_path = [folder_path + "/data/" + session + "/fixations.csv" for session in session_names]



In [None]:
# load one huge dataframe coming from all sessions
start_path = folder_path + "/data/Cyprus_start_end_frames.csv"
start_end_df = pd.read_csv(start_path)

try:
    print("Try loading dataframes..")
    df = pd.read_csv(folder_path + "/data/fixation_and_labels_total.csv")
    df_fix = pd.read_csv(folder_path + "/data/fix_only_label_total.csv")
    print(".. done!")
except:
    print("..fail, Run 5.0 again")

df_fix["session_short"] = df_fix["session"].apply(lambda x: shorten_session_name(x))
# day 1 if session 1,2,3
df_fix["day"] = df_fix["session_short"].apply(lambda x: int(float(x)))

### Fixation per category

In [None]:
# Coount per Category
Category = pd.DataFrame(df_fix.groupby([heuristic1])["fixation_id"].count())
Category.reset_index(inplace=True)
Category.rename(columns={"fixation_id": "Count"}, inplace=True)

plt.figure(figsize=(WIDTH *1.3, WIDTH), dpi = dpi)
sns.barplot(x=heuristic1, y="Count", data=Category, width=0.75)
plt.xticks(rotation=45)
#x axis labels
plt.xlabel("Category")
#y axis labels
plt.ylabel("Fixation Count")


for i in range(13):
    plt.text(i, Category["Count"][i] + 100,Category["Count"][i] , ha='center', va='bottom')

Category["p"] = Category["Count"] / Category["Count"].sum()* 100
Category

In [None]:
pd.DataFrame(df_fix.groupby([heuristic1])["duration_ms"].describe()).sort_values(by="mean", ascending=False)

### Fixation  per Category per session


In [None]:
# coefficient of variation
CategoryPerDayMeta = pd.DataFrame(df_fix.groupby([ "session_short", heuristic1])["fixation_id"].count())

#CategoryPerDayMeta.reset_index(inplace=True)
#CategoryPerDayMeta = CategoryPerDayMeta.groupby([ "session_short", heuristic1]).count()

CategoryPerDayMeta = CategoryPerDayMeta.groupby([heuristic1]).describe()
CategoryPerDayMeta.reset_index(inplace=True)
CategoryPerDayMeta["cv"] = CategoryPerDayMeta[('fixation_id',  'std')] / CategoryPerDayMeta[('fixation_id',  'mean')]
CategoryPerDayMeta["variance"] = CategoryPerDayMeta[('fixation_id',  'std')]**2
CategoryPerDayMeta.sort_values(by=('fixation_id',  'std'), ascending=False)

In [None]:
CategoryPerDay = pd.DataFrame(df_fix.groupby([ "session_short", heuristic1])["fixation_id"].count()).unstack(fill_value=0).stack()
CategoryPerDay.reset_index(inplace=True)
CategoryPerDay.rename(columns={"fixation_id": "Count"}, inplace=True)
data = []
#data.append(list(CategoryPerDay["session_short"].unique()))
for category in CategoryPerDayMeta[heuristic1].unique():
    data.append(list(CategoryPerDay[CategoryPerDay[heuristic1] == category]["Count"].values))

categories = list(CategoryPerDayMeta[heuristic1].unique())
sessions = list(CategoryPerDay["session_short"].unique())

In [None]:
CategoryPerDay.groupby([heuristic1]).describe()

In [None]:
#variance of duration per category
CategoryPerDay.groupby([ "session_short"])["Count"].var()

In [None]:
table = np.array(data)

In [None]:
data_titels = {
    'session': sessions,
    categories[0]: data[0],
    categories[1]: data[1],
    categories[2]: data[2],
    categories[3]: data[3],
    categories[4]: data[4],
    categories[5]: data[5],
    categories[6]: data[6],
    categories[7]: data[7],
    categories[8]: data[8],
    categories[9]: data[9],
    categories[10]: data[10],
    categories[11]: data[11],
    categories[12]: data[12],
    
    # ... up to variable10
}

# Convert the data to a DataFrame
df = pd.DataFrame(data_titels)
df

In [None]:
# Count per category per day
# and fill non with 0 values

CategoryPerDay = pd.DataFrame(df_fix.groupby([ "session_short", heuristic1])["fixation_id"].count())
CategoryPerDay.reset_index(inplace=True)
CategoryPerDay.rename(columns={"fixation_id": "Count"}, inplace=True)
 
Category["Count"] / Category["Count"].sum()* 100

# plotsize
plt.figure(figsize=(WIDTH *1.3, WIDTH), dpi = dpi)
sns.barplot(x=heuristic1, y="Count", data=CategoryPerDay, hue = "session_short", width=0.75)

plt.xticks(rotation=45)
plt.xlabel("Category")
plt.ylabel("Fixation Count")
plt.legend(title="Session")
plt.show()

# plot 2
plt.figure(figsize=(WIDTH *1.3, WIDTH), dpi = dpi)
sns.swarmplot(data=CategoryPerDay, x=heuristic1, y="Count", hue="session_short", color=".25")
sns.boxplot(data=CategoryPerDay, x=heuristic1, y="Count")
plt.legend([],[], frameon=False)
plt.xticks(rotation=45)
plt.xlabel("Category")
plt.ylabel("Fixation Count")
plt.show()

In [None]:
Collider_Count = pd.DataFrame(df_fix.groupby(["day", heuristic1])["duration_ms"].sum())
Collider_Count.reset_index(inplace=True)
Collider_Count.rename(columns={"duration_ms": "Count"}, inplace=True)
sumcount = Collider_Count.groupby("day")["Count"].sum()
Collider_Count = Collider_Count.merge(sumcount, on="day", suffixes=("", "_sum"))
Collider_Count["CountN"] = Collider_Count["Count"] / Collider_Count["Count_sum"]
Collider_Count["Count"] = Collider_Count["Count"] / 1e3
print("The ammount of fixations per category per session is:")

# plot coliider count
plt.figure(figsize=(10, 5))
sns.barplot(data=Collider_Count, x=heuristic1, y="CountN", hue="day")
# rotate x labels
plt.xticks(rotation=45)
# plt.title("Fixations per category per session")
xlabel = plt.xlabel("Label")
ylabel = plt.ylabel("Absolute Fixation Duration [s]")
# renambe label
plt.legend(title="Session")
plt.show()

In [None]:
Meta_CategoryPerDay = CategoryPerDay.groupby(heuristic1)["Count"].describe()

In [None]:
# check if the distribution is normal
from scipy.stats import shapiro
for category in Category[heuristic1]:
    data = CategoryPerDay[CategoryPerDay[heuristic1] == category]["Count"]
    stat, p = shapiro(data)
    print('Statistics=%.3f, p=%.3f' % (stat, p))
    alpha = 0.05
    if p > alpha:
        print(f'{category} looks Gaussian (fail to reject H0)')
    else:
        print(f'{category}  does not look Gaussian (reject H0)')

### Fixation per Level

In [None]:
df_fix.groupby(heuristic1_level)["fixation_id"].count().plot(kind="bar", figsize=(WIDTH*1.3, WIDTH), width=0.75)

# print the vlaue in the bar
for i in range(4):
    plt.text(i, df_fix.groupby(heuristic1_level)["fixation_id"].count().values[i] + 100, df_fix.groupby(heuristic1_level)["fixation_id"].count().values[i], ha='center', va='bottom')
# rotate x labels
plt.xticks(rotation=0)
# replace x labels by 
plt.xticks(np.arange(0, 4), [ "Level_1", "Level_2", "Level_3", "Level_4"])
# remove some y ticks
plt.yticks(np.arange(0, 20001, 5000))
# x label off
xlabel = plt.xlabel("")
ylabel = plt.ylabel("Number of fixations")

plt.xlabel("")

df_fix.groupby(heuristic1_level)["fixation_id"].count() / df_fix.groupby(heuristic1_level)["fixation_id"].count().sum()

In [None]:
LevelCountPerDay = pd.DataFrame(df_fix.groupby([heuristic1_level, "session_short"])["fixation_id"].count())
LevelCountPerDay.reset_index(inplace=True)
LevelCountPerDay.rename(columns={"fixation_id": "Count"}, inplace=True)
LevelCountPerDay["mean"] = LevelCountPerDay.groupby([heuristic1_level])["Count"].mean()
LevelCountPerDay["std"] = LevelCountPerDay.groupby([heuristic1_level])["Count"].std()
sns.barplot(data=LevelCountPerDay, x=heuristic1_level, y="Count")#, hue="session_short")


### Fixation ditribution over Space

In [None]:
# take the int of the y coordinate
df_fix["y"] = df_fix.apply(lambda row: int(row["fixation_y"]), axis=1)
df_fix["x"] = df_fix.apply(lambda row: int(row["fixation_x"]), axis=1)

# create a heatmap of the fixations
plt.figure(figsize=(10, 5))
# sns.color_palette("crest", as_cmap=True)
sns.histplot(
    data=df_fix,
    x="x",
    y="y",
    bins=50,
    cbar=True,
    cbar_kws={"label": "Fixation Count"},
    cmap="viridis",
)
# plot with virdis color
# sns.histplot(data=df_fix, x='x', y='y', bins=50, cbar=True, cbar_kws={'label': 'Nr of fixations'}, cmap="viridis")

# scale the y axis to 0-1200
plt.ylim(0, 1200)
plt.xlim(0, 1600)

# dont show ticks
plt.xticks([])
plt.yticks([])
# dont show the axis labels
plt.xlabel("")
plt.ylabel("")


# set the scale in equal distance
plt.gca().set_aspect("equal", adjustable="box")

# plt.title("Fixation distribution over the screen")
plt.show()

In [None]:
for group_name, group_df in df_fix.groupby("session_short"):
    # Perform operations on each group

    plt.figure(figsize=(10, 5))
    sns.color_palette("crest", as_cmap=True)
    sns.histplot(
        data=group_df,
        x="x",
        y="y",
        bins=50,
        cbar=True,
        cbar_kws={"label": "Fixation Count"},
        cmap="viridis",
    )
    # plot with virdis color
    # sns.histplot(data=df_fix, x='x', y='y', bins=50, cbar=True, cbar_kws={'label': 'Nr of fixations'}, cmap="viridis")

    # scale the y axis to 0-1200
    plt.ylim(0, 1200)
    plt.xlim(0, 1600)

    # dont show ticks
    plt.xticks([])
    plt.yticks([])
    # dont show the axis labels
    plt.xlabel("")
    plt.ylabel("")

### Fixation duration

In [None]:
Fixation_Count = pd.DataFrame(df_fix.groupby(["session_short"])["duration_ms"].sum())
Fixation_Count.reset_index(inplace=True)
Fixation_Count.plot(
    x="session_short", y="duration_ms", kind="bar", legend=False, width=0.75
)


(df_fix.groupby("session")["session_duration"].max() / 1e9 / 60).plot(
    secondary_y=True, color="red", legend=False
)
plt.ylabel(
    "Session Duration [min]",
    loc="center",
)
plt.ylim(0, 13)
plt.xticks(rotation=0)
plt.xticks(np.arange(0, 13), Fixation_Count["session_short"])
plt.xlabel("Session")
plt.title("")
plt.show()

In [None]:
plt.figure(figsize=(WIDTH * 1.3, WIDTH), dpi=dpi)
sns.violinplot(data=df_fix, x="day", y="duration_ms")

In [None]:
# boxplot of the duration of the fixations per category
plt.figure(figsize=(WIDTH*1.3, WIDTH), dpi=dpi)
sns.boxplot(data=df_fix, x=heuristic1, y="duration_ms", fliersize=0.5, linecolor="blue", order=categories)
# Add jitter 
sns.stripplot(
    x=heuristic1,
    y="duration_ms",
    data=df_fix,
    color="grey",
    dodge=True,
    jitter=0.4,
    size=0.3,
)
plt.ylabel("Duration of Fixation [ms]")
plt.xlabel("Category")
plt.xticks(rotation=45)


plt.show()

In [None]:
# boxplot with violin plot
plt.figure(figsize=(WIDTH*1.3, WIDTH), dpi=dpi)
categories = df_fix[heuristic1].unique()
categories.sort()
sns.violinplot(data=df_fix, x=heuristic1, y="duration_ms",  linecolor="blue", order=categories)
sns.boxplot(data=df_fix, x=heuristic1, y="duration_ms", fliersize=0.5, linecolor="blue", order=categories)
plt.ylabel("Duration of Fixation [ms]")
plt.xlabel("Category")
plt.xticks(rotation=45)
plt.show()

In [None]:
# boxplot of the duration of the fixations per category
plt.figure(figsize=(10, 5))
sns.barplot(data=df_fix, x="day", y="duration_ms")
plt.xticks(rotation=45)
xlabel = plt.xlabel("Session Day")
ylabel = plt.ylabel("Fixation Duration [ms]")
plt.show()

In [None]:
# boxplot of the duration of the fixations per category
plt.figure(figsize=(WIDTH*1.3, WIDTH), dpi=dpi)
sns.barplot(data=df_fix, x="fixation_pXc_label", y="duration_ms",  order=categories)
plt.xticks(rotation=45)
xlabel = plt.xlabel("Category")
ylabel = plt.ylabel("Fixation Duration [ms]")
plt.show()

### Fixation duration, duration per level, per session, per category


In [None]:
# distribution with stacked per category
plt.figure(figsize=(WIDTH *1.3, WIDTH), dpi=dpi)
sns.displot(
    df_fix,
    x="duration_ms",
    hue="fixation_sum_label",
    multiple="stack",
    kde=True,
)
plt.show()

In [None]:
# distribution with stacked per category with cutoff at 500ms
plt.figure(figsize=(WIDTH *1.3, WIDTH), dpi=dpi)
sns.displot(
    df_fix[df_fix.duration_ms < 500],
    x="duration_ms",
    hue="fixation_sum_label",
    multiple="stack",
    kde=True,
)
plt.show()

In [None]:
# fixation duration per level_pXc_annotation as boxplot
plt.figure(figsize=(WIDTH *1.3, WIDTH), dpi=dpi)
sns.boxplot(data=df_fix, x="level_pXc_annotation", y="duration_ms")
plt.xticks(rotation=45)
plt.title("Fixation duration per level_pXc_annotation")
plt.show()

In [None]:
# fixation duration per session

# print a boxplot about the duration of the fixations per package in alphabetical order
plt.figure(figsize=(10, 5))
sns.boxplot(data=df_fix, x="session_short", y="duration_ms", width=0.75, fliersize=1)

plt.xticks(rotation=45)

# set the y axis to 0-1500
plt.ylim(0, 1500)
plt.title("Fixation duration per category")
plt.ylabel("Fixation duration [ms]")
plt.xlabel("Session")
plt.show()

###  Outlier Fixation

In [None]:
df_fix_outlier = df_fix[df_fix.is_outlier == True]

In [None]:
## calculate the threshold for outliers
# Calculate the median of the duration_ms column
median_duration = df_fix["duration_ms"].median()

# Filter outliers below the median - these should be less than the median if we're looking for "below"
outliers_below_median = (
    df_fix[(df_fix["duration_ms"] < median_duration) & (df_fix["is_outlier"] == True)]
    .sort_values("duration_ms", ascending=False)
    .head(1)
)

# Filter outliers above the median - these should be greater than the median if we're looking for "above"
outliers_above_median = (
    df_fix[(df_fix["duration_ms"] > median_duration) & (df_fix["is_outlier"] == True)]
    .sort_values("duration_ms")
    .head(1)
)

# Since you're taking the head(1), it means you're interested in the single highest outlier below the median and the single lowest outlier above the median
highest_outlier_below_median = (
    outliers_below_median["duration_ms"].values[0]
    if not outliers_below_median.empty
    else None
)
lowest_outlier_above_median = (
    outliers_above_median["duration_ms"].values[0]
    if not outliers_above_median.empty
    else None
)

highest_outlier_below_median, lowest_outlier_above_median

In [None]:
# density distribution of the fixation duration
plt.figure(figsize=(WIDTH *1.3, WIDTH), dpi=dpi)
sns.kdeplot(df_fix["duration_ms"], fill=True)

plt.xlabel("Fixation Duration [ms]")
plt.ylabel("Density")
plt.legend([ "Fixation Count"])


# add a read line at ms 577
threshold = 577
plt.axvline(x=threshold, color="r", linestyle="--")
# name the line outlier threshold
plt.text(590, 0.003, "Outlier Threshold = " + str(threshold), rotation=90)

max = df_fix["duration_ms"].max()
min = df_fix["duration_ms"].min()
mean = df_fix["duration_ms"].mean()
# show less ticks on y axis
plt.yticks(
    ticks=[0, 0.001, 0.002, 0.003, 0.004, 0.005],
    labels=[0, 0.001, 0.002, 0.003, 0.004, 0.005],
)
plt.tight_layout()
plt.text(1000, 0.004, f"Max: {max} ms,\nMin: {min} ms,\nMean: {round(mean,2)} ms")
plt.show()

In [None]:
# fixation duration per fixation_pXc_annotation as boxplot
plt.figure(figsize=(10, 5))
sns.histplot(
    data=df_fix_outlier, x="duration_ms", hue="fixation_pXc_label", multiple="layer"
)  # , kde=True)
plt.xticks(rotation=45)
plt.title("Fixation duration per fixation_pXc_label")
plt.show()

#  fixation duration per fixation_pXc_annotation as another plot
plt.figure(figsize=(10, 5))

In [None]:
# Set up the matplotlib figure with subplots

df_fix_outlier = df_fix
fig, axs = plt.subplots(
    2, 2, figsize=(15, 10)
)  # Increase the size to accommodate 4 plots

# Plot with multiple="layer"
sns.histplot(
    data=df_fix_outlier,
    x="duration_ms",
    hue="fixation_pXc_label",
    multiple="layer",
    ax=axs[0, 0],
    bins=50,
)
axs[0, 0].set_title("Layered Histogram")
axs[0, 0].tick_params(axis="x", rotation=45)

# Plot with multiple="dodge"
sns.histplot(
    data=df_fix_outlier,
    x="duration_ms",
    hue="fixation_pXc_label",
    multiple="dodge",
    ax=axs[0, 1],
    bins=50,
)
axs[0, 1].set_title("Dodged Histogram")
axs[0, 1].tick_params(axis="x", rotation=45)

# Plot with multiple="stack"
sns.histplot(
    data=df_fix_outlier,
    x="duration_ms",
    hue="fixation_pXc_label",
    multiple="stack",
    ax=axs[1, 0],
    bins=50,
)
axs[1, 0].set_title("Stacked Histogram")
axs[1, 0].tick_params(axis="x", rotation=45)

# Plot with multiple="fill"
sns.histplot(
    data=df_fix_outlier,
    x="duration_ms",
    hue="fixation_pXc_label",
    multiple="fill",
    ax=axs[1, 1],
    bins=50,
)
axs[1, 1].set_title("Filled Histogram")
axs[1, 1].tick_params(axis="x", rotation=45)

# Adjust layout to prevent overlap
plt.tight_layout()
plt.show()

In [None]:
Outlier_label = pd.DataFrame(
    df_fix[df_fix.is_outlier == True]
    .groupby(["fixation_pXc_label"])["fixation_id"]
    .count()
)
Outlier_label.reset_index(inplace=True)
Outlier_label.rename(columns={"fixation_id": "outlier"}, inplace=True)
# normalize the data
Outlier_label["outlier_norm"] = Outlier_label["outlier"] / len(
    df_fix[df_fix.is_outlier == True]
)


NoOutlier = pd.DataFrame(
    df_fix[df_fix.is_outlier == False]
    .groupby(["fixation_pXc_label"])["fixation_id"]
    .count()
)
NoOutlier.reset_index(inplace=True)
NoOutlier.rename(columns={"fixation_id": "no_outlier"}, inplace=True)
NoOutlier["no_outlier_norm"] = NoOutlier["no_outlier"] / len(
    df_fix[df_fix.is_outlier == False]
)


# aggregate Longfix, LongFix1, LongFix15, LongFix3
NoOutlier = NoOutlier.merge(Outlier_label, on="fixation_pXc_label", how="outer")
NoOutlier.plot(
    x="fixation_pXc_label",
    y=["no_outlier_norm", "outlier_norm"],
    kind="bar",
    legend=False,
    figsize=(4*1.3, 4)
)

# rotate x labels
plt.xticks(rotation=45)
# rename y axis
plt.ylabel("Relative Count")
# change x axis label
plt.xlabel("Category")

# show legend
plt.legend([ "No Outlier", "Outlier (MAD)"])

plt.ylim(0, 0.6)

# add a boc with the ammount of outliers
plt.text(0, 0.56, "Total Outliers: " + str(len(df_fix[df_fix.is_outlier == True])))

print("The ammount of outliers is:", len(df_fix[df_fix.is_outlier == True]))

In [None]:
LongFix = pd.DataFrame(
    df_fix_outlier[df_fix_outlier.duration_ms < 1000]
    .groupby(["fixation_pXc_label"])["fixation_id"]
    .count()
)
LongFix.reset_index(inplace=True)
LongFix.rename(columns={"fixation_id": "outlier_count"}, inplace=True)
# normalize the data
LongFix["outlier_count_norm"] = LongFix["outlier_count"] / len(
    df_fix_outlier[df_fix_outlier.duration_ms < 1000]
)

LongFix1 = pd.DataFrame(
    df_fix_outlier[(df_fix_outlier.duration_ms >= 1000)]
    .groupby(["fixation_pXc_label"])["fixation_id"]
    .count()
)
LongFix1.reset_index(inplace=True)
LongFix1.rename(columns={"fixation_id": "outlier_count_1sec"}, inplace=True)
# normalize the data
LongFix1["outlier_count_1sec_norm"] = LongFix1["outlier_count_1sec"] / len(
    df_fix_outlier[((df_fix_outlier.duration_ms >= 1000))]
)

LongFix15 = pd.DataFrame(
    df_fix_outlier[((df_fix_outlier.duration_ms >= 1500))]
    .groupby(["fixation_pXc_label"])["fixation_id"]
    .count()
)
LongFix15.reset_index(inplace=True)
LongFix15.rename(columns={"fixation_id": "outlier_count_1.5sec"}, inplace=True)
# normalize the data
LongFix15["outlier_count_1.5sec_norm"] = LongFix15["outlier_count_1.5sec"] / len(
    df_fix_outlier[((df_fix_outlier.duration_ms >= 1500))]
)

LongFix3 = pd.DataFrame(
    df_fix_outlier[((df_fix_outlier.duration_ms >= 3000))]
    .groupby(["fixation_pXc_label"])["fixation_id"]
    .count()
)
LongFix3.reset_index(inplace=True)
LongFix3.rename(columns={"fixation_id": "outlier_count_3sec"}, inplace=True)
LongFix3["outlier_count_3sec_norm"] = LongFix3["outlier_count_3sec"] / len(
    df_fix_outlier[((df_fix_outlier.duration_ms >= 3000))]
)

NoOutlier = pd.DataFrame(
    df_fix[df_fix.is_outlier == False]
    .groupby(["fixation_pXc_label"])["fixation_id"]
    .count()
)
NoOutlier.reset_index(inplace=True)
NoOutlier.rename(columns={"fixation_id": "no_outlier"}, inplace=True)
NoOutlier["no_outlier_norm"] = NoOutlier["no_outlier"] / len(
    df_fix[df_fix.is_outlier == False]
)

# aggregate Longfix, LongFix1, LongFix15, LongFix3
LongFix = LongFix.merge(LongFix1, on="fixation_pXc_label", how="outer")
LongFix = LongFix.merge(LongFix15, on="fixation_pXc_label", how="outer")
LongFix = LongFix.merge(LongFix3, on="fixation_pXc_label", how="outer")
LongFix = LongFix.merge(NoOutlier, on="fixation_pXc_label", how="outer")
LongFix.fillna(0, inplace=True)

sns.color_palette("viridis", as_cmap=True)
sns.cubehelix_palette(start=0.5, rot=-0.5, as_cmap=True)

LongFix.plot(
    x="fixation_pXc_label",
    y=[
        "outlier_count_norm",
        "outlier_count_1sec_norm",
        "outlier_count_1.5sec_norm",
        "outlier_count_3sec_norm",
        "no_outlier_norm",
    ],
    kind="bar",
    title="Amount of fixations per session acc",
    colormap="rocket",
    legend=False,
)


# legend
plt.legend(
    [
        "All Outliers",
        "duration >= 1 sec",
        "duration >=  1.5 sec",
        "duration >=  3 sec",
        "No Outliers",
    ]
)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

df = LongFix

# Now, let's plot the stacked bar plot
fig, ax = plt.subplots(figsize=(WIDTH*1.3, WIDTH), dpi = dpi)

# Labels for the categories
labels = df["fixation_pXc_label"]

# Values for each stack
#count_outlier = df["outlier_count"]
count_1sec = df["outlier_count_1sec"]
count_1_5sec = df["outlier_count_1.5sec"]
count_3sec = df["outlier_count_3sec"]
count_No_outlier = 0 #'df["no_outlier"]



# Plotting each stack
#ax.bar(labels, count_No_outlier, label="No Outlier")
ax.bar(labels, count_1sec, bottom=count_No_outlier , label="1s-1.5s")
ax.bar(labels,count_1_5sec,bottom=count_No_outlier + count_1sec,label="1.5s-3s",)
ax.bar(labels,count_3sec,bottom=count_No_outlier +  count_1sec + count_1_5sec,label=" >3s",)

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_xlabel("Category")
ax.set_ylabel("Fixation Count")
ax.legend()

# Rotate x-axis labels for better readability
plt.xticks(rotation=45)
plt.tight_layout()  # Adjust layout to prevent overlap

# Show plot
plt.show()