In [None]:

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json

import sys
sys.path.append('/src/')
from visualization.visualize import print_image_with_point, show_complete_fixation_with_all_frames_all_gaze

# use defaul plot style
plt.style.use('default')
WIDTH = 6
dpi = 100

In [None]:
folder_path = "path/to/folder/with/data/"

# evaluation data
eval_path = folder_path + "data/cyprus_eval_frames.csv"
eval_path_results = folder_path + "data/cyprus_eval_frames_results.csv"
cyprus_eval_frames_results_predictions = folder_path + "data/cyprus_eval_frames_results_predictions.csv"
cyprus_eval_complete_sumpXc = folder_path + "data/cyprus_eval_complete_sumpXc.csv"

# load the names and print the categories
p = folder_path + "label_mapping.json"
with open(p, 'r') as f:
    label_mapping = json.load(f)
    categories = set(label_mapping["category_mapping"].values())
# note :   
# cma active agent
# sa passive agent

In [None]:
# load the data
df_res_pred = pd.read_csv(cyprus_eval_frames_results_predictions)

In [None]:
# rename the columns
df_res_pred["manual_label"] = df_res_pred['response']
df_res_pred.drop(columns=["response"], inplace=True)

# rename the 4Level to ambiguous
df_res_pred.loc[df_res_pred["level_sum"]==4, "predicted_sum"] = "ambiguous"  
df_res_pred.loc[df_res_pred["level_pXc"]==4, "predicted_pXc"] = "ambiguous" 

# check if response was correct for the two differenv ways of calulating the score
df_res_pred["correct_sum"] = df_res_pred["manual_label"] == df_res_pred["predicted_sum"] 
df_res_pred["correct_pXc"] = df_res_pred["manual_label"] == df_res_pred["predicted_pXc"] 
  
# show the tabel
df_res_pred

In [None]:
### analysis of the results taking out the ambiguous results
df_res_pred_clean = df_res_pred[df_res_pred[ "manual_label"] != "ambiguous"]
df_res_pred_amb = df_res_pred[df_res_pred[ "manual_label"] == "ambiguous"]

###  Analysis

In [None]:
df = df_res_pred

In [None]:
df["correct_pXc"].value_counts() 

In [None]:
# correct counts per level
# to datafram
df_level = df.groupby("level_pXc")["correct_pXc"].value_counts().unstack().fillna(0).astype(int).reset_index()
# delete index and set the level as index
df_level = df_level.set_index("level_pXc", inplace=True)
df_level

In [None]:
# plot the results per level as a stacked bar plot
df_level.plot(kind='bar', stacked=True, color = ["red", "green"], figsize=(WIDTH*1.3, WIDTH), dpi=dpi )
plt.xticks(rotation=0)
plt.xlabel("Prediction Level")
plt.ylabel("Fixation Count")
plt.legend(["Incorrect", "Correct"])
plt.xticks(np.arange(0, 4), [ "1", "2", "3", "4"])

In [None]:
# calcultate recall and precision for each category

# modify below -----------------------------------------------------------
heuristic = "predicted_pXc"  #'predicted_sum'#

#df = df[df.level_pXc == 3] # chose if it shoulb be just for a specific level
# modify above -----------------------------------------------------------

categorie_list = sorted(list(categories) + ["ambiguous"])
recall_dict = {}
precision_dict = {}
accuracy_dict = {}

for category in categorie_list:
    TP = len(df[(df["manual_label"] == category) & (df[heuristic] == category)])
    FP = len(df[(df["manual_label"] != category) & (df[heuristic] == category)])
    TN = len(df[(df["manual_label"] != category) & (df[heuristic] != category)])
    FN = len(df[(df["manual_label"] == category) & (df[heuristic] != category)])

    recall_dict[category] = (
        TP / (TP + FN) if (TP + FN) > 0 else 0
    )  # how good can the model find all correct ones
    precision_dict[category] = (
        TP / (TP + FP) if (TP + FP) > 0 else 0
    )  # how good can the model find only correct ones
    accuracy_dict[category] = (
        (TP + TN) / (TP + TN + FP + FN) if (TP + TN + FP + FN) > 0 else 0
    )

df_per_cat = {
    "category": list(recall_dict.keys()),
    "recall": list(recall_dict.values()),
    "precision": list(precision_dict.values()),
    "accuracy": list(accuracy_dict.values()),
    "ammount": list(
        df_res_pred["manual_label"]
        .value_counts()
        .reindex(categorie_list)
        .fillna(0)
        .astype(int)
    ),
}
# create df_per_cat
df_per_cat = pd.DataFrame(df_per_cat)
df_per_cat = df_per_cat[df_per_cat["ammount"] != 0].reset_index(drop=True)
df_per_cat["weighted_recall"] = df_per_cat["recall"] * df_per_cat["ammount"]
df_per_cat["weigth"] = df_per_cat["ammount"] / df_per_cat["ammount"].sum()
df_per_cat

In [None]:
# plot the results
fig, ax = plt.subplots(figsize=(WIDTH*1.3, WIDTH), dpi=dpi) 
width = 0.35

recall_bars = ax.bar(np.arange(len(categorie_list)), recall_dict.values(), width, label='Recall', color=plt.cm.Blues(700))
precision_bars = ax.bar(np.arange(len(categorie_list)) + width, precision_dict.values(), width, label='Precision', color=plt.cm.Blues(100))

ax.set_xlabel('Category')
ax.set_ylabel('Score')
ax.set_title('Recall and Precision ')
ax.set_xticks(np.arange(len(categorie_list)) + width / 2)
ax.set_xticklabels(categorie_list, rotation=90)
ax.legend()

def autolabel(bars):
    for bar in bars:
        height = bar.get_height()
        ax.annotate('{}'.format(height),
                    xy=(bar.get_x() + bar.get_width() / 2, height),
                    xytext=(0, 3),
                    textcoords="offset points",
                    ha='center', va='bottom')
 
# just to read the values        
#autolabel(recall_bars)
#autolabel(precision_bars)
plt.show()



In [None]:
from sklearn.metrics import accuracy_score, balanced_accuracy_score, average_precision_score 
heuristic = 'predicted_pXc'#'predicted_sum'# 

print(f'mean accuracy: {sum(df_per_cat["accuracy"]) / len(df_per_cat):.3f}')
print(f'Weighted mean accuracy: {sum(df_per_cat["accuracy"]* df_per_cat["ammount"]) / sum(df_per_cat["ammount"]):.3f}')

print(f'my Balanced recall: {sum(df_per_cat["recall"]) / len(df_per_cat):.3f}')
print(f'my Weighted balanced recall: {sum(df_per_cat["weighted_recall"]) / sum(df_per_cat["ammount"]):.3f}')

print(f'my Balanced precision: {sum(df_per_cat["precision"]) / len(df_per_cat):.3f}')
print(f'my Weighted balanced precision: {sum(df_per_cat["precision"]* df_per_cat["ammount"]) / sum(df_per_cat["ammount"]):.3f}')

In [None]:
# plot a confusion matrix
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

# Define the category names
category_names = ['ambiguous', 'building', 'person', 'signs', 'street', 'vegetation', 'vehicle']

# Create the confusion matrix
cm = confusion_matrix(df['manual_label'], df[heuristic], labels=category_names)

# Normalize the confusion matrix to get percentages
cm_percentage = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

# Plot the confusion matrix
fig, ax = plt.subplots(figsize=(WIDTH*1.3, WIDTH), dpi=dpi)
im = ax.imshow(cm_percentage, interpolation='nearest', cmap=plt.cm.Blues)
ax.figure.colorbar(im, ax=ax)

# Set the category names as x and y axis labels
ax.set(xticks=np.arange(cm.shape[1]),
       yticks=np.arange(cm.shape[0]),
       xticklabels=category_names, yticklabels=category_names,
       xlabel='Predicted label', ylabel='True label')

# Rotate the x-axis labels for better readability
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")

# Loop over the data and create text annotations for each cell
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        ax.text(j, i, format(cm_percentage[i, j], '.2f'),
                ha="center", va="center", color="white" if cm_percentage[i, j] > 0.5 else "black")

# add title 
ax.set_xlabel("Predicted Category")
ax.set_ylabel("True Category")
# Show the plot

plt.show()

In [None]:
# print some basic statistics
from sklearn.metrics import accuracy_score, balanced_accuracy_score, average_precision_score 
heuristic = 'predicted_pXc'#'predicted_sum'# 

df_cleaned = df#[df["manual_label"] != "ambiguous"]
#df_cleaned = df_cleaned[df_cleaned["manual_label"] != "sign"]

print(f'mean accuracy: {sum(df_per_cat["accuracy"]) / len(df_per_cat):.3f}')
print(f'Weighted mean accuracy: {sum(df_per_cat["accuracy"]* df_per_cat["ammount"]) / sum(df_per_cat["ammount"]):.3f}')

print(f'my Balanced recall: {sum(df_per_cat["recall"]) / len(df_per_cat):.3f}')
print(f'my Weighted balanced recall: {sum(df_per_cat["weighted_recall"]) / sum(df_per_cat["ammount"]):.3f}')

print(f'my Balanced precision: {sum(df_per_cat["precision"]) / len(df_per_cat):.3f}')
print(f'my Weighted balanced precision: {sum(df_per_cat["precision"]* df_per_cat["ammount"]) / sum(df_per_cat["ammount"]):.3f}')

In [None]:
# plot 
# Adjusting annotations to minimize overlay
# Manually adjust the positions of the annotations for the overlapping categories

# Create the plot again
fig, ax = plt.subplots(figsize=(WIDTH*1.3, WIDTH), dpi=dpi)
scatter = ax.scatter(df_per_cat["recall"], df_per_cat["precision"], marker = "x", s=[a*5 for a in df_per_cat["ammount"]], color=plt.cm.Blues(180))
ax.set_xlabel('Recall')
ax.set_ylabel('Precision')
#ax.set_title('Recall vs Precision')
ax.set_xlim(0, 1.1)
ax.set_ylim(0, 1.1)

# Adjusted annotations
adjusted_positions = {
    'vehicle': (0, -15)  # Shift up
    #'person': (10, 5)    # Shift down
}

#Annotate the dots with adjusted positions for "building" and "person"
for i, category in enumerate(df_per_cat["category"]):
    offset = adjusted_positions.get(category, (0, 10))  # Default offset if not in adjusted_positions
    ax.annotate(category, (df_per_cat["recall"][i], df_per_cat["precision"][i]), textcoords="offset points", xytext=offset, ha='center')

plt.show()
