In [None]:
%load_ext autoreload
%autoreload 2

import os
os.chdir("/nfs/turbo/coe-chaijy/sstorks/simulation_informed_pcr4nlu/TRAVEl")
os.environ['TRAVEl_config_path'] = "/nfs/turbo/coe-chaijy/sstorks/simulation_informed_pcr4nlu/TRAVEl/config.yml"
from travel import init_travel
init_travel()

In [None]:
from travel.constants import CONFIG_PATH, DATA_CACHE_DIR
from travel.data.ego4d import FRAME_KEEP_FREQUENCY
print(CONFIG_PATH, DATA_CACHE_DIR)
print(FRAME_KEEP_FREQUENCY)

# Visualize Some Ego4D Examples

In [None]:
from travel.data.ego4d import Ego4DMistakeDetectionDataset

dataset = Ego4DMistakeDetectionDataset(data_split="val",
                                               mismatch_augmentation=True,
                                               multi_frame=False,
                                      debug_n_examples_per_class=250)

In [None]:
from collections import Counter
from pprint import pprint
mistake_dist = Counter()
for example in dataset.get_batches(1, load_frames=False):
    mistake_dist[(example.mistake, example.mistake_type)] += 1

pprint(mistake_dist.most_common())

In [None]:
import random

pos = [ex for ex in dataset if not ex.mistake]
neg_inc = [ex for ex in dataset if ex.mistake and ex.mistake_type == "Action Incomplete"]
neg_v = [ex for ex in dataset if ex.mistake and ex.mistake_type == "MisalignSRL_V"]
neg_arg1 = [ex for ex in dataset if ex.mistake and ex.mistake_type == "MisalignSRL_ARG1"]
neg_v_arg1 = [ex for ex in dataset if ex.mistake and ex.mistake_type == "MisalignSRL_V_ARG1"]

examples_to_visualize = random.sample(pos, 20)
examples_to_visualize += random.sample(neg_inc, 5)
examples_to_visualize += random.sample(neg_v, 5)
examples_to_visualize += random.sample(neg_arg1, 5)
examples_to_visualize += random.sample(neg_v_arg1, 5)


# examples_to_visualize = random.sample(list(range(len(dataset))), 30)
# examples_to_visualize = list(range(len(dataset)))



In [None]:
random.shuffle(examples_to_visualize)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

from travel.data.utils.image import variance_of_laplacian
from travel.constants import RESULTS_DIR

text_lines = []
this_results_dir = os.path.join(RESULTS_DIR, "data_quality_annotation", "dqa_06272025")
if not os.path.exists(this_results_dir):
    os.makedirs(this_results_dir)

for example_idx, example in enumerate(examples_to_visualize):
        
    text_lines.append(f"{example_idx}. {'Mistake' if example.mistake else 'Success'} ({example.mistake_type})\n")
        
    print("")
    print(example_idx, example.procedure_description)
    print(f"{'Success' if not example.mistake else 'Mistake'} ({example.mistake_type})")
    duration = example.frame_times[-1] - example.frame_times[0]
    # print(example.frame_times)
    # print(f"({duration} sec.)")
    print(f"Example ID: {example.example_id}")
    print("Verb noun pair:", example.verb_noun_pair)
    
    frame_lightnesses = [np.mean(np.asarray(frame)) / 255.0 for frame in example.frames]
    # print("Frame lightness (mean):", np.mean(frame_lightnesses))
    # print("Frame blurriness:", [variance_of_laplacian(frame) for frame in example.frames])
    
    fig, axarr = plt.subplots(1, len(example.frames), figsize=(22, 4))
    
    # Ensure axarr is always iterable
    if len(example.frames) == 1:
        axarr = [axarr]

    plt.figtext(0.5, 0.03, f"{example_idx}. {example.procedure_description}", ha='center', fontsize=12)
    for frame, ax in zip(example.frames, axarr):
        if frame is not None:
            ax.imshow(frame)
            ax.axis('off')  # Hide the axes for better visualization

    plt.savefig(os.path.join(this_results_dir, f"{example_idx}.png"), bbox_inches='tight')
    plt.tight_layout()
    plt.show()

with open(os.path.join(this_results_dir, "answers.txt"), "w") as f:
    f.writelines(text_lines)
    

# Select Examples for Ego4D-PMD Mistake Detection Label Data Quality Analysis

In [None]:
from travel.constants import RESULTS_DIR

analysis_dir = os.path.join(RESULTS_DIR, "data_quality_annotation", "dqa_06272025")


In [None]:
from pprint import pprint

with open(os.path.join(analysis_dir, "answers.txt"),"r") as f:
    lines_gt = f.readlines()
    
labels_gt = [l.split(".")[1].strip() for l in lines_gt]
mistake_types_gt = [l.replace(l.split()[0],"").strip().replace("(","").replace(")","") for l in labels_gt]
labels_gt = [l.split()[0].strip() for l in labels_gt]

pprint(labels_gt)
pprint(mistake_types_gt)


In [None]:
# ann_fname = "answers_shane.txt"
ann_fname = "answers_itamar.txt"

with open(os.path.join(analysis_dir, ann_fname),"r") as f:
    lines_ann = f.readlines()
    
labels_ann = [l.split(".")[1].strip()[0] for l in lines_ann]
labels_ann = ["Mistake" if l == "M" else "Success" for l in labels_ann]

pprint(labels_ann)

In [None]:
human_accuracy = [1 if a == gt else 0 for a, gt in zip(labels_ann, labels_gt)]
human_accuracy = sum(human_accuracy) / len(human_accuracy)

print("Human accuracy:", human_accuracy)

for example_type in set(mistake_types_gt):
    ex_subset = [(gt, ann) for gt, ann, t in zip(labels_gt, labels_ann, mistake_types_gt) if t == example_type]
    human_accuracy_subset = [1 if a == gt else 0 for gt, a in ex_subset]
    human_accuracy_subset = sum(human_accuracy_subset) / len(human_accuracy_subset)
    print(f"Human accuracy ({example_type}):", human_accuracy_subset)

ex_allmistakes = [(gt, ann) for gt, ann, t in zip(labels_gt, labels_ann, mistake_types_gt) if t != "None"]
human_accuracy_allmistakes = [1 if a == gt else 0 for gt, a in ex_allmistakes]
human_accuracy_allmistakes = sum(human_accuracy_allmistakes) / len(human_accuracy_allmistakes)
print("Human accuracy (All Mistakes):", human_accuracy_allmistakes)


In [None]:
from sklearn.metrics import cohen_kappa_score

ann_fname1 = "answers_shane.txt"
ann_fname2 = "answers_itamar.txt"

with open(os.path.join(analysis_dir, ann_fname1),"r") as f:
    lines_ann1 = f.readlines()
    
labels_ann1 = [l.split(".")[1].strip()[0] for l in lines_ann1]
labels_ann1 = ["Mistake" if l == "M" else "Success" for l in labels_ann1]

with open(os.path.join(analysis_dir, ann_fname2),"r") as f:
    lines_ann2 = f.readlines()
    
labels_ann2 = [l.split(".")[1].strip()[0] for l in lines_ann2]
labels_ann2 = ["Mistake" if l == "M" else "Success" for l in labels_ann2]

print(cohen_kappa_score(labels_ann1, labels_ann2))

# Visualize VQG Training Data from Ego4D

In [None]:
from matplotlib import pyplot as plt
import os
import pickle
from pprint import pprint

RESULTS_DIR = "coe-chaijy/sstorks/simulation_informed_pcr4nlu/TRAVEl/saved_results/vqg_learning/VQG_data_debug_Llama-2-7b-hf_icl5_20240429110645"
data_path = os.path.join(RESULTS_DIR, "frameVQA_examples.pkl")
data = pickle.load(open(data_path, "rb"))

print(f"{len(data)} examples generated")
for example in data[100:120]:
    # pprint(example)

    for question_set in example.candidate_question_sets:
        fig, ax = plt.subplots(figsize=(8, 10))

        # Display the image
        ax.imshow(example.frame)

        # Adding text on the image at different positions
        text_offset = 30  # Pixel offset for text below the image
        ax.text(0, example.frame.height + 35, question_set.procedure_description, fontsize=9, style='italic')
        ax.text(0, example.frame.height + 70, f"{question_set.questions[0]} ({question_set.answers[0].name})", fontsize=9)
        ax.text(0, example.frame.height + 105, f"{question_set.questions[1]} ({question_set.answers[1].name})", fontsize=9)

        # Set the limits of the axes and hide them
        ax.set_xlim([0, example.frame.width])
        ax.set_ylim([example.frame.height + text_offset + 50, -10])  # Expanded to fit the text below the image
        ax.axis('off')

        # Display the figure
        plt.show()