## Setup

In [1]:
import json
from pathlib import Path
from shutil import copyfile

import pandas as pd
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
base_path = Path(".").absolute().parents[1] / "generated-dataset-30_000" / "data"

assert base_path.exists()

In [3]:
def get_instances():
    for file in base_path.glob("*/data.json"):
        with open(file) as f:
            as_json = json.load(f)

        def answer_mapping(answer_type, answer, options, correct_option):
            match answer_type:
                case "binary":
                    correct = str(options[correct_option])
                    return {"True": "A", "False": "B"}[correct]
                case "multi":
                    return correct_option
                case _:
                    return answer

        action_sequence = [entry["action"] for entry in as_json["prompt_sequence"]]

        yield from (
            {
                "sample_id": int(str(file.parts[-2])),
                "question_id": question_id,
                "textual_description": as_json["textual_description"],
                "question_type": qa_pair["question_type"],
                "question": qa_pair["question"],
                "answer_type": qa_pair["answer_type"],
                "answer": answer_mapping(
                    qa_pair["answer_type"], qa_pair["answer"], qa_pair["options"], qa_pair["correct_option"]
                ),
                "action_sequence": action_sequence,
            }
            for question_id, qa_pair in enumerate(as_json["qa_pairs"])
        )


data = pd.DataFrame.from_records(list(tqdm(get_instances())))

150000it [00:03, 48643.11it/s]


In [4]:
test_ids = [int(entry) for entry in (base_path.parent / "test_ids.txt").read_text().splitlines()]
len(test_ids), test_ids[:5]

(3000, [27000, 27001, 27002, 27003, 27004])

In [5]:
test_data = data[data["sample_id"].isin(test_ids)]
test_data

Unnamed: 0,sample_id,question_id,textual_description,question_type,question,answer_type,answer,action_sequence
0,29691,0,"As the first action, the person is sitting dow...",last_open,What was the final activity demonstrated by th...,open,The person is golfing (swinging a club).,"[sitting down, waving, jumping once, golfing (..."
1,29691,1,"As the first action, the person is sitting dow...",right_after_multi,What did the person do immediately after jumpi...,multi,B,"[sitting down, waving, jumping once, golfing (..."
2,29691,2,"As the first action, the person is sitting dow...",count_multi,Can you provide the number of times the person...,multi,A,"[sitting down, waving, jumping once, golfing (..."
3,29691,3,"As the first action, the person is sitting dow...",before_open,Which was done some time before someone was go...,open,"We see someone who is sitting down, waving, an...","[sitting down, waving, jumping once, golfing (..."
4,29691,4,"As the first action, the person is sitting dow...",count_number_multi,Which activity does the person carry out 0 tim...,multi,A,"[sitting down, waving, jumping once, golfing (..."
...,...,...,...,...,...,...,...,...
149975,27341,0,Kicking a ball is the last action performed by...,comparison_timestamp_same_binary,Are the actions at 11.162 and 0.763 identical ...,binary,B,"[kicking a ball, drinking with the left hand, ..."
149976,27341,1,Kicking a ball is the last action performed by...,comparison_first_last_different_binary,Can we say that the initial action is distinct...,binary,A,"[kicking a ball, drinking with the left hand, ..."
149977,27341,2,Kicking a ball is the last action performed by...,interval_part_sequence_open,Within the specified time interval of 1.046 to...,open,3,"[kicking a ball, drinking with the left hand, ..."
149978,27341,3,Kicking a ball is the last action performed by...,comparison_counting_binary,Does the individual engage in drinking with th...,binary,B,"[kicking a ball, drinking with the left hand, ..."


In [6]:
path_study = base_path.parent / "human-study"
path_study.mkdir(exist_ok=True)
path_study

PosixPath('/workspaces/tsqa-pipeline-v2/generated-dataset-30_000/human-study')

In [7]:
video_target = path_study / "videos"
video_target.mkdir(exist_ok=True)

for id in tqdm(set(test_data["sample_id"])):
    video = base_path / str(id) / "render_smpl_compressed.mp4"
    copyfile(video, video_target / f"{id}.mp4")

100%|██████████| 3000/3000 [00:03<00:00, 943.28it/s] 


In [7]:
# load CSV
sample_id_2_url = pd.read_csv(path_study / "QuAnTS Human Evaluation File IDs GIFs.csv", names=["url", "sample_id"])
sample_id_2_url["sample_id"] = sample_id_2_url["sample_id"].str.extract(r"(\d+)").astype(int)
sample_id_2_url.set_index("sample_id", inplace=True)
sample_id_2_url

Unnamed: 0_level_0,url
sample_id,Unnamed: 1_level_1
27999,https://drive.google.com/uc?id=13QZwXkrhga-5ci...
27998,https://drive.google.com/uc?id=1EG48SP5A-0aYIN...
27997,https://drive.google.com/uc?id=1bt1OZCLazP4-YX...
27996,https://drive.google.com/uc?id=1EzRaCB9nCcunig...
27995,https://drive.google.com/uc?id=1l1Mhoyymu26yuF...
...,...
27004,https://drive.google.com/uc?id=1Cf796oRfXyD0XH...
27003,https://drive.google.com/uc?id=16eeEc7wbTx-zpy...
27002,https://drive.google.com/uc?id=1FLY5Qi9XCaqTgW...
27001,https://drive.google.com/uc?id=1G8MSsUO3VwqCb1...


In [8]:
test_data_urls = test_data.join(sample_id_2_url, on="sample_id")
test_data_urls

Unnamed: 0,sample_id,question_id,textual_description,question_type,question,answer_type,answer,action_sequence,url
0,29691,0,"As the first action, the person is sitting dow...",last_open,What was the final activity demonstrated by th...,open,The person is golfing (swinging a club).,"[sitting down, waving, jumping once, golfing (...",
1,29691,1,"As the first action, the person is sitting dow...",right_after_multi,What did the person do immediately after jumpi...,multi,B,"[sitting down, waving, jumping once, golfing (...",
2,29691,2,"As the first action, the person is sitting dow...",count_multi,Can you provide the number of times the person...,multi,A,"[sitting down, waving, jumping once, golfing (...",
3,29691,3,"As the first action, the person is sitting dow...",before_open,Which was done some time before someone was go...,open,"We see someone who is sitting down, waving, an...","[sitting down, waving, jumping once, golfing (...",
4,29691,4,"As the first action, the person is sitting dow...",count_number_multi,Which activity does the person carry out 0 tim...,multi,A,"[sitting down, waving, jumping once, golfing (...",
...,...,...,...,...,...,...,...,...,...
149975,27341,0,Kicking a ball is the last action performed by...,comparison_timestamp_same_binary,Are the actions at 11.162 and 0.763 identical ...,binary,B,"[kicking a ball, drinking with the left hand, ...",https://drive.google.com/uc?id=1S4MYjs6g2AgzCa...
149976,27341,1,Kicking a ball is the last action performed by...,comparison_first_last_different_binary,Can we say that the initial action is distinct...,binary,A,"[kicking a ball, drinking with the left hand, ...",https://drive.google.com/uc?id=1S4MYjs6g2AgzCa...
149977,27341,2,Kicking a ball is the last action performed by...,interval_part_sequence_open,Within the specified time interval of 1.046 to...,open,3,"[kicking a ball, drinking with the left hand, ...",https://drive.google.com/uc?id=1S4MYjs6g2AgzCa...
149978,27341,3,Kicking a ball is the last action performed by...,comparison_counting_binary,Does the individual engage in drinking with th...,binary,B,"[kicking a ball, drinking with the left hand, ...",https://drive.google.com/uc?id=1S4MYjs6g2AgzCa...


In [9]:
only_answer_type = "open"

# Restrict the data to the test set, specific answer types, and only the first applicable question per sample

eval_data = test_data_urls.copy()[test_data_urls["answer_type"] == only_answer_type].drop_duplicates(
    subset="sample_id"
)
eval_data.sort_values(by="sample_id", inplace=True, ignore_index=True)
eval_data.index.name = "study_entry_id"
eval_data

Unnamed: 0_level_0,sample_id,question_id,textual_description,question_type,question,answer_type,answer,action_sequence,url
study_entry_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,27000,0,The sequence starts with bowing from 0:0.0 to ...,right_after_open,"After engaging in bowing, what followed immedi...",open,Someone is waving.,"[bowing, waving, picking something up with bot...",https://drive.google.com/uc?id=1p3r-97sKD2987-...
1,27001,0,The person is starting off the sequence with k...,last_open,What was the last action performed by the person?,open,We see someone who is kicking a ball.,"[kicking a ball, playing guitar, eating with t...",https://drive.google.com/uc?id=1G8MSsUO3VwqCb1...
2,27002,0,The person is starting off the sequence with e...,descriptive_identification_open,"At 11.943, what action is the person performing?",open,Someone is running.,"[eating with the right hand, bowing, running, ...",https://drive.google.com/uc?id=1FLY5Qi9XCaqTgW...
3,27003,1,The person is starting the sequence with picki...,right_after_open,"After the person performed holding a baby, wha...",open,We see someone who is sitting down.,"[picking something up with both hands, holding...",https://drive.google.com/uc?id=16eeEc7wbTx-zpy...
4,27005,4,The person is starting off the sequence with s...,interval_whole_sequence_open,How many different actions does the person per...,open,3,"[skipping rope, skipping rope, waving, holding...",https://drive.google.com/uc?id=1Nzzlxnbu71uko4...
...,...,...,...,...,...,...,...,...,...
2516,29994,1,The person is starting the sequence with catch...,count_number_open,The person is involved in performing 1 instanc...,open,"catching a ball, kicking a ball, golfing (swin...","[catching a ball, kicking a ball, kicking a ba...",
2517,29995,3,The sequence starts with throwing a ball. This...,extremum_least_open,"Given the sequence, which activity had the lea...",open,"throwing a ball, skipping rope, shaking hands,...","[throwing a ball, skipping rope, shaking hands...",
2518,29996,0,The first activity the person is doing is danc...,last_open,What was the final action undertaken by the pe...,open,Someone is kicking a ball.,"[dancing, picking something up with both hands...",
2519,29997,2,The first activity the person is doing is play...,extremum_least_open,Which activity had the smallest number of inst...,open,kicking a ball and eating with the right hand,"[playing guitar, kicking a ball, eating with t...",


In [10]:
eval_data.iloc[0]["action_sequence"]

['bowing',
 'waving',
 'picking something up with both hands',
 'picking something up with both hands']

In [11]:
export_data = eval_data[["sample_id", "url", "question", "answer"]].copy()

match only_answer_type:
    case "binary":
        export_data.replace(dict(answer={"A": "yes", "B": "no"}), inplace=True)
    case _:
        pass

export_data

Unnamed: 0_level_0,sample_id,url,question,answer
study_entry_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,27000,https://drive.google.com/uc?id=1p3r-97sKD2987-...,"After engaging in bowing, what followed immedi...",Someone is waving.
1,27001,https://drive.google.com/uc?id=1G8MSsUO3VwqCb1...,What was the last action performed by the person?,We see someone who is kicking a ball.
2,27002,https://drive.google.com/uc?id=1FLY5Qi9XCaqTgW...,"At 11.943, what action is the person performing?",Someone is running.
3,27003,https://drive.google.com/uc?id=16eeEc7wbTx-zpy...,"After the person performed holding a baby, wha...",We see someone who is sitting down.
4,27005,https://drive.google.com/uc?id=1Nzzlxnbu71uko4...,How many different actions does the person per...,3
...,...,...,...,...
2516,29994,,The person is involved in performing 1 instanc...,"catching a ball, kicking a ball, golfing (swin..."
2517,29995,,"Given the sequence, which activity had the lea...","throwing a ball, skipping rope, shaking hands,..."
2518,29996,,What was the final action undertaken by the pe...,Someone is kicking a ball.
2519,29997,,Which activity had the smallest number of inst...,kicking a ball and eating with the right hand


In [12]:
export_data.to_csv(path_study / f"study_data_{only_answer_type}.csv")