# ACQUIRED Dataset Demo

In [None]:
import os, sys, json, random
import pprint
pp = pprint.PrettyPrinter(depth=6)

from IPython.display import HTML, IFrame
from PIL import Image, ImageDraw, ImageFont
from ipywidgets import Video

import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [None]:
def red_text(text):
    return "\x1b[31m" + text + "\x1b[0m"

def blue_text(text):
    return "\x1b[94m" + text + "\x1b[0m"

def green_text(text):
    return "\x1b[32m" + text + "\x1b[0m"

def magenta_text(text):
    return "\x1b[35m" + text + "\x1b[0m"

def yellow_text(text):
    return "\x1b[93m" + text + "\x1b[0m"

def bold_text(text):
    return "\x1b[1;128m" + text + "\x1b[0m"

## Essential Functions

In [None]:
def load_data(splits, dataset_root="./Dataset"):
    data = []
    for split in splits:
        filename = os.path.join(dataset_root, split+".json")

        with open(filename, 'r') as f:
            loaded_data = json.load(f)
            data += loaded_data
    
    return data


def sample_one_datapoint(data, video_source="local", local_video_root="acquired_dataset"):
    assert video_source in ["local", "url"]
    
    random_data = random.choice(data)

    video_id = random_data["video_id"]
    video_from = video_id.split("-")[0].split(":")[0]
    if video_from == "oopsqa":
        video_from = "Oops!"
    else:
        video_from = "Ego4D"
    video_path = os.path.join(local_video_root, random_data["video_path"])
    
    print("Video URL: {}".format(random_data["video_url"]))
    print("Video ID:  {}".format(random_data["video_id"]))
    print("Domain:    {}".format(random_data["domain"]))
    print("Source:    {}".format(video_from))
    print("---"*42)
    print("{} {}".format(bold_text(blue_text("Question:")), blue_text(random_data["question"])))
    if random_data["correct_answer_key"] == "answer1":
        answer1_color_func = green_text
        answer2_color_func = red_text
    else:
        answer1_color_func = red_text
        answer2_color_func = green_text
    print("{} {}".format(bold_text(answer1_color_func("Answer 1:")), answer1_color_func(random_data["answer1"])))
    print("{} {}".format(bold_text(answer2_color_func("Answer 2:")), answer2_color_func(random_data["answer1"])))
    print("Correct Answer: {}".format(random_data["correct_answer_key"]))

    if video_source == "url":
        video_src = '<iframe width="560" height="315" src="{}" frameborder="0" allowfullscreen></iframe>'.format(random_data["video_url"])
        display(HTML(video_src))
    else:
        display(Video.from_file(
            video_path, width=560, height=315,
        ))

    return

## Show One Random Sample

The video source can be from `local` where you have already downloaded the videos following the README.md, and/or via `url` for quick visualization before fully downloading the whole video set.

In [None]:
splits = ["train", "val", "test"]
video_source = "local"  # "url"

data = load_data(
    splits,
    dataset_root="./Dataset",
)

print("Number of questions for splits in {} = {}".format(splits, len(data)))
print("===" * 42)
sample_one_datapoint(
    data,
    video_source=video_source,
    local_video_root="acquired_dataset",
)

# End of Demo