In [None]:
# https://github.com/facebookresearch/detectron2/blob/master/projects/DensePose/densepose/vis/base.py
import numpy as np
import cv2


class TextVisualizer(object):

    _COLOR_GRAY = (218, 227, 218)
    _COLOR_WHITE = (255, 255, 255)

    def __init__(
        self,
        font_face=cv2.FONT_HERSHEY_DUPLEX,
        font_color_bgr=_COLOR_GRAY,
        font_scale=0.35,
        font_line_type=cv2.LINE_AA,
        font_line_thickness=1,
        font_scale_with_image=False,
        fill_color_bgr=_COLOR_WHITE,
        fill_color_transparency=1.0,
        frame_color_bgr=_COLOR_WHITE,
        frame_color_transparency=1.0,
        frame_thickness=1,
    ):
        self.font_face = font_face
        self.font_color_bgr = font_color_bgr
        self.font_scale = font_scale
        self.font_line_type = font_line_type
        self.font_line_thickness = font_line_thickness
        self.font_scale_with_image = font_scale_with_image
        self.fill_color_bgr = fill_color_bgr
        self.fill_color_transparency = fill_color_transparency
        self.frame_color_bgr = frame_color_bgr
        self.frame_color_transparency = frame_color_transparency
        self.frame_thickness = frame_thickness

    def visualize(self, image_bgr, txt, topleft_xy):
        txt_w, txt_h, txt_bsl = self.get_text_size_wh(txt, image_bgr.shape[:2])
        topleft_xy = tuple(map(int, topleft_xy))
        bottomleft_xy = (topleft_xy[0], topleft_xy[1] + txt_h)
        txt_h += txt_bsl
        x, y = topleft_xy
        if self.frame_color_transparency < 1.0:
            t = self.frame_thickness
            image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :] = (
                image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :]
                * self.frame_color_transparency
                + np.array(self.frame_color_bgr) * (1.0 - self.frame_color_transparency)
            ).astype(np.float)
        if self.fill_color_transparency < 1.0:
            image_bgr[y : y + txt_h, x : x + txt_w, :] = (
                image_bgr[y : y + txt_h, x : x + txt_w, :]
                * self.fill_color_transparency
                + np.array(self.fill_color_bgr) * (1.0 - self.fill_color_transparency)
            ).astype(np.float)
        cv2.putText(
            image_bgr,
            txt,
            bottomleft_xy,
            self.font_face,
            self.get_font_scale(image_bgr.shape[:2]),
            self.font_color_bgr,
            self.font_line_thickness,
            self.font_line_type,
        )
        return image_bgr

    def get_font_scale(self, image_hw):
        font_scale = self.font_scale
        if self.font_scale_with_image:
            font_scale = max(font_scale * min(image_hw[0], image_hw[1]) / 512, 0.2)
        return font_scale

    def get_text_size_wh(self, txt, image_hw):
        ((txt_w, txt_h), txt_bsl) = cv2.getTextSize(
            txt,
            self.font_face,
            self.get_font_scale(image_hw),
            self.font_line_thickness,
        )
        return txt_w, txt_h, txt_bsl

## NSFF

In [None]:
video_dir = "/home/ruilongli/data/NSFF/nvidia_data_full/"
frame_dir = "/home/ruilongli/workspace/nsff_pl/data/"
video_names = ["Balloon1-2", "Balloon2-2", "DynamicFace-2", "Jumping", "Playground", "Skating-2", "Truck-2", "Umbrella"]
fps_list = [15, 30, 15, 30, 30, 30, 30, 15]
cameras = ["cam%02d" % i for i in range(1, 12 + 1)]

In [None]:
import os
import imageio
import cv2
import numpy as np


text_visualizer = TextVisualizer(
    font_color_bgr=(0, 0, 0),
    font_scale=2,
    font_line_thickness=4,
    fill_color_transparency=0.3,
    font_scale_with_image=True,
)


for video_name, fps in zip(video_names, fps_list):
    video_frames = sorted(
        os.listdir(os.path.join(video_dir, video_name, "dense", "mv_images"))
    )
    images_multiview = []
    for i, camera in enumerate(cameras):
        images = []
        for video_frame in video_frames:
            image_path = os.path.join(
                video_dir, video_name, "dense", "mv_images", video_frame, "%s.jpg" % camera
            )
            image = imageio.imread(image_path)
            # image = cv2.resize(image, (0, 0), fx=0.5, fy=0.5)
            images.append(text_visualizer.visualize(image, "capture%d" % i, (0, 0)))
        images_multiview.append(images)
    images_multiview = np.array(images_multiview)

    frames = sorted(
        os.listdir(os.path.join(video_dir, video_name, "dense", "images"))
    )
    images_train = []
    for frame in frames:
        image_path = os.path.join(video_dir, video_name, "dense", "images", frame)
        image = imageio.imread(image_path)
        image = cv2.resize(image, (0, 0), fx=2, fy=2)
        images_train.append(text_visualizer.visualize(image, "train traj.", (0, 0)))
    images_train = np.stack(images_train)

    canvas = np.concatenate([
        np.concatenate([
            images_multiview[0],
            images_multiview[4],
        ], axis=-3),
        np.concatenate([
            images_multiview[2],
            images_multiview[6],
        ], axis=-3),
        np.zeros((images_train.shape[0], images_train.shape[1], 10, 3), dtype=images_train.dtype) + 255,
        images_train,
    ], axis=-2)

    writer = imageio.get_writer("suppl/%s.mp4" % video_name, fps=fps)
    for img in canvas:
        img = cv2.resize(img, (0, 0), fx=0.3, fy=0.3)
        writer.append_data(img)
    writer.close()

    # break

## Nerfies & HyperNeRF

In [None]:
nerfies_dir = "/home/ruilongli/data/nerfies/"
hypernerf_dir = "/home/ruilongli/data/hypernerf/"
nerfies_names = ["broom", "tail", "toby-sit"]
hypernerf_names = ["broom2", "vrig-3dprinter", "vrig-chicken", "vrig-peel-banana"]
fps = 15

In [None]:
import os
import imageio
import cv2
import numpy as np
import json


text_visualizer = TextVisualizer(
    font_color_bgr=(0, 0, 0),
    font_scale=2,
    font_line_thickness=4,
    fill_color_transparency=0.3,
    font_scale_with_image=True,
)


for video_name in nerfies_names + hypernerf_names:
    if video_name in nerfies_names:
        data_dir = nerfies_dir
    else:
        data_dir = hypernerf_dir
    
    image_dir = os.path.join(data_dir, video_name, "rgb", "2x")
    with open(os.path.join(data_dir, video_name, "dataset.json"), "r") as fp:
        meta_data = json.load(fp)
    ids = meta_data["ids"]
    train_ids = meta_data["train_ids"]
    left_ids = [
        id for id in ids if 
        (
            ("left" in id) and
            os.path.exists(os.path.join(image_dir, "left1_%s.png" % id.split("_")[1])) and
            os.path.exists(os.path.join(image_dir, "right1_%s.png" % id.split("_")[1]))
        )
    ]
    right_ids = [
        id for id in ids if 
        (
            ("right" in id) and
            os.path.exists(os.path.join(image_dir, "left1_%s.png" % id.split("_")[1])) and
            os.path.exists(os.path.join(image_dir, "right1_%s.png" % id.split("_")[1]))
        )
    ]

    images_multiview = []
    for i, subset_ids in enumerate([left_ids, right_ids]):
        images = []
        for id in subset_ids:
            image_path = os.path.join(image_dir, "%s.png" % id)
            if not os.path.exists(image_path):
                continue
            image = imageio.imread(image_path)
            images.append(text_visualizer.visualize(image, "capture%d" % i, (0, 0)))
        images_multiview.append(images)
    images_multiview = np.array(images_multiview)

    images_train = []
    for id in train_ids:
        image_path = os.path.join(image_dir, "%s.png" % id)
        if not os.path.exists(image_path):
            continue
        image = imageio.imread(image_path)
        images_train.append(text_visualizer.visualize(image, "train traj.", (0, 0)))
    images_train = np.stack(images_train)
    print (images_multiview.shape, images_train.shape)

    canvas = np.concatenate([
        images_multiview[0],
        images_multiview[1],
        np.zeros((images_train.shape[0], images_train.shape[1], 10, 3), dtype=images_train.dtype) + 255,
        images_train,
    ], axis=-2)

    writer = imageio.get_writer("suppl/%s.mp4" % video_name, fps=fps)
    for img in canvas:
        img = cv2.resize(img, (0, 0), fx=0.3, fy=0.3)
        writer.append_data(img)
    writer.close()


## Ours

In [None]:
data_dir = "/home/ruilongli/workspace/nerfbios/datasets/iphone-captures_v3"
video_names = [
    "hang-dance-1_0-250-1_aligned_gq95_bk120",
    "block-move_0-350-1_aligned_gq95_bk120",
    "teddy-move_0-350-1_aligned_gq95_bk120",
    "wheel-rotate_0-250-1_aligned_gq95_bk120",
]
fps = 30

In [None]:
import os
import imageio
import cv2
import numpy as np
import json


text_visualizer = TextVisualizer(
    font_color_bgr=(0, 0, 0),
    font_scale=1.5,
    # fill_color_bgr=(128, 128, 128),
    font_line_thickness=2,
    fill_color_transparency=0.3,
    font_scale_with_image=True,
)


for video_name in video_names:
    image_dir = os.path.join(data_dir, video_name, "rgb", "2x")
    with open(os.path.join(data_dir, video_name, "dataset.json"), "r") as fp:
        meta_data = json.load(fp)
    train_ids = meta_data["train_ids"]

    images_multiview = []
    for i, camera in enumerate(["000", "001"]):
        images = []
        for id in train_ids:
            image_path = os.path.join(image_dir, "2_%s_%s.png" % (camera, id.split("_")[-1]))
            if not os.path.exists(image_path):
                image = np.zeros((480, 360, 3), dtype=np.uint8) + 128
            else:
                image = imageio.imread(image_path)
            images.append(text_visualizer.visualize(image, "capture (test)", (0, 0)))
        images_multiview.append(images)
    images_multiview = np.array(images_multiview)

    images_train = []
    for id in train_ids:
        image_path = os.path.join(image_dir, "%s.png" % id)
        image = imageio.imread(image_path)
        images_train.append(text_visualizer.visualize(image, "capture (train)", (0, 0)))
    images_train = np.stack(images_train)
    print (images_multiview.shape, images_train.shape)

    if video_name == "wheel-rotate_0-250-1_aligned_gq95_bk120":
        canvas = np.concatenate([
            images_multiview[0],
            np.zeros((images_train.shape[0], images_train.shape[1], 10, 3), dtype=images_train.dtype) + 255,
            images_train,
        ], axis=-2)
    else:
        canvas = np.concatenate([
            images_multiview[0],
            images_multiview[1],
            np.zeros((images_train.shape[0], images_train.shape[1], 10, 3), dtype=images_train.dtype) + 255,
            images_train,
        ], axis=-2)

    writer = imageio.get_writer("suppl/%s.mp4" % video_name, fps=fps)
    for img in canvas:
        # img = cv2.resize(img, (0, 0), fx=0.3, fy=0.3)
        writer.append_data(img)
    writer.close()


In [None]:
import mediapy

mediapy.show_video(images_multiview[0], height=100, codec='gif', fps=15)
# images_multiview.shape

In [None]:
mediapy.show_video(canvas, height=100, codec='gif', fps=15)


In [None]:
import mediapy

mediapy.show_video(images_multiview[0], height=100, codec='gif', fps=15)
# images_multiview.shape

In [None]:
mediapy.show_video(images_multiview[6], height=100, codec='gif', fps=15)


In [None]:
mediapy.show_video(images_train, height=100, codec='gif', fps=15)


In [None]:
images_train.shape

In [None]:
images_multiview.shape