In [140]:
zoo = {
    "zero123_dtu": (
        91692112, 
        ['1', '32', '18', '4', '5', '6', '7', '8', '9', '10', '26', '12', '13', '14', '15']),
    "ours_dtu": (
        91650514,
        ['16', '32', '63', '49', '50', '36', '22', '38', '54', '25', '71', '72', '73', '44', '45']),
    "zero123_mipnerf360": (
        91688416,
        ['15', '9', '3', '18', '5', '34', '35']),
    "ours_mipnerf360": (
        91657828,
        ['15', '9', '3', '11', '5', '6', '14']
    ),
    "oursanchored_mipnerf360": (
        92764541,
        ['1', '9', '3', '11', '5', '6', '14'])
}

MIPNERF_UID_TO_VIEW_IDX={
    'bicycle': 2,
    'bonsai': 0,
    'counter': 20,
    'garden': 0,
    'kitchen': 0,
    'room': 0,
    'stump':11
}
MIPNERF360_SCENE_UIDS = [
    'bicycle',
    'bonsai',
    'counter',
    'garden',
    'kitchen',
    'room',
    'stump',
]

def _mipnerf360_wid_to_scene(wid):
    # gross_hack
    return MIPNERF360_SCENE_UIDS[(int(wid) - 1) % 7]

In [141]:
import mediapy
import os
import glob
import einops
import numpy as np

GCS_PREFIX = "/gcs/xcloud-shared/kylesargent/zero123_training_runs/"

def gcs_image_accessor(xid, wid, dataset, yield_gt=False):
    pattern = os.path.join(GCS_PREFIX, str(xid), str(wid), '*', 'save', '*test.mp4')
    [video_path] = glob.glob(pattern)
    
    video = mediapy.read_video(video_path)
    # return video

    gt, pred, _, _, _ = einops.rearrange(video, "t h (five w) three -> five t h w three", five=5, three=3)
    if yield_gt:
        to_yield = gt
    else:
        to_yield = pred
    
    if dataset == 'mipnerf360':
        scene = _mipnerf360_wid_to_scene(wid)
        excluded_idx = MIPNERF_UID_TO_VIEW_IDX[scene]
    else:
        excluded_idx = 25
    
    for idx, frame in enumerate(to_yield):
        if idx != excluded_idx:
            yield frame

In [142]:
def pixelnerf_image_accessor(dataset):
    if dataset == 'mipnerf360':
        path = '/home/jupyter/stylegan3/pixelnerf_datasets/clip_final_eval_out_zeroshot_mipnerf360_withlpips_worldscale.3'
    elif dataset == 'dtu':
        path = "/home/jupyter/stylegan3/pixelnerf_datasets/clip_final_eval_out_zeroshot_withlpips_worldscale.5"
    else:
        raise NotImplementedError
    image_paths = glob.glob(os.path.join(path, 'dtu_1v', '*', "*.png"))
    for image_path in image_paths:
        yield mediapy.read_image(image_path)

In [143]:
import collections
from tqdm import tqdm

image_sets = collections.defaultdict(list)

for key, (xid, wids) in tqdm(zoo.items()):
    print(key)
    method, dataset = key.split('_')
    
    for wid in wids:
        image_sets[f"{method}_{dataset}"].extend(
            list(gcs_image_accessor(xid, wid, dataset)))
        
    if f"gt_{dataset}" not in image_sets:
        for wid in wids:
            image_sets[f"gt_{dataset}"].extend(
                list(gcs_image_accessor(xid, wid, dataset, yield_gt=True)))
            
image_sets["pixelnerf_dtu"].extend(list(pixelnerf_image_accessor("dtu")))
image_sets["pixelnerf_mipnerf360"].extend(list(pixelnerf_image_accessor("mipnerf360")))

  0%|          | 0/5 [00:00<?, ?it/s]

zero123_dtu


 20%|██        | 1/5 [00:26<01:45, 26.46s/it]

ours_dtu


 40%|████      | 2/5 [00:40<00:57, 19.03s/it]

zero123_mipnerf360


 60%|██████    | 3/5 [00:54<00:33, 16.93s/it]

ours_mipnerf360


 80%|████████  | 4/5 [01:02<00:13, 13.25s/it]

oursanchored_mipnerf360


100%|██████████| 5/5 [01:11<00:00, 14.30s/it]


In [144]:
# validate

image_arrs = {k:np.array(v) for (k,v) in image_sets.items()}
# print({k:v.shape for (k,v) in image_arrs.items()})
_ = [print((k,v.shape, v.min(), v.max(), v.dtype)) for (k,v) in image_arrs.items()]

('zero123_dtu', (720, 304, 400, 3), 0, 255, dtype('uint8'))
('gt_dtu', (720, 304, 400, 3), 0, 255, dtype('uint8'))
('ours_dtu', (720, 304, 400, 3), 0, 255, dtype('uint8'))
('zero123_mipnerf360', (1413, 256, 256, 3), 0, 255, dtype('uint8'))
('gt_mipnerf360', (1413, 256, 256, 3), 0, 255, dtype('uint8'))
('ours_mipnerf360', (1413, 256, 256, 3), 0, 255, dtype('uint8'))
('oursanchored_mipnerf360', (1413, 256, 256, 3), 0, 255, dtype('uint8'))
('pixelnerf_dtu', (720, 300, 400, 3), 18, 247, dtype('uint8'))
('pixelnerf_mipnerf360', (1413, 256, 256, 3), 92, 252, dtype('uint8'))


In [1]:
import torch

In [2]:
from metrics import metric_utils
from collections import namedtuple
import torch

DumbOptCls = namedtuple("DumbOpt", ["device", "rank", "num_gpus"])
opts = DumbOptCls(device=torch.ones((1,)).cuda().device, rank=0, num_gpus=1)

In [3]:
# gt_ds = image_arrs['gt_dtu']
# pred_ds = image_arrs['zero123_dtu']
import importlib

def compute_is(gt_ds, pred_ds):
    from metrics import inception_score
    importlib.reload(inception_score)
    from metrics import metric_utils
    importlib.reload(metric_utils)

    inception_score.metric_utils.compute_feature_stats_for_dataset = (
        lambda **kwargs: metric_utils.hijackable_compute_stats(dataset=gt_ds, **kwargs))
    inception_score.metric_utils.compute_feature_stats_for_generator = (
        lambda **kwargs: metric_utils.hijackable_compute_stats(dataset=pred_ds, **kwargs))
    return inception_score.compute_is(
        opts, num_gen=len(pred_ds), num_splits=1)
    
def compute_fid(gt_ds, pred_ds):
    from metrics import frechet_inception_distance
    importlib.reload(frechet_inception_distance)
    from metrics import metric_utils
    importlib.reload(metric_utils)

    frechet_inception_distance.metric_utils.compute_feature_stats_for_dataset = (
        lambda **kwargs: metric_utils.hijackable_compute_stats(dataset=gt_ds, **kwargs))
    frechet_inception_distance.metric_utils.compute_feature_stats_for_generator = (
        lambda **kwargs: metric_utils.hijackable_compute_stats(dataset=pred_ds, **kwargs))
    return frechet_inception_distance.compute_fid(
        opts, max_real=len(gt_ds), num_gen=len(pred_ds))
    
def compute_kid(gt_ds, pred_ds):
    from metrics import kernel_inception_distance
    importlib.reload(kernel_inception_distance)
    from metrics import metric_utils
    importlib.reload(metric_utils)

    kernel_inception_distance.metric_utils.compute_feature_stats_for_dataset = (
        lambda **kwargs: metric_utils.hijackable_compute_stats(dataset=gt_ds, **kwargs))
    kernel_inception_distance.metric_utils.compute_feature_stats_for_generator = (
        lambda **kwargs: metric_utils.hijackable_compute_stats(dataset=pred_ds, **kwargs))
    return kernel_inception_distance.compute_kid(
        opts, max_real=len(gt_ds), num_gen=len(pred_ds), num_subsets=100, max_subset_size=100)

In [5]:
import skimage
import numpy as np

astronaut = skimage.data.astronaut()

ds = np.broadcast_to(astronaut, (100, 512, 512, 3))

compute_fid(ds, ds[:, :, ::-1].copy())

  return default_collate([torch.as_tensor(b) for b in batch])
  return default_collate([torch.as_tensor(b) for b in batch])


-1.5056731859921695e+128

In [147]:
for key in image_sets:
    if key.startswith('gt'):
        continue
    
    if key.endswith("dtu"):
        gt_ds = image_arrs['gt_dtu']
    elif key.endswith("mipnerf360"):
        gt_ds = image_arrs['gt_mipnerf360']
    
    pred_ds = image_arrs[key]
    print(key)
    print(f"is: {compute_is(gt_ds, pred_ds)}")
    print(f"fid: {compute_fid(gt_ds, pred_ds)}")
    print(f"kid: {compute_kid(gt_ds, pred_ds)}")
    print()
          

zero123_dtu
is: (7.758379936218262, 0.0)
fid: 93.79453945352734
kid: 0.013764171894728526

ours_dtu
is: (6.810516357421875, 0.0)
fid: 79.76314295588246
kid: 0.009443904277146486

zero123_mipnerf360
is: (5.664248943328857, 0.0)
fid: 177.69337334933545
kid: 0.058030588107638933

ours_mipnerf360
is: (6.3430304527282715, 0.0)
fid: 126.91775902900311
kid: 0.03379108493134468

oursanchored_mipnerf360
is: (6.236597537994385, 0.0)
fid: 121.14152737007001
kid: 0.031454294369476034

pixelnerf_dtu
is: (3.734673023223877, 0.0)
fid: 329.8599216675181
kid: 0.21406792080965908

pixelnerf_mipnerf360
is: (2.114959955215454, 0.0)
fid: 356.10463150517705
kid: 0.3136811732559975



In [None]:
zero123_dtu
is: (7.758379936218262, 0.0)
fid: 93.79453945352734
kid: 0.014226056541982297

ours_dtu
is: (6.810516357421875, 0.0)
fid: 79.76314295588246
kid: 0.00963420555160987

zero123_mipnerf360
is: (5.664248943328857, 0.0)
fid: 177.69337334933545
kid: 0.0570758872119634

ours_mipnerf360
is: (6.3430304527282715, 0.0)
fid: 126.91775902900311
kid: 0.033165712239583305

pixelnerf_dtu
is: (3.734673023223877, 0.0)
fid: 329.8599216675181
kid: 0.2145034626736112

pixelnerf_mipnerf360
is: (2.114959955215454, 0.0)
fid: 356.10463150517705
kid: 0.3131586584004103