In [22]:
import sys,os,imageio,lpips
root = '/home/youngsun/documents/mvs/mvsnerf_timing'
os.chdir(root)
sys.path.append(root)

from opt import config_parser
from data import dataset_dict
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt


# models
from models_timer import *
from renderer_timer import *
from data.ray_utils import get_rays

from tqdm import tqdm


from skimage.metrics import structural_similarity

# pytorch-lightning
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning import LightningModule, Trainer, loggers


from data.ray_utils import ray_marcher

import numpy as np
import pandas as pd

%load_ext autoreload
%autoreload 2

torch.cuda.set_device(2)
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [23]:
def decode_batch(batch):
    rays = batch['rays']  # (B, 8)
    rgbs = batch['rgbs']  # (B, 3)
    return rays, rgbs

def unpreprocess(data, shape=(1,1,3,1,1)):
    # to unnormalize image for visualization
    # data N V C H W
    device = data.device
    mean = torch.tensor([-0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.225]).view(*shape).to(device)
    std = torch.tensor([1 / 0.229, 1 / 0.224, 1 / 0.225]).view(*shape).to(device)

    return (data - mean) / std

def read_depth(filename):
    depth_h = np.array(read_pfm(filename)[0], dtype=np.float32) # (800, 800)
    depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5,
                       interpolation=cv2.INTER_NEAREST)  # (600, 800)
    depth_h = depth_h[44:556, 80:720]  # (512, 640)
#     depth = cv2.resize(depth_h, None, fx=0.5, fy=0.5,interpolation=cv2.INTER_NEAREST)#!!!!!!!!!!!!!!!!!!!!!!!!!
    mask = depth>0
    return depth_h,mask

loss_fn_vgg = lpips.LPIPS(net='vgg') 
mse2psnr = lambda x : -10. * np.log(x) / np.log(10.)

Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off]
Loading model from: /opt/conda/envs/mvsnerf/lib/python3.8/site-packages/lpips/weights/v0.1/vgg.pth


# quantity evauation

In [24]:
import sys,os,imageio,lpips,cv2,torch,glob
import numpy as np
import matplotlib.pyplot as plt
from skimage.metrics import structural_similarity

In [25]:
loss_fn_vgg = lpips.LPIPS(net='vgg') 
mse2psnr = lambda x : -10. * np.log(x) / np.log(10.)



def acc_threshold(abs_err, threshold):
    """
    computes the percentage of pixels whose depth error is less than @threshold
    """
    acc_mask = abs_err < threshold
    return  acc_mask.astype('float') if type(abs_err) is np.ndarray else acc_mask.float()

Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off]
Loading model from: /opt/conda/envs/mvsnerf/lib/python3.8/site-packages/lpips/weights/v0.1/vgg.pth


# DTU no fine tuning

## rendering novel views with nearest 3 views

In [47]:
psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
depth_acc = {}
eval_metric = [0.1,0.05,0.01]
depth_acc[f'abs_err'],depth_acc[f'acc_l_{eval_metric[0]}'],depth_acc[f'acc_l_{eval_metric[1]}'],depth_acc[f'acc_l_{eval_metric[2]}'] = {},{},{},{}
   

for i_scene, scene in enumerate([1]):#,8,21,103,114

    # create timing variables

    # measure time - all processes
    start_all = torch.cuda.Event(enable_timing=True)
    end_all = torch.cuda.Event(enable_timing=True)

    # measure time - Feature extraction and neural volume encoding
    start_mvsnet = torch.cuda.Event(enable_timing=True)
    end_mvsnet = torch.cuda.Event(enable_timing=True)
    
    # measure time - MVSNeRF and volume rendering
    start_mvsnerf = torch.cuda.Event(enable_timing=True)
    end_mvsnerf = torch.cuda.Event(enable_timing=True)
    
    # measure time - MVSNeRF and volume rendering loop
    start_loop = torch.cuda.Event(enable_timing=True)
    end_loop = torch.cuda.Event(enable_timing=True)

    # measure time - MVSNeRF and volume rendering all loops
    start_loops = torch.cuda.Event(enable_timing=True)
    end_loops = torch.cuda.Event(enable_timing=True)
    
    # for combining all recorded time
    records = []
    records_general = {"0_all" : [], "1_loop" : [], "2_loops" : []}
    records_mvsnet = {"0_total" : [], "1_feat" : [], "2_costvol" : [], "3_3dcnn" : []}
    records_mvsnerf = {"0_total" : [],"1_sample" : [], "2_nerf" : [], "3_rend" : []}
    
    psnr,ssim,LPIPS_vgg = [],[],[]
    cmd = f'--datadir /mnt/hdd/mvsnerf_data/dtu/scan{scene}  \
     --dataset_name dtu_ft  \
     --net_type v0 --ckpt ./ckpts/mvsnerf-v0.tar \
     --imgScale_train 0.75 --imgScale_test 0.75 --img_downscale 0.75'

    args = config_parser(cmd.split())
    args.use_viewdirs = True

    args.N_samples = 128
    args.feat_dim =  8+4*3

    # create models
    if 0==i_scene:
        render_kwargs_train, render_kwargs_test, start, grad_vars = create_nerf_mvs(args, use_mvs=True, dir_embedder=False, pts_embedder=True)
        filter_keys(render_kwargs_train)

        MVSNet = render_kwargs_train['network_mvs']
        render_kwargs_train.pop('network_mvs')


    datadir = args.datadir
    datatype = 'train'
    pad = 16
    args.chunk = 5120


    print('============> rendering dataset <===================')
    dataset_train = dataset_dict[args.dataset_name](args, split='train')
    dataset_val = dataset_dict[args.dataset_name](args, split='val')
    val_idx = dataset_val.img_idx

    save_as_image = True
    save_dir = f'results/test_dtu'
    os.makedirs(save_dir, exist_ok=True)
    MVSNet.train()
    MVSNet = MVSNet.cuda()

    with torch.no_grad():

        try:
            tqdm._instances.clear() 
        except Exception:     
            pass       

        for i, batch in enumerate(tqdm(dataset_val)):
                        
            torch.cuda.empty_cache()
            
            ##################
            # time everything
            ##################
            start_all.record()
            #

            rays, img = decode_batch(batch)
            rays = rays.squeeze().to(device)  # (H*W, 3)
            img = img.squeeze().cpu().numpy()  # (H, W, 3)
            depth = batch['depth'].squeeze().numpy()  # (H, W)

            # find nearest image idx from training views
            positions = dataset_train.poses[:,:3,3]
            dis = np.sum(np.abs(positions - dataset_val.poses[[i],:3,3]), axis=-1)
            pair_idx = np.argsort(dis)[:3]
            pair_idx = [dataset_train.img_idx[item] for item in pair_idx]

            imgs_source, proj_mats, near_far_source, pose_source = dataset_train.read_source_views(pair_idx=pair_idx,device=device)
            
            ##################
            # time mvsnet 0
            ##################
            start_mvsnet.record()
            #
            volume_feature, _, _, records_mvsnet = MVSNet(imgs_source, proj_mats, near_far_source, records=records_mvsnet, pad=pad)
            #
            end_mvsnet.record()
            torch.cuda.synchronize()
            records_mvsnet['0_total'].append(start_mvsnet.elapsed_time(end_mvsnet))
            ##############################################################
            
                
            imgs_source = unpreprocess(imgs_source)

            N_rays_all = rays.shape[0]
            rgb_rays, depth_rays_preds = [],[]
            
            ##################
            # time loops
            ##################
            start_loops.record()
            #
            
            for chunk_idx in range(N_rays_all//args.chunk + int(N_rays_all%args.chunk>0)):
                
                # for loop timing
                ##################
                # time loop
                ##################
                start_loop.record()
                #
                xyz_coarse_sampled, rays_o, rays_d, z_vals = ray_marcher(rays[chunk_idx*args.chunk:(chunk_idx+1)*args.chunk],
                                                    N_samples=args.N_samples)

                # Converting world coordinate to ndc coordinate
                H, W = img.shape[:2]
                inv_scale = torch.tensor([W - 1, H - 1]).to(device)
                w2c_ref, intrinsic_ref = pose_source['w2cs'][0], pose_source['intrinsics'][0].clone()
                xyz_NDC = get_ndc_coordinate(w2c_ref, intrinsic_ref, xyz_coarse_sampled, inv_scale,
                                             near=near_far_source[0], far=near_far_source[1], pad=pad*args.imgScale_test)


                # rendering
                
                ##################
                # time mvsnerf 0
                ##################
                start_mvsnerf.record()
                #
                rgb, disp, acc, depth_pred, alpha, extras, records_mvsnerf = rendering(args, pose_source, xyz_coarse_sampled,
                                                                       xyz_NDC, z_vals, rays_o, rays_d, records_mvsnerf,
                                                                       volume_feature,imgs_source, **render_kwargs_train)
                #
                end_mvsnerf.record()
                torch.cuda.synchronize()
                records_mvsnerf['0_total'].append(start_mvsnerf.elapsed_time(end_mvsnerf))
                ##############################################################
                
                

                rgb, depth_pred = torch.clamp(rgb.cpu(),0,1.0).numpy(), depth_pred.cpu().numpy()
                rgb_rays.append(rgb)
                depth_rays_preds.append(depth_pred)
                
                #
                end_loop.record()
                torch.cuda.synchronize()
                records_general['1_loop'].append(start_loop.elapsed_time(end_loop))
                ##############################################################
            
            #
            end_loops.record()
            torch.cuda.synchronize()
            records_general['2_loops'].append(start_loops.elapsed_time(end_loops))
            ##############################################################
            
            #
            end_all.record()
            torch.cuda.synchronize()
            records_general['0_all'].append(start_all.elapsed_time(end_all))
            ##############################################################
            
            
            depth_rays_preds = np.concatenate(depth_rays_preds).reshape(H, W)

            depth_gt, _ =  read_depth(f'/mnt/hdd/mvsnerf_data/dtu/Depths/scan{scene}/depth_map_{val_idx[i]:04d}.pfm')

            # commented out because prediction and gt shape mismatch
#             mask_gt = depth_gt>0
#             abs_err = abs_error(depth_rays_preds, depth_gt/200, mask_gt)

#             eval_metric = [0.01,0.05, 0.1]
#             depth_acc[f'abs_err'][f'{scene}'] = np.mean(abs_err)
#             depth_acc[f'acc_l_{eval_metric[0]}'][f'{scene}'] = acc_threshold(abs_err,eval_metric[0]).mean()
#             depth_acc[f'acc_l_{eval_metric[1]}'][f'{scene}'] = acc_threshold(abs_err,eval_metric[1]).mean()
#             depth_acc[f'acc_l_{eval_metric[2]}'][f'{scene}'] = acc_threshold(abs_err,eval_metric[2]).mean()


            depth_rays_preds, _ = visualize_depth_numpy(depth_rays_preds, near_far_source)

            rgb_rays = np.concatenate(rgb_rays).reshape(H, W, 3)
            img_vis = np.concatenate((img*255,rgb_rays*255,depth_rays_preds),axis=1)

            if save_as_image:
                imageio.imwrite(f'{save_dir}/scan{scene}_{val_idx[i]:03d}.png', img_vis.astype('uint8'))
            else:
                rgbs.append(img_vis.astype('uint8'))

            # quantity
            # mask background since they are outside the far boundle
            mask = depth==0
            imageio.imwrite(f'{save_dir}/scan{scene}_{val_idx[i]:03d}_mask.png', mask.astype('uint8')*255)
            rgb_rays[mask],img[mask] = 0.0,0.0
            psnr.append( mse2psnr(np.mean((rgb_rays[~mask]-img[~mask])**2)))
            ssim.append( structural_similarity(rgb_rays, img, multichannel=True))

            img_tensor = torch.from_numpy(rgb_rays)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
            img_gt_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0
            LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

        print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
        psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)


    if not save_as_image:
        imageio.mimwrite(f'{save_dir}/{scene}_spiral.mp4', np.stack(rgbs), fps=20, quality=10)

# a = np.mean(list(depth_acc['abs_err'].values()))
# b = np.mean(list(depth_acc[f'acc_l_{eval_metric[0]}'].values()))
# c = np.mean(list(depth_acc[f'acc_l_{eval_metric[1]}'].values()))
# d = np.mean(list(depth_acc[f'acc_l_{eval_metric[2]}'].values()))
# print(f'============> abs_err: {a} <=================')
# print(f'============> acc_l_{eval_metric[0]}: {b} <=================')
# print(f'============> acc_l_{eval_metric[1]}: {c} <=================')
# print(f'============> acc_l_{eval_metric[2]}: {d} <=================')
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

Found ckpts ['./ckpts/mvsnerf-v0.tar']
Reloading from ./ckpts/mvsnerf-v0.tar
==> image down scale: 0.75
===> training index: [25, 21, 33, 22, 14, 15, 26, 30, 31, 35, 34, 43, 46, 29, 16, 36]
==> image down scale: 0.75
===> valing index: [32, 24, 23, 44]


  ssim.append( structural_similarity(rgb_rays, img, multichannel=True))
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:11<00:00,  2.88s/it]

=====> scene: 1 mean psnr 23.73473304455396 ssim: 0.8938922882080078 lpips: 0.1892389878630638
=====> all mean psnr 23.73473304455396 ssim: 0.8938922882080078 lpips: 0.1892389878630638





In [71]:
def experiment_and_record(records_processed, name='test', scenes=[1], num_src=3, img_scale=1.0, save_as_image=True):
    
    psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
    
    for i_scene, scene in enumerate(scenes):#,8,21,103,114
        
        psnr,ssim,LPIPS_vgg = [],[],[]
        
        # create timing variables

        # measure time - all processes
        start_all = torch.cuda.Event(enable_timing=True)
        end_all = torch.cuda.Event(enable_timing=True)

        # measure time - Feature extraction and neural volume encoding
        start_mvsnet = torch.cuda.Event(enable_timing=True)
        end_mvsnet = torch.cuda.Event(enable_timing=True)

        # measure time - MVSNeRF and volume rendering
        start_mvsnerf = torch.cuda.Event(enable_timing=True)
        end_mvsnerf = torch.cuda.Event(enable_timing=True)

        # measure time - MVSNeRF and volume rendering loop
        start_loop = torch.cuda.Event(enable_timing=True)
        end_loop = torch.cuda.Event(enable_timing=True)

        # measure time - MVSNeRF and volume rendering all loops
        start_loops = torch.cuda.Event(enable_timing=True)
        end_loops = torch.cuda.Event(enable_timing=True)

        # for combining all recorded time
        records = []
        records_general = {"0_all" : [], "1_loop" : [], "2_loops" : []}
        records_mvsnet = {"0_total" : [], "1_feat" : [], "2_costvol" : [], "3_3dcnn" : []}
        records_mvsnerf = {"0_total" : [],"1_sample" : [], "2_nerf" : [], "3_rend" : []}

        cmd = f'--datadir /mnt/hdd/mvsnerf_data/dtu/scan{scene}  \
         --dataset_name dtu_ft  \
         --net_type v0 --ckpt ./ckpts/mvsnerf-v0.tar \
         --imgScale_train {img_scale} --imgScale_test {img_scale} --img_downscale {img_scale}'

        args = config_parser(cmd.split())
        args.use_viewdirs = True

        args.N_samples = 128
        args.feat_dim =  8+4*num_src

        # create models
        if 0==i_scene:
            render_kwargs_train, render_kwargs_test, start, grad_vars = create_nerf_mvs(args, use_mvs=True, dir_embedder=False, pts_embedder=True)
            filter_keys(render_kwargs_train)

            MVSNet = render_kwargs_train['network_mvs']
            render_kwargs_train.pop('network_mvs')


        datadir = args.datadir
        datatype = 'train'
        pad = 16
        args.chunk = 5120


        print('============> rendering dataset <===================')
        dataset_train = dataset_dict[args.dataset_name](args, split='train')
        dataset_val = dataset_dict[args.dataset_name](args, split='val')
        val_idx = dataset_val.img_idx

        save_as_image = True
        save_dir = f'results/test_dtu_{name}'
        os.makedirs(save_dir, exist_ok=True)
        MVSNet.train()
        MVSNet = MVSNet.cuda()

        with torch.no_grad():

            try:
                tqdm._instances.clear() 
            except Exception:     
                pass       

            for i, batch in enumerate(tqdm(dataset_val)):

                torch.cuda.empty_cache()

                ##################
                # time everything
                ##################
                start_all.record()
                #

                rays, img = decode_batch(batch)
                rays = rays.squeeze().to(device)  # (H*W, 3)
                img = img.squeeze().cpu().numpy()  # (H, W, 3)
                depth = batch['depth'].squeeze().numpy()  # (H, W)

                # find nearest image idx from training views
                positions = dataset_train.poses[:,:3,3]
                dis = np.sum(np.abs(positions - dataset_val.poses[[i],:3,3]), axis=-1)
                pair_idx = np.argsort(dis)[:num_src]
                pair_idx = [dataset_train.img_idx[item] for item in pair_idx]

                imgs_source, proj_mats, near_far_source, pose_source = dataset_train.read_source_views(pair_idx=pair_idx,device=device)

                ##################
                # time mvsnet 0
                ##################
                start_mvsnet.record()
                #
                volume_feature, _, _, records_mvsnet = MVSNet(imgs_source, proj_mats, near_far_source, records=records_mvsnet, pad=pad)
                #
                end_mvsnet.record()
                torch.cuda.synchronize()
                records_mvsnet['0_total'].append(start_mvsnet.elapsed_time(end_mvsnet))
                ##############################################################


                imgs_source = unpreprocess(imgs_source)

                N_rays_all = rays.shape[0]
                rgb_rays, depth_rays_preds = [],[]

                ##################
                # time loops
                ##################
                start_loops.record()
                #

                for chunk_idx in range(N_rays_all//args.chunk + int(N_rays_all%args.chunk>0)):

                    # for loop timing
                    ##################
                    # time loop
                    ##################
                    start_loop.record()
                    #
                    xyz_coarse_sampled, rays_o, rays_d, z_vals = ray_marcher(rays[chunk_idx*args.chunk:(chunk_idx+1)*args.chunk],
                                                        N_samples=args.N_samples)

                    # Converting world coordinate to ndc coordinate
                    H, W = img.shape[:2]
                    inv_scale = torch.tensor([W - 1, H - 1]).to(device)
                    w2c_ref, intrinsic_ref = pose_source['w2cs'][0], pose_source['intrinsics'][0].clone()
                    xyz_NDC = get_ndc_coordinate(w2c_ref, intrinsic_ref, xyz_coarse_sampled, inv_scale,
                                                 near=near_far_source[0], far=near_far_source[1], pad=pad*args.imgScale_test)


                    # rendering

                    ##################
                    # time mvsnerf 0
                    ##################
                    start_mvsnerf.record()
                    #
                    rgb, disp, acc, depth_pred, alpha, extras, records_mvsnerf = rendering(args, pose_source, xyz_coarse_sampled,
                                                                           xyz_NDC, z_vals, rays_o, rays_d, records_mvsnerf,
                                                                           volume_feature,imgs_source, **render_kwargs_train)
                    #
                    end_mvsnerf.record()
                    torch.cuda.synchronize()
                    records_mvsnerf['0_total'].append(start_mvsnerf.elapsed_time(end_mvsnerf))
                    ##############################################################



                    rgb, depth_pred = torch.clamp(rgb.cpu(),0,1.0).numpy(), depth_pred.cpu().numpy()
                    rgb_rays.append(rgb)
                    depth_rays_preds.append(depth_pred)

                    #
                    end_loop.record()
                    torch.cuda.synchronize()
                    records_general['1_loop'].append(start_loop.elapsed_time(end_loop))
                    ##############################################################

                #
                end_loops.record()
                torch.cuda.synchronize()
                records_general['2_loops'].append(start_loops.elapsed_time(end_loops))
                ##############################################################

                #
                end_all.record()
                torch.cuda.synchronize()
                records_general['0_all'].append(start_all.elapsed_time(end_all))
                ##############################################################
                
                depth_rays_preds = np.concatenate(depth_rays_preds).reshape(H, W)
                depth_rays_preds, _ = visualize_depth_numpy(depth_rays_preds, near_far_source)

                rgb_rays = np.concatenate(rgb_rays).reshape(H, W, 3)
                img_vis = np.concatenate((img*255,rgb_rays*255,depth_rays_preds),axis=1)

                if save_as_image:
                    imageio.imwrite(f'{save_dir}/scan{scene}_{val_idx[i]:03d}.png', img_vis.astype('uint8'))
                
                # quantity
                # mask background since they are outside the far boundle
                mask = depth==0
                imageio.imwrite(f'{save_dir}/scan{scene}_{val_idx[i]:03d}_mask.png', mask.astype('uint8')*255)
                rgb_rays[mask],img[mask] = 0.0,0.0
                psnr.append( mse2psnr(np.mean((rgb_rays[~mask]-img[~mask])**2)))
                ssim.append( structural_similarity(rgb_rays, img, multichannel=True))

                img_tensor = torch.from_numpy(rgb_rays)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
                img_gt_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0
                LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

            psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)

    records_processed = append_records(records_processed, records_general, records_mvsnet, records_mvsnerf, 
                                       np.mean(psnr_all), np.mean(ssim_all), np.mean(LPIPS_vgg_all), 
                                       name)
            
    return records_processed

In [72]:
def init_records():
    return {'experiment name':[],
            'novel scenes synthesized':[],
            'psnr':[],
            'ssim':[],
            'lpips':[],
            'total time':[],
            'mvsnet total time':[],
            'mvsnet feature extraction':[],
            'mvsnet cost volume':[],
            'mvsnet 3D-CNN':[],
            'mvsnerf total time':[],
            'mvsnerf volume sampling':[],
            'mvsnerf nerf':[],
            'mvsnerf rendering':[]
           }

def append_records(records_processed, records_general, records_mvsnet, records_mvsnerf, 
                   psnr, ssim, lpips,
                   name='this experiment wants a name'):

    records_processed['experiment name'] += [f'{name}']
    records_processed['novel scenes synthesized'] += [len(records_general['0_all'])]
    
    records_processed['psnr'] += [psnr]
    records_processed['ssim'] += [ssim]
    records_processed['lpips'] += [lpips]
    
    records_processed['total time'] += [np.mean(records_general['0_all'])]

    records_processed['mvsnet total time'] += [np.mean(records_mvsnet['0_total'])]
    records_processed['mvsnet feature extraction'] += [np.mean(records_mvsnet['1_feat'])]
    records_processed['mvsnet cost volume'] += [np.mean(records_mvsnet['2_costvol'])]
    records_processed['mvsnet 3D-CNN'] += [np.mean(records_mvsnet['3_3dcnn'])]
    
    records_processed['mvsnerf total time'] += [np.mean(records_mvsnerf['0_total']) * len (records_mvsnerf['0_total']) / len(records_general['0_all'])]
    records_processed['mvsnerf volume sampling'] += [np.mean(records_mvsnerf['1_sample']) * len (records_mvsnerf['1_sample']) / len(records_general['0_all'])]
    records_processed['mvsnerf nerf'] += [np.mean(records_mvsnerf['2_nerf']) * len (records_mvsnerf['2_nerf']) / len(records_general['0_all'])]
    records_processed['mvsnerf rendering'] += [np.mean(records_mvsnerf['3_rend']) * len (records_mvsnerf['3_rend']) / len(records_general['0_all'])]

    return records_processed

In [50]:
records_processed = init_records()

records_processed = append_records(records_processed, records_general, records_mvsnet, records_mvsnerf, 'test')

In [51]:
records_processed = append_records(records_processed, records_general, records_mvsnet, records_mvsnerf, 'test2')

In [52]:
records_processed

{'experiment name': ['test', 'test2'],
 'total time': [1601.7471313476562, 1601.7471313476562],
 'novel scenes synthesized': [4, 4],
 'mvsnet total time': [61.2093448638916, 61.2093448638916],
 'mvsnet feature extraction': [4.20684814453125, 4.20684814453125],
 'mvsnet cost volume': [17.603840351104736, 17.603840351104736],
 'mvsnet 3D-CNN': [39.14752006530762, 39.14752006530762],
 'mvsnerf total time': [1451.4196434020996, 1451.4196434020996],
 'mvsnerf volume sampling': [77.8173440694809, 77.8173440694809],
 'mvsnerf nerf': [1349.6506843566895, 1349.6506843566895],
 'mvsnerf rendering': [13.000448025763035, 13.000448025763035]}

In [73]:
## 이걸로 계속 돌리고 쌓으면 됨!

records_processed = init_records()
records_processed = experiment_and_record(records_processed, name='test3', scenes=[1,2,3,4], num_src=3, img_scale=0.5, save_as_image=True)

Found ckpts ['./ckpts/mvsnerf-v0.tar']
Reloading from ./ckpts/mvsnerf-v0.tar
==> image down scale: 0.5
===> training index: [25, 21, 33, 22, 14, 15, 26, 30, 31, 35, 34, 43, 46, 29, 16, 36]
==> image down scale: 0.5
===> valing index: [32, 24, 23, 44]


  ssim.append( structural_similarity(rgb_rays, img, multichannel=True))
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:05<00:00,  1.33s/it]


==> image down scale: 0.5
===> training index: [25, 21, 33, 22, 14, 15, 26, 30, 31, 35, 34, 43, 46, 29, 16, 36]
==> image down scale: 0.5
===> valing index: [32, 24, 23, 44]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:05<00:00,  1.28s/it]


==> image down scale: 0.5
===> training index: [25, 21, 33, 22, 14, 15, 26, 30, 31, 35, 34, 43, 46, 29, 16, 36]
==> image down scale: 0.5
===> valing index: [32, 24, 23, 44]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:05<00:00,  1.26s/it]


==> image down scale: 0.5
===> training index: [25, 21, 33, 22, 14, 15, 26, 30, 31, 35, 34, 43, 46, 29, 16, 36]
==> image down scale: 0.5
===> valing index: [32, 24, 23, 44]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:05<00:00,  1.38s/it]


In [58]:
records_processed = experiment_and_record(records_processed, name='test3', scenes=[1,2,3,4], num_src=3, img_scale=0.5, save_as_image=True)

Found ckpts ['./ckpts/mvsnerf-v0.tar']
Reloading from ./ckpts/mvsnerf-v0.tar
==> image down scale: 0.5
===> training index: [25, 21, 33, 22, 14, 15, 26, 30, 31, 35, 34, 43, 46, 29, 16, 36]
==> image down scale: 0.5
===> valing index: [32, 24, 23, 44]


  ssim.append( structural_similarity(rgb_rays, img, multichannel=True))
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:05<00:00,  1.30s/it]


=====> scene: 1 mean psnr 19.779164493385384 ssim: 0.8381005525588989 lpips: 0.2410932220518589
==> image down scale: 0.5
===> training index: [25, 21, 33, 22, 14, 15, 26, 30, 31, 35, 34, 43, 46, 29, 16, 36]
==> image down scale: 0.5
===> valing index: [32, 24, 23, 44]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:05<00:00,  1.26s/it]


=====> scene: 2 mean psnr 17.855236492613876 ssim: 0.7069318890571594 lpips: 0.25165697932243347
==> image down scale: 0.5
===> training index: [25, 21, 33, 22, 14, 15, 26, 30, 31, 35, 34, 43, 46, 29, 16, 36]
==> image down scale: 0.5
===> valing index: [32, 24, 23, 44]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:04<00:00,  1.23s/it]


=====> scene: 3 mean psnr 20.201428833914655 ssim: 0.8010879158973694 lpips: 0.2353186160326004
==> image down scale: 0.5
===> training index: [25, 21, 33, 22, 14, 15, 26, 30, 31, 35, 34, 43, 46, 29, 16, 36]
==> image down scale: 0.5
===> valing index: [32, 24, 23, 44]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:05<00:00,  1.36s/it]

=====> scene: 4 mean psnr 18.070365974106757 ssim: 0.8202603459358215 lpips: 0.19477754086256027
=====> all mean psnr 18.97654894850517 ssim: 0.7915952205657959 lpips: 0.23071158956736326





In [74]:
pd.DataFrame(records_processed).round({'psnr':1, 'ssim':2, 'lpips':2, 
                                       'total time':0, 'mvsnet total time':0, 'mvsnerf total time':0, 
                                       'mvsnet feature extraction' : 1, 'mvsnet cost volume':1, 'mvsnet 3D-CNN':1,
                                       'mvsnerf volume sampling': 1, 'mvsnerf nerf' : 1, 'mvsnerf rendering' : 1
                                      })

Unnamed: 0,experiment name,novel scenes synthesized,psnr,ssim,lpips,total time,mvsnet total time,mvsnet feature extraction,mvsnet cost volume,mvsnet 3D-CNN,mvsnerf total time,mvsnerf volume sampling,mvsnerf nerf,mvsnerf rendering
0,test3,4,19.0,0.79,0.23,741.0,36.0,4.5,10.8,20.7,631.0,32.4,588.2,5.6


## rendering novel views with fixed 3 source views

In [None]:
psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate([1,8,21,103,114]):#,
    psnr,ssim,LPIPS_vgg = [],[],[]
    cmd = f'--datadir /mnt/data/new_disk/sungx/data/mvs_dataset/DTU/mvs_training/dtu/scan{scene}  \
    --dataset_name dtu_ft  \
    --ckpt ./ckpts//mvsnerf-v0.tar'

    args = config_parser(cmd.split())
    args.use_viewdirs = True

    args.N_samples = 128
    args.feat_dim =  8+3*4

    # create models
    if 0==i_scene:
        render_kwargs_train, render_kwargs_test, start, grad_vars = create_nerf_mvs(args, use_mvs=True, dir_embedder=False, pts_embedder=True)
        filter_keys(render_kwargs_train)

        MVSNet = render_kwargs_train['network_mvs']
        render_kwargs_train.pop('network_mvs')


    datadir = args.datadir
    datatype = 'val'
    pad = 24
    args.chunk = 5120


    print('============> rendering dataset <===================')
    dataset = dataset_dict[args.dataset_name](args, split=datatype)
    val_idx = dataset.img_idx
    
    save_as_image = True
    save_dir = f'results/test3'
    os.makedirs(save_dir, exist_ok=True)
    MVSNet.train()
    MVSNet = MVSNet.cuda()
    
    with torch.no_grad():

        imgs_source, proj_mats, near_far_source, pose_source = dataset.read_source_views(device=device)
        volume_feature, _, _ = MVSNet(imgs_source, proj_mats, near_far_source, pad=pad)
        imgs_source = unpreprocess(imgs_source)

        try:
            tqdm._instances.clear() 
        except Exception:     
            pass
        
        for i, batch in enumerate(tqdm(dataset)):
            torch.cuda.empty_cache()
            
            rays, img = decode_batch(batch)
            rays = rays.squeeze().to(device)  # (H*W, 3)
            img = img.squeeze().cpu().numpy()  # (H, W, 3)
            depth = batch['depth'].squeeze().numpy()  # (H, W)
        
            N_rays_all = rays.shape[0]
            rgb_rays, depth_rays_preds = [],[]
            for chunk_idx in range(N_rays_all//args.chunk + int(N_rays_all%args.chunk>0)):

                xyz_coarse_sampled, rays_o, rays_d, z_vals = ray_marcher(rays[chunk_idx*args.chunk:(chunk_idx+1)*args.chunk],
                                                    N_samples=args.N_samples)

                # Converting world coordinate to ndc coordinate
                H, W = img.shape[:2]
                inv_scale = torch.tensor([W - 1, H - 1]).to(device)
                w2c_ref, intrinsic_ref = pose_source['w2cs'][0], pose_source['intrinsics'][0].clone()
                xyz_NDC = get_ndc_coordinate(w2c_ref, intrinsic_ref, xyz_coarse_sampled, inv_scale,
                                             near=near_far_source[0], far=near_far_source[1], pad=pad*args.imgScale_test)


                # rendering
                rgb, disp, acc, depth_pred, alpha, extras = rendering(args, pose_source, xyz_coarse_sampled,
                                                                       xyz_NDC, z_vals, rays_o, rays_d,
                                                                       volume_feature,imgs_source, **render_kwargs_train)
    
                
                rgb, depth_pred = torch.clamp(rgb.cpu(),0,1.0).numpy(), depth_pred.cpu().numpy()
                rgb_rays.append(rgb)
                depth_rays_preds.append(depth_pred)

            
            depth_rays_preds = np.concatenate(depth_rays_preds).reshape(H, W)
            depth_rays_preds, _ = visualize_depth_numpy(depth_rays_preds, near_far_source)
            
            rgb_rays = np.concatenate(rgb_rays).reshape(H, W, 3)
            img_vis = np.concatenate((img*255,rgb_rays*255,depth_rays_preds),axis=1)
            
            if save_as_image:
                imageio.imwrite(f'{save_dir}/scan{scene}_{val_idx[i]:03d}.png', img_vis.astype('uint8'))
            else:
                rgbs.append(img_vis.astype('uint8'))
                
            # quantity
            # mask background since they are outside the far boundle
            mask = depth==0
            imageio.imwrite(f'{save_dir}/scan{scene}_{val_idx[i]:03d}_mask.png', mask.astype('uint8')*255)
            rgb_rays[mask],img[mask] = 0.0,0.0
            psnr.append( mse2psnr(np.mean((rgb_rays[~mask]-img[~mask])**2)))
            ssim.append( structural_similarity(rgb_rays, img, multichannel=True))
            
            img_tensor = torch.from_numpy(rgb_rays)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
            img_gt_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0
            LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

        print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
        psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)

    if not save_as_image:
        imageio.mimwrite(f'{save_dir}/{scene}_spiral.mp4', np.stack(rgbs), fps=20, quality=10)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

# Pairs generation

In [3]:
import json,torch
import sys,os
import numpy as np
root = '/home/hengfei/Desktop/research/mvsnerf'
os.chdir(root)
sys.path.append(root)
pairs = torch.load('./configs/pairs.th')

# llff
root_dir = '/home/hengfei/Desktop/research/mvsnerf/xgaze/'
for scene in ['xgaze_11images_cropped_colmapCODE']:#
    poses_bounds = np.load(os.path.join(root_dir, scene, 'poses_bounds.npy'))  # (N_images, 11)
    poses = poses_bounds[:, :15].reshape(-1, 3, 5)  # (N_images, 3, 5)
    poses = np.concatenate([poses[..., 1:2], - poses[..., :1], poses[..., 2:4]], -1)

    ref_position = np.mean(poses[..., 3],axis=0, keepdims=True)
    dist = np.sum(np.abs(poses[..., 3] - ref_position), axis=-1)
    pair_idx = np.argsort(dist)[:11]
#     pair_idx = torch.randperm(len(poses))[:20].tolist()

    pairs[f'{scene}_test'] = pair_idx[::6]
    pairs[f'{scene}_val'] = pair_idx[::6]
    pairs[f'{scene}_train'] = np.delete(pair_idx, range(0,11,6))

torch.save(pairs,'/home/hengfei/Desktop/research/mvsnerf/configs/pairs.th')

# nerf 

In [22]:
root = '/mnt/new_disk_2/anpei/code/nerf/logs/'
root_gt = '/mnt/new_disk2/anpei/code/MVS-NeRF/runs_fine_tuning/'
pairs = torch.load('/mnt/new_disk_2/anpei/code/MVS-NeRF/configs/pairs.th')
psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate(['chair','drums','ficus','hotdog','lego','materials','mic','ship']):#
    psnr,ssim,LPIPS_vgg = [],[],[]
    
    files = sorted(glob.glob(f'{root}/{scene}_test/testset_200000/*.png'))
    for j, file in enumerate(files):

        idx = pairs[f'{scene}_val'][j]
        img = cv2.imread(file)[...,::-1]
        gt = cv2.imread(f'{root_gt}/{scene}/{scene}/{scene}_00009999_{j:02d}.png')[...,::-1]
        gt, img = gt[:,:800].astype('float')/255.0, img.astype('float')/255.0

#         H_crop, W_crop = np.array(gt.shape[:2])//10
#         img = img[H_crop:-H_crop,W_crop:-W_crop]
#         gt = gt[H_crop:-H_crop,W_crop:-W_crop]

        psnr.append( mse2psnr(np.mean((gt-img)**2)))
        ssim.append( structural_similarity(gt, img, multichannel=True))

        img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
        img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
        LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

    print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
    psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate(['fern','flower','fortress','horns','leaves', 'orchids', 'room',  'trex']):#,
    psnr,ssim,LPIPS_vgg = [],[],[]
    
    files = sorted(glob.glob(f'{root}/{scene}_test/testset_200000/*.png'))
    for j, file in enumerate(files):

        idx = pairs[f'{scene}_val'][j]
        img = cv2.resize(cv2.imread(file)[...,::-1],(960,640))
        gt =  cv2.imread(f'/mnt/new_disk2/anpei/code/MVS-NeRF/results/test3/{scene}_{idx:03d}.png')[...,::-1]
        gt, img = gt[:,:960].astype('float')/255.0, img.astype('float')/255.0


        psnr.append( mse2psnr(np.mean((gt-img)**2)))
        ssim.append( structural_similarity(gt, img, multichannel=True))

        img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
        img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
        LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

    print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
    psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate([1,8,21,103,114]):#,
    psnr,ssim,LPIPS_vgg = [],[],[]
    
    files = sorted(glob.glob(f'{root}/scan{scene}_test/testset_200000/*.png'))
    for j, file in enumerate(files):

        idx = pairs[f'dtu_val'][j]
        img = cv2.imread(file)[...,::-1]
        gt = cv2.imread(f'/mnt/new_disk2/anpei/code/MVS-NeRF/results/test3/scan{scene}_{idx:03d}.png')[...,::-1]
        mask = cv2.imread(f'/mnt/new_disk2/anpei/code/MVS-NeRF/results/test3/scan{scene}_{idx:03d}_mask.png')==255
        gt, img = gt[:,:640].astype('float')/255.0, img.astype('float')/255.0

        gt[mask],img[mask] = 0.0,0.0
        psnr.append( mse2psnr(np.mean((gt[~mask]-img[~mask])**2)))
        ssim.append( structural_similarity(gt, img, multichannel=True))

        img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
        img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
        LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

    print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
    psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

=====> scene: chair mean psnr 31.070870959993407 ssim: 0.970869913728838 lpips: 0.05512496456503868
=====> scene: drums mean psnr 25.464383523724557 ssim: 0.9430287321705997 lpips: 0.1010842639952898
=====> scene: ficus mean psnr 29.72717081186501 ssim: 0.9688198661712594 lpips: 0.04721927270293236
=====> scene: hotdog mean psnr 34.63162021512352 ssim: 0.9798700143526428 lpips: 0.0885334312915802
=====> scene: lego mean psnr 32.65761069614622 ssim: 0.9751430050524844 lpips: 0.05375238787382841
=====> scene: materials mean psnr 30.220202654922936 ssim: 0.9677394226502894 lpips: 0.1052329633384943
=====> scene: mic mean psnr 31.810551677509977 ssim: 0.9810118386928188 lpips: 0.03268271638080478
=====> scene: ship mean psnr 29.487980342358682 ssim: 0.9079920156014059 lpips: 0.2625834122300148
=====> all mean psnr 30.633798860205538 ssim: 0.9618093510525423 lpips: 0.09327667654724792
=====> scene: fern mean psnr 23.87081932481545 ssim: 0.828319405500272 lpips: 0.29106350988149643
=====> sc

# ours

In [10]:
root = '/mnt/new_disk2/anpei/code/MVS-NeRF/runs_fine_tuning/'
pairs = torch.load('/mnt/new_disk_2/anpei/code/MVS-NeRF/configs/pairs.th')
psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate(['chair','drums','ficus','hotdog','lego','materials','mic','ship']):
    psnr,ssim,LPIPS_vgg = [],[],[]
    
    files = sorted(glob.glob(f'{root}/{scene}/{scene}/{scene}_00009999_*'))
    for j, file in enumerate(files):

        idx = pairs[f'{scene}_val'][j]
        img = cv2.imread(file).astype('float')[...,::-1]
        gt, img = img[:,:800]/255.0, img[:,800:1600]/255.0

#         H_crop, W_crop = np.array(gt.shape[:2])//10
#         img = img[H_crop:-H_crop,W_crop:-W_crop]
#         gt = gt[H_crop:-H_crop,W_crop:-W_crop]

        psnr.append( mse2psnr(np.mean((gt-img)**2)))
        ssim.append( structural_similarity(gt, img, multichannel=True))

        img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
        img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
        LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

    print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
    psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate(['fern','flower','fortress','horns','leaves', 'orchids', 'room',  'trex']):#,
    psnr,ssim,LPIPS_vgg = [],[],[]
    
    files = sorted(glob.glob(f'/mnt/new_disk2/anpei/code/MVS-NeRF/runs_fine_tuning/{scene}/{scene}/{scene}_00009999_*'))
    for j, file in enumerate(files):

        idx = pairs[f'{scene}_val'][j]
        img = cv2.imread(file)[...,::-1]
        gt, img = img[:,:960].astype('float')/255.0, img[:,960:960*2].astype('float')/255.0

        H_crop, W_crop = np.array(gt.shape[:2])//10
        img = img[H_crop:-H_crop,W_crop:-W_crop]
        gt = gt[H_crop:-H_crop,W_crop:-W_crop]


        psnr.append( mse2psnr(np.mean((gt-img)**2)))
        ssim.append( structural_similarity(gt, img, multichannel=True))

        img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
        img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
        LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

    print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
    psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

root = '/mnt/new_disk2/anpei/code/MVS-NeRF/runs_fine_tuning/'
psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate([1,8,21,103,114]):#,
    psnr,ssim,LPIPS_vgg = [],[],[]
    
    files = sorted(glob.glob(f'{root}/dtu_scan{scene}_1h/dtu_scan{scene}_1h/00010239_*'))
    for j, file in enumerate(files):

        idx = pairs[f'dtu_val'][j]
        img = cv2.imread(file)[...,::-1][:,640:1280]
        gt = cv2.imread(f'/mnt/new_disk2/anpei/code/MVS-NeRF/results/test3/scan{scene}_{idx:03d}.png')[...,::-1]
        mask = cv2.imread(f'/mnt/new_disk2/anpei/code/MVS-NeRF/results/test3/scan{scene}_{idx:03d}_mask.png')==255
        gt, img = gt[:,:640].astype('float')/255.0, img.astype('float')/255.0

        gt[mask],img[mask] = 0.0,0.0
        psnr.append( mse2psnr(np.mean((gt[~mask]-img[~mask])**2)))
        ssim.append( structural_similarity(gt, img, multichannel=True))

        img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
        img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
        LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

    print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
    psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

=====> scene: chair mean psnr 26.746873810513947 ssim: 0.9321234340308168 lpips: 0.15475058555603027
=====> scene: drums mean psnr 22.28406117543553 ssim: 0.8964614019632969 lpips: 0.21542910858988762
=====> scene: ficus mean psnr 26.365789669973488 ssim: 0.9438413276712496 lpips: 0.15914445742964745
=====> scene: hotdog mean psnr 32.489636248742805 ssim: 0.9699785599978382 lpips: 0.11295554973185062
=====> scene: lego mean psnr 26.79832336361502 ssim: 0.9245065827858229 lpips: 0.18708691000938416
=====> scene: materials mean psnr 24.957611270986945 ssim: 0.9249186604651752 lpips: 0.1740873008966446
=====> scene: mic mean psnr 29.449610622444368 ssim: 0.9693072200690339 lpips: 0.092950988560915
=====> scene: ship mean psnr 26.60832366062154 ssim: 0.8780999869891254 lpips: 0.28621142730116844
=====> all mean psnr 26.962528727791707 ssim: 0.9299046467465449 lpips: 0.17282704100944102
=====> scene: fern mean psnr 22.61357364768159 ssim: 0.77000724312094 lpips: 0.2827577739953995
=====> sc

# ibrnet

In [6]:
# root = '/mnt/new_disk2/anpei/code/IBRNet/logs'
pairs = torch.load('/mnt/new_disk_2/anpei/code/MVS-NeRF/configs/pairs.th')
# psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
# for i_scene, scene in enumerate(['chair','drums','ficus','hotdog','lego','materials','mic','ship']):#,
#     psnr,ssim,LPIPS_vgg = [],[],[]
    
#     files = sorted(glob.glob(f'{root}/nerf-3view-finetuning-nearest-{scene}/010000_*'))
#     for j, file in enumerate(files):

#         idx = pairs[f'{scene}_val'][j]
#         img = cv2.imread(file).astype('float')[...,::-1]
#         gt, img = img[:,800:800*2]/255.0, img[:,800*3:800*4]/255.0

# #         H_crop, W_crop = np.array(gt.shape[:2])//10
# #         img = img[H_crop:-H_crop,W_crop:-W_crop]
# #         gt = gt[H_crop:-H_crop,W_crop:-W_crop]

#         psnr.append( mse2psnr(np.mean((gt-img)**2)))
#         ssim.append( structural_similarity(gt, img, multichannel=True))

#         img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
#         img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
#         LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

#     print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
#     psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
# print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate(['fern','flower','fortress','horns','leaves', 'orchids', 'room',  'trex']):#
    psnr,ssim,LPIPS_vgg = [],[],[]
    
    files = sorted(glob.glob(f'/mnt/new_disk2/anpei/code/IBRNet/logs/llff-3view-finetuning-nearest-{scene}/010000_*'))
    for j, file in enumerate(files):

        idx = pairs[f'{scene}_val'][j]
        img = cv2.imread(file)[...,::-1]
        gt, img = img[:,1008:1008*2].astype('float')/255.0, img[:,1008*3:1008*4].astype('float')/255.0
        img, gt = cv2.resize(img,(960,640)), cv2.resize(gt,(960,640))

        H_crop, W_crop = np.array(gt.shape[:2])//10
        img = img[H_crop:-H_crop,W_crop:-W_crop]
        gt = gt[H_crop:-H_crop,W_crop:-W_crop]

        
        psnr.append( mse2psnr(np.mean((gt-img)**2)))
        ssim.append( structural_similarity(gt, img, multichannel=True))

        img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
        img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
        LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

    print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
    psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

root = '/mnt/new_disk2/anpei/code/IBRNet/logs'
psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate([1,8,21,103,114]):#
    psnr,ssim,LPIPS_vgg = [],[],[]
    
    files = sorted(glob.glob(f'{root}/dtu-3view-finetuning-nearest-scan{scene}/010000_*'))
    for j, file in enumerate(files):

        idx = pairs[f'dtu_val'][j]
        img = cv2.imread(file)[...,::-1][:,3*640:4*640]
        gt = cv2.imread(f'/mnt/new_disk2/anpei/code/MVS-NeRF/results/test3/scan{scene}_{idx:03d}.png')[...,::-1]
        mask = cv2.imread(f'/mnt/new_disk2/anpei/code/MVS-NeRF/results/test3/scan{scene}_{idx:03d}_mask.png')==255
        gt, img = gt[:,:640].astype('float')/255.0, img.astype('float')/255.0

        gt[mask],img[mask] = 0.0,0.0
        psnr.append( mse2psnr(np.mean((gt[~mask]-img[~mask])**2)))
        ssim.append( structural_similarity(gt, img, multichannel=True))

        img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
        img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
        LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

    print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
    psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

=====> scene: fern mean psnr 22.64474646040451 ssim: 0.7736232480476191 lpips: 0.26588304713368416
=====> scene: flower mean psnr 26.553349019087786 ssim: 0.9092690161984827 lpips: 0.14575103670358658
=====> scene: fortress mean psnr 30.338842953903075 ssim: 0.9368867837660259 lpips: 0.13289865292608738
=====> scene: horns mean psnr 25.01290939681414 ssim: 0.9040335882553917 lpips: 0.1899307444691658
=====> scene: leaves mean psnr 22.076508076698556 ssim: 0.8430354849586478 lpips: 0.17987846583127975
=====> scene: orchids mean psnr 19.007830032899616 ssim: 0.7045611776629173 lpips: 0.2861044891178608
=====> scene: room mean psnr 31.05473820815669 ssim: 0.9723299877991765 lpips: 0.08911459799855947
=====> scene: trex mean psnr 22.339864946223464 ssim: 0.8421255627008343 lpips: 0.22207806631922722
=====> all mean psnr 24.878598636773482 ssim: 0.8607331061736369 lpips: 0.1889548875624314
=====> scene: 1 mean psnr 30.99564992655386 ssim: 0.9548394719193786 lpips: 0.1285402663052082
=====> 

# pixel nerf

In [6]:
root = '/mnt/new_disk2/anpei/code/pixel-nerf/visuals/dtu'
root_gt = '/mnt/new_disk2/anpei/code/MVS-NeRF/runs_fine_tuning/'
pairs = torch.load('/mnt/new_disk_2/anpei/code/MVS-NeRF/configs/pairs.th')
psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate(['chair','drums','ficus','hotdog','lego','materials','mic','ship']):#,
    psnr,ssim,LPIPS_vgg = [],[],[]
    
    pairs_idx = pairs[f'{scene}_val']
    for j, file in enumerate(pairs_idx):

        idx = pairs_idx[j]
        img = cv2.imread(f'{root}/{scene}_{idx:03d}.png')[...,::-1]
        gt = cv2.imread(f'{root_gt}/{scene}/{scene}/{scene}_00009999_{j:02d}.png')[...,::-1]
        gt, img = gt[:,:800].astype('float')/255.0, img.astype('float')/255.0

#         H_crop, W_crop = np.array(gt.shape[:2])//10
#         img = img[H_crop:-H_crop,W_crop:-W_crop]
#         gt = gt[H_crop:-H_crop,W_crop:-W_crop]

        psnr.append( mse2psnr(np.mean((gt-img)**2)))
        ssim.append( structural_similarity(gt, img, multichannel=True))

        img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
        img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
        LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

    print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
    psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate(['fern','flower','fortress','horns','leaves', 'orchids', 'room',  'trex']):#,
    psnr,ssim,LPIPS_vgg = [],[],[]
    
    pairs_idx = pairs[f'{scene}_val']
    for j, file in enumerate(pairs_idx):

        idx = pairs_idx[j]
        img = cv2.resize(cv2.imread(f'{root}/{scene}_{idx:03d}.png')[...,::-1],(960,640))
        gt =  cv2.imread(f'/mnt/new_disk2/anpei/code/MVS-NeRF/results/test3/{scene}_{idx:03d}.png')[...,::-1]
        gt, img = gt[:,:960].astype('float')/255.0, img.astype('float')/255.0


        psnr.append( mse2psnr(np.mean((gt-img)**2)))
        ssim.append( structural_similarity(gt, img, multichannel=True))

        img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
        img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
        LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

    print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
    psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

psnr_all,ssim_all,LPIPS_vgg_all,depth_acc = [],[],[],[]
for i_scene, scene in enumerate([1,8,21,103,114]):#,
    psnr,ssim,LPIPS_vgg = [],[],[]
    
    pairs_idx = pairs[f'dtu_val']
    for j, file in enumerate(pairs_idx):

        idx = pairs_idx[j]
        img = cv2.imread(f'{root}/scan{scene}_{idx:03d}.png')[...,::-1]
        
        gt = cv2.imread(f'/mnt/new_disk2/anpei/code/MVS-NeRF/results/test3/scan{scene}_{idx:03d}.png')[...,::-1]
        mask = cv2.imread(f'/mnt/new_disk2/anpei/code/MVS-NeRF/results/test3/scan{scene}_{idx:03d}_mask.png')==255
        gt, img = gt[:,:640].astype('float')/255.0, img.astype('float')/255.0

        gt[mask],img[mask] = 0.0,0.0
        psnr.append( mse2psnr(np.mean((gt[~mask]-img[~mask])**2)))
        ssim.append( structural_similarity(gt, img, multichannel=True))

        img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
        img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
        LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())
        
        # depth
#         depth_pred = torch.load(f'{root}/scan{scene}_{idx:03d}_depth.th')
#         depth_gt,_ =  read_depth(f'/mnt/data/new_disk/sungx/data/mvs_dataset/DTU/mvs_training/dtu/Depths/scan{scene}/depth_map_{idx:04d}.pfm')
        
#         mask_gt = depth_gt>0
#         abs_err = abs_error(depth_pred*1.5, depth_gt/200, mask_gt).numpy()

#         eval_metric = [0.01,0.05, 0.1]
#         depth_acc[f'abs_err'][f'{scene}'] = np.mean(abs_err)
#         depth_acc[f'acc_l_{eval_metric[0]}'][f'{scene}'] = acc_threshold(abs_err,eval_metric[0]).mean()
#         depth_acc[f'acc_l_{eval_metric[1]}'][f'{scene}'] = acc_threshold(abs_err,eval_metric[1]).mean()
#         depth_acc[f'acc_l_{eval_metric[2]}'][f'{scene}'] = acc_threshold(abs_err,eval_metric[2]).mean()

    print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
    psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

=====> scene: chair mean psnr 7.175962813343725 ssim: 0.6243642351905847 lpips: 0.38591109961271286
=====> scene: drums mean psnr 8.148548711878252 ssim: 0.6701584468514097 lpips: 0.42121122032403946
=====> scene: ficus mean psnr 6.608732738834844 ssim: 0.668716265099144 lpips: 0.3350602239370346
=====> scene: hotdog mean psnr 6.799387670799135 ssim: 0.6689815218041557 lpips: 0.43327029794454575
=====> scene: lego mean psnr 7.740217521658803 ssim: 0.6710903029993184 lpips: 0.42670799791812897
=====> scene: materials mean psnr 7.609290420358684 ssim: 0.6441046576733512 lpips: 0.43245941400527954
=====> scene: mic mean psnr 7.707203698223274 ssim: 0.7294597852809476 lpips: 0.32929887622594833
=====> scene: ship mean psnr 7.295484760785579 ssim: 0.5836685948507447 lpips: 0.5257005095481873
=====> all mean psnr 7.385603541985287 ssim: 0.657567976218707 lpips: 0.4112024549394846
=====> scene: fern mean psnr 12.397648684821284 ssim: 0.5312397318110376 lpips: 0.6500117480754852
=====> scene: 