In [1]:
import sys,os,imageio,lpips
root = '/home/honglanqing/hushoukang/mvsnerf'
os.chdir(root)
sys.path.append(root)

from opt import config_parser
from data import dataset_dict
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt


# models
from models import *
from renderer import *
from data.ray_utils import get_rays

from tqdm import tqdm


from skimage.metrics import structural_similarity

# pytorch-lightning
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning import LightningModule, Trainer, loggers


from data.ray_utils import ray_marcher

%load_ext autoreload
%autoreload 2

torch.cuda.set_device(1)
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [2]:
def decode_batch(batch):
    rays = batch['rays']  # (B, 8)
    rgbs = batch['rgbs']  # (B, 3)
    return rays, rgbs

def unpreprocess(data, shape=(1,1,3,1,1)):
    # to unnormalize image for visualization
    # data N V C H W
    device = data.device
    mean = torch.tensor([-0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.225]).view(*shape).to(device)
    std = torch.tensor([1 / 0.229, 1 / 0.224, 1 / 0.225]).view(*shape).to(device)

    return (data - mean) / std

def read_depth(filename):
    depth_h = np.array(read_pfm(filename)[0], dtype=np.float32) # (800, 800)
    depth_h = cv2.resize(depth_h, None, fx=0.5, fy=0.5,
                       interpolation=cv2.INTER_NEAREST)  # (600, 800)
    depth_h = depth_h[44:556, 80:720]  # (512, 640)
#     depth = cv2.resize(depth_h, None, fx=0.5, fy=0.5,interpolation=cv2.INTER_NEAREST)#!!!!!!!!!!!!!!!!!!!!!!!!!
    mask = depth>0
    return depth_h,mask

loss_fn_vgg = lpips.LPIPS(net='vgg') 
mse2psnr = lambda x : -10. * np.log(x) / np.log(10.)

Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off]
Loading model from: /home/honglanqing/anaconda3/envs/pytorch1.8_skhu/lib/python3.7/site-packages/lpips/weights/v0.1/vgg.pth


# llff no fine tuning

## rendering novel views with nearest 3 source views

In [None]:
psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
# for i_scene, scene in enumerate([ 'xgaze_11images_cropped_colmapCODE']):#
for i_scene, scene in enumerate([ 'room','fortress', 'flower','orchids','leaves','horns','trex','fern']):#
# for i_scene, scene in enumerate([ 'flower']):#
    psnr,ssim,LPIPS_vgg = [],[],[]
#     cmd = f'--datadir /home/hengfei/Desktop/research/mvsnerf/xgaze/{scene}  \
#      --dataset_name llff \
#      --net_type v0 --ckpt ./ckpts/mvsnerf-v0.tar '

    cmd = f'--datadir /home/honglanqing/data/nerf_llff_data/{scene}  \
     --dataset_name llff \
     --net_type v0 --ckpt runs_new/mvsnerf-modify/ckpts/latest.tar '

    args = config_parser(cmd.split())
    args.use_viewdirs = True

    args.N_samples = 128
    args.feat_dim = 8+12

    # create models
    if 0==i_scene:
        render_kwargs_train, render_kwargs_test, start, grad_vars = create_nerf_mvs(args, use_mvs=True, dir_embedder=False, pts_embedder=True)
        filter_keys(render_kwargs_train)

        MVSNet = render_kwargs_train['network_mvs']
        render_kwargs_train.pop('network_mvs')


    datadir = args.datadir
    datatype = 'val'
    pad = 24
    args.chunk = 5120


    print('============> rendering dataset <===================')
    dataset = dataset_dict[args.dataset_name](args, split=datatype)
    val_idx = dataset.img_idx
    
    save_as_image = True
    save_dir = f'results/test3'
    os.makedirs(save_dir, exist_ok=True)
    MVSNet.train()
    MVSNet = MVSNet.cuda()
    
    with torch.no_grad():
        
        try:
            tqdm._instances.clear() 
        except Exception:     
            pass
        
        for i, batch in enumerate(tqdm(dataset)):
            torch.cuda.empty_cache()

                
            rays, img = decode_batch(batch)
            rays = rays.squeeze().to(device)  # (H*W, 3)
            img = img.squeeze().cpu().numpy()  # (H, W, 3)
        
            # find nearest image idx
            img_idx = torch.load('configs/pairs.th')[f'{scene}_train']
            positions = dataset.poses[img_idx,:3,3]
            dis = np.sum(np.abs(positions - dataset.poses[val_idx[i],:3,3]), axis=-1)
            pair_idx = np.argsort(dis)[:3]
            pair_idx = [img_idx[item] for item in pair_idx]
            
            imgs_source, proj_mats, near_far_source, pose_source = dataset.read_source_views(pair_idx=pair_idx,device=device)
            volume_feature, img_feat, _ = MVSNet(imgs_source, proj_mats, near_far_source, pad=pad, lindisp=False)
            imgs_source = unpreprocess(imgs_source)
        
            N_rays_all = rays.shape[0]
            rgb_rays, depth_rays_preds = [],[]
            for chunk_idx in range(N_rays_all//args.chunk + int(N_rays_all%args.chunk>0)):

                xyz_coarse_sampled, rays_o, rays_d, z_vals = ray_marcher(rays[chunk_idx*args.chunk:(chunk_idx+1)*args.chunk],
                                                    N_samples=args.N_samples, lindisp=False)

                # Converting world coordinate to ndc coordinate
                H, W = img.shape[:2]
                inv_scale = torch.tensor([W - 1, H - 1]).to(device)
                w2c_ref, intrinsic_ref = pose_source['w2cs'][0], pose_source['intrinsics'][0].clone()
                xyz_NDC = get_ndc_coordinate(w2c_ref, intrinsic_ref, xyz_coarse_sampled, inv_scale,
                        near=near_far_source[0], far=near_far_source[1], pad=pad*args.imgScale_test, lindisp=False)


                # rendering
                rgb, disp, acc, depth_pred, alpha, extras = rendering(args, pose_source, xyz_coarse_sampled,
                    xyz_NDC, z_vals, rays_o, rays_d,volume_feature,imgs_source, img_feat=None,**render_kwargs_train)
    
                
                rgb, depth_pred = torch.clamp(rgb.cpu(),0,1.0).numpy(), depth_pred.cpu().numpy()
                rgb_rays.append(rgb)
                depth_rays_preds.append(depth_pred)

            depth_rays_preds = np.concatenate(depth_rays_preds).reshape(H, W)
            depth_rays_preds, _ = visualize_depth_numpy(depth_rays_preds, near_far_source)
            
            rgb_rays = np.concatenate(rgb_rays).reshape(H, W, 3)
            img_vis = np.concatenate((img*255,rgb_rays*255,depth_rays_preds),axis=1)
            
#             img_vis = np.concatenate((torch.cat(torch.split(imgs_source*255, [1,1,1], dim=1),-1).squeeze().permute(1,2,0).cpu().numpy(),img_vis),axis=1)
            
            if save_as_image:
                imageio.imwrite(f'{save_dir}/{scene}_{val_idx[i]:03d}.png', img_vis.astype('uint8'))
            else:
                rgbs.append(img_vis.astype('uint8'))
                
            # quantity
            # center crop 0.8 ratio
            H_crop, W_crop = np.array(rgb_rays.shape[:2])//10
            img = img[H_crop:-H_crop,W_crop:-W_crop]
            rgb_rays = rgb_rays[H_crop:-H_crop,W_crop:-W_crop]
            
            psnr.append( mse2psnr(np.mean((rgb_rays-img)**2)))
            ssim.append( structural_similarity(rgb_rays, img, multichannel=True))
            
            img_tensor = torch.from_numpy(rgb_rays)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
            img_gt_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0
            import pdb
            pdb.set_trace()
            LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())
            
        print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
        psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
    
    if not save_as_image:
        imageio.mimwrite(f'{save_dir}/{scene}_spiral.mp4', np.stack(rgbs), fps=20, quality=10)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

Found ckpts ['runs_new/mvsnerf-modify/ckpts/latest.tar']
Reloading from runs_new/mvsnerf-modify/ckpts/latest.tar
41 41 /home/honglanqing/data/nerf_llff_data/room
scale_factor 8.030018355528686
===> valing index: [35 15 38 21]


  0%|                                                                                                                                                               | 0/4 [00:00<?, ?it/s]

original focal 3070.63827088164
porcessed focal [731.1043502099143, 649.8705335199238]




> [0;32m/tmp/ipykernel_35453/1443191394.py[0m(121)[0;36m<module>[0;34m()[0m
[0;32m    119 [0;31m            [0;32mimport[0m [0mpdb[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    120 [0;31m            [0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 121 [0;31m            [0mLPIPS_vgg[0m[0;34m.[0m[0mappend[0m[0;34m([0m [0mloss_fn_vgg[0m[0;34m([0m[0mimg_tensor[0m[0;34m,[0m [0mimg_gt_tensor[0m[0;34m)[0m[0;34m.[0m[0mitem[0m[0;34m([0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    122 [0;31m[0;34m[0m[0m
[0m[0;32m    123 [0;31m        [0mprint[0m[0;34m([0m[0;34mf'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}'[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m
ipdb> rgb_rays.shape
(512, 768, 3)
ipdb> torch.from_numpy(rgb_rays).shape
torch.Size([512, 768, 3])
ipdb> torch.from_numpy(rgb_rays)[None].shape
torch.Size([1, 512, 768

#### for room

In [None]:
psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate([ 'room']):#
    psnr,ssim,LPIPS_vgg = [],[],[]
    cmd = f'--datadir /mnt/new_disk_2/anpei/Dataset/MVSNeRF/nerf_llff_data/{scene}  \
     --dataset_name llff \
     --net_type v0 --ckpt ./ckpts/mvsnerf-v0.tar '


    args = config_parser(cmd.split())
    args.use_viewdirs = True

    args.N_samples = 128
    args.feat_dim = 8+12

    # create models
    if 0==i_scene:
        render_kwargs_train, render_kwargs_test, start, grad_vars = create_nerf_mvs(args, use_mvs=True, dir_embedder=False, pts_embedder=True)
        filter_keys(render_kwargs_train)

        MVSNet = render_kwargs_train['network_mvs']
        render_kwargs_train.pop('network_mvs')


    datadir = args.datadir
    datatype = 'val'
    pad = 24
    args.chunk = 5120


    print('============> rendering dataset <===================')
    dataset = dataset_dict[args.dataset_name](args, split=datatype)
    val_idx = dataset.img_idx
    
    save_as_image = True
    save_dir = f'results/test3'
    os.makedirs(save_dir, exist_ok=True)
    MVSNet.train()
    MVSNet = MVSNet.cuda()
    
    with torch.no_grad():
        
        try:
            tqdm._instances.clear() 
        except Exception:     
            pass
        
        for i, batch in enumerate(tqdm(dataset)):
            torch.cuda.empty_cache()
            if 1!=i:
                continue
                
            rays, img = decode_batch(batch)
            rays = rays.squeeze().to(device)  # (H*W, 3)
            img = img.squeeze().cpu().numpy()  # (H, W, 3)
        
            # find nearest image idx
            img_idx = torch.load('configs/pairs.th')[f'{scene}_train']
            positions = dataset.poses[img_idx,:3,3]
            dis = np.sum(np.abs(positions - dataset.poses[val_idx[i],:3,3]), axis=-1)
            pair_idx = np.argsort(dis)[:3]
            pair_idx = img_idx[:3]#[img_idx[item] for item in pair_idx]
            
            imgs_source, proj_mats, near_far_source, pose_source = dataset.read_source_views(pair_idx=pair_idx,device=device)
            volume_feature, img_feat, _ = MVSNet(imgs_source, proj_mats, near_far_source, pad=pad, lindisp=False)
            imgs_source = unpreprocess(imgs_source)
        
            N_rays_all = rays.shape[0]
            rgb_rays, depth_rays_preds = [],[]
            for chunk_idx in range(N_rays_all//args.chunk + int(N_rays_all%args.chunk>0)):

                xyz_coarse_sampled, rays_o, rays_d, z_vals = ray_marcher(rays[chunk_idx*args.chunk:(chunk_idx+1)*args.chunk],
                                                    N_samples=args.N_samples, lindisp=False)

                # Converting world coordinate to ndc coordinate
                H, W = img.shape[:2]
                inv_scale = torch.tensor([W - 1, H - 1]).to(device)
                w2c_ref, intrinsic_ref = pose_source['w2cs'][0], pose_source['intrinsics'][0].clone()
                xyz_NDC = get_ndc_coordinate(w2c_ref, intrinsic_ref, xyz_coarse_sampled, inv_scale,
                        near=near_far_source[0], far=near_far_source[1], pad=pad*args.imgScale_test, lindisp=False)


                # rendering
                rgb, disp, acc, depth_pred, alpha, extras = rendering(args, pose_source, xyz_coarse_sampled,
                    xyz_NDC, z_vals, rays_o, rays_d,volume_feature,imgs_source, img_feat=None,**render_kwargs_train)
    
                
                rgb, depth_pred = torch.clamp(rgb.cpu(),0,1.0).numpy(), depth_pred.cpu().numpy()
                rgb_rays.append(rgb)
                depth_rays_preds.append(depth_pred)


            depth_rays_preds = np.concatenate(depth_rays_preds).reshape(H, W)
            depth_rays_preds, _ = visualize_depth_numpy(depth_rays_preds, near_far_source)
            
            rgb_rays = np.concatenate(rgb_rays).reshape(H, W, 3)
            img_vis = np.concatenate((img*255,rgb_rays*255,depth_rays_preds),axis=1)
            
#             img_vis = np.concatenate((torch.cat(torch.split(imgs_source*255, [1,1,1], dim=1),-1).squeeze().permute(1,2,0).cpu().numpy(),img_vis),axis=1)
            
            if save_as_image:
                imageio.imwrite(f'{save_dir}/{scene}_{val_idx[i]:03d}.png', img_vis.astype('uint8'))
            else:
                rgbs.append(img_vis.astype('uint8'))
                
            # quantity
            # center crop 0.8 ratio
            H_crop, W_crop = np.array(rgb_rays.shape[:2])//10
            img = img[H_crop:-H_crop,W_crop:-W_crop]
            rgb_rays = rgb_rays[H_crop:-H_crop,W_crop:-W_crop]
            
            psnr.append( mse2psnr(np.mean((rgb_rays-img)**2)))
            ssim.append( structural_similarity(rgb_rays, img, multichannel=True))
            
            img_tensor = torch.from_numpy(rgb_rays)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
            img_gt_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0
            LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())
            
        print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
        psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
    
    if not save_as_image:
        imageio.mimwrite(f'{save_dir}/{scene}_spiral.mp4', np.stack(rgbs), fps=20, quality=10)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

## rendering novel views with fixed 3 source views

In [47]:
psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate(['fortress','flower','orchids', 'room','leaves','horns','trex','fern']):#'flower','orchids', 'room','leaves','fern','horns','trex','fortress'
    psnr,ssim,LPIPS_vgg = [],[],[]
#     cmd = f'--datadir /mnt/new_disk_2/anpei/Dataset/MVSNeRF/nerf_llff_data/{scene}  \
#      --dataset_name llff \
#      --ckpt ./ckpts/mvsnerf-v0.tar  \
#      --net_type v0 --netwidth 128 --netdepth 6'

    cmd = f'--datadir /home/honglanqing/data/nerf_llff_data/{scene}  \
     --dataset_name llff \
     --ckpt runs_new/mvsnerf-modify/ckpts/latest.tar  \
     --net_type v0 --netwidth 128 --netdepth 6'    
    
    args = config_parser(cmd.split())
    args.use_viewdirs = True

    args.N_samples = 128
    args.feat_dim =  8+3*4

    # create models
    if 0==i_scene:
        render_kwargs_train, render_kwargs_test, start, grad_vars = create_nerf_mvs(args, use_mvs=True, dir_embedder=False, pts_embedder=True)
        filter_keys(render_kwargs_train)

        MVSNet = render_kwargs_train['network_mvs']
        render_kwargs_train.pop('network_mvs')


    datadir = args.datadir
    datatype = 'val'
    pad = 24
    args.chunk = 5120


    print('============> rendering dataset <===================')
    dataset = dataset_dict[args.dataset_name](args, split=datatype)
    val_idx = dataset.img_idx
    
    save_as_image = True
    save_dir = f'results/test3'
    os.makedirs(save_dir, exist_ok=True)
    MVSNet.train()
    MVSNet = MVSNet.cuda()
    
    with torch.no_grad():

        imgs_source, proj_mats, near_far_source, pose_source = dataset.read_source_views(device=device)
        volume_feature, _, _ = MVSNet(imgs_source, proj_mats, near_far_source, pad=pad)
        imgs_source = unpreprocess(imgs_source)

        try:
            tqdm._instances.clear() 
        except Exception:     
            pass
        
        for i, batch in enumerate(tqdm(dataset)):
            torch.cuda.empty_cache()
            
            rays, img = decode_batch(batch)
            rays = rays.squeeze().to(device)  # (H*W, 3)
            img = img.squeeze().cpu().numpy()  # (H, W, 3)
        
            N_rays_all = rays.shape[0]
            rgb_rays, depth_rays_preds = [],[]
            for chunk_idx in range(N_rays_all//args.chunk + int(N_rays_all%args.chunk>0)):

                xyz_coarse_sampled, rays_o, rays_d, z_vals = ray_marcher(rays[chunk_idx*args.chunk:(chunk_idx+1)*args.chunk],
                                                    N_samples=args.N_samples)

                # Converting world coordinate to ndc coordinate
                H, W = img.shape[:2]
                inv_scale = torch.tensor([W - 1, H - 1]).to(device)
                w2c_ref, intrinsic_ref = pose_source['w2cs'][0], pose_source['intrinsics'][0].clone()
                xyz_NDC = get_ndc_coordinate(w2c_ref, intrinsic_ref, xyz_coarse_sampled, inv_scale,
                                             near=near_far_source[0], far=near_far_source[1], pad=pad*args.imgScale_test)


                # rendering
                rgb, disp, acc, depth_pred, alpha, extras = rendering(args, pose_source, xyz_coarse_sampled,
                                                                       xyz_NDC, z_vals, rays_o, rays_d,
                                                                       volume_feature,imgs_source, **render_kwargs_train)
    
                
                rgb, depth_pred = torch.clamp(rgb.cpu(),0,1.0).numpy(), depth_pred.cpu().numpy()
                rgb_rays.append(rgb)
                depth_rays_preds.append(depth_pred)

            
            depth_rays_preds = np.concatenate(depth_rays_preds).reshape(H, W)
            depth_rays_preds, _ = visualize_depth_numpy(depth_rays_preds, near_far_source)
            
            rgb_rays = np.concatenate(rgb_rays).reshape(H, W, 3)
            img_vis = np.concatenate((img*255,rgb_rays*255,depth_rays_preds),axis=1)
            
            if save_as_image:
                imageio.imwrite(f'{save_dir}/{scene}_{val_idx[i]:03d}.png', img_vis.astype('uint8'))
                imageio.imwrite(f'runs_new/mvsnerf-modify/results/{scene}_{val_idx[i]:03d}.png', (rgb_rays*255).astype('uint8'))
            else:
                rgbs.append(img_vis.astype('uint8'))
                
            # quantity
            # center crop 0.8 ratio
            H_crop, W_crop = np.array(rgb_rays.shape[:2])//10
            img = img[H_crop:-H_crop,W_crop:-W_crop]
            rgb_rays = rgb_rays[H_crop:-H_crop,W_crop:-W_crop]
            
            psnr.append( mse2psnr(np.mean((rgb_rays-img)**2)))
            ssim.append( structural_similarity(rgb_rays, img, multichannel=True))
            
            img_tensor = torch.from_numpy(rgb_rays)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
            img_gt_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0
            LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())
        
        print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
        psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
        
    if not save_as_image:
        imageio.mimwrite(f'{save_dir}/{scene}_spiral.mp4', np.stack(rgbs), fps=20, quality=10)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

Found ckpts ['runs_new/mvsnerf-modify/ckpts/latest.tar']
Reloading from runs_new/mvsnerf-modify/ckpts/latest.tar
42 42 /home/honglanqing/data/nerf_llff_data/fortress
scale_factor 9.564056923447355
===> valing index: [21  9 40 25]
original focal 3371.3189700388566
porcessed focal [802.6949928663945, 713.5066603256839]
====> ref idx: [15 20 26]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:40<00:00, 10.05s/it]


=====> scene: fortress mean psnr 23.313624253650566 ssim: 0.7998276948928833 lpips: 0.27299268171191216
34 34 /home/honglanqing/data/nerf_llff_data/flower
scale_factor 16.93030140064795
===> valing index: [20  6 22  5]
original focal 3575.0586059510074
porcessed focal [851.204429988335, 756.6261599896312]
====> ref idx: [12 29 11]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:39<00:00,  9.95s/it]


=====> scene: flower mean psnr 21.386200093997523 ssim: 0.8095268607139587 lpips: 0.2635017856955528
25 25 /home/honglanqing/data/nerf_llff_data/orchids
scale_factor 10.866254797560998
===> valing index: [12 10 16 19]
original focal 3124.62276683125
porcessed focal [743.957801626488, 661.2958236679893]
====> ref idx: [ 8 13 11]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:38<00:00,  9.58s/it]


=====> scene: orchids mean psnr 16.314854412973737 ssim: 0.5679171085357666 lpips: 0.4050118550658226
41 41 /home/honglanqing/data/nerf_llff_data/room
scale_factor 8.030018355528686
===> valing index: [35 15 38 21]
original focal 3070.63827088164
porcessed focal [731.1043502099143, 649.8705335199238]
====> ref idx: [14 39 34]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:55<00:00, 13.85s/it]


=====> scene: room mean psnr 23.524102544355397 ssim: 0.9166499972343445 lpips: 0.20913172513246536
26 26 /home/honglanqing/data/nerf_llff_data/leaves
scale_factor 26.362457265215173
===> valing index: [13 11 16  4]
original focal 3428.4755177313386
porcessed focal [816.3036946979378, 725.6032841759447]
====> ref idx: [12 18  8]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:38<00:00,  9.61s/it]


=====> scene: leaves mean psnr 15.391822650084222 ssim: 0.5242440700531006 lpips: 0.41956865787506104
62 62 /home/honglanqing/data/nerf_llff_data/horns
scale_factor 8.315313360692242
===> valing index: [33 40 31 59]
original focal 3368.8237176028883
porcessed focal [802.1008851435449, 712.9785645720398]
====> ref idx: [23 32 24]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:43<00:00, 10.87s/it]


=====> scene: horns mean psnr 20.83228774323398 ssim: 0.7862597703933716 lpips: 0.3289877027273178
55 55 /home/honglanqing/data/nerf_llff_data/trex
scale_factor 10.958461140504273
===> valing index: [20 21 53 22]
original focal 3329.8699571672205
porcessed focal [792.8261802779097, 704.734382469253]
====> ref idx: [52 19 47]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:45<00:00, 11.44s/it]


=====> scene: trex mean psnr 20.687225106496015 ssim: 0.7780523896217346 lpips: 0.27430441230535507
20 20 /home/honglanqing/data/nerf_llff_data/fern
scale_factor 12.738972134007064
===> valing index: [12 13  5 19]
original focal 3260.5263328805895
porcessed focal [776.3157935429975, 690.0584831493311]
====> ref idx: [17  2  7]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:43<00:00, 10.83s/it]

=====> scene: fern mean psnr 19.277901354731778 ssim: 0.6552879214286804 lpips: 0.37551266700029373
=====> all mean psnr 20.091002269940404 ssim: 0.7297207117080688 lpips: 0.3186264359392226





# nerf no fine tuning

## rendering novel views with nearest 3 views

In [26]:
psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate(['ship','mic','chair','lego','drums','ficus','materials','hotdog']):#
    psnr,ssim,LPIPS_vgg = [],[],[]
#     cmd = f'--datadir /mnt/new_disk_2/anpei/Dataset/nerf_synthetic/{scene}  \
#      --dataset_name blender --white_bkgd \
#     --net_type v0 --ckpt ./ckpts/mvsnerf-v0.tar '

    cmd = f'--datadir /home/honglanqing/data/nerf_synthetic/{scene}  \
     --dataset_name blender --white_bkgd \
    --net_type v0 --ckpt runs_new/mvsnerf-modify/ckpts/latest.tar '

    args = config_parser(cmd.split())
    args.use_viewdirs = True

    args.N_samples = 128
    args.feat_dim =  8+12

    # create models
    if 0==i_scene:
        render_kwargs_train, render_kwargs_test, start, grad_vars = create_nerf_mvs(args, use_mvs=True, dir_embedder=False, pts_embedder=True)
        filter_keys(render_kwargs_train)

        MVSNet = render_kwargs_train['network_mvs']
        render_kwargs_train.pop('network_mvs')


    datadir = args.datadir
    datatype = 'train'
    pad = 16
    args.chunk = 5120


    print('============> rendering dataset <===================')
    dataset_train = dataset_dict[args.dataset_name](args, split='train')
    dataset_val = dataset_dict[args.dataset_name](args, split='val')
    val_idx = dataset_val.img_idx
    
    save_as_image = True
    save_dir = f'results/test3'
    os.makedirs(save_dir, exist_ok=True)
    MVSNet.train()
    MVSNet = MVSNet.cuda()
    
    with torch.no_grad():

        try:
            tqdm._instances.clear() 
        except Exception:     
            pass
        
        for i, batch in enumerate(tqdm(dataset_val)):
            torch.cuda.empty_cache()

            
            rays, img = decode_batch(batch)
            rays = rays.squeeze().to(device)  # (H*W, 3)
            img = img.squeeze().cpu().numpy()  # (H, W, 3)
        
            # find nearest image idx from training views
            positions = dataset_train.poses[:,:3,3]
            dis = np.sum(np.abs(positions - dataset_val.poses[[i],:3,3]), axis=-1)
            pair_idx = np.argsort(dis)[:3]
            pair_idx = [dataset_train.img_idx[item] for item in pair_idx]
            
            imgs_source, proj_mats, near_far_source, pose_source = dataset_train.read_source_views(pair_idx=pair_idx,device=device)
            volume_feature, _, _ = MVSNet(imgs_source, proj_mats, near_far_source, pad=pad)
            imgs_source = unpreprocess(imgs_source)
        
            N_rays_all = rays.shape[0]
            rgb_rays, depth_rays_preds = [],[]
            for chunk_idx in range(N_rays_all//args.chunk + int(N_rays_all%args.chunk>0)):

                xyz_coarse_sampled, rays_o, rays_d, z_vals = ray_marcher(rays[chunk_idx*args.chunk:(chunk_idx+1)*args.chunk],
                                                    N_samples=args.N_samples)

                # Converting world coordinate to ndc coordinate
                H, W = img.shape[:2]
                inv_scale = torch.tensor([W - 1, H - 1]).to(device)
                w2c_ref, intrinsic_ref = pose_source['w2cs'][0], pose_source['intrinsics'][0].clone()
                intrinsic_ref[:2] *= args.imgScale_test/args.imgScale_train
                xyz_NDC = get_ndc_coordinate(w2c_ref, intrinsic_ref, xyz_coarse_sampled, inv_scale,
                                             near=near_far_source[0], far=near_far_source[1], pad=pad*args.imgScale_test)


                # rendering
                rgb, disp, acc, depth_pred, alpha, extras = rendering(args, pose_source, xyz_coarse_sampled,
                                                                       xyz_NDC, z_vals, rays_o, rays_d,
                                                                       volume_feature,imgs_source, **render_kwargs_train)
    
                
                rgb, depth_pred = torch.clamp(rgb.cpu(),0,1.0).numpy(), depth_pred.cpu().numpy()
                rgb_rays.append(rgb)
                depth_rays_preds.append(depth_pred)

            
            depth_rays_preds = np.concatenate(depth_rays_preds).reshape(H, W)
            depth_rays_preds, _ = visualize_depth_numpy(depth_rays_preds, near_far_source)
            
            rgb_rays = np.concatenate(rgb_rays).reshape(H, W, 3)
            img_vis = np.concatenate((img*255,rgb_rays*255,depth_rays_preds),axis=1)
            
#             img_vis = np.concatenate((torch.cat(torch.split(imgs_source*255, [1,1,1], dim=1),-1).squeeze().permute(1,2,0).cpu().numpy(),img_vis),axis=1)
            
            if save_as_image:
                imageio.imwrite(f'{save_dir}/{scene}_{val_idx[i]:03d}.png', img_vis.astype('uint8'))
            else:
                rgbs.append(img_vis.astype('uint8'))
                
            # quantity
            # center crop 0.8 ratio
            H_crop, W_crop = np.array(rgb_rays.shape[:2])//10
            img = img[H_crop:-H_crop,W_crop:-W_crop]
            rgb_rays = rgb_rays[H_crop:-H_crop,W_crop:-W_crop]
            
            psnr.append( mse2psnr(np.mean((rgb_rays-img)**2)))
            ssim.append( structural_similarity(rgb_rays, img, multichannel=True))
            
            img_tensor = torch.from_numpy(rgb_rays)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
            img_gt_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0
            LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

        print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
        psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)

    if not save_as_image:
        imageio.mimwrite(f'{save_dir}/{scene}_spiral.mp4', np.stack(rgbs), fps=20, quality=10)

print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

Found ckpts ['runs_new/mvsnerf-modify/ckpts/latest.tar']
Reloading from runs_new/mvsnerf-modify/ckpts/latest.tar
===> training index: [12 32 44 17 47  3 19  2 33 77 95 54 11 98 67 87]


  0%|                                                                                                                                                               | 0/4 [00:00<?, ?it/s]

===> valing index: [80 86 22 20]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:36<00:00,  9.20s/it]


=====> scene: ship mean psnr 23.910442338047737 ssim: 0.832594096660614 lpips: 0.32471855729818344
===> training index: [61 80 64  2 85 15 97 93 53 44 71 68 32 90 99  6]


  0%|                                                                                                                                                               | 0/4 [00:00<?, ?it/s]

===> valing index: [20 49 55 72]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:36<00:00,  9.22s/it]


=====> scene: mic mean psnr 22.9280946907667 ssim: 0.9375986456871033 lpips: 0.0955142118036747
===> training index: [62 56 26 67 92 31 63 77 85 82 47 41 55 61 99 25]


  0%|                                                                                                                                                               | 0/4 [00:00<?, ?it/s]

===> valing index: [ 8 24 32 78]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:36<00:00,  9.16s/it]


=====> scene: chair mean psnr 24.497580105731302 ssim: 0.9028964042663574 lpips: 0.13612907379865646
===> training index: [6, 43, 33, 13, 17, 19, 20, 25, 30, 37, 46, 48, 49, 55, 59, 65]


  0%|                                                                                                                                                               | 0/4 [00:00<?, ?it/s]

===> valing index: [63, 70, 18, 28]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:37<00:00,  9.30s/it]


=====> scene: lego mean psnr 22.374371048712224 ssim: 0.8998879194259644 lpips: 0.1829792596399784
===> training index: [43, 81, 14, 3, 9, 11, 20, 21, 22, 40, 41, 42, 46, 51, 52, 55]


  0%|                                                                                                                                                               | 0/4 [00:00<?, ?it/s]

===> valing index: [79, 74, 91, 68]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:37<00:00,  9.25s/it]


=====> scene: drums mean psnr 19.808898152038488 ssim: 0.8620845079421997 lpips: 0.22344421222805977
===> training index: [92 69 56 98  3 64 61 45  8 62 83 17  7 44 32 90]


  0%|                                                                                                                                                               | 0/4 [00:00<?, ?it/s]

===> valing index: [38 23  0  5]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:37<00:00,  9.30s/it]


=====> scene: ficus mean psnr 21.283852152446965 ssim: 0.8884677886962891 lpips: 0.15958010032773018
===> training index: [34 73 94 97 47 19 58 16 21 13 82 61 18 79 78 37]


  0%|                                                                                                                                                               | 0/4 [00:00<?, ?it/s]

===> valing index: [36 63 46 96]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:36<00:00,  9.20s/it]


=====> scene: materials mean psnr 21.52506433562085 ssim: 0.8886674046516418 lpips: 0.1660989411175251
===> training index: [48 61  0  3 33 73 78 58 28 63 71 38 22 30  9 31]


  0%|                                                                                                                                                               | 0/4 [00:00<?, ?it/s]

===> valing index: [26 60 13 47]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:37<00:00,  9.48s/it]

=====> scene: hotdog mean psnr 30.49515343129361 ssim: 0.9586866497993469 lpips: 0.11133340187370777
=====> all mean psnr 23.352932031832232 ssim: 0.8963603973388672 lpips: 0.17497471976093948





## rendering novel views with fixed 3 source views

In [27]:
psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate(['ship','mic','chair','lego','drums','ficus','materials','hotdog']):#'ship','mic','chair','lego','drums','ficus','materials','hotdog'
    psnr,ssim,LPIPS_vgg = [],[],[]
#     cmd = f'--datadir /home/honglanqing/data/nerf_synthetic/{scene}  \
#      --dataset_name blender --white_bkgd \
#      --ckpt /mnt/new_disk_2/anpei/code/MVS-NeRF/runs_new/mvs-nerf-color-skip-no-border/ckpts/79999.tar \
#      --net_type v2 --netwidth 128 --netdepth 6'

    cmd = f'--datadir /home/honglanqing/data/nerf_synthetic/{scene}  \
     --dataset_name blender --white_bkgd \
    --net_type v0 --ckpt runs_new/mvsnerf-modify/ckpts/latest.tar '
    
    args = config_parser(cmd.split())
    args.use_viewdirs = True

    args.N_samples = 128
    args.feat_dim = 8+3*4

    # create models
    if 0==i_scene:
        render_kwargs_train, render_kwargs_test, start, grad_vars = create_nerf_mvs(args, use_mvs=True, dir_embedder=False, pts_embedder=True)
        filter_keys(render_kwargs_train)

        MVSNet = render_kwargs_train['network_mvs']
        render_kwargs_train.pop('network_mvs')


    datadir = args.datadir
    datatype = 'val'
    pad = 16
    args.chunk = 5120


    print('============> rendering dataset <===================')
    dataset = dataset_dict[args.dataset_name](args, split=datatype)
    val_idx = dataset.img_idx
    
    save_as_image = True
    save_dir = f'results/test3'
    os.makedirs(save_dir, exist_ok=True)
    MVSNet.train()
    MVSNet = MVSNet.cuda()
    
    with torch.no_grad():

        imgs_source, proj_mats, near_far_source, pose_source = dataset.read_source_views(device=device)
        volume_feature, _, _ = MVSNet(imgs_source, proj_mats, near_far_source, pad=pad)
        imgs_source = unpreprocess(imgs_source)

        try:
            tqdm._instances.clear() 
        except Exception:     
            pass
        
        for i, batch in enumerate(tqdm(dataset)):
            torch.cuda.empty_cache()
            
            rays, img = decode_batch(batch)
            rays = rays.squeeze().to(device)  # (H*W, 3)
            img = img.squeeze().cpu().numpy()  # (H, W, 3)
        
            N_rays_all = rays.shape[0]
            rgb_rays, depth_rays_preds = [],[]
            for chunk_idx in range(N_rays_all//args.chunk + int(N_rays_all%args.chunk>0)):

                xyz_coarse_sampled, rays_o, rays_d, z_vals = ray_marcher(rays[chunk_idx*args.chunk:(chunk_idx+1)*args.chunk],
                                                    N_samples=args.N_samples)

                # Converting world coordinate to ndc coordinate
                H, W = img.shape[:2]
                inv_scale = torch.tensor([W - 1, H - 1]).to(device)
                w2c_ref, intrinsic_ref = pose_source['w2cs'][0], pose_source['intrinsics'][0].clone()
                xyz_NDC = get_ndc_coordinate(w2c_ref, intrinsic_ref, xyz_coarse_sampled, inv_scale,
                                             near=near_far_source[0], far=near_far_source[1], pad=pad*args.imgScale_test)


                # rendering
                rgb, disp, acc, depth_pred, alpha, extras = rendering(args, pose_source, xyz_coarse_sampled,
                                                                       xyz_NDC, z_vals, rays_o, rays_d,
                                                                       volume_feature,imgs_source, **render_kwargs_train)
    
                
                rgb, depth_pred = torch.clamp(rgb.cpu(),0,1.0).numpy(), depth_pred.cpu().numpy()
                rgb_rays.append(rgb)
                depth_rays_preds.append(depth_pred)

            
            depth_rays_preds = np.concatenate(depth_rays_preds).reshape(H, W)
            depth_rays_preds, _ = visualize_depth_numpy(depth_rays_preds, near_far_source)
            
            rgb_rays = np.concatenate(rgb_rays).reshape(H, W, 3)
            img_vis = np.concatenate((img*255,rgb_rays*255,depth_rays_preds),axis=1)
            
            img_vis = np.concatenate((torch.cat(torch.split(imgs_source*255, [1,1,1], dim=1),-1).squeeze().permute(1,2,0).cpu().numpy(),img_vis),axis=1)
            
            if save_as_image:
                imageio.imwrite(f'{save_dir}/{scene}_{val_idx[i]:03d}.png', img_vis.astype('uint8'))
            else:
                rgbs.append(img_vis.astype('uint8'))
                
            # quantity
            psnr.append( mse2psnr(np.mean((rgb_rays-img)**2)))
            ssim.append( structural_similarity(rgb_rays, img, multichannel=True))
            
            img_tensor = torch.from_numpy(rgb_rays)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
            img_gt_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0
            LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

        print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
        psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)

    if not save_as_image:
        imageio.mimwrite(f'{save_dir}/{scene}_depth_spiral.mp4', np.stack(depths_vis), fps=10, quality=10)
        imageio.mimwrite(f'{save_dir}/{scene}_spiral.mp4', np.stack(rgbs), fps=20, quality=10)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

Found ckpts ['runs_new/mvsnerf-modify/ckpts/latest.tar']
Reloading from runs_new/mvsnerf-modify/ckpts/latest.tar
===> valing index: [80 86 22 20]
====> ref idx: [12 32 44]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:53<00:00, 13.37s/it]


=====> scene: ship mean psnr 18.630262643026924 ssim: 0.7773339152336121 lpips: 0.36529964953660965
===> valing index: [20 49 55 72]
====> ref idx: [61 80 64]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:58<00:00, 14.71s/it]


=====> scene: mic mean psnr 19.361852138596063 ssim: 0.9227213859558105 lpips: 0.1804120633751154
===> valing index: [ 8 24 32 78]
====> ref idx: [62 56 26]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:38<00:00,  9.55s/it]


=====> scene: chair mean psnr 20.521792024706603 ssim: 0.8802194595336914 lpips: 0.2157134786248207
===> valing index: [63, 70, 18, 28]
====> ref idx: [6, 43, 33]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:38<00:00,  9.70s/it]


=====> scene: lego mean psnr 17.0593615244548 ssim: 0.8484376072883606 lpips: 0.30572066456079483
===> valing index: [79, 74, 91, 68]
====> ref idx: [43, 81, 14]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:38<00:00,  9.66s/it]


=====> scene: drums mean psnr 16.56296054723107 ssim: 0.8482185006141663 lpips: 0.21876754984259605
===> valing index: [38 23  0  5]
====> ref idx: [92 69 56]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:38<00:00,  9.58s/it]


=====> scene: ficus mean psnr 19.00421108254332 ssim: 0.8853785395622253 lpips: 0.25366575084626675
===> valing index: [36 63 46 96]
====> ref idx: [34 73 94]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:38<00:00,  9.58s/it]


=====> scene: materials mean psnr 14.78830890112095 ssim: 0.8034148812294006 lpips: 0.35606543719768524
===> valing index: [26 60 13 47]
====> ref idx: [48 61  0]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:42<00:00, 10.68s/it]

=====> scene: hotdog mean psnr 26.50119909542007 ssim: 0.9523382782936096 lpips: 0.15973489359021187
=====> all mean psnr 19.053743494637473 ssim: 0.864757776260376 lpips: 0.25692243594676256





# DTU no fine tuning

## rendering novel views with nearest 3 views

In [7]:
psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
depth_acc = {}
eval_metric = [0.1,0.05,0.01]
depth_acc[f'abs_err'],depth_acc[f'acc_l_{eval_metric[0]}'],depth_acc[f'acc_l_{eval_metric[1]}'],depth_acc[f'acc_l_{eval_metric[2]}'] = {},{},{},{}
for i_scene, scene in enumerate([1,8,21,103,114]):#,8,21,103,114
# for i_scene, scene in enumerate([1]):#,8,21,103,114
    psnr,ssim,LPIPS_vgg = [],[],[]
    cmd = f'--datadir /home/honglanqing/data/DTU/mvs_training/dtu/scan{scene}  \
     --dataset_name dtu_ft  \
     --net_type v0 --ckpt runs_new/mvsnerf-modify/ckpts/latest.tar ' 
#      --net_type v0 --ckpt runs_fine_tuning/scan1-ft-rgb-update/ckpts/latest.tar' 
    
#      --net_type v0 --ckpt runs_fine_tuning/scan1-ft-paper/ckpts/latest.tar '
    
#      --net_type v0 --ckpt ./ckpts//mvsnerf-v0.tar '

    args = config_parser(cmd.split())
    args.use_viewdirs = True

    args.N_samples = 128
    args.feat_dim =  8+12

    # create models
    if 0==i_scene:
        render_kwargs_train, render_kwargs_test, start, grad_vars = create_nerf_mvs(args, use_mvs=True, dir_embedder=False, pts_embedder=True)
        filter_keys(render_kwargs_train)

        MVSNet = render_kwargs_train['network_mvs']
        render_kwargs_train.pop('network_mvs')


    datadir = args.datadir
    datatype = 'train'
    pad = 16
    args.chunk = 5120


    print('============> rendering dataset <===================')
    dataset_train = dataset_dict[args.dataset_name](args, split='train')
    dataset_val = dataset_dict[args.dataset_name](args, split='val')
    val_idx = dataset_val.img_idx
    
    save_as_image = True
    save_dir = f'results/test3'
    os.makedirs(save_dir, exist_ok=True)
    MVSNet.train()
    MVSNet = MVSNet.cuda()
    


    with torch.no_grad():

        try:
            tqdm._instances.clear() 
        except Exception:     
            pass
        
        for i, batch in enumerate(tqdm(dataset_val)):
            torch.cuda.empty_cache()
            
            rays, img = decode_batch(batch)
            rays = rays.squeeze().to(device)  # (H*W, 3)
            img = img.squeeze().cpu().numpy()  # (H, W, 3)
            depth = batch['depth'].squeeze().numpy()  # (H, W)
        
            # find nearest image idx from training views
            positions = dataset_train.poses[:,:3,3]
            dis = np.sum(np.abs(positions - dataset_val.poses[[i],:3,3]), axis=-1)
            pair_idx = np.argsort(dis)[:3]
            pair_idx = [dataset_train.img_idx[item] for item in pair_idx]
            
            imgs_source, proj_mats, near_far_source, pose_source = dataset_train.read_source_views(pair_idx=pair_idx,device=device)
            volume_feature, _, _ = MVSNet(imgs_source, proj_mats, near_far_source, pad=pad)
            imgs_source = unpreprocess(imgs_source)
        
            N_rays_all = rays.shape[0]
            rgb_rays, depth_rays_preds = [],[]
            for chunk_idx in range(N_rays_all//args.chunk + int(N_rays_all%args.chunk>0)):

                xyz_coarse_sampled, rays_o, rays_d, z_vals = ray_marcher(rays[chunk_idx*args.chunk:(chunk_idx+1)*args.chunk],
                                                    N_samples=args.N_samples)

                # Converting world coordinate to ndc coordinate
                H, W = img.shape[:2]
                inv_scale = torch.tensor([W - 1, H - 1]).to(device)
                w2c_ref, intrinsic_ref = pose_source['w2cs'][0], pose_source['intrinsics'][0].clone()
                xyz_NDC = get_ndc_coordinate(w2c_ref, intrinsic_ref, xyz_coarse_sampled, inv_scale,
                                             near=near_far_source[0], far=near_far_source[1], pad=pad*args.imgScale_test)


                # rendering
                rgb, disp, acc, depth_pred, alpha, extras = rendering(args, pose_source, xyz_coarse_sampled,
                                                                       xyz_NDC, z_vals, rays_o, rays_d,
                                                                       volume_feature,imgs_source, **render_kwargs_train)
    
                
                rgb, depth_pred = torch.clamp(rgb.cpu(),0,1.0).numpy(), depth_pred.cpu().numpy()
                rgb_rays.append(rgb)
                depth_rays_preds.append(depth_pred)

            
            depth_rays_preds = np.concatenate(depth_rays_preds).reshape(H, W)

            depth_gt, _ =  read_depth(f'/home/honglanqing/data/DTU/mvs_training/dtu/Depths/scan{scene}/depth_map_{val_idx[i]:04d}.pfm')
        
            mask_gt = depth_gt>0
            abs_err = abs_error(depth_rays_preds, depth_gt/200, mask_gt)

            eval_metric = [0.01,0.05, 0.1]
            depth_acc[f'abs_err'][f'{scene}'] = np.mean(abs_err)
            depth_acc[f'acc_l_{eval_metric[0]}'][f'{scene}'] = acc_threshold(abs_err,eval_metric[0]).mean()
            depth_acc[f'acc_l_{eval_metric[1]}'][f'{scene}'] = acc_threshold(abs_err,eval_metric[1]).mean()
            depth_acc[f'acc_l_{eval_metric[2]}'][f'{scene}'] = acc_threshold(abs_err,eval_metric[2]).mean()

            
            depth_rays_preds, _ = visualize_depth_numpy(depth_rays_preds, near_far_source)
            
            rgb_rays = np.concatenate(rgb_rays).reshape(H, W, 3)
            img_vis = np.concatenate((img*255,rgb_rays*255,depth_rays_preds),axis=1)
            
            if save_as_image:
                imageio.imwrite(f'{save_dir}/scan{scene}_{val_idx[i]:03d}.png', img_vis.astype('uint8'))
            else:
                rgbs.append(img_vis.astype('uint8'))
                
            # quantity
            # mask background since they are outside the far boundle
            mask = depth==0
            imageio.imwrite(f'{save_dir}/scan{scene}_{val_idx[i]:03d}_mask.png', mask.astype('uint8')*255)
            rgb_rays[mask],img[mask] = 0.0,0.0
            psnr.append( mse2psnr(np.mean((rgb_rays[~mask]-img[~mask])**2)))
            ssim.append( structural_similarity(rgb_rays, img, multichannel=True))
            
            img_tensor = torch.from_numpy(rgb_rays)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
            img_gt_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0
            LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

        print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
        psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
        

    if not save_as_image:
        imageio.mimwrite(f'{save_dir}/{scene}_spiral.mp4', np.stack(rgbs), fps=20, quality=10)

a = np.mean(list(depth_acc['abs_err'].values()))
b = np.mean(list(depth_acc[f'acc_l_{eval_metric[0]}'].values()))
c = np.mean(list(depth_acc[f'acc_l_{eval_metric[1]}'].values()))
d = np.mean(list(depth_acc[f'acc_l_{eval_metric[2]}'].values()))
print(f'============> abs_err: {a} <=================')
print(f'============> acc_l_{eval_metric[0]}: {b} <=================')
print(f'============> acc_l_{eval_metric[1]}: {c} <=================')
print(f'============> acc_l_{eval_metric[2]}: {d} <=================')
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

Found ckpts ['runs_new/mvsnerf-modify/ckpts/latest.tar']
Reloading from runs_new/mvsnerf-modify/ckpts/latest.tar
==> image down scale: 1.0
===> training index: [25, 21, 33, 22, 14, 15, 26, 30, 31, 35, 34, 43, 46, 29, 16, 36]


  0%|                                                                                                                                               | 0/4 [00:00<?, ?it/s]

==> image down scale: 1.0
===> valing index: [32, 24, 23, 44]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:22<00:00,  5.58s/it]


=====> scene: 1 mean psnr 27.22732432193617 ssim: 0.9372664093971252 lpips: 0.15456868708133698
==> image down scale: 1.0
===> training index: [25, 21, 33, 22, 14, 15, 26, 30, 31, 35, 34, 43, 46, 29, 16, 36]


  0%|                                                                                                                                               | 0/4 [00:00<?, ?it/s]

==> image down scale: 1.0
===> valing index: [32, 24, 23, 44]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:20<00:00,  5.18s/it]


=====> scene: 8 mean psnr 27.360482260702646 ssim: 0.9209671020507812 lpips: 0.21667785197496414
==> image down scale: 1.0
===> training index: [25, 21, 33, 22, 14, 15, 26, 30, 31, 35, 34, 43, 46, 29, 16, 36]


  0%|                                                                                                                                               | 0/4 [00:00<?, ?it/s]

==> image down scale: 1.0
===> valing index: [32, 24, 23, 44]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:21<00:00,  5.32s/it]


=====> scene: 21 mean psnr 21.633413162347757 ssim: 0.8904734253883362 lpips: 0.16485727205872536
==> image down scale: 1.0
===> training index: [25, 21, 33, 22, 14, 15, 26, 30, 31, 35, 34, 43, 46, 29, 16, 36]


  0%|                                                                                                                                               | 0/4 [00:00<?, ?it/s]

==> image down scale: 1.0
===> valing index: [32, 24, 23, 44]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:21<00:00,  5.39s/it]


=====> scene: 103 mean psnr 29.089540691947978 ssim: 0.9606113433837891 lpips: 0.16754594817757607
==> image down scale: 1.0
===> training index: [25, 21, 33, 22, 14, 15, 26, 30, 31, 35, 34, 43, 46, 29, 16, 36]


  0%|                                                                                                                                               | 0/4 [00:00<?, ?it/s]

==> image down scale: 1.0
===> valing index: [32, 24, 23, 44]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:20<00:00,  5.17s/it]

=====> scene: 114 mean psnr 28.091805907377125 ssim: 0.949100911617279 lpips: 0.13727304339408875
=====> all mean psnr 26.680513268862335 ssim: 0.9316838383674622 lpips: 0.16818456053733827





## rendering novel views with fixed 3 source views

In [5]:
psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate([1,8,21,103,114]):#,
# for i_scene, scene in enumerate([1]):#,
    psnr,ssim,LPIPS_vgg = [],[],[]
    cmd = f'--datadir /home/honglanqing/data/DTU/mvs_training/dtu/scan{scene}  \
    --dataset_name dtu_ft  \
    --ckpt runs_new/mvsnerf-modify/ckpts/latest.tar '
    
#     --ckpt runs_new/mvsnerf-modify/ckpts/latest.tar'
    
    
#     --ckpt runs_fine_tuning/scan1-ft-paper/ckpts/latest.tar '
    
#     --ckpt ./ckpts//mvsnerf-v0.tar'

    args = config_parser(cmd.split())
    args.use_viewdirs = True

    args.N_samples = 128
    args.feat_dim =  8+3*4

    # create models
    if 0==i_scene:
        render_kwargs_train, render_kwargs_test, start, grad_vars = create_nerf_mvs(args, use_mvs=True, dir_embedder=False, pts_embedder=True)
        filter_keys(render_kwargs_train)

        MVSNet = render_kwargs_train['network_mvs']
        render_kwargs_train.pop('network_mvs')


    datadir = args.datadir
    datatype = 'val'
    pad = 24
    args.chunk = 1024 #5120


    print('============> rendering dataset <===================')
    dataset = dataset_dict[args.dataset_name](args, split=datatype)
    val_idx = dataset.img_idx
    
    save_as_image = True
    save_dir = f'results/test3'
    os.makedirs(save_dir, exist_ok=True)
    MVSNet.train()
    MVSNet = MVSNet.cuda()
    
    with torch.no_grad():

        imgs_source, proj_mats, near_far_source, pose_source = dataset.read_source_views(device=device)
        volume_feature, _, _ = MVSNet(imgs_source, proj_mats, near_far_source, pad=pad)
#         ckpts = torch.load(args.ckpt)
#         volume_feature = ckpts['volume']['feat_volume']
        
        imgs_source = unpreprocess(imgs_source)

        try:
            tqdm._instances.clear() 
        except Exception:     
            pass
        
        for i, batch in enumerate(tqdm(dataset)):
            torch.cuda.empty_cache()
            
            rays, img = decode_batch(batch)
            rays = rays.squeeze().to(device)  # (H*W, 3)
            img = img.squeeze().cpu().numpy()  # (H, W, 3)
            depth = batch['depth'].squeeze().numpy()  # (H, W)
        
            N_rays_all = rays.shape[0]
            rgb_rays, depth_rays_preds = [],[]
            for chunk_idx in range(N_rays_all//args.chunk + int(N_rays_all%args.chunk>0)):

                xyz_coarse_sampled, rays_o, rays_d, z_vals = ray_marcher(rays[chunk_idx*args.chunk:(chunk_idx+1)*args.chunk],
                                                    N_samples=args.N_samples)

                # Converting world coordinate to ndc coordinate
                H, W = img.shape[:2]
                inv_scale = torch.tensor([W - 1, H - 1]).to(device)
                w2c_ref, intrinsic_ref = pose_source['w2cs'][0], pose_source['intrinsics'][0].clone()
                xyz_NDC = get_ndc_coordinate(w2c_ref, intrinsic_ref, xyz_coarse_sampled, inv_scale,
                                             near=near_far_source[0], far=near_far_source[1], pad=pad*args.imgScale_test)


                # rendering
                rgb, disp, acc, depth_pred, alpha, extras = rendering(args, pose_source, xyz_coarse_sampled,
                                                                       xyz_NDC, z_vals, rays_o, rays_d,
                                                                       volume_feature,imgs_source, **render_kwargs_train)
    
                
                rgb, depth_pred = torch.clamp(rgb.cpu(),0,1.0).numpy(), depth_pred.cpu().numpy()
                rgb_rays.append(rgb)
                depth_rays_preds.append(depth_pred)

            
            depth_rays_preds = np.concatenate(depth_rays_preds).reshape(H, W)
            depth_rays_preds, _ = visualize_depth_numpy(depth_rays_preds, near_far_source)
            
            rgb_rays = np.concatenate(rgb_rays).reshape(H, W, 3)
            img_vis = np.concatenate((img*255,rgb_rays*255,depth_rays_preds),axis=1)
            
            if save_as_image:
                imageio.imwrite(f'{save_dir}/scan{scene}_{val_idx[i]:03d}.png', img_vis.astype('uint8'))
                os.makedirs('runs_new/mvsnerf-modify/results', exist_ok=True)
                imageio.imwrite(f'runs_new/mvsnerf-modify/results/scan{scene}_{val_idx[i]:03d}.png', (rgb_rays*255).astype('uint8'))
            else:
                rgbs.append(img_vis.astype('uint8'))

            # quantity
            # mask background since they are outside the far boundle
            mask = depth==0
            imageio.imwrite(f'{save_dir}/scan{scene}_{val_idx[i]:03d}_mask.png', mask.astype('uint8')*255)
            rgb_rays[mask],img[mask] = 0.0,0.0
            psnr.append( mse2psnr(np.mean((rgb_rays[~mask]-img[~mask])**2)))
            ssim.append( structural_similarity(rgb_rays, img, multichannel=True))
            
            img_tensor = torch.from_numpy(rgb_rays)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
            img_gt_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0
            LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

        print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
        psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)

    if not save_as_image:
        imageio.mimwrite(f'{save_dir}/{scene}_spiral.mp4', np.stack(rgbs), fps=20, quality=10)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

Found ckpts ['runs_new/mvsnerf-modify/ckpts/latest.tar']
Reloading from runs_new/mvsnerf-modify/ckpts/latest.tar
==> image down scale: 1.0
===> valing index: [32, 24, 23, 44]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:33<00:00,  8.40s/it]


=====> scene: 1 mean psnr 22.881604527099558 ssim: 0.8879632949829102 lpips: 0.23583434522151947
==> image down scale: 1.0
===> valing index: [32, 24, 23, 44]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:33<00:00,  8.45s/it]


=====> scene: 8 mean psnr 21.561876771171217 ssim: 0.8705674409866333 lpips: 0.2916260063648224
==> image down scale: 1.0
===> valing index: [32, 24, 23, 44]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:30<00:00,  7.56s/it]


=====> scene: 21 mean psnr 17.774449750657556 ssim: 0.8091757893562317 lpips: 0.2539396323263645
==> image down scale: 1.0
===> valing index: [32, 24, 23, 44]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:33<00:00,  8.45s/it]


=====> scene: 103 mean psnr 22.71532497414603 ssim: 0.9276988506317139 lpips: 0.2320169433951378
==> image down scale: 1.0
===> valing index: [32, 24, 23, 44]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:34<00:00,  8.62s/it]

=====> scene: 114 mean psnr 22.506578454721925 ssim: 0.9139024019241333 lpips: 0.18781418725848198
=====> all mean psnr 21.487966895559257 ssim: 0.8818615674972534 lpips: 0.24024622291326522





# Pairs generation

In [3]:
import json,torch
import sys,os
import numpy as np
root = '/home/hengfei/Desktop/research/mvsnerf'
os.chdir(root)
sys.path.append(root)
pairs = torch.load('./configs/pairs.th')

# llff
root_dir = '/home/hengfei/Desktop/research/mvsnerf/xgaze/'
for scene in ['xgaze_11images_cropped_colmapCODE']:#
    poses_bounds = np.load(os.path.join(root_dir, scene, 'poses_bounds.npy'))  # (N_images, 11)
    poses = poses_bounds[:, :15].reshape(-1, 3, 5)  # (N_images, 3, 5)
    poses = np.concatenate([poses[..., 1:2], - poses[..., :1], poses[..., 2:4]], -1)

    ref_position = np.mean(poses[..., 3],axis=0, keepdims=True)
    dist = np.sum(np.abs(poses[..., 3] - ref_position), axis=-1)
    pair_idx = np.argsort(dist)[:11]
#     pair_idx = torch.randperm(len(poses))[:20].tolist()

    pairs[f'{scene}_test'] = pair_idx[::6]
    pairs[f'{scene}_val'] = pair_idx[::6]
    pairs[f'{scene}_train'] = np.delete(pair_idx, range(0,11,6))

torch.save(pairs,'/home/hengfei/Desktop/research/mvsnerf/configs/pairs.th')

# quantity evauation

In [3]:
import sys,os,imageio,lpips,cv2,torch,glob
import numpy as np
import matplotlib.pyplot as plt
from skimage.metrics import structural_similarity

In [4]:
loss_fn_vgg = lpips.LPIPS(net='vgg') 
mse2psnr = lambda x : -10. * np.log(x) / np.log(10.)



def acc_threshold(abs_err, threshold):
    """
    computes the percentage of pixels whose depth error is less than @threshold
    """
    acc_mask = abs_err < threshold
    return  acc_mask.astype('float') if type(abs_err) is np.ndarray else acc_mask.float()

Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off]
Loading model from: /home/honglanqing/anaconda3/envs/pytorch1.8_skhu/lib/python3.7/site-packages/lpips/weights/v0.1/vgg.pth


# nerf 

In [5]:
root = '/mnt/new_disk_2/anpei/code/nerf/logs/'
root_gt = '/mnt/new_disk2/anpei/code/MVS-NeRF/runs_fine_tuning/'
pairs = torch.load('/mnt/new_disk_2/anpei/code/MVS-NeRF/configs/pairs.th')
psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate(['chair','drums','ficus','hotdog','lego','materials','mic','ship']):#
    psnr,ssim,LPIPS_vgg = [],[],[]
    
    files = sorted(glob.glob(f'{root}/{scene}_test/testset_200000/*.png'))
    for j, file in enumerate(files):

        idx = pairs[f'{scene}_val'][j]
        img = cv2.imread(file)[...,::-1]
        gt = cv2.imread(f'{root_gt}/{scene}/{scene}/{scene}_00009999_{j:02d}.png')[...,::-1]
        gt, img = gt[:,:800].astype('float')/255.0, img.astype('float')/255.0

#         H_crop, W_crop = np.array(gt.shape[:2])//10
#         img = img[H_crop:-H_crop,W_crop:-W_crop]
#         gt = gt[H_crop:-H_crop,W_crop:-W_crop]

        psnr.append( mse2psnr(np.mean((gt-img)**2)))
        ssim.append( structural_similarity(gt, img, multichannel=True))

        img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
        img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
        LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

    print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
    psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate(['fern','flower','fortress','horns','leaves', 'orchids', 'room',  'trex']):#,
    psnr,ssim,LPIPS_vgg = [],[],[]
    
    files = sorted(glob.glob(f'{root}/{scene}_test/testset_200000/*.png'))
    for j, file in enumerate(files):

        idx = pairs[f'{scene}_val'][j]
        img = cv2.resize(cv2.imread(file)[...,::-1],(960,640))
        gt =  cv2.imread(f'/mnt/new_disk2/anpei/code/MVS-NeRF/results/test3/{scene}_{idx:03d}.png')[...,::-1]
        gt, img = gt[:,:960].astype('float')/255.0, img.astype('float')/255.0


        psnr.append( mse2psnr(np.mean((gt-img)**2)))
        ssim.append( structural_similarity(gt, img, multichannel=True))

        img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
        img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
        LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

    print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
    psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate([1,8,21,103,114]):#,
    psnr,ssim,LPIPS_vgg = [],[],[]
    
    files = sorted(glob.glob(f'{root}/scan{scene}_test/testset_200000/*.png'))
    for j, file in enumerate(files):

        idx = pairs[f'dtu_val'][j]
        img = cv2.imread(file)[...,::-1]
        gt = cv2.imread(f'/mnt/new_disk2/anpei/code/MVS-NeRF/results/test3/scan{scene}_{idx:03d}.png')[...,::-1]
        mask = cv2.imread(f'/mnt/new_disk2/anpei/code/MVS-NeRF/results/test3/scan{scene}_{idx:03d}_mask.png')==255
        gt, img = gt[:,:640].astype('float')/255.0, img.astype('float')/255.0

        gt[mask],img[mask] = 0.0,0.0
        psnr.append( mse2psnr(np.mean((gt[~mask]-img[~mask])**2)))
        ssim.append( structural_similarity(gt, img, multichannel=True))

        img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
        img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
        LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

    print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
    psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

FileNotFoundError: [Errno 2] No such file or directory: '/mnt/new_disk_2/anpei/code/MVS-NeRF/configs/pairs.th'

# ours

In [6]:
root = '/mnt/new_disk2/anpei/code/MVS-NeRF/runs_fine_tuning/'
pairs = torch.load('/mnt/new_disk_2/anpei/code/MVS-NeRF/configs/pairs.th')
psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate(['chair','drums','ficus','hotdog','lego','materials','mic','ship']):
    psnr,ssim,LPIPS_vgg = [],[],[]
    
    files = sorted(glob.glob(f'{root}/{scene}/{scene}/{scene}_00009999_*'))
    for j, file in enumerate(files):

        idx = pairs[f'{scene}_val'][j]
        img = cv2.imread(file).astype('float')[...,::-1]
        gt, img = img[:,:800]/255.0, img[:,800:1600]/255.0

#         H_crop, W_crop = np.array(gt.shape[:2])//10
#         img = img[H_crop:-H_crop,W_crop:-W_crop]
#         gt = gt[H_crop:-H_crop,W_crop:-W_crop]

        psnr.append( mse2psnr(np.mean((gt-img)**2)))
        ssim.append( structural_similarity(gt, img, multichannel=True))

        img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
        img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
        LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

    print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
    psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate(['fern','flower','fortress','horns','leaves', 'orchids', 'room',  'trex']):#,
    psnr,ssim,LPIPS_vgg = [],[],[]
    
    files = sorted(glob.glob(f'/mnt/new_disk2/anpei/code/MVS-NeRF/runs_fine_tuning/{scene}/{scene}/{scene}_00009999_*'))
    for j, file in enumerate(files):

        idx = pairs[f'{scene}_val'][j]
        img = cv2.imread(file)[...,::-1]
        gt, img = img[:,:960].astype('float')/255.0, img[:,960:960*2].astype('float')/255.0

        H_crop, W_crop = np.array(gt.shape[:2])//10
        img = img[H_crop:-H_crop,W_crop:-W_crop]
        gt = gt[H_crop:-H_crop,W_crop:-W_crop]


        psnr.append( mse2psnr(np.mean((gt-img)**2)))
        ssim.append( structural_similarity(gt, img, multichannel=True))

        img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
        img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
        LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

    print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
    psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

root = '/mnt/new_disk2/anpei/code/MVS-NeRF/runs_fine_tuning/'
psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate([1,8,21,103,114]):#,
    psnr,ssim,LPIPS_vgg = [],[],[]
    
    files = sorted(glob.glob(f'{root}/dtu_scan{scene}_1h/dtu_scan{scene}_1h/00010239_*'))
    for j, file in enumerate(files):

        idx = pairs[f'dtu_val'][j]
        img = cv2.imread(file)[...,::-1][:,640:1280]
        gt = cv2.imread(f'/mnt/new_disk2/anpei/code/MVS-NeRF/results/test3/scan{scene}_{idx:03d}.png')[...,::-1]
        mask = cv2.imread(f'/mnt/new_disk2/anpei/code/MVS-NeRF/results/test3/scan{scene}_{idx:03d}_mask.png')==255
        gt, img = gt[:,:640].astype('float')/255.0, img.astype('float')/255.0

        gt[mask],img[mask] = 0.0,0.0
        psnr.append( mse2psnr(np.mean((gt[~mask]-img[~mask])**2)))
        ssim.append( structural_similarity(gt, img, multichannel=True))

        img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
        img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
        LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

    print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
    psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

FileNotFoundError: [Errno 2] No such file or directory: '/mnt/new_disk_2/anpei/code/MVS-NeRF/configs/pairs.th'

# ibrnet

In [6]:
# root = '/mnt/new_disk2/anpei/code/IBRNet/logs'
pairs = torch.load('/mnt/new_disk_2/anpei/code/MVS-NeRF/configs/pairs.th')
# psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
# for i_scene, scene in enumerate(['chair','drums','ficus','hotdog','lego','materials','mic','ship']):#,
#     psnr,ssim,LPIPS_vgg = [],[],[]
    
#     files = sorted(glob.glob(f'{root}/nerf-3view-finetuning-nearest-{scene}/010000_*'))
#     for j, file in enumerate(files):

#         idx = pairs[f'{scene}_val'][j]
#         img = cv2.imread(file).astype('float')[...,::-1]
#         gt, img = img[:,800:800*2]/255.0, img[:,800*3:800*4]/255.0

# #         H_crop, W_crop = np.array(gt.shape[:2])//10
# #         img = img[H_crop:-H_crop,W_crop:-W_crop]
# #         gt = gt[H_crop:-H_crop,W_crop:-W_crop]

#         psnr.append( mse2psnr(np.mean((gt-img)**2)))
#         ssim.append( structural_similarity(gt, img, multichannel=True))

#         img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
#         img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
#         LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

#     print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
#     psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
# print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate(['fern','flower','fortress','horns','leaves', 'orchids', 'room',  'trex']):#
    psnr,ssim,LPIPS_vgg = [],[],[]
    
    files = sorted(glob.glob(f'/mnt/new_disk2/anpei/code/IBRNet/logs/llff-3view-finetuning-nearest-{scene}/010000_*'))
    for j, file in enumerate(files):

        idx = pairs[f'{scene}_val'][j]
        img = cv2.imread(file)[...,::-1]
        gt, img = img[:,1008:1008*2].astype('float')/255.0, img[:,1008*3:1008*4].astype('float')/255.0
        img, gt = cv2.resize(img,(960,640)), cv2.resize(gt,(960,640))

        H_crop, W_crop = np.array(gt.shape[:2])//10
        img = img[H_crop:-H_crop,W_crop:-W_crop]
        gt = gt[H_crop:-H_crop,W_crop:-W_crop]

        
        psnr.append( mse2psnr(np.mean((gt-img)**2)))
        ssim.append( structural_similarity(gt, img, multichannel=True))

        img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
        img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
        LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

    print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
    psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

root = '/mnt/new_disk2/anpei/code/IBRNet/logs'
psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate([1,8,21,103,114]):#
    psnr,ssim,LPIPS_vgg = [],[],[]
    
    files = sorted(glob.glob(f'{root}/dtu-3view-finetuning-nearest-scan{scene}/010000_*'))
    for j, file in enumerate(files):

        idx = pairs[f'dtu_val'][j]
        img = cv2.imread(file)[...,::-1][:,3*640:4*640]
        gt = cv2.imread(f'/mnt/new_disk2/anpei/code/MVS-NeRF/results/test3/scan{scene}_{idx:03d}.png')[...,::-1]
        mask = cv2.imread(f'/mnt/new_disk2/anpei/code/MVS-NeRF/results/test3/scan{scene}_{idx:03d}_mask.png')==255
        gt, img = gt[:,:640].astype('float')/255.0, img.astype('float')/255.0

        gt[mask],img[mask] = 0.0,0.0
        psnr.append( mse2psnr(np.mean((gt[~mask]-img[~mask])**2)))
        ssim.append( structural_similarity(gt, img, multichannel=True))

        img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
        img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
        LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

    print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
    psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

=====> scene: fern mean psnr 22.64474646040451 ssim: 0.7736232480476191 lpips: 0.26588304713368416
=====> scene: flower mean psnr 26.553349019087786 ssim: 0.9092690161984827 lpips: 0.14575103670358658
=====> scene: fortress mean psnr 30.338842953903075 ssim: 0.9368867837660259 lpips: 0.13289865292608738
=====> scene: horns mean psnr 25.01290939681414 ssim: 0.9040335882553917 lpips: 0.1899307444691658
=====> scene: leaves mean psnr 22.076508076698556 ssim: 0.8430354849586478 lpips: 0.17987846583127975
=====> scene: orchids mean psnr 19.007830032899616 ssim: 0.7045611776629173 lpips: 0.2861044891178608
=====> scene: room mean psnr 31.05473820815669 ssim: 0.9723299877991765 lpips: 0.08911459799855947
=====> scene: trex mean psnr 22.339864946223464 ssim: 0.8421255627008343 lpips: 0.22207806631922722
=====> all mean psnr 24.878598636773482 ssim: 0.8607331061736369 lpips: 0.1889548875624314
=====> scene: 1 mean psnr 30.99564992655386 ssim: 0.9548394719193786 lpips: 0.1285402663052082
=====> 

# pixel nerf

In [6]:
root = '/mnt/new_disk2/anpei/code/pixel-nerf/visuals/dtu'
root_gt = '/mnt/new_disk2/anpei/code/MVS-NeRF/runs_fine_tuning/'
pairs = torch.load('/mnt/new_disk_2/anpei/code/MVS-NeRF/configs/pairs.th')
psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate(['chair','drums','ficus','hotdog','lego','materials','mic','ship']):#,
    psnr,ssim,LPIPS_vgg = [],[],[]
    
    pairs_idx = pairs[f'{scene}_val']
    for j, file in enumerate(pairs_idx):

        idx = pairs_idx[j]
        img = cv2.imread(f'{root}/{scene}_{idx:03d}.png')[...,::-1]
        gt = cv2.imread(f'{root_gt}/{scene}/{scene}/{scene}_00009999_{j:02d}.png')[...,::-1]
        gt, img = gt[:,:800].astype('float')/255.0, img.astype('float')/255.0

#         H_crop, W_crop = np.array(gt.shape[:2])//10
#         img = img[H_crop:-H_crop,W_crop:-W_crop]
#         gt = gt[H_crop:-H_crop,W_crop:-W_crop]

        psnr.append( mse2psnr(np.mean((gt-img)**2)))
        ssim.append( structural_similarity(gt, img, multichannel=True))

        img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
        img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
        LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

    print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
    psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]
for i_scene, scene in enumerate(['fern','flower','fortress','horns','leaves', 'orchids', 'room',  'trex']):#,
    psnr,ssim,LPIPS_vgg = [],[],[]
    
    pairs_idx = pairs[f'{scene}_val']
    for j, file in enumerate(pairs_idx):

        idx = pairs_idx[j]
        img = cv2.resize(cv2.imread(f'{root}/{scene}_{idx:03d}.png')[...,::-1],(960,640))
        gt =  cv2.imread(f'/mnt/new_disk2/anpei/code/MVS-NeRF/results/test3/{scene}_{idx:03d}.png')[...,::-1]
        gt, img = gt[:,:960].astype('float')/255.0, img.astype('float')/255.0


        psnr.append( mse2psnr(np.mean((gt-img)**2)))
        ssim.append( structural_similarity(gt, img, multichannel=True))

        img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
        img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
        LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())

    print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
    psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

psnr_all,ssim_all,LPIPS_vgg_all,depth_acc = [],[],[],[]
for i_scene, scene in enumerate([1,8,21,103,114]):#,
    psnr,ssim,LPIPS_vgg = [],[],[]
    
    pairs_idx = pairs[f'dtu_val']
    for j, file in enumerate(pairs_idx):

        idx = pairs_idx[j]
        img = cv2.imread(f'{root}/scan{scene}_{idx:03d}.png')[...,::-1]
        
        gt = cv2.imread(f'/mnt/new_disk2/anpei/code/MVS-NeRF/results/test3/scan{scene}_{idx:03d}.png')[...,::-1]
        mask = cv2.imread(f'/mnt/new_disk2/anpei/code/MVS-NeRF/results/test3/scan{scene}_{idx:03d}_mask.png')==255
        gt, img = gt[:,:640].astype('float')/255.0, img.astype('float')/255.0

        gt[mask],img[mask] = 0.0,0.0
        psnr.append( mse2psnr(np.mean((gt[~mask]-img[~mask])**2)))
        ssim.append( structural_similarity(gt, img, multichannel=True))

        img_tensor = torch.from_numpy(img)[None].permute(0,3,1,2).float()*2-1.0 # image should be RGB, IMPORTANT: normalized to [-1,1]
        img_gt_tensor = torch.from_numpy(gt)[None].permute(0,3,1,2).float()*2-1.0
        LPIPS_vgg.append( loss_fn_vgg(img_tensor, img_gt_tensor).item())
        
        # depth
#         depth_pred = torch.load(f'{root}/scan{scene}_{idx:03d}_depth.th')
#         depth_gt,_ =  read_depth(f'/mnt/data/new_disk/sungx/data/mvs_dataset/DTU/mvs_training/dtu/Depths/scan{scene}/depth_map_{idx:04d}.pfm')
        
#         mask_gt = depth_gt>0
#         abs_err = abs_error(depth_pred*1.5, depth_gt/200, mask_gt).numpy()

#         eval_metric = [0.01,0.05, 0.1]
#         depth_acc[f'abs_err'][f'{scene}'] = np.mean(abs_err)
#         depth_acc[f'acc_l_{eval_metric[0]}'][f'{scene}'] = acc_threshold(abs_err,eval_metric[0]).mean()
#         depth_acc[f'acc_l_{eval_metric[1]}'][f'{scene}'] = acc_threshold(abs_err,eval_metric[1]).mean()
#         depth_acc[f'acc_l_{eval_metric[2]}'][f'{scene}'] = acc_threshold(abs_err,eval_metric[2]).mean()

    print(f'=====> scene: {scene} mean psnr {np.mean(psnr)} ssim: {np.mean(ssim)} lpips: {np.mean(LPIPS_vgg)}')   
    psnr_all.append(psnr);ssim_all.append(ssim);LPIPS_vgg_all.append(LPIPS_vgg)
print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') 

=====> scene: chair mean psnr 7.175962813343725 ssim: 0.6243642351905847 lpips: 0.38591109961271286
=====> scene: drums mean psnr 8.148548711878252 ssim: 0.6701584468514097 lpips: 0.42121122032403946
=====> scene: ficus mean psnr 6.608732738834844 ssim: 0.668716265099144 lpips: 0.3350602239370346
=====> scene: hotdog mean psnr 6.799387670799135 ssim: 0.6689815218041557 lpips: 0.43327029794454575
=====> scene: lego mean psnr 7.740217521658803 ssim: 0.6710903029993184 lpips: 0.42670799791812897
=====> scene: materials mean psnr 7.609290420358684 ssim: 0.6441046576733512 lpips: 0.43245941400527954
=====> scene: mic mean psnr 7.707203698223274 ssim: 0.7294597852809476 lpips: 0.32929887622594833
=====> scene: ship mean psnr 7.295484760785579 ssim: 0.5836685948507447 lpips: 0.5257005095481873
=====> all mean psnr 7.385603541985287 ssim: 0.657567976218707 lpips: 0.4112024549394846
=====> scene: fern mean psnr 12.397648684821284 ssim: 0.5312397318110376 lpips: 0.6500117480754852
=====> scene: 