In [None]:
from google.colab import drive
drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [None]:
import os
os.chdir("/content/gdrive/MyDrive/Colab Notebooks/NeRF/")

In [None]:
!pip install timm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting timm
  Downloading timm-0.6.12-py3-none-any.whl (549 kB)
[K     |████████████████████████████████| 549 kB 8.2 MB/s 
[?25hCollecting huggingface-hub
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 93.7 MB/s 
Installing collected packages: huggingface-hub, timm
Successfully installed huggingface-hub-0.11.1 timm-0.6.12


In [None]:
!pip install einops

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting einops
  Downloading einops-0.6.0-py3-none-any.whl (41 kB)
[K     |████████████████████████████████| 41 kB 554 kB/s 
[?25hInstalling collected packages: einops
Successfully installed einops-0.6.0


In [None]:
!pip install tensorboardX

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorboardX
  Downloading tensorboardX-2.5.1-py2.py3-none-any.whl (125 kB)
[K     |████████████████████████████████| 125 kB 7.3 MB/s 
Installing collected packages: tensorboardX
Successfully installed tensorboardX-2.5.1


Parameters

In [None]:
class Parameters_gen_video:
    def __init__(self):
        self.config=''#config file path
        self.expname=''#experiment name
        self.ckptdir=''#checkpoint folder
        self.ckpt_path=''#weights npy for coarse net
        self.outdir=''#output vid directory
        self.local_rank=0
        self.data_path=''#dataset to train
        self.img_hw=[1024,768]#img sz
        self.focal=131.25
        self.radius=1.3
        self.data_index=[]
        self.z_near=0.8
        self.z_far=1.8
        self.fps=12
        self.no_reload=False
        self.distribted=False
        self.num_frames=40
        self.elevation=0.0
        self.chunk_size=128
        self.im_feat_dim=128
        self.mlp_feat_dim=512
        self.freq_num=10
        self.mlp_block_num=2
        self.coarse_only=False
        self.anti_alias_pooling=1
        self.num_source_views=1
        self.freeze_pos_embed=False
        self.no_skip_conv=False
        self.lrate_feature=1e-3
        self.lrate_mlp=5e-4
        self.lrate_decay_factor=0.5
        self.lrate_decay_steps=50000
        self.warmup_steps=10000
        self.scheduler='steplr'
        self.use_warmup=False
        self.bbox_step=100000
        self.N_samples=64
        self.N_importance=128
        self.inv_uniform=False
        self.det=False
        self.white_bkgd=False

In [None]:
class Parameters_eval:
    def __init__(self):
        self.config=''
        self.expname=''
        self.ckptdir=''
        self.ckpt_path=''
        self.outdir=''
        self.local_rank=0
        self.include_src=False
        self.data_path=''
        self.data_type='srn'
        self.img_hw=[1024,768]
        self.data_range=[0,50]
        self.data_indices=[0]
        self.use_data_index=False
        self.pose_index=64
        self.no_reload=False
        self.distributed=False
        self.skip=1
        self.chunk_size=128
        self.im_feat_dim=128
        self.mlp_feat_dim=512
        self.freq_num=10
        self.mlp_block_num=2
        self.coarse_only=False
        self.anti_alias_pooling=1
        self.num_source_views=1
        self.freeze_pos_embed=False
        self.no_skip_conv=False
        self.lrate_feature=1e-3
        self.lrate_mlp=5e-4
        self.lrate_decay_factor=0.5
        self.lrate_decay_steps=50000
        self.warmup_steps=10000
        self.scheduler='steplr'
        self.use_warmup=False
        self.bbox_step=100000
        self.N_samples=64
        self.N_importance=64
        self.inv_uniform=False
        self.det=False
        self.white_bkgd=False

Model

Model/Criterion.py

In [None]:
import torch
import torch.nn as nn

TINY_NUMBER = 1e-6      # float32 only has 7 decimal digits precision

def img2mse(x, y, mask=None):
    '''
    :param x: img 1, [(...), 3]
    :param y: img 2, [(...), 3]
    :param mask: optional, [(...)]
    :return: mse score
    '''
    if mask is None:
        return torch.mean((x - y) * (x - y))
    else:
        return torch.sum((x - y) * (x - y) * mask.unsqueeze(-1)) / (torch.sum(mask) * x.shape[-1] + TINY_NUMBER)

class Criterion(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, outputs, ray_batch, scalars_to_log):
        '''
        training criterion
        '''
        pred_rgb = outputs['rgb']
        pred_mask = outputs['mask'].float()
        gt_rgb = ray_batch['rgb']

        loss = img2mse(pred_rgb, gt_rgb, pred_mask)

        return loss, scalars_to_log

Network/Resnet_mlp.py

In [None]:
import numpy as np
import torch
import torch.nn as nn

class GaussianActivation(nn.Module):
    def __init__(self, a=1.0):
        super(GaussianActivation, self).__init__()
        self.a = a

    def forward(self, x):
        return torch.exp(-0.5*x**2 / self.a**2)

class ResnetBlock(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size, use_gaussian=False):
        super().__init__()

        if use_gaussian:
            self.prelu_0 = GaussianActivation()
            self.prelu_1 = GaussianActivation()
        else:
            self.prelu_0 = torch.nn.ReLU(inplace=True)
            self.prelu_1 = torch.nn.ReLU(inplace=True)

        self.fc_0 = torch.nn.Linear(input_size, hidden_size)
        self.fc_1 = torch.nn.Linear(hidden_size, output_size)

        self.shortcut = (
            torch.nn.Linear(input_size, output_size, bias=False)
            if input_size != output_size else None)


    def forward(self, x):
        residual = self.fc_1(self.prelu_1(self.fc_0(self.prelu_0(x))))
        shortcut = x if self.shortcut is None else self.shortcut(x)
        return residual + shortcut


class PosEncodeResnet(torch.nn.Module):
    def __init__(self, args, pos_size, x_size,
                hidden_size, output_size, block_num, freq_factor=np.pi, use_gaussian=False):
        """
        Args:
            pos_size: size of positional encodings
            x_size: size of input vector
            hidden_size: hidden channels
            output_size: output channels
            freq_num: how many frequency bases
            block_num: how many resnet blocks
        """
        super().__init__()

        self.args = args
        self.freq_factor = freq_factor

        input_size = (
            pos_size * (2 * self.args.freq_num + 1)
            + x_size
        )

        self.input_layer = torch.nn.Linear(input_size, hidden_size)
        self.blocks = torch.nn.ModuleList(
            [ResnetBlock(hidden_size, hidden_size, hidden_size, use_gaussian=use_gaussian)
             for i in range(block_num)]
        )
        if use_gaussian:
            self.output_prelu = GaussianActivation()
        else:
            self.output_prelu = torch.nn.ReLU(inplace=True)
        self.output_layer = torch.nn.Linear(hidden_size, output_size)
        self.softplus = torch.nn.Softplus()
        self.sigmoid = torch.nn.Sigmoid()

    def posenc(self, x):
        freq_multiplier = (
            self.freq_factor * 2 ** torch.arange(
                                        self.args.freq_num,
                                        device=x.device
                                    )
        ).view(1, 1, 1, -1)
        x_expand = x.unsqueeze(-1)
        sin_val = torch.sin(x_expand * freq_multiplier)
        cos_val = torch.cos(x_expand * freq_multiplier)
        return torch.cat(
            [x_expand, sin_val, cos_val], -1
        ).view(x.shape[:2] + (-1,))

    def forward(self, pos_x, in_x):
        """
        Args:
            pos_x: input to be encoded with positional encodings
            in_x: input NOT to be encoded with positional encodings
        """
        x = self.posenc(pos_x)
        x = torch.cat([x, in_x], axis=-1)
        x = self.input_layer(x)
        for block in self.blocks:
            x = block(x)
        out = self.output_layer(self.output_prelu(x))
        out = torch.cat([self.sigmoid(out[..., :-1]), self.softplus(out[..., -1:])], -1)
        return out

Network/Vit.py

In [None]:
import types
import math

import timm
import torch
import torch.nn as nn
import torch.nn.functional as F


def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=dilation, groups=groups, bias=False, dilation=dilation, padding_mode='reflect')

def conv1x1(in_planes, out_planes, stride=1):
    """1x1 convolution"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False, padding_mode='reflect')


class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()

    def forward(self, x):
        return x


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=None):
        super(BasicBlock, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = norm_layer(planes, track_running_stats=False, affine=True)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = norm_layer(planes, track_running_stats=False, affine=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

activations = {}

def get_activation(name):
    def hook(model, input, output):
        activations[name] = output

    return hook


class Slice(nn.Module):
    def __init__(self, start_index=1):
        super(Slice, self).__init__()
        self.start_index = start_index

    def forward(self, x):
        return x[:, self.start_index :]


class AddReadout(nn.Module):
    def __init__(self, start_index=1):
        super(AddReadout, self).__init__()
        self.start_index = start_index

    def forward(self, x):
        if self.start_index == 2:
            readout = (x[:, 0] + x[:, 1]) / 2
        else:
            readout = x[:, 0]
        return x[:, self.start_index :] + readout.unsqueeze(1)


class ProjectReadout(nn.Module):
    def __init__(self, in_features, start_index=1):
        super(ProjectReadout, self).__init__()
        self.start_index = start_index

        self.project = nn.Sequential(nn.Linear(2 * in_features, in_features), nn.GELU())

    def forward(self, x):
        readout = x[:, 0].unsqueeze(1).expand_as(x[:, self.start_index :])
        features = torch.cat((x[:, self.start_index :], readout), -1)

        return self.project(features)


class Transpose(nn.Module):
    def __init__(self, dim0, dim1):
        super(Transpose, self).__init__()
        self.dim0 = dim0
        self.dim1 = dim1

    def forward(self, x):
        x = x.transpose(self.dim0, self.dim1)
        return x

def forward_vit(pretrained, x):
    b, c, h, w = x.shape

    glob = pretrained.model.forward_flex(x)

    layer_1 = pretrained.activations["1"]
    layer_2 = pretrained.activations["2"]
    layer_3 = pretrained.activations["3"]
    layer_4 = pretrained.activations["4"]

    layer_1 = pretrained.act_postprocess1[0:2](layer_1)
    layer_2 = pretrained.act_postprocess2[0:2](layer_2)
    layer_3 = pretrained.act_postprocess3[0:2](layer_3)
    layer_4 = pretrained.act_postprocess4[0:2](layer_4)

    unflatten = nn.Sequential(
        nn.Unflatten(
            2,
            torch.Size(
                [
                    h // pretrained.model.patch_size[1],
                    w // pretrained.model.patch_size[0],
                ]
            ),
        )
    )

    if layer_1.ndim == 3:
        layer_1 = unflatten(layer_1)
    if layer_2.ndim == 3:
        layer_2 = unflatten(layer_2)
    if layer_3.ndim == 3:
        layer_3 = unflatten(layer_3)
    if layer_4.ndim == 3:
        layer_4 = unflatten(layer_4)

    layer_1 = pretrained.act_postprocess1[3 : len(pretrained.act_postprocess1)](layer_1)
    layer_2 = pretrained.act_postprocess2[3 : len(pretrained.act_postprocess2)](layer_2)
    layer_3 = pretrained.act_postprocess3[3 : len(pretrained.act_postprocess3)](layer_3)
    layer_4 = pretrained.act_postprocess4[3 : len(pretrained.act_postprocess4)](layer_4)

    return layer_1, layer_2, layer_3, layer_4

def _resize_pos_embed(self, posemb, gs_h, gs_w):
    posemb_tok, posemb_grid = (
        posemb[:, : self.start_index],
        posemb[0, self.start_index :],
    )

    gs_old = int(math.sqrt(len(posemb_grid)))

    posemb_grid = posemb_grid.reshape(1, gs_old, gs_old, -1).permute(0, 3, 1, 2)
    posemb_grid = F.interpolate(posemb_grid, size=(gs_h, gs_w), mode="bilinear")
    posemb_grid = posemb_grid.permute(0, 2, 3, 1).reshape(1, gs_h * gs_w, -1)

    posemb = torch.cat([posemb_tok, posemb_grid], dim=1)

    return posemb

def forward_flex(self, x):
    b, c, h, w = x.shape

    pos_embed = self._resize_pos_embed(
        self.pos_embed, h // self.patch_size[1], w // self.patch_size[0]
    )

    B = x.shape[0]

    if hasattr(self.patch_embed, "backbone"):
        x = self.patch_embed.backbone(x)
        if isinstance(x, (list, tuple)):
            x = x[-1]  # last feature if backbone outputs list/tuple of features

    x = self.patch_embed.proj(x).flatten(2).transpose(1, 2)

    if getattr(self, "dist_token", None) is not None:
        cls_tokens = self.cls_token.expand(
            B, -1, -1
        )  # stole cls_tokens impl from Phil Wang, thanks
        dist_token = self.dist_token.expand(B, -1, -1)
        x = torch.cat((cls_tokens, dist_token, x), dim=1)
    else:
        cls_tokens = self.cls_token.expand(
            B, -1, -1
        )  # stole cls_tokens impl from Phil Wang, thanks
        x = torch.cat((cls_tokens, x), dim=1)

    x = x + pos_embed
    x = self.pos_drop(x)

    for blk in self.blocks:
        x = blk(x)

    x = self.norm(x)

    return x

def get_readout_oper(vit_features, features, use_readout, start_index=1):
    if use_readout == "ignore":
        readout_oper = [Slice(start_index)] * len(features)
    elif use_readout == "add":
        readout_oper = [AddReadout(start_index)] * len(features)
    elif use_readout == "project":
        readout_oper = [
            ProjectReadout(vit_features, start_index) for out_feat in features
        ]
    else:
        assert (
            False
        ), "wrong operation for readout token, use_readout can be 'ignore', 'add', or 'project'"

    return readout_oper

def _make_vit_b16_backbone(
    model,
    features=[96, 192, 384, 768],
    size=[384, 384],
    hooks=[2, 5, 8, 11],
    vit_features=768,
    use_readout="ignore",
    start_index=1,
):
    pretrained = nn.Module()

    pretrained.model = model
    pretrained.model.blocks[hooks[0]].register_forward_hook(get_activation("1"))
    pretrained.model.blocks[hooks[1]].register_forward_hook(get_activation("2"))
    pretrained.model.blocks[hooks[2]].register_forward_hook(get_activation("3"))
    pretrained.model.blocks[hooks[3]].register_forward_hook(get_activation("4"))

    pretrained.activations = activations


    readout_oper = get_readout_oper(vit_features, features, use_readout, start_index)

    # 32, 48, 136, 384
    pretrained.act_postprocess1 = nn.Sequential(
        readout_oper[0],
        Transpose(1, 2),
        nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])),
        nn.Conv2d(
            in_channels=vit_features,
            out_channels=features[0],
            kernel_size=1,
            stride=1,
            padding=0,
        ),
        nn.ConvTranspose2d(
            in_channels=features[0],
            out_channels=features[0],
            kernel_size=4,
            stride=4,
            padding=0,
            bias=True,
            dilation=1,
            groups=1,
        ),
    )

    pretrained.act_postprocess2 = nn.Sequential(
        readout_oper[1],
        Transpose(1, 2),
        nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])),
        nn.Conv2d(
            in_channels=vit_features,
            out_channels=features[1],
            kernel_size=1,
            stride=1,
            padding=0,
        ),
        nn.ConvTranspose2d(
            in_channels=features[1],
            out_channels=features[1],
            kernel_size=2,
            stride=2,
            padding=0,
            bias=True,
            dilation=1,
            groups=1,
        ),
    )

    pretrained.act_postprocess3 = nn.Sequential(
        readout_oper[2],
        Transpose(1, 2),
        nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])),
        nn.Conv2d(
            in_channels=vit_features,
            out_channels=features[2],
            kernel_size=1,
            stride=1,
            padding=0,
        ),
    )

    pretrained.act_postprocess4 = nn.Sequential(
        readout_oper[3],
        Transpose(1, 2),
        nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])),
        nn.Conv2d(
            in_channels=vit_features,
            out_channels=features[3],
            kernel_size=1,
            stride=1,
            padding=0,
        ),
        nn.Conv2d(
            in_channels=features[3],
            out_channels=features[3],
            kernel_size=3,
            stride=2,
            padding=1,
        ),
    )

    pretrained.model.start_index = start_index
    pretrained.model.patch_size = [16, 16]

    # We inject this function into the VisionTransformer instances so that
    # we can use it with interpolated position embeddings without modifying the library source.
    pretrained.model.forward_flex = types.MethodType(forward_flex, pretrained.model)
    pretrained.model._resize_pos_embed = types.MethodType(
        _resize_pos_embed, pretrained.model
    )

    return pretrained

def _make_pretrained_vitb16_128(
    pretrained, use_readout="ignore", hooks=None
):
    model = timm.create_model("vit_base_patch16_224", img_size=128, pretrained=pretrained)

    hooks = [2, 5, 8, 11] if hooks == None else hooks
    return _make_vit_b16_backbone(
        model,
        features=[96, 192, 384, 768],
        hooks=hooks,
        use_readout=use_readout
    )

def _make_encoder(
    backbone,
    features,
    use_pretrained,
    groups=1,
    expand=False,
    hooks=None,
    use_readout="ignore"
):
    if backbone == "vitb16_128":
        pretrained = _make_pretrained_vitb16_128(
            use_pretrained,
            hooks=hooks,
            use_readout=use_readout
        )
        scratch = _make_scratch(
            [96, 192, 384, 768], features, groups=groups, expand=expand
        )  # ViT-B/16 - 84.6% Top1 (backbone)
    else:
        print(f"Backbone '{backbone}' not implemented")
        assert False

    return pretrained, scratch

def _make_scratch(in_shape, out_shape, groups=1, expand=False):
    scratch = nn.Module()

    out_shape1 = out_shape
    out_shape2 = out_shape
    out_shape3 = out_shape
    out_shape4 = out_shape
    if expand == True:
        out_shape1 = out_shape
        out_shape2 = out_shape * 2
        out_shape3 = out_shape * 4
        out_shape4 = out_shape * 8

    scratch.layer1_rn = nn.Conv2d(
        in_shape[0],
        out_shape1,
        kernel_size=3,
        stride=1,
        padding=1,
        bias=False,
        groups=groups,
    )
    scratch.layer2_rn = nn.Conv2d(
        in_shape[1],
        out_shape2,
        kernel_size=3,
        stride=1,
        padding=1,
        bias=False,
        groups=groups,
    )
    scratch.layer3_rn = nn.Conv2d(
        in_shape[2],
        out_shape3,
        kernel_size=3,
        stride=1,
        padding=1,
        bias=False,
        groups=groups,
    )
    scratch.layer4_rn = nn.Conv2d(
        in_shape[3],
        out_shape4,
        kernel_size=3,
        stride=1,
        padding=1,
        bias=False,
        groups=groups,
    )

    return scratch

class VIT(nn.Module):
    def __init__(
        self,
        features=256,
        backbone="vitb16_128",
        readout="project",
        channels_last=False,
        train_pos_embed=True,
        norm_layer=None,
        use_skip_conv=True,
    ):

        super(VIT, self).__init__()

        self.channels_last = channels_last

        hooks = {
            "vitb16_128": [2, 5, 8, 11],
        }

        # Instantiate backbone and reassemble blocks
        self.pretrained, self.scratch = _make_encoder(
            backbone,
            features,
            True,  # Set to true of you want to train from scratch, uses ImageNet weights
            groups=1,
            expand=False,
            hooks=hooks[backbone],
            use_readout=readout
        )

        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.use_skip_conv = use_skip_conv
        if use_skip_conv:
            self.scratch.output_conv = nn.Sequential(
                nn.ReLU(True),
                nn.Conv2d(4*features, features, kernel_size=3, stride=1, padding=1),
                nn.ReLU(True),
                nn.Conv2d(features, features // 2, kernel_size=3, stride=1, padding=1),
            )

            downsample = nn.Sequential(
                conv1x1(64, features//2, 1),
                norm_layer(features//2, track_running_stats=False, affine=True),
            ) # HACK

            self.scratch.skip_conv = nn.Sequential(
                nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False, padding_mode='reflect'),
                norm_layer(64, track_running_stats=False, affine=True),
                nn.ReLU(inplace=True),
                BasicBlock(64, features//2, 1, downsample, norm_layer),
                BasicBlock(features//2, features//2, 1, None, norm_layer),
                BasicBlock(features//2, features//2, 1, None, norm_layer),
            )
        else:
            self.scratch.output_conv = nn.Sequential(
                nn.ReLU(True),
                nn.Conv2d(4*features, features, kernel_size=3, stride=1, padding=1),
            )

        self.pretrained.model.pos_embed.requires_grad = train_pos_embed

    def forward(self, x):
        if self.channels_last == True:
            x.contiguous(memory_format=torch.channels_last)

        layer_1, layer_2, layer_3, layer_4 = forward_vit(self.pretrained, x)

        layer_1_rn = self.scratch.layer1_rn(layer_1)
        layer_2_rn = self.scratch.layer2_rn(layer_2)
        layer_3_rn = self.scratch.layer3_rn(layer_3)
        layer_4_rn = self.scratch.layer4_rn(layer_4)

        if self.use_skip_conv:
            skip_x = self.scratch.skip_conv(x)
            sz = skip_x.shape[-2:]
        else:
            sz = layer_1.shape[-2:]

        new_latents = []
        new_latents.append(F.interpolate(
            layer_1_rn,
            sz,
            mode='bilinear',
            align_corners=True,
        ))

        new_latents.append(F.interpolate(
            layer_2_rn,
            sz,
            mode='bilinear',
            align_corners=True,
        ))

        new_latents.append(F.interpolate(
            layer_3_rn,
            sz,
            mode='bilinear',
            align_corners=True,
        ))

        new_latents.append(F.interpolate(
            layer_4_rn,
            sz,
            mode='bilinear',
            align_corners=True,
        ))

        new_latents = torch.cat(new_latents, 1)

        out = self.scratch.output_conv(new_latents)

        # out = self.scratch.output_conv(path_1)
        if self.use_skip_conv:
            out = torch.cat([out, skip_x], 1)

        return out

Model/Model.py

In [None]:
import os
import torch
import numpy as np
#from network.resnet_mlp import PosEncodeResnet
#from network.vit import VIT

def de_parallel(model):
    return model.module if hasattr(model, 'module') else model


class VisionNerfModel(object):
    def __init__(self, args, load_opt=True, load_scheduler=True):
        self.args = args
        device = torch.device('cuda:{}'.format(args.local_rank))

        self.freq_factor = np.pi

        # create coarse network
        pos_c = 3
        in_c = args.im_feat_dim + 3 + 3
        # create coarse network
        self.net_coarse = PosEncodeResnet(args, pos_c, in_c, args.mlp_feat_dim,
                                          4, args.mlp_block_num).to(device)
        if args.coarse_only:
            self.net_fine = None
        else:
            # create fine network
            self.net_fine = PosEncodeResnet(args, pos_c, in_c, args.mlp_feat_dim,
                                            4, args.mlp_block_num).to(device)


        im_feat = args.im_feat_dim
        # create feature extraction network
        self.feature_net = VIT(im_feat,
                               train_pos_embed=not args.freeze_pos_embed,
                               use_skip_conv=not args.no_skip_conv).cuda()

        # optimizer and learning rate scheduler
        learnable_params = list(self.net_coarse.parameters())
        learnable_params += list(self.feature_net.parameters())
        if self.net_fine is not None:
            learnable_params += list(self.net_fine.parameters())

        params = [
                {'params': self.net_coarse.parameters()},
                {'params': self.feature_net.parameters(), 'lr': args.lrate_feature},
            ]

        if self.net_fine is not None:
            params.append({'params': self.net_fine.parameters()})

        self.optimizer = torch.optim.Adam(params, lr=args.lrate_mlp)

        if args.scheduler == 'steplr':
            self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer,
                                                            step_size=args.lrate_decay_steps,
                                                            gamma=args.lrate_decay_factor)
        else:
            raise NotImplementedError

        if args.use_warmup:
            self.warmup_scheduler = torch.optim.lr_scheduler.LambdaLR(self.optimizer,
                        lr_lambda=lambda step: np.clip((step+1), 0, args.warmup_steps) / args.warmup_steps)
        else:
            self.warmup_scheduler = None

        out_folder = os.path.join(args.ckptdir, args.expname)
        self.start_step = self.load_from_ckpt(out_folder,
                                              load_opt=load_opt,
                                              load_scheduler=load_scheduler)

        if args.distributed:

            self.net_coarse = torch.nn.parallel.DistributedDataParallel(
                self.net_coarse,
                device_ids=[args.local_rank],
                output_device=args.local_rank
            )

            self.feature_net = torch.nn.parallel.DistributedDataParallel(
                self.feature_net,
                device_ids=[args.local_rank],
                output_device=args.local_rank,
                find_unused_parameters=True
            )

            if self.net_fine is not None:
                self.net_fine = torch.nn.parallel.DistributedDataParallel(
                    self.net_fine,
                    device_ids=[args.local_rank],
                    output_device=args.local_rank
                )

    def encode(self, x):
        """
        Args:
            x: input tensor [b, v, h, w, c]
        Returns:
            Extracted feature maps [b, out_c, h, w]
        """
        b, v, h, w, c = x.shape
        x = x*2. - 1. # Normalization for transformer
        feat_maps = self.feature_net(x.reshape([-1, h, w, c]).permute(0, 3, 1, 2))
        _, nc, nh, nw = feat_maps.shape
        feat_maps = feat_maps.reshape([b, v, nc, nh, nw])
        return feat_maps

    def posenc(self, x):
        freq_multiplier = (
            self.freq_factor * 2 ** torch.arange(
                                        self.args.freq_num,
                                        device=f"cuda:{self.args.local_rank}"
                                    )
        ).view(1, 1, 1, -1)
        x_expand = x.unsqueeze(-1)
        sin_val = torch.sin(x_expand * freq_multiplier)
        cos_val = torch.cos(x_expand * freq_multiplier)
        return torch.cat(
            [x_expand, sin_val, cos_val], -1
        ).view(x.shape[:2] + (-1,))

    def switch_to_eval(self):
        self.net_coarse.eval()
        if self.net_fine is not None:
            self.net_fine.eval()
        self.feature_net.eval()

    def switch_to_train(self):
        self.net_coarse.train()
        if self.net_fine is not None:
            self.net_fine.train()
        self.feature_net.train()

    def save_model(self, filename):
        to_save = {'optimizer': self.optimizer.state_dict(),
                   'scheduler': self.scheduler.state_dict(),
                   'net_coarse': de_parallel(self.net_coarse).state_dict(),
                   'feature_net': de_parallel(self.feature_net).state_dict(),
                   }

        if self.net_fine is not None:
            to_save['net_fine'] = de_parallel(self.net_fine).state_dict()

        torch.save(to_save, filename)

    def load_model(self, filename, load_opt=True, load_scheduler=True):
        if self.args.distributed:
            to_load = torch.load(filename, map_location='cuda:{}'.format(self.args.local_rank))
        else:
            to_load = torch.load(filename)

        if load_opt:
            self.optimizer.load_state_dict(to_load['optimizer'])
        if load_scheduler:
            self.scheduler.load_state_dict(to_load['scheduler'])

        self.net_coarse.load_state_dict(to_load['net_coarse'])
        self.feature_net.load_state_dict(to_load['feature_net'])

        if self.net_fine is not None and 'net_fine' in to_load.keys():
            self.net_fine.load_state_dict(to_load['net_fine'])

    def load_from_ckpt(self, out_folder,
                       load_opt=True,
                       load_scheduler=True,
                       force_latest_ckpt=False):
        '''Load model from existing checkpoints and return the current step

        Args:
            out_folder: the directory that stores ckpts
        Returns:
            The current starting step
        '''

        # all existing ckpts
        ckpts = []
        if os.path.exists(out_folder):
            ckpts = [os.path.join(out_folder, f)
                     for f in sorted(os.listdir(out_folder)) if f.endswith('.pth')]

        if self.args.ckpt_path is not None and not force_latest_ckpt:
            if os.path.isfile(self.args.ckpt_path):  # load the specified ckpt
                ckpts = [self.args.ckpt_path]

        if len(ckpts) > 0 and not self.args.no_reload:
            fpath = ckpts[-1]
            self.load_model(fpath, load_opt, load_scheduler)
            step = int(fpath[-10:-4])
            print('Reloading from {}, starting at step={}'.format(fpath, step))
        else:
            print('No ckpts found, training from scratch...')
            step = 0

        return step

Model/Projection.py

In [None]:
import torch
import torch.nn.functional as F
from einops import rearrange, repeat

def divide_safe(num, denom):
    eps = 1e-8
    tmp = denom + eps * torch.le(denom, 1e-20).to(torch.float)
    return num / tmp

def meshgrid_pinhole(h, w,
                    is_homogenous=True, device=None):
    '''Create a meshgrid for image coordinate
    Args:
        h: grid height
        w: grid width
        is_homogenous: return homogenous or not
    Returns:
        Image coordinate meshgrid [height, width, 2 (3 if homogenous)]
    '''
    xs = torch.linspace(0, w-1, steps=w, device=device)
    ys = torch.linspace(0, h-1, steps=h, device=device)
    new_y, new_x = torch.meshgrid(ys, xs)
    grid = (new_x, new_y)

    if is_homogenous:
        ones = torch.ones_like(new_x)
        grid = torch.stack(grid + (ones, ), 2)
    else:
        grid = torch.stack(grid, 2)
    return grid

def normalize(pixel_locations, h, w):
    resize_factor = torch.tensor([w-1., h-1.], device=pixel_locations.device).view([1, 1, 1, 1, 2])
    normalized_pixel_locations = 2 * pixel_locations / resize_factor - 1.  # [n_views, n_points, 2]
    return normalized_pixel_locations


class Projector():
    def __init__(self, device):
        self.device = device

    def normalize(self, pixel_locations, h, w):
        resize_factor = torch.tensor([w-1., h-1.], device=pixel_locations.device).view([1, 1, 1, 1, 2])
        normalized_pixel_locations = 2 * pixel_locations / resize_factor - 1.  # [n_views, n_points, 2]
        return normalized_pixel_locations

    def normalize_pts(self, xyz, bbox3d):
        near = bbox3d[:, None, None, None, :, 1]
        far = bbox3d[:, None, None, None, :, 0]
        normalized_voxel_locations = 2 * (xyz - near) / (far-near) - 1.
        return normalized_voxel_locations

    def compute_projections(self, xyz, train_ints, train_exts):
        '''Project 3D points into cameras
        Args:
            xyz: [batch, N_rays, N_samples, 3]
            train_ints: intrinsics [batch, num_views, 4, 4]
            train_exts: extrinsics [batch, num_views, 4, 4]
        Returns:
            Pixel locations [batch, #views, N_rays, N_samples, 2], xyz_c [batch, #views, N_rays*N_samples, 4]
        '''
        batch, N_rays, N_samples, _ = xyz.shape
        xyz = xyz.reshape(batch, -1, 3)  # [batch, n_points, 3]
        num_views = train_ints.shape[1]
        train_intrinsics = train_ints  # [batch, n_views, 4, 4]
        train_poses = train_exts  # [batch, n_views, 4, 4]
        xyz_h = torch.cat([xyz, torch.ones_like(xyz[..., :1])], dim=-1)  # [batch, n_points, 4]

        xyz_c = torch.inverse(train_poses) @ (xyz_h.permute([0, 2, 1])[:, None].repeat(1, num_views, 1, 1)) # camera_coodrinates
        projections = train_intrinsics @ xyz_c # [batch, n_views, 4, n_points]
        projections = projections.permute(0, 1, 3, 2)  # [batch, n_views, n_points, 4]
        pixel_locations = projections[..., :2] / torch.clamp(projections[..., 2:3], min=1e-8)  # [batch, n_views, n_points, 2]
        pixel_locations = torch.clamp(pixel_locations, min=-1e6, max=1e6)
        return pixel_locations.reshape((batch, num_views, N_rays, N_samples, 2)), \
               xyz_c.permute(0, 1, 3, 2).reshape((batch, num_views, N_rays, N_samples, 4))

    def compute_directions(self, dir, train_exts):
        '''Transform view directions from world to camera coordinates
        Args:
            dir: [batch, N_rays, N_samples, 3]
            train_exts: extrinsics [batch, num_views, 4, 4]
        Returns:
            Viewing direction in camera coordinates [batch, #views, N_rays*N_samples, 3]
        '''
        _, N_rays, N_samples, _ = dir.shape
        num_views = train_exts.shape[1]
        dir = repeat(dir, 'b nr ns c -> b nv c (nr ns)', nv=num_views)
        train_poses = train_exts[..., :3, :3]  # [batch, n_views, 4, 4]
        dir_c = torch.inverse(train_poses) @ (dir)
        dir_c = rearrange(dir_c, 'b nv c (nr ns) -> b nv nr ns c', nr=N_rays, ns=N_samples)
        return dir_c

    def compute_pixel(self,  xyz, train_imgs, train_ints, train_exts, featmaps):
        '''Original pixelNeRF projection (2D -> samples)
        Args:
            xyz: [batch, n_rays, n_samples, 3]
            train_imgs: [batch, n_views, h, w, 3]
            train_ints: [batch, n_views, 4, 4]
            train_exts: [batch, n_views, 4, 4]
            featmaps: [batch, n_views, c, h, w]
        Returns: rgb_feat_sampled: [batch, n_rays, n_samples, n_views, 3+n_feat],
                 xyz_c: [batch, n_views, n_rays, n_samples, 4]
        '''
        _, views, h, w = train_imgs.shape[:-1]

        train_imgs = train_imgs.permute(0, 1, 4, 2, 3)  # [batch, n_views, 3, h, w]
        train_imgs = train_imgs * 2. - 1. # normalization

        # compute the projection of the query points to each reference image
        pixel_locations, xyz_c = self.compute_projections(xyz, train_ints, train_exts)
        normalized_pixel_locations = self.normalize(pixel_locations, h, w)   # [batch, n_views, n_rays, n_samples, 2]
        N_rays, N_samples = normalized_pixel_locations.shape[2:4]

        # rgb sampling
        rgbs_sampled = F.grid_sample(train_imgs.flatten(0, 1), normalized_pixel_locations.flatten(0, 1), align_corners=True)
        rgb_sampled = rearrange(rgbs_sampled, '(b v) c nr ns -> b nr ns v c', v=views)

        # deep feature sampling
        feat_sampled = F.grid_sample(featmaps.flatten(0, 1), normalized_pixel_locations.flatten(0, 1), align_corners=True)
        feat_sampled = rearrange(feat_sampled, '(b v) c nr ns -> b nr ns v c', v=views)
        rgb_feat_sampled = torch.cat([rgb_sampled, feat_sampled], dim=-1)   # [batch, n_rays, n_samples, n_views, c+3]

        return rgb_feat_sampled, xyz_c

Model/Render_ray.py

In [None]:
from collections import OrderedDict
import torch
import torch.nn.functional as F
from einops import rearrange, repeat

########################################################################################################################
# helper functions for nerf ray rendering
########################################################################################################################


def sample_pdf(bins, weights, N_samples, det=False):
    '''
    Args:
        bins: tensor of shape [batch, N_rays, M+1], M is the number of bins
        weights: tensor of shape [batch, N_rays, M]
        N_samples: number of samples along each ray
        det: if True, will perform deterministic sampling
    Returns: [batch, N_rays, N_samples]
    '''

    batch = bins.shape[0]
    M = weights.shape[-1]
    weights += 1e-5
    # Get pdf
    pdf = weights / torch.sum(weights, dim=-1, keepdim=True)    # [batch, N_rays, M]
    cdf = torch.cumsum(pdf, dim=-1)  # [batch, N_rays, M]
    cdf = torch.cat([torch.zeros_like(cdf[..., 0:1]), cdf], dim=-1) # [batch, N_rays, M+1]

    # Take uniform samples
    if det:
        u = torch.linspace(0., 1., N_samples, device=bins.device)
        u = u[None, None, :].repeat(bins.shape[:2] + (1,))       # [batch, N_rays, N_samples]
    else:
        u = torch.rand(batch, bins.shape[1], N_samples, device=bins.device)

    # Invert CDF
    above_inds = torch.zeros_like(u, dtype=torch.long)       # [batch, N_rays, N_samples]
    for i in range(M):
        above_inds += (u >= cdf[..., i:i+1]).long()

    # random sample inside each bin
    below_inds = torch.clamp(above_inds-1, min=0)
    inds_g = torch.stack((below_inds, above_inds), dim=-1)     # [batch, N_rays, N_samples, 2]

    cdf = cdf.unsqueeze(2).repeat(1, 1, N_samples, 1)  # [batch, N_rays, N_samples, M+1]
    cdf_g = torch.gather(input=cdf, dim=-1, index=inds_g)  # [batch, N_rays, N_samples, 2]

    bins = bins.unsqueeze(2).repeat(1, 1, N_samples, 1)  # [batch, N_rays, N_samples, M+1]
    bins_g = torch.gather(input=bins, dim=-1, index=inds_g)  # [batch, N_rays, N_samples, 2]

    # t = (u-cdf_g[:, :, 0]) / (cdf_g[:, :, 1] - cdf_g[:, :, 0] + TINY_NUMBER)  # [N_rays, N_samples]
    # fix numeric issue
    denom = cdf_g[..., 1] - cdf_g[..., 0]      # [batch, N_rays, N_samples]
    denom = torch.where(denom < 1e-5, torch.ones_like(denom), denom)
    t = (u - cdf_g[..., 0]) / denom

    samples = bins_g[..., 0] + t * (bins_g[..., 1]-bins_g[..., 0])

    return samples

def sample_along_camera_ray(ray_o, ray_d, depth_range,
                            N_samples,
                            inv_uniform=False,
                            det=False):
    '''
    :param ray_o: origin of the ray in scene coordinate system; tensor of shape [N_rays, 3] or [Batch, N_rays, 3]
    :param ray_d: homogeneous ray direction vectors in scene coordinate system; tensor of shape [N_rays, 3] or [Batch, N_rays, 3]
    :param depth_range: [B, 2] (near_depth, far_depth)
    :param inv_uniform: if True, uniformly sampling inverse depth
    :param det: if True, will perform deterministic sampling
    :return: tensor of shape [Batch, N_rays, N_samples, 3]
    '''

    if ray_o.ndim == 2:
        ray_o = ray_o[None, :]
    if ray_d.ndim == 2:
        ray_d = ray_d[None, :]

    # will sample inside [near_depth, far_depth]
    # assume the nearest possible depth is at least (min_ratio * depth)
    near_depth_value = depth_range[:, 0]
    far_depth_value = depth_range[:, 1]
    assert torch.all(near_depth_value > 0) and torch.all(far_depth_value > 0) and torch.all(far_depth_value > near_depth_value)

    near_depth = near_depth_value[..., None] * torch.ones_like(ray_d[..., 0])

    far_depth = far_depth_value[..., None] * torch.ones_like(ray_d[..., 0])
    if inv_uniform:
        start = 1. / near_depth     # [Batch, N_rays,]
        step = (1. / far_depth - start) / (N_samples-1)
        inv_z_vals = torch.stack([start+i*step for i in range(N_samples)], dim=-1)  # [Batch, N_rays, N_samples]
        z_vals = 1. / inv_z_vals
    else:
        start = near_depth
        step = (far_depth - near_depth) / (N_samples-1)
        z_vals = torch.stack([start+i*step for i in range(N_samples)], dim=-1)  # [Batch, N_rays, N_samples]

    if not det:
        # get intervals between samples
        mids = .5 * (z_vals[..., 1:] + z_vals[..., :-1])
        upper = torch.cat([mids, z_vals[..., -1:]], dim=-1)
        lower = torch.cat([z_vals[..., 0:1], mids], dim=-1)
        # uniform samples in those intervals
        t_rand = torch.rand_like(z_vals)
        z_vals = lower + (upper - lower) * t_rand   # [N_rays, N_samples]

    ray_d = ray_d.unsqueeze(2).repeat(1, 1, N_samples, 1)  # [N_rays, N_samples, 3]
    ray_o = ray_o.unsqueeze(2).repeat(1, 1, N_samples, 1)
    pts = z_vals.unsqueeze(-1) * ray_d + ray_o       # [N_rays, N_samples, 3]
    return pts, z_vals


########################################################################################################################
# ray rendering of nerf
########################################################################################################################

def raw2outputs(raw, z_vals, white_bkgd=False):
    '''
    Args:
        raw: raw network output; tensor of shape [batch, N_rays, N_samples, 4]
        z_vals: depth of point samples along rays; tensor of shape [batch, N_rays, N_samples]
    Returns:
        {'rgb': [batch, N_rays, 3], 'depth': batch, [N_rays,], 'weights': [batch, N_rays,]}
    '''
    rgb = raw[..., :3]     # [batch, N_rays, N_samples, 3]
    sigma = raw[..., 3]    # [batch, N_rays, N_samples]

    # Changed to include dists to imitate pixelnerf
    sigma2alpha = lambda sigma, dists: 1. - torch.exp(-dists * torch.relu(sigma))

    # point samples are ordered with increasing depth
    # interval between samples
    dists = z_vals[..., 1:] - z_vals[..., :-1]
    dists = torch.cat((dists, dists[..., -1:]), dim=-1)  # [batch, N_rays, N_samples]

    alpha = sigma2alpha(sigma, dists)  # [batch, N_rays, N_samples]

    # Eq. (3): T
    T = torch.cumprod(1. - alpha + 1e-10, dim=-1)[..., :-1]   # [batch, N_rays, N_samples-1]
    T = torch.cat((torch.ones_like(T[..., 0:1]), T), dim=-1)  # [batch, N_rays, N_samples]

    # maths show weights, and summation of weights along a ray, are always inside [0, 1]
    weights = alpha * T     # [N_rays, N_samples]
    rgb_map = torch.sum(weights.unsqueeze(-1) * rgb, dim=2)  # [N_rays, 3]

    if white_bkgd:
        rgb_map = rgb_map + (1. - torch.sum(weights, dim=-1, keepdim=True))

    depth_map = torch.sum(weights * z_vals, dim=-1)     # [N_rays,]

    ret = OrderedDict([('rgb', rgb_map),
                       ('depth', depth_map),
                       ('weights', weights),                # used for importance sampling of fine samples
                       ('mask', torch.ones_like(rgb_map[..., 0])),
                       ('alpha', alpha),
                       ('z_vals', z_vals)
                       ])

    return ret


def render_rays(ray_batch,
                model,
                featmaps,
                projector,
                N_samples,
                inv_uniform=False,
                N_importance=0,
                det=False,
                white_bkgd=False):
    '''
    Args:
        ray_batch: {'ray_o': [batch, N_rays, 3] , 'ray_d': [batch, N_rays, 3], 'view_dir': [batch, N_rays, 2]}
        model:  {'net_coarse':  , 'net_fine': }
        featmaps: feature maps for inference [b, c, h, w] or [b, c, d, h, w]
        projector: projector object
        N_samples: samples along each ray (for both coarse and fine model)
        inv_uniform: if True, uniformly sample inverse depth for coarse model
        det: if True, will deterministicly sample depths
        white_bkgd: if True, assume background is white
    Return:
        {'outputs_coarse': {}, 'outputs_fine': {}}
    '''
    ret = {'outputs_coarse': None,
           'outputs_fine': None}

    # pts: [batch, N_rays, N_samples, 3]
    # z_vals: [batch, N_rays, N_samples]
    pts, z_vals = sample_along_camera_ray(ray_o=ray_batch['ray_o'],
                                          ray_d=ray_batch['ray_d'],
                                          depth_range=ray_batch['depth_range'],
                                          N_samples=N_samples, inv_uniform=inv_uniform, det=det)
    batch, N_rays, N_samples = pts.shape[:3]

    rgb_feat, xyz_c = projector.compute_pixel(pts, ray_batch['src_rgbs'],
                                                ray_batch['src_intrinsics'],
                                                ray_batch['src_c2w_mats'],
                                                featmaps=featmaps)  # [batch, N_rays, N_samples, N_views, x]

    xyz_c = xyz_c[:, 0, ..., :3] # HACK only use the first view
    rgb_feat = rgb_feat.squeeze(3) # HACK consider only one camera now so remove the axis
    dir = repeat(ray_batch['ray_d'], 'b nr c -> b nr ns c', ns=N_samples)
    dir_c = projector.compute_directions(dir, ray_batch['src_c2w_mats'])
    dir_c = dir_c[:, 0, ..., :3] # HACK only use the first view

    feat = torch.cat([rgb_feat, dir_c], -1)
    raw_coarse = model.net_coarse(xyz_c.flatten(0, 1), feat.flatten(0, 1))   # [batch*N_rays*N_samples, 4]
    raw_coarse = raw_coarse.reshape([batch, N_rays, N_samples, 4])
    outputs_coarse = raw2outputs(raw_coarse, z_vals,
                                 white_bkgd=white_bkgd)
    ret['outputs_coarse'] = outputs_coarse

    if N_importance > 0:
        assert model.net_fine is not None
        # detach since we would like to decouple the coarse and fine networks
        weights = outputs_coarse['weights'].clone().detach()            # [batch, N_rays, N_samples]
        if inv_uniform:
            inv_z_vals = 1. / z_vals
            inv_z_vals_mid = .5 * (inv_z_vals[..., 1:] + inv_z_vals[..., :-1])   # [batch, N_rays, N_samples-1]
            weights = weights[..., 1:-1]      # [batch, N_rays, N_samples-2]
            inv_z_vals = sample_pdf(bins=torch.flip(inv_z_vals_mid, dims=[-1]),
                                    weights=torch.flip(weights, dims=[-1]),
                                    N_samples=N_importance, det=det)  # [batch, N_rays, N_importance]
            z_samples = 1. / inv_z_vals
        else:
            # take mid-points of depth samples
            z_vals_mid = .5 * (z_vals[..., 1:] + z_vals[..., :-1])   # [batch, N_rays, N_samples-1]
            weights = weights[..., 1:-1]      # [N_rays, N_samples-2]
            z_samples = sample_pdf(bins=z_vals_mid, weights=weights,
                                   N_samples=N_importance, det=det)  # [batch, N_rays, N_importance]

        z_vals = torch.cat((z_vals, z_samples), dim=-1)  # [batch, N_rays, N_samples + N_importance]

        # samples are sorted with increasing depth
        z_vals, _ = torch.sort(z_vals, dim=-1)
        N_total_samples = N_samples + N_importance

        viewdirs = ray_batch['ray_d'][:, :, None].expand([-1, -1, N_total_samples, -1])
        ray_o = ray_batch['ray_o'][:, :, None].repeat(1, 1, N_total_samples, 1)
        pts = z_vals.unsqueeze(-1) * viewdirs + ray_o  # [batch, N_rays, N_samples + N_importance, 3]

        rgb_feat_sampled, xyz_c = projector.compute_pixel(pts, ray_batch['src_rgbs'],
                                                    ray_batch['src_intrinsics'],
                                                    ray_batch['src_c2w_mats'],
                                                    featmaps=featmaps)  # [batch, N_rays, N_samples, N_views, x]

        xyz_c = xyz_c[:, 0, ..., :3] # HACK only use the first view
        rgb_feat_sampled = rgb_feat_sampled.squeeze(3) # HACK consider only one camera now so remove the axis
        dir = repeat(ray_batch['ray_d'], 'b nr c -> b nr ns c', ns=N_total_samples)
        dir_c = projector.compute_directions(dir, ray_batch['src_c2w_mats'])
        dir_c = dir_c[:, 0, ..., :3] # HACK only use the first view

        feat = torch.cat([rgb_feat_sampled, dir_c], -1)
        raw_fine = model.net_fine(xyz_c.flatten(0, 1), feat.flatten(0, 1))   # [batch*N_rays*N_samples, 4]
        raw_fine = raw_fine.reshape([batch, N_rays, N_total_samples, 4])
        outputs_fine = raw2outputs(raw_fine, z_vals,
                                   white_bkgd=white_bkgd)
        ret['outputs_fine'] = outputs_fine

    return ret

Model/Render_image.py

In [None]:
from collections import OrderedDict

import torch


def render_single_image(ray_sampler,
                        ray_batch,
                        model,
                        featmaps,
                        projector,
                        chunk_size,
                        N_samples,
                        inv_uniform=False,
                        N_importance=0,
                        det=False,
                        white_bkgd=False,
                        render_stride=1):
    '''
    Args:
        ray_sampler: RaySamplingSingleImage for this view
        ray_batch: {'ray_o': [N_rays, 3] , 'ray_d': [N_rays, 3], 'view_dir': [N_rays, 2]}
        model:  {'net_coarse': , 'net_fine': , ...}
        chunk_size: number of rays in a chunk
        N_samples: samples along each ray (for both coarse and fine model)
        inv_uniform: if True, uniformly sample inverse depth for coarse model
        det: if True, use deterministic sampling
        white_bkgd: if True, assume background is white
        render_stride: stride for rendering
        featmaps: feature maps for inference [b, c, h, w] or [b, c, d, h, w]
    Return:
        {'outputs_coarse': {'rgb': numpy, 'depth': numpy, ...}, 'outputs_fine': {}}
    '''

    all_ret = OrderedDict([('outputs_coarse', OrderedDict()),
                           ('outputs_fine', OrderedDict())])

    N_rays = ray_batch['ray_o'].shape[0]

    for i in range(0, N_rays, chunk_size):
        chunk = OrderedDict()
        for k in ray_batch:
            if k in ['intrinsics', 'c2w_mat', 'depth_range',
                     'src_rgbs', 'src_intrinsics', 'src_c2w_mats']:
                chunk[k] = ray_batch[k]
            elif ray_batch[k] is not None:
                chunk[k] = ray_batch[k][None, i:i+chunk_size]
            else:
                chunk[k] = None

        ret = render_rays(chunk, model, featmaps,
                          projector=projector,
                          N_samples=N_samples,
                          inv_uniform=inv_uniform,
                          N_importance=N_importance,
                          det=det,
                          white_bkgd=white_bkgd)

        # handle both coarse and fine outputs
        # cache chunk results on cpu
        if i == 0:
            for k in ret['outputs_coarse']:
                all_ret['outputs_coarse'][k] = []

            if ret['outputs_fine'] is None:
                all_ret['outputs_fine'] = None
            else:
                for k in ret['outputs_fine']:
                    all_ret['outputs_fine'][k] = []

        for k in ret['outputs_coarse']:
            all_ret['outputs_coarse'][k].append(ret['outputs_coarse'][k].squeeze(0).cpu())

        if ret['outputs_fine'] is not None:
            for k in ret['outputs_fine']:
                all_ret['outputs_fine'][k].append(ret['outputs_fine'][k].squeeze(0).cpu())

    rgb_strided = torch.ones(ray_sampler.H, ray_sampler.W, 3)[::render_stride, ::render_stride, :]
    # merge chunk results and reshape
    for k in all_ret['outputs_coarse']:
        if k == 'random_sigma':
            continue
        tmp = torch.cat(all_ret['outputs_coarse'][k], dim=0).reshape((rgb_strided.shape[0],
                                                                      rgb_strided.shape[1], -1))
        all_ret['outputs_coarse'][k] = tmp.squeeze()

    if all_ret['outputs_fine'] is not None:
        for k in all_ret['outputs_fine']:
            if k == 'random_sigma':
                continue
            tmp = torch.cat(all_ret['outputs_fine'][k], dim=0).reshape((rgb_strided.shape[0],
                                                                        rgb_strided.shape[1], -1))

            all_ret['outputs_fine'][k] = tmp.squeeze()

    return all_ret

Model/Sample_ray.py

In [None]:
import numpy as np
import torch
import torch.nn.functional as F


rng = np.random.RandomState(234)


def bbox_sample(bboxes, N_rand):
    """
    Args:
        bboxes: bounding box value (xmin, ymin, xmax, ymax) [batch, 4]
        N_rand: number of pixels to sample
    Returns:
        Pixel indices to sample from
    """
    x = (
        torch.rand(N_rand) * (bboxes[2] + 1 - bboxes[0])
        + bboxes[0]
    ).long()
    y = (
        torch.rand(N_rand) * (bboxes[3] + 1 - bboxes[1])
        + bboxes[1]
    ).long()
    return y, x

def bbox_sample_full(bboxes, N_rand, h=128, w=128, prob=0.8):
    """Bounding box sampling but includes other parts of the images
    Args:
        bboxes: bounding box value (xmin, ymin, xmax, ymax) [batch, 4]
        N_rand: number of pixels to sample
        h: image height
        w: image width
        prob: probability of choosing samples inside the bbox
    Returns:
        Pixel indices to sample from
    """
    N_in = int(N_rand * prob)
    N_out = N_rand - N_in

    x = (
        torch.rand(N_in) * (bboxes[2] + 1 - bboxes[0])
        + bboxes[0]
    ).long()
    y = (
        torch.rand(N_in) * (bboxes[3] + 1 - bboxes[1])
        + bboxes[1]
    ).long()


    x_out = (
        torch.rand(N_out) * w
    ).long()

    y_out = (
        torch.rand(N_out) * h
    ).long()

    y = torch.cat([y, y_out])
    x = torch.cat([x, x_out])

    return y, x

########################################################################################################################
# ray batch sampling
########################################################################################################################

class RaySamplerSingleImage(object):
    def __init__(self, data, device, resize_factor=1, render_stride=1):
        super().__init__()
        self.render_stride = render_stride
        self.rgb = data['tgt_rgb'] if 'tgt_rgb' in data.keys() else None
        self.intrinsics = data['tgt_intrinsic']
        self.c2w_mat = data['tgt_c2w_mat']
        self.rgb_path = data['rgb_path']
        self.depth_range = data['depth_range']
        self.device = device
        self.batch_size = len(self.intrinsics)

        self.H = int(data['img_hw'][0])
        self.W = int(data['img_hw'][1])

        # half-resolution output
        if resize_factor != 1:
            self.W = int(self.W * resize_factor)
            self.H = int(self.H * resize_factor)
            self.intrinsics[:, :2, :3] *= resize_factor
            if self.rgb is not None:
                self.rgb = F.interpolate(self.rgb.permute(0, 3, 1, 2), scale_factor=resize_factor).permute(0, 2, 3, 1)

        self.rays_o, self.rays_d = self.get_rays_single_image(self.H, self.W, self.intrinsics, self.c2w_mat)
        if self.rgb is not None:
            self.rgb = self.rgb.reshape(-1, 3)

        if 'src_rgbs' in data.keys():
            self.src_rgbs = data['src_rgbs']
        else:
            self.src_rgbs = None
        if 'src_masks' in data.keys():
            self.src_masks = data['src_masks']
        else:
            self.src_masks = None
        if 'src_intrinsics' in data.keys():
            self.src_intrinsics = data['src_intrinsics']
        else:
            self.src_intrinsics = None
        if 'src_c2w_mats' in data.keys():
            self.src_c2w_mats = data['src_c2w_mats']
        else:
            self.src_c2w_mats = None
        if 'tgt_bbox' in data.keys():
            self.tgt_bbox = data['tgt_bbox']

    def get_rays_single_image(self, H, W, intrinsics, c2w):
        '''Generate rays for a single image (batch size = 1).

        Args:
            H: image height
            W: image width
            intrinsics: 4 by 4 intrinsic matrix
            c2w: 4 by 4 camera to world extrinsic matrix
        Returns:
            Tensors of ray origin and direction.
        '''
        u, v = np.meshgrid(np.arange(W)[::self.render_stride], np.arange(H)[::self.render_stride])
        u = u.reshape(-1).astype(dtype=np.float32)  # + 0.5    # add half pixel
        v = v.reshape(-1).astype(dtype=np.float32)  # + 0.5
        pixels = np.stack((u, v, np.ones_like(u)), axis=0)  # (3, H*W)
        pixels = torch.from_numpy(pixels)
        batched_pixels = pixels.unsqueeze(0).repeat(self.batch_size, 1, 1)

        rays_d = (c2w[:, :3, :3].bmm(torch.inverse(intrinsics[:, :3, :3])).bmm(batched_pixels)).transpose(1, 2)
        rays_d = rays_d.reshape(-1, 3)
        rays_o = c2w[:, :3, 3].unsqueeze(1).repeat(1, rays_d.shape[0], 1).reshape(-1, 3)  # B x HW x 3
        return rays_o, rays_d

    def get_all(self):
        ret = {'ray_o': self.rays_o.cuda(),
               'ray_d': self.rays_d.cuda(),
               'depth_range': self.depth_range.cuda(),
               'intrinsics': self.intrinsics.cuda(),
               'c2w_mat': self.c2w_mat.cuda(),
               'rgb': self.rgb.cuda() if self.rgb is not None else None,
               'src_rgbs': self.src_rgbs.cuda() if self.src_rgbs is not None else None,
               'src_intrinsics': self.src_intrinsics.cuda() if self.src_intrinsics is not None else None,
               'src_c2w_mats': self.src_c2w_mats.cuda() if self.src_c2w_mats is not None else None,
               'src_masks': self.src_masks.cuda() if self.src_masks is not None else None,
        }
        return ret

    def sample_random_pixel(self, N_rand, sample_mode, center_ratio=0.8):
        if sample_mode == 'center':
            border_H = int(self.H * (1 - center_ratio) / 2.)
            border_W = int(self.W * (1 - center_ratio) / 2.)

            # pixel coordinates
            u, v = np.meshgrid(np.arange(border_H, self.H - border_H),
                               np.arange(border_W, self.W - border_W))
            u = u.reshape(-1)
            v = v.reshape(-1)

            select_inds = rng.choice(u.shape[0], size=(N_rand,), replace=False)
            select_inds = v[select_inds] + self.W * u[select_inds]

        elif sample_mode == 'uniform':
            # Random from one image
            select_inds = rng.choice(self.H*self.W, size=(N_rand,), replace=False)
        else:
            raise Exception("unknown sample mode!")

        return select_inds

    def random_sample(self, N_rand, sample_mode, center_ratio=0.8):
        '''Generate a bundle of randomly sampled rays.
        Args:
            N_rand: number of rays to be casted
        Returns:
            A dictionary of ray information.
        '''

        select_inds = self.sample_random_pixel(N_rand, sample_mode, center_ratio)

        rays_o = self.rays_o[select_inds]
        rays_d = self.rays_d[select_inds]

        if self.rgb is not None:
            rgb = self.rgb[select_inds]
        else:
            rgb = None

        ret = {'ray_o': rays_o.cuda(),
               'ray_d': rays_d.cuda(),
               'intrinsics': self.intrinsics.cuda(),
               'c2w_mat': self.c2w_mat.cuda(),
               'depth_range': self.depth_range.cuda(),
               'rgb': rgb.cuda() if rgb is not None else None,
               'src_rgbs': self.src_rgbs.cuda() if self.src_rgbs is not None else None,
               'src_intrinsics': self.src_intrinsics.cuda() if self.src_intrinsics is not None else None,
               'src_c2w_mats': self.src_c2w_mats.cuda() if self.src_c2w_mats is not None else None,
               'selected_inds': select_inds,
               'src_masks': self.src_masks.cuda() if self.src_masks is not None else None,
        }
        return ret

class RaySamplerMultipleImages(object):
    """Ray sampler for multiple images (batch size > 1)
    """
    def __init__(self, data, device, cur_step, resize_factor=1, render_stride=1, bbox_steps=100000):
        super().__init__()
        self.render_stride = render_stride
        self.rgb = data['tgt_rgb'] if 'tgt_rgb' in data.keys() else None # [b, h, w, 3]
        self.intrinsics = data['tgt_intrinsic'] # [b, 4, 4]
        self.c2w_mat = data['tgt_c2w_mat'] # [b, 4, 4]
        self.rgb_path = data['rgb_path']
        self.depth_range = data['depth_range'] # [b, 2]
        self.device = device
        self.batch_size = len(self.intrinsics)
        self.cur_step = cur_step
        self.bbox_steps = bbox_steps

        self.H = int(data['img_hw'][0][0])
        self.W = int(data['img_hw'][1][0])

        # half-resolution output
        if resize_factor != 1:
            self.W = int(self.W * resize_factor)
            self.H = int(self.H * resize_factor)
            self.intrinsics[:, :2, :3] *= resize_factor
            if self.rgb is not None:
                self.rgb = F.interpolate(self.rgb.permute(0, 3, 1, 2), scale_factor=resize_factor).permute(0, 2, 3, 1)

        self.rays_o, self.rays_d = self.get_rays_multiple_images(self.H, self.W, self.intrinsics, self.c2w_mat)
        if self.rgb is not None:
            self.rgb = self.rgb.reshape(self.batch_size, -1, 3)

        if 'src_rgbs' in data.keys():
            self.src_rgbs = data['src_rgbs']
        else:
            self.src_rgbs = None
        if 'src_masks' in data.keys():
            self.src_masks = data['src_masks']
        else:
            self.src_masks = None
        if 'src_intrinsics' in data.keys():
            self.src_intrinsics = data['src_intrinsics']
        else:
            self.src_intrinsics = None
        if 'src_c2w_mats' in data.keys():
            self.src_c2w_mats = data['src_c2w_mats']
        else:
            self.src_c2w_mats = None
        if 'tgt_bbox' in data.keys():
            self.tgt_bbox = data['tgt_bbox']

    def get_rays_multiple_images(self, H, W, intrinsics, c2w):
        '''Generate rays for multiple images (batch size > 1).
        Args:
            H: image height
            W: image width
            intrinsics: 4 by 4 intrinsic matrix
            c2w: 4 by 4 camera to world extrinsic matrix
        Returns:
            Tensors of ray origin and direction.
        '''
        u, v = np.meshgrid(np.arange(W)[::self.render_stride], np.arange(H)[::self.render_stride])
        u = u.reshape(-1).astype(dtype=np.float32)  # + 0.5    # add half pixel
        v = v.reshape(-1).astype(dtype=np.float32)  # + 0.5
        pixels = np.stack((u, v, np.ones_like(u)), axis=0)  # (3, H*W)
        pixels = torch.from_numpy(pixels)
        batched_pixels = pixels.unsqueeze(0).repeat(self.batch_size, 1, 1)

        rays_d = (c2w[:, :3, :3].bmm(torch.inverse(intrinsics[:, :3, :3])).bmm(batched_pixels)).transpose(1, 2) # B x HW x 3
        rays_o = c2w[:, :3, 3].unsqueeze(1).repeat(1, rays_d.shape[1], 1)  # B x HW x 3

        return rays_o, rays_d

    def get_all(self):
        ret = {'ray_o': self.rays_o.cuda(), # [b, h*w, 3]
               'ray_d': self.rays_d.cuda(), # [b, h*w, 3]
               'depth_range': self.depth_range.cuda(), # [b, 2]
               'intrinsics': self.intrinsics.cuda(), # [b, 4, 4]
               'c2w_mat': self.c2w_mat.cuda(), # [b, 4, 4]
               'rgb': self.rgb.cuda() if self.rgb is not None else None, # [b, h*w, 3]
               'src_rgbs': self.src_rgbs.cuda() if self.src_rgbs is not None else None, # [b, v, h, w, 3]
               'src_intrinsics': self.src_intrinsics.cuda() if self.src_intrinsics is not None else None, # [b, v, 4, 4]
               'src_c2w_mats': self.src_c2w_mats.cuda() if self.src_c2w_mats is not None else None, # [b, v, 4, 4]
               'src_masks': self.src_masks.cuda() if self.src_masks is not None else None, # [b, v, h, w, 1]
        }
        return ret

    def sample_random_pixel(self, N_rand, sample_mode, batch_idx, center_ratio=0.8):
        if sample_mode == 'center':
            border_H = int(self.H * (1 - center_ratio) / 2.)
            border_W = int(self.W * (1 - center_ratio) / 2.)

            # pixel coordinates
            u, v = np.meshgrid(np.arange(border_H, self.H - border_H),
                               np.arange(border_W, self.W - border_W))
            u = u.reshape(-1)
            v = v.reshape(-1)

            select_inds = rng.choice(u.shape[0], size=(N_rand,), replace=False)
            select_inds = v[select_inds] + self.W * u[select_inds]

        elif sample_mode == 'uniform' or (sample_mode == 'bbox' and self.cur_step > self.bbox_steps):
            # Random from one image
            select_inds = rng.choice(self.H*self.W, size=(N_rand,), replace=False)

        elif sample_mode == 'bbox':
            u, v = bbox_sample(self.tgt_bbox[batch_idx], N_rand)
            select_inds = v + self.W * u
        elif sample_mode == 'bbox_sample_full':
            u, v = bbox_sample_full(self.tgt_bbox[batch_idx], N_rand, h=self.H, w=self.W, prob=0.8)
            select_inds = v + self.W * u
        else:
            raise Exception("unknown sample mode!")

        return select_inds

    def random_sample(self, N_rand, sample_mode, center_ratio=0.8):
        '''Generate a bundle of randomly sampled rays.
        Args:
            N_rand: number of rays to be casted
        Returns:
            A dictionary of ray information.
        '''

        select_inds = []
        for x in range(self.batch_size):
            select_inds.append(
                self.sample_random_pixel(N_rand, sample_mode, x, center_ratio)
            )
        select_inds = np.stack(select_inds, 0)

        rays_o = [self.rays_o[i, select_inds[i]] for i in range(self.batch_size)]
        rays_d = [self.rays_d[i, select_inds[i]] for i in range(self.batch_size)]
        rays_o = torch.stack(rays_o, 0)
        rays_d = torch.stack(rays_d, 0)

        if self.rgb is not None:
            rgb = [self.rgb[i, select_inds[i]] for i in range(self.batch_size)]
            rgb = torch.stack(rgb, 0)
        else:
            rgb = None

        ret = {'ray_o': rays_o.cuda(),
               'ray_d': rays_d.cuda(),
               'intrinsics': self.intrinsics.cuda(),
               'c2w_mat': self.c2w_mat.cuda(),
               'depth_range': self.depth_range.cuda(),
               'rgb': rgb.cuda() if rgb is not None else None,
               'src_rgbs': self.src_rgbs.cuda() if self.src_rgbs is not None else None,
               'src_intrinsics': self.src_intrinsics.cuda() if self.src_intrinsics is not None else None,
               'src_c2w_mats': self.src_c2w_mats.cuda() if self.src_c2w_mats is not None else None,
               'selected_inds': select_inds,
               'src_masks': self.src_masks.cuda() if self.src_masks is not None else None,
        }
        return ret

Utils.py

In [None]:
import subprocess
import torch
import numpy as np
from matplotlib.backends.backend_agg import FigureCanvasAgg
from matplotlib.figure import Figure
import matplotlib as mpl
from matplotlib import cm
import cv2
import os
from datetime import datetime
import shutil

HUGE_NUMBER = 1e10
TINY_NUMBER = 1e-6      # float32 only has 7 decimal digits precision

img_HWC2CHW = lambda x: x.permute(2, 0, 1)
gray2rgb = lambda x: x.unsqueeze(2).repeat(1, 1, 3)


to8b = lambda x: (255 * np.clip(x, 0, 1)).astype(np.uint8)
mse2psnr = lambda x: -10. * np.log(x+TINY_NUMBER) / np.log(10.)


def get_single(data_dict):
    new_dict = {}
    for key in data_dict:
        if key == 'img_hw':
            new_dict[key] = [data_dict[key][0][:1], data_dict[key][1][:1]]
        else:
            datum = data_dict[key][0]
            if isinstance(datum, torch.Tensor):
                new_dict[key] = datum.unsqueeze(0)
            else:
                new_dict[key] = [datum]
    return new_dict

def get_views(data_dict, src_indices, tgt_indices):
    """Acquire certain source/target views from a given sample

    Args:
        data_dict: sample from eval dataset
        src_indices: source view indices [#views]
        tgt_indices: target view indices [#views]

    Returns:
        An array of data_dict's
    """
    samples = []

    for i in range(len(src_indices)):
        sample = {
            'rgb_path': data_dict['rgb_path'],
            'img_id': data_dict['img_id'],
            'img_hw': data_dict['img_hw'],
            'depth_range': data_dict['depth_range'],
            'tgt_bbox': data_dict['bbox'][:, tgt_indices[i]],
            'tgt_mask': data_dict['masks'][:, tgt_indices[i]],
            'tgt_rgb': data_dict['rgbs'][:, tgt_indices[i]],
            'tgt_c2w_mat': data_dict['c2w_mats'][:, tgt_indices[i]],
            'tgt_intrinsic': data_dict['intrinsics'][:, tgt_indices[i]],
            'src_masks': data_dict['masks'][:, src_indices[i]][None, :], # HACK assuming one view
            'src_rgbs': data_dict['rgbs'][:, src_indices[i]][None, :],
            'src_c2w_mats': data_dict['c2w_mats'][:, src_indices[i]][None, :],
            'src_intrinsics': data_dict['intrinsics'][:, src_indices[i]][None, :],
        }

        samples.append(sample)

    return samples

def get_views_single(data_dict, src_indices, tgt_indices):
    """Acquire certain source/target views from a given sample

    Args:
        data_dict: sample from eval dataset
        src_indices: source view indices [#views]
        tgt_indices: target view indices [#views]

    Returns:
        An array of data_dict's
    """
    samples = []

    for i in range(len(src_indices)):
        sample = {
            'rgb_path': data_dict['rgb_path'],
            'img_id': data_dict['img_id'],
            'img_hw': data_dict['img_hw'],
            'depth_range': data_dict['depth_range'],
            'tgt_bbox': data_dict['bbox'][tgt_indices[i]],
            'tgt_mask': data_dict['masks'][tgt_indices[i]],
            'tgt_rgb': data_dict['rgbs'][tgt_indices[i]],
            'tgt_c2w_mat': data_dict['c2w_mats'][tgt_indices[i]],
            'tgt_intrinsic': data_dict['intrinsics'][tgt_indices[i]],
            'src_masks': data_dict['masks'][src_indices[i]][None, :], # HACK assuming one view
            'src_rgbs': data_dict['rgbs'][src_indices[i]][None, :],
            'src_c2w_mats': data_dict['c2w_mats'][src_indices[i]][None, :],
            'src_intrinsics': data_dict['intrinsics'][src_indices[i]][None, :],
        }

        samples.append(sample)

    return samples

def save_current_code(outdir):
    now = datetime.now()  # current date and time
    date_time = now.strftime("%m_%d-%H:%M:%S")
    src_dir = '.'
    dst_dir = os.path.join(outdir, 'code_{}'.format(date_time))
    shutil.copytree(src_dir, dst_dir,
                    ignore=shutil.ignore_patterns('data*', 'pretrained*', 'logs*', 'out*', '*.png', '*.mp4',
                                                  '*__pycache__*', '*.git*', '*.idea*', '*.zip', '*.jpg'))

# Get git commit hash
def get_git_revision_hash():
    return subprocess.check_output(['git', 'rev-parse', 'HEAD'])

def img2mse(x, y, mask=None):
    '''
    :param x: img 1, [(...), 3]
    :param y: img 2, [(...), 3]
    :param mask: optional, [(...)]
    :return: mse score
    '''
    if mask is None:
        return torch.mean((x - y) * (x - y))
    else:
        return torch.sum((x - y) * (x - y) * mask.unsqueeze(-1)) / (torch.sum(mask) * x.shape[-1] + TINY_NUMBER)


def img2psnr(x, y, mask=None):
    return mse2psnr(img2mse(x, y, mask).item())


def cycle(iterable):
    while True:
        for x in iterable:
            yield x


def get_vertical_colorbar(h, vmin, vmax, cmap_name='jet', label=None, cbar_precision=2):
    '''
    :param w: pixels
    :param h: pixels
    :param vmin: min value
    :param vmax: max value
    :param cmap_name:
    :param label
    :return:
    '''
    fig = Figure(figsize=(2, 8), dpi=100)
    fig.subplots_adjust(right=1.5)
    canvas = FigureCanvasAgg(fig)

    # Do some plotting.
    ax = fig.add_subplot(111)
    cmap = cm.get_cmap(cmap_name)
    norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax)

    tick_cnt = 6
    tick_loc = np.linspace(vmin, vmax, tick_cnt)
    cb1 = mpl.colorbar.ColorbarBase(ax, cmap=cmap,
                                    norm=norm,
                                    ticks=tick_loc,
                                    orientation='vertical')

    tick_label = [str(np.round(x, cbar_precision)) for x in tick_loc]
    if cbar_precision == 0:
        tick_label = [x[:-2] for x in tick_label]

    cb1.set_ticklabels(tick_label)

    cb1.ax.tick_params(labelsize=18, rotation=0)

    if label is not None:
        cb1.set_label(label)

    fig.tight_layout()

    canvas.draw()
    s, (width, height) = canvas.print_to_buffer()

    im = np.frombuffer(s, np.uint8).reshape((height, width, 4))

    im = im[:, :, :3].astype(np.float32) / 255.
    if h != im.shape[0]:
        w = int(im.shape[1] / im.shape[0] * h)
        im = cv2.resize(im, (w, h), interpolation=cv2.INTER_AREA)

    return im


def colorize_np(x, cmap_name='jet', mask=None, range=None, append_cbar=False, cbar_in_image=False, cbar_precision=2):
    '''
    turn a grayscale image into a color image
    :param x: input grayscale, [H, W]
    :param cmap_name: the colorization method
    :param mask: the mask image, [H, W]
    :param range: the range for scaling, automatic if None, [min, max]
    :param append_cbar: if append the color bar
    :param cbar_in_image: put the color bar inside the image to keep the output image the same size as the input image
    :return: colorized image, [H, W]
    '''
    if range is not None:
        vmin, vmax = range
    elif mask is not None:
        # vmin, vmax = np.percentile(x[mask], (2, 100))
        vmin = np.min(x[mask][np.nonzero(x[mask])])
        vmax = np.max(x[mask])
        # vmin = vmin - np.abs(vmin) * 0.01
        x[np.logical_not(mask)] = vmin
        # print(vmin, vmax)
    else:
        vmin, vmax = np.percentile(x, (1, 100))
        vmax += TINY_NUMBER

    x = np.clip(x, vmin, vmax)
    x = (x - vmin) / (vmax - vmin)
    # x = np.clip(x, 0., 1.)

    cmap = cm.get_cmap(cmap_name)
    x_new = cmap(x)[:, :, :3]

    if mask is not None:
        mask = np.float32(mask[:, :, np.newaxis])
        x_new = x_new * mask + np.ones_like(x_new) * (1. - mask)

    cbar = get_vertical_colorbar(h=x.shape[0], vmin=vmin, vmax=vmax, cmap_name=cmap_name, cbar_precision=cbar_precision)

    if append_cbar:
        if cbar_in_image:
            x_new[:, -cbar.shape[1]:, :] = cbar
        else:
            x_new = np.concatenate((x_new, np.zeros_like(x_new[:, :5, :]), cbar), axis=1)
        return x_new
    else:
        return x_new


# tensor
def colorize(x, cmap_name='jet', mask=None, range=None, append_cbar=False, cbar_in_image=False):
    device = x.device
    x = x.cpu().numpy()
    if mask is not None:
        mask = mask.cpu().numpy() > 0.99
        kernel = np.ones((3, 3), np.uint8)
        mask = cv2.erode(mask.astype(np.uint8), kernel, iterations=1).astype(bool)

    x = colorize_np(x, cmap_name, mask, range, append_cbar, cbar_in_image)
    x = torch.from_numpy(x).to(device)
    return x

data/data_utils.py

In [None]:
import cv2
import numpy as np
import math
from PIL import Image
import torchvision.transforms as transforms
import torch
from scipy.spatial.transform import Rotation as R

rng = np.random.RandomState(234)
_EPS = np.finfo(float).eps * 4.0
TINY_NUMBER = 1e-6      # float32 only has 7 decimal digits precision


def vector_norm(data, axis=None, out=None):
    """Return length, i.e. eucledian norm, of ndarray along axis.
    """
    data = np.array(data, dtype=np.float64, copy=True)
    if out is None:
        if data.ndim == 1:
            return math.sqrt(np.dot(data, data))
        data *= data
        out = np.atleast_1d(np.sum(data, axis=axis))
        np.sqrt(out, out)
        return out
    else:
        data *= data
        np.sum(data, axis=axis, out=out)
        np.sqrt(out, out)


def quaternion_about_axis(angle, axis):
    """Return quaternion for rotation about axis.
    """
    quaternion = np.zeros((4, ), dtype=np.float64)
    quaternion[:3] = axis[:3]
    qlen = vector_norm(quaternion)
    if qlen > _EPS:
        quaternion *= math.sin(angle/2.0) / qlen
    quaternion[3] = math.cos(angle/2.0)
    return quaternion


def quaternion_matrix(quaternion):
    """Return homogeneous rotation matrix from quaternion.
    """
    q = np.array(quaternion[:4], dtype=np.float64, copy=True)
    nq = np.dot(q, q)
    if nq < _EPS:
        return np.identity(4)
    q *= math.sqrt(2.0 / nq)
    q = np.outer(q, q)
    return np.array((
        (1.0-q[1, 1]-q[2, 2],     q[0, 1]-q[2, 3],     q[0, 2]+q[1, 3], 0.0),
        (    q[0, 1]+q[2, 3], 1.0-q[0, 0]-q[2, 2],     q[1, 2]-q[0, 3], 0.0),
        (    q[0, 2]-q[1, 3],     q[1, 2]+q[0, 3], 1.0-q[0, 0]-q[1, 1], 0.0),
        (                0.0,                 0.0,                 0.0, 1.0)
        ), dtype=np.float64)


def rectify_inplane_rotation(src_pose, tar_pose, src_img, th=40):
    relative = np.linalg.inv(tar_pose).dot(src_pose)
    relative_rot = relative[:3, :3]
    r = R.from_matrix(relative_rot)
    euler = r.as_euler('zxy', degrees=True)
    euler_z = euler[0]
    if np.abs(euler_z) < th:
        return src_pose, src_img

    R_rectify = R.from_euler('z', -euler_z, degrees=True).as_matrix()
    src_R_rectified = src_pose[:3, :3].dot(R_rectify)
    out_pose = np.eye(4)
    out_pose[:3, :3] = src_R_rectified
    out_pose[:3, 3:4] = src_pose[:3, 3:4]
    h, w = src_img.shape[:2]
    center = ((w - 1.) / 2., (h - 1.) / 2.)
    M = cv2.getRotationMatrix2D(center, -euler_z, 1)
    src_img = np.clip((255*src_img).astype(np.uint8), a_max=255, a_min=0)
    rotated = cv2.warpAffine(src_img, M, (w, h), borderValue=(255, 255, 255), flags=cv2.INTER_LANCZOS4)
    rotated = rotated.astype(np.float32) / 255.
    return out_pose, rotated


def random_crop(rgb, camera, src_rgbs, src_cameras, size=(400, 600), center=None):
    h, w = rgb.shape[:2]
    out_h, out_w = size[0], size[1]
    if out_w >= w or out_h >= h:
        return rgb, camera, src_rgbs, src_cameras

    if center is not None:
        center_h, center_w = center
    else:
        center_h = np.random.randint(low=out_h // 2 + 1, high=h - out_h // 2 - 1)
        center_w = np.random.randint(low=out_w // 2 + 1, high=w - out_w // 2 - 1)

    rgb_out = rgb[center_h - out_h // 2:center_h + out_h // 2, center_w - out_w // 2:center_w + out_w // 2, :]
    src_rgbs = np.array(src_rgbs)
    src_rgbs = src_rgbs[:, center_h - out_h // 2:center_h + out_h // 2,
               center_w - out_w // 2:center_w + out_w // 2, :]
    camera[0] = out_h
    camera[1] = out_w
    camera[4] -= center_w - out_w // 2
    camera[8] -= center_h - out_h // 2
    src_cameras[:, 4] -= center_w - out_w // 2
    src_cameras[:, 8] -= center_h - out_h // 2
    src_cameras[:, 0] = out_h
    src_cameras[:, 1] = out_w
    return rgb_out, camera, src_rgbs, src_cameras


def random_flip(rgb, camera, src_rgbs, src_cameras):
    h, w = rgb.shape[:2]
    h_r, w_r = src_rgbs.shape[1:3]
    rgb_out = np.flip(rgb, axis=1).copy()
    src_rgbs = np.flip(src_rgbs, axis=-2).copy()
    camera[2] *= -1
    camera[4] = w - 1. - camera[4]
    src_cameras[:, 2] *= -1
    src_cameras[:, 4] = w_r - 1. - src_cameras[:, 4]
    return rgb_out, camera, src_rgbs, src_cameras


def get_color_jitter_params(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2):
    color_jitter = transforms.ColorJitter(brightness=brightness, contrast=contrast, saturation=saturation, hue=hue)
    transform = transforms.ColorJitter.get_params(color_jitter.brightness,
                                                  color_jitter.contrast,
                                                  color_jitter.saturation,
                                                  color_jitter.hue)
    return transform


def color_jitter(img, transform):
    '''
    Args:
        img: np.float32 [h, w, 3]
        transform:
    Returns: transformed np.float32
    '''
    img = Image.fromarray((255.*img).astype(np.uint8))
    img_trans = transform(img)
    img_trans = np.array(img_trans).astype(np.float32) / 255.
    return img_trans


def color_jitter_all_rgbs(rgb, ref_rgbs, brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2):
    transform = get_color_jitter_params(brightness, contrast, saturation, hue)
    rgb_trans = color_jitter(rgb, transform)
    ref_rgbs_trans = []
    for ref_rgb in ref_rgbs:
        ref_rgbs_trans.append(color_jitter(ref_rgb, transform))

    ref_rgbs_trans = np.array(ref_rgbs_trans)
    return rgb_trans, ref_rgbs_trans


def deepvoxels_parse_intrinsics(filepath, trgt_sidelength, invert_y=False):
    # Get camera intrinsics
    with open(filepath, 'r') as file:
        f, cx, cy = list(map(float, file.readline().split()))[:3]
        grid_barycenter = torch.Tensor(list(map(float, file.readline().split())))
        near_plane = float(file.readline())
        scale = float(file.readline())
        height, width = map(float, file.readline().split())

        try:
            world2cam_poses = int(file.readline())
        except ValueError:
            world2cam_poses = None

    if world2cam_poses is None:
        world2cam_poses = False

    world2cam_poses = bool(world2cam_poses)

    cx = cx / width * trgt_sidelength
    cy = cy / height * trgt_sidelength
    f = trgt_sidelength / height * f

    fx = f
    if invert_y:
        fy = -f
    else:
        fy = f

    # Build the intrinsic matrices
    full_intrinsic = np.array([[fx, 0., cx, 0.],
                               [0., fy, cy, 0],
                               [0., 0, 1, 0],
                               [0, 0, 0, 1]])

    return full_intrinsic, grid_barycenter, scale, near_plane, world2cam_poses


def angular_dist_between_2_vectors(vec1, vec2):
    vec1_unit = vec1 / (np.linalg.norm(vec1, axis=1, keepdims=True) + TINY_NUMBER)
    vec2_unit = vec2 / (np.linalg.norm(vec2, axis=1, keepdims=True) + TINY_NUMBER)
    angular_dists = np.arccos(np.clip(np.sum(vec1_unit*vec2_unit, axis=-1), -1.0, 1.0))
    return angular_dists


def batched_angular_dist_rot_matrix(R1, R2):
    '''
    calculate the angular distance between two rotation matrices (batched)
    :param R1: the first rotation matrix [N, 3, 3]
    :param R2: the second rotation matrix [N, 3, 3]
    :return: angular distance in radiance [N, ]
    '''
    assert R1.shape[-1] == 3 and R2.shape[-1] == 3 and R1.shape[-2] == 3 and R2.shape[-2] == 3
    return np.arccos(np.clip((np.trace(np.matmul(R2.transpose(0, 2, 1), R1), axis1=1, axis2=2) - 1) / 2.,
                             a_min=-1 + TINY_NUMBER, a_max=1 - TINY_NUMBER))


def get_nearest_pose_ids(tar_pose, ref_poses, num_select, tar_id=-1, angular_dist_method='vector',
                         scene_center=(0, 0, 0)):
    '''
    Args:
        tar_pose: target pose [4, 4]
        ref_poses: reference poses [N, 4, 4]
        num_select: the number of nearest views to select
    Returns: the selected indices
    '''
    num_cams = len(ref_poses)
    num_select = min(num_select, num_cams-1)
    batched_tar_pose = tar_pose[None, ...].repeat(num_cams, 0)

    if angular_dist_method == 'matrix':
        dists = batched_angular_dist_rot_matrix(batched_tar_pose[:, :3, :3], ref_poses[:, :3, :3])
    elif angular_dist_method == 'vector':
        tar_cam_locs = batched_tar_pose[:, :3, 3]
        ref_cam_locs = ref_poses[:, :3, 3]
        scene_center = np.array(scene_center)[None, ...]
        tar_vectors = tar_cam_locs - scene_center
        ref_vectors = ref_cam_locs - scene_center
        dists = angular_dist_between_2_vectors(tar_vectors, ref_vectors)
    elif angular_dist_method == 'dist':
        tar_cam_locs = batched_tar_pose[:, :3, 3]
        ref_cam_locs = ref_poses[:, :3, 3]
        dists = np.linalg.norm(tar_cam_locs - ref_cam_locs, axis=1)
    else:
        raise Exception('unknown angular distance calculation method!')

    if tar_id >= 0:
        assert tar_id < num_cams
        dists[tar_id] = 1e3  # make sure not to select the target id itself

    sorted_ids = np.argsort(dists)
    selected_ids = sorted_ids[:num_select]
    # print(angular_dists[selected_ids] * 180 / np.pi)
    return selected_ids

data/DVR.py

In [None]:
import os
import glob

import imageio
import numpy as np
import tqdm
import torch
import torch.nn.functional as F
import  torchvision.transforms as T
from torch.utils.data import Dataset


def parse_pose(path, num_views):
    cameras = np.load(path)

    intrinsics = []
    c2w_mats = []

    for i in range(num_views):
        # ShapeNet
        wmat_inv_key = "world_mat_inv_" + str(i)
        wmat_key = "world_mat_" + str(i)
        kmat_key = "camera_mat_" + str(i)
        if wmat_inv_key in cameras:
            c2w_mat = cameras[wmat_inv_key]
        else:
            w2c_mat = cameras[wmat_key]
            if w2c_mat.shape[0] == 3:
                w2c_mat = np.vstack((w2c_mat, np.array([0, 0, 0, 1])))
            c2w_mat = np.linalg.inv(w2c_mat)

        intrinsics.append(cameras[kmat_key])
        c2w_mats.append(c2w_mat)

    intrinsics = np.stack(intrinsics, 0)
    c2w_mats = np.stack(c2w_mats, 0)

    return intrinsics, c2w_mats

class DVRDataset(Dataset):
    """
    Dataset from DVR (Niemeyer et al. 2020)
    Provides 3D-R2N2 and NMR renderings
    """
    def __init__(self, args, mode,
                **kwargs):
        """
        Args:
            args.data_path: path to data directory
            args.img_hw: image size (resize if needed)
            mode: train | test | val mode
        """
        super().__init__()
        self.base_path = args.data_path
        self.dataset_name = os.path.basename(args.data_path)
        assert os.path.exists(self.base_path)

        cats = [x for x in glob.glob(os.path.join(args.data_path, "*")) if os.path.isdir(x)]

        list_prefix = "softras_" # Train on all categories and eval on them

        if mode == "train":
            file_lists = [os.path.join(x, list_prefix + "train.lst") for x in cats]
        elif mode == "val":
            file_lists = [os.path.join(x, list_prefix + "val.lst") for x in cats]
        elif mode == "test":
            file_lists = [os.path.join(x, list_prefix + "test.lst") for x in cats]

        print("Loading NMR dataset", self.base_path, "name:", self.dataset_name, "mode:", mode)

        self.mode = mode

        all_objs = []
        for file_list in file_lists:
            if not os.path.exists(file_list):
                continue
            base_dir = os.path.dirname(file_list)
            cat = os.path.basename(base_dir)
            with open(file_list, "r") as f:
                objs = [(cat, os.path.join(base_dir, x.strip())) for x in f.readlines()]
            all_objs.extend(objs)

        self.all_objs = all_objs

        if args.debug:
            self.all_objs = self.all_objs[:1]


        self.intrinsics = []
        self.poses = []
        self.rgb_paths = []
        for _, path in tqdm.tqdm(self.all_objs):
            curr_paths = sorted(glob.glob(os.path.join(path, "image", "*")))
            self.rgb_paths.append(curr_paths)

            pose_path = os.path.join(path, 'cameras.npz')
            intrinsics, c2w_mats = parse_pose(pose_path, len(curr_paths))

            self.poses.append(c2w_mats)
            self.intrinsics.append(intrinsics)

        self.rgb_paths = np.array(self.rgb_paths)
        self.poses = np.stack(self.poses, 0)
        self.intrinsics = np.array(self.intrinsics)

        assert(len(self.rgb_paths) == len(self.poses))

        self.define_transforms()
        self.img_hw = args.img_hw

        self.num_views = args.num_source_views
        self.closest_n_views = args.closest_n_views

        # Default near/far plane depth
        self.z_near = 1.2
        self.z_far = 4.0

    def __len__(self):
        return len(self.intrinsics)

    def define_transforms(self):
        self.img_transforms = T.Compose(
            [T.ToTensor(), T.Normalize((0.0, 0.0, 0.0), (1.0, 1.0, 1.0))]
        )
        self.mask_transforms = T.Compose(
            [T.ToTensor(), T.Normalize((0.0,), (1.0,))]
        )

    def __getitem__(self, index):
        train_poses = self.poses[index]

        render_idx = np.random.choice(len(train_poses), 1, replace=False)[0]

        intrinsic = self.intrinsics[index][render_idx].copy()

        rgb_path = self.rgb_paths[index, render_idx]
        render_pose = train_poses[render_idx]
        if self.closest_n_views > 0:
            nearest_pose_ids = get_nearest_pose_ids(render_pose,
                                                    train_poses,
                                                    self.closest_n_views,
                                                    tar_id=render_idx,
                                                    angular_dist_method='vector')
        else:
            nearest_pose_ids = np.arange(len(train_poses))
            nearest_pose_ids = np.delete(nearest_pose_ids, render_idx)
        nearest_pose_ids = np.random.choice(nearest_pose_ids, self.num_views, replace=False)

        # Read target RGB
        img = imageio.imread(rgb_path)[..., :3]
        mask = (img.sum(axis=-1) != 255*3)[..., None].astype(np.uint8) * 255
        tgt_rgb = self.img_transforms(img)
        tgt_mask = self.mask_transforms(mask)

        intrinsic[0, 0] *= img.shape[1] / 2.0
        intrinsic[1, 1] *= img.shape[0] / 2.0
        intrinsic[0, 2] = img.shape[1] / 2.0
        intrinsic[1, 2] = img.shape[0] / 2.0

        h, w = tgt_rgb.shape[-2:]
        if (h != self.img_hw[0]) or (w != self.img_hw[1]):
            scale = self.img_hw[-1] / img.shape[1]
            intrinsic[:2] *= scale

            tgt_rgb = F.interpolate(tgt_rgb[None, :], size=self.img_hw, mode="area")[0]
            tgt_mask = F.interpolate(tgt_mask[None, :], size=self.img_hw, mode="area")[0]

        yy = torch.any(tgt_mask, axis=2)
        xx = torch.any(tgt_mask, axis=1)
        ynz = torch.nonzero(yy)[:, 1]
        xnz = torch.nonzero(xx)[:, 1]
        ymin, ymax = ynz[[0, -1]]
        xmin, xmax = xnz[[0, -1]]
        tgt_bbox = torch.FloatTensor([xmin, ymin, xmax, ymax])

        # Read source RGB
        src_rgb_paths = [self.rgb_paths[index][x] for x in nearest_pose_ids]
        src_c2w_mats = np.array([train_poses[x] for x in nearest_pose_ids])
        src_intrinsics = np.array(self.intrinsics[index][nearest_pose_ids])

        src_intrinsics[..., 0, 0] *= img.shape[1] / 2.0
        src_intrinsics[..., 1, 1] *= img.shape[0] / 2.0
        src_intrinsics[..., 0, 2] = img.shape[1] / 2.0
        src_intrinsics[..., 1, 2] = img.shape[0] / 2.0

        src_rgbs = []
        src_masks = []
        for i, rgb_path in enumerate(src_rgb_paths):
            img = imageio.imread(rgb_path)[..., :3]
            mask = (img.sum(axis=-1) != 255*3)[..., None].astype(np.uint8) * 255
            rgb = self.img_transforms(img)
            mask = self.mask_transforms(mask)

            h, w = rgb.shape[-2:]
            if (h != self.img_hw[0]) or (w != self.img_hw[1]):
                scale = self.img_hw[-1] / w
                src_intrinsics[i, :2] *= scale

                rgb = F.interpolate(rgb[None, :], size=self.img_hw, mode="area")[0]
                mask = F.interpolate(mask[None, :], size=self.img_hw, mode="area")[0]

            src_rgbs.append(rgb)
            src_masks.append(mask)

        depth_range = np.array([self.z_near, self.z_far])

        return {
            "rgb_path": rgb_path,
            "img_id": index,
            "img_hw": self.img_hw,
            "tgt_mask": tgt_mask.permute([1, 2, 0]).float(),
            "tgt_rgb": tgt_rgb.permute([1, 2, 0]).float(),
            "tgt_c2w_mat": torch.FloatTensor(render_pose),
            "tgt_intrinsic": torch.FloatTensor(intrinsic),
            "tgt_bbox": tgt_bbox,
            "src_masks": torch.stack(src_masks).permute([0, 2, 3, 1]).float(),
            "src_rgbs": torch.stack(src_rgbs).permute([0, 2, 3, 1]).float(),
            "src_c2w_mats": torch.FloatTensor(src_c2w_mats),
            "src_intrinsics": torch.FloatTensor(src_intrinsics),
            "depth_range": torch.FloatTensor(depth_range)
        }

data/DVR_eval.py

In [None]:
import os
import glob

import imageio
import numpy as np
import tqdm
import torch
import torch.nn.functional as F
import  torchvision.transforms as T
from torch.utils.data import Dataset

def parse_pose(path, num_views):
    cameras = np.load(path)

    intrinsics = []
    c2w_mats = []

    for i in range(num_views):
        # ShapeNet
        wmat_inv_key = "world_mat_inv_" + str(i)
        wmat_key = "world_mat_" + str(i)
        kmat_key = "camera_mat_" + str(i)
        if wmat_inv_key in cameras:
            c2w_mat = cameras[wmat_inv_key]
        else:
            w2c_mat = cameras[wmat_key]
            if w2c_mat.shape[0] == 3:
                w2c_mat = np.vstack((w2c_mat, np.array([0, 0, 0, 1])))
            c2w_mat = np.linalg.inv(w2c_mat)

        intrinsics.append(cameras[kmat_key])
        c2w_mats.append(c2w_mat)

    intrinsics = np.stack(intrinsics, 0)
    c2w_mats = np.stack(c2w_mats, 0)

    return intrinsics, c2w_mats

class DVREvalDataset(Dataset):
    """
    Dataset from DVR (Niemeyer et al. 2020)
    Provides 3D-R2N2 and NMR renderings
    """
    def __init__(self, args, mode,
                **kwargs):
        """
        Args:
            args.data_path: path to data directory
            args.img_hw: image size (resize if needed)
            mode: train | test | val mode
        """
        super().__init__()
        self.base_path = args.data_path
        self.dataset_name = os.path.basename(args.data_path)
        assert os.path.exists(self.base_path)

        cats = [x for x in glob.glob(os.path.join(args.data_path, "*")) if os.path.isdir(x)]

        list_prefix = "gen_"

        if mode == "train":
            file_lists = [os.path.join(x, list_prefix + "train.lst") for x in cats]
        elif mode == "val":
            file_lists = [os.path.join(x, list_prefix + "val.lst") for x in cats]
        elif mode == "test":
            file_lists = [os.path.join(x, list_prefix + "test.lst") for x in cats]

        print("Loading NMR dataset", self.base_path, "name:", self.dataset_name, "mode:", mode)
        self.mode = mode

        all_objs = []
        for file_list in file_lists:
            if not os.path.exists(file_list):
                continue
            base_dir = os.path.dirname(file_list)
            cat = os.path.basename(base_dir)
            with open(file_list, "r") as f:
                objs = [(cat, os.path.join(base_dir, x.strip())) for x in f.readlines()]
            all_objs.extend(objs)

        self.all_objs = all_objs

        if args.debug:
            self.all_objs = self.all_objs[:1]

        if mode == "val" or mode == "test":
            self.all_objs = self.all_objs[:100] # HACK to avoid reading too much things

        self.intrinsics = []
        self.poses = []
        self.rgb_paths = []
        for _, path in tqdm.tqdm(self.all_objs):
            curr_paths = sorted(glob.glob(os.path.join(path, "image", "*")))
            self.rgb_paths.append(curr_paths)

            pose_path = os.path.join(path, 'cameras.npz')
            intrinsics, c2w_mats = parse_pose(pose_path, len(curr_paths))

            self.poses.append(c2w_mats)
            self.intrinsics.append(intrinsics)

        self.rgb_paths = np.array(self.rgb_paths)
        self.poses = np.stack(self.poses, 0)
        self.intrinsics = np.array(self.intrinsics)

        assert(len(self.rgb_paths) == len(self.poses))

        self.define_transforms()
        self.img_hw = args.img_hw

        self.num_views = args.num_source_views
        self.closest_n_views = args.closest_n_views

        # default near/far plane depth
        self.z_near = 1.2
        self.z_far = 4.0

    def __len__(self):
        return len(self.intrinsics)

    def define_transforms(self):
        self.img_transforms = T.Compose(
            [T.ToTensor(), T.Normalize((0.0, 0.0, 0.0), (1.0, 1.0, 1.0))]
        )
        self.mask_transforms = T.Compose(
            [T.ToTensor(), T.Normalize((0.0,), (1.0,))]
        )

    def __getitem__(self, index):
        rgb_paths = self.rgb_paths[index]
        c2w_mats = np.array(self.poses[index])
        intrinsics = np.array(self.intrinsics[index])

        rgbs = []
        masks = []
        bboxes = []

        # Read all RGB
        for i in range(len(rgb_paths)):
            img = imageio.imread(rgb_paths[i])[..., :3]
            mask = (img.sum(axis=-1) != 255*3)[..., None].astype(np.uint8) * 255
            rgb = self.img_transforms(img)
            mask = self.mask_transforms(mask)

            intrinsics[i, 0, 0] *= img.shape[1] / 2.0
            intrinsics[i, 1, 1] *= img.shape[0] / 2.0
            intrinsics[i, 0, 2] = img.shape[1] / 2.0
            intrinsics[i, 1, 2] = img.shape[0] / 2.0

            h, w = rgb.shape[-2:]
            if (h != self.img_hw[0]) or (w != self.img_hw[1]):
                scale = self.img_hw[-1] / w
                intrinsics[i, :2] *= scale

                rgb = F.interpolate(rgb[None, :], size=self.img_hw, mode="area")[0]
                mask = F.interpolate(mask[None, :], size=self.img_hw, mode="area")[0]

            rgbs.append(rgb)
            masks.append(mask)

            yy = torch.any(mask, axis=2)
            xx = torch.any(mask, axis=1)
            ynz = torch.nonzero(yy)[:, 1]
            xnz = torch.nonzero(xx)[:, 1]
            ymin, ymax = ynz[[0, -1]]
            xmin, xmax = xnz[[0, -1]]
            bbox = torch.FloatTensor([xmin, ymin, xmax, ymax])

            bboxes.append(bbox)

        depth_range = np.array([self.z_near, self.z_far])

        return {
            "rgb_path": rgb_paths[0],
            "img_id": index,
            "img_hw": self.img_hw,
            "bbox": torch.stack(bboxes, 0),
            "masks": torch.stack(masks).permute([0, 2, 3, 1]).float(),
            "rgbs": torch.stack(rgbs).permute([0, 2, 3, 1]).float(),
            "c2w_mats": torch.FloatTensor(c2w_mats),
            "intrinsics": torch.FloatTensor(intrinsics),
            "depth_range": torch.FloatTensor(depth_range)
        }

data/srn.py

In [None]:
import os
import glob

import imageio
import numpy as np
import tqdm
import torch
import torch.nn.functional as F
import  torchvision.transforms as T
from torch.utils.data import Dataset

def parse_intrinsic(path):
    with open(path, "r") as f:
        lines = f.readlines()
        focal, cx, cy, _ = map(float, lines[0].split())
    intrinsic = np.array([[focal, 0, cx, 0],
                          [0, focal, cy, 0],
                          [0,     0,  1, 0],
                          [0,     0,  0, 1]])
    return intrinsic

def parse_pose(path):
    return np.loadtxt(path, dtype=np.float32).reshape(4, 4)

class SRNDataset(Dataset):
    """
    Dataset from SRN (V. Sitzmann et al. 2020)
    """
    def __init__(self, args, mode, **kwargs):
        """
        Args:
            args.data_path: path to data directory
            args.img_hw: image size (resize if needed)
            mode: train | test | val mode
        """
        super().__init__()
        self.base_path = args.data_path + "_" + mode
        self.dataset_name = os.path.basename(args.data_path)

        print("Loading SRN dataset", self.base_path, "name:", self.dataset_name)
        self.mode = mode
        assert os.path.exists(self.base_path)

        is_chair = "chair" in self.dataset_name
        if is_chair and mode == "train":
            # Ugly thing from SRN's public dataset
            tmp = os.path.join(self.base_path, "chairs_2.0_train")
            if os.path.exists(tmp):
                self.base_path = tmp

        intrinsic_paths = sorted(
            glob.glob(os.path.join(self.base_path, "*", "intrinsics.txt"))
        )

        if args.debug:
            intrinsic_paths = intrinsic_paths[:1]

        self.intrinsics = []
        self.poses = []
        self.rgb_paths = []
        for path in tqdm.tqdm(intrinsic_paths):
            dir = os.path.dirname(path)
            curr_paths = sorted(glob.glob(os.path.join(dir, "rgb", "*")))
            self.rgb_paths.append(curr_paths)

            pose_paths = [f.replace('rgb', 'pose').replace('png', 'txt') for f in curr_paths]
            c2w_mats = [parse_pose(x) for x in
                    pose_paths]
            self.poses.append(c2w_mats)

            self.intrinsics.append(parse_intrinsic(path))

        self.rgb_paths = np.array(self.rgb_paths)
        self.poses = np.stack(self.poses, 0)
        self.intrinsics = np.array(self.intrinsics)

        assert(len(self.rgb_paths) == len(self.poses))

        self.define_transforms()
        self.img_hw = args.img_hw

        self.num_views = args.num_source_views
        self.closest_n_views = args.closest_n_views

        # Default near/far plane depth
        if is_chair:
            self.z_near = 1.25
            self.z_far = 2.75
        else:
            self.z_near = 0.8
            self.z_far = 1.8

    def __len__(self):
        return len(self.intrinsics)

    def define_transforms(self):
        self.img_transforms = T.Compose(
            [T.ToTensor(), T.Normalize((0.0, 0.0, 0.0), (1.0, 1.0, 1.0))]
        )
        self.mask_transforms = T.Compose(
            [T.ToTensor(), T.Normalize((0.0,), (1.0,))]
        )

    def __getitem__(self, index):
        intrinsic = self.intrinsics[index].copy()

        train_poses = self.poses[index]

        render_idx = np.random.choice(len(train_poses), 1, replace=False)[0]
        rgb_path = self.rgb_paths[index, render_idx]
        render_pose = train_poses[render_idx]
        if self.closest_n_views > 0:
            nearest_pose_ids = get_nearest_pose_ids(render_pose,
                                                    train_poses,
                                                    self.closest_n_views,
                                                    tar_id=render_idx,
                                                    angular_dist_method='vector')
        else:
            nearest_pose_ids = np.arange(len(train_poses))
            nearest_pose_ids = np.delete(nearest_pose_ids, render_idx)
        nearest_pose_ids = np.random.choice(nearest_pose_ids, self.num_views, replace=False)

        # Read target RGB
        img = imageio.imread(rgb_path)[..., :3]
        mask = (img.sum(axis=-1) != 255*3)[..., None].astype(np.uint8) * 255
        tgt_rgb = self.img_transforms(img)
        tgt_mask = self.mask_transforms(mask)

        h, w = tgt_rgb.shape[-2:]
        if (h != self.img_hw[0]) or (w != self.img_hw[1]):
            scale = self.img_hw[-1] / img.shape[1]
            intrinsic[:2] *= scale

            tgt_rgb = F.interpolate(tgt_rgb[None, :], size=self.img_hw, mode="area")[0]
            tgt_mask = F.interpolate(tgt_mask[None, :], size=self.img_hw, mode="area")[0]

        yy = torch.any(tgt_mask, axis=2)
        xx = torch.any(tgt_mask, axis=1)
        ynz = torch.nonzero(yy)[:, 1]
        xnz = torch.nonzero(xx)[:, 1]
        ymin, ymax = ynz[[0, -1]]
        xmin, xmax = xnz[[0, -1]]
        tgt_bbox = torch.FloatTensor([xmin, ymin, xmax, ymax])

        # Read source RGB
        src_rgb_paths = [self.rgb_paths[index][x] for x in nearest_pose_ids]
        src_c2w_mats = np.array([train_poses[x] for x in nearest_pose_ids])
        src_intrinsics = np.array([self.intrinsics[index]] * len(nearest_pose_ids))

        src_rgbs = []
        src_masks = []
        for i, rgb_path in enumerate(src_rgb_paths):
            img = imageio.imread(rgb_path)[..., :3]
            mask = (img.sum(axis=-1) != 255*3)[..., None].astype(np.uint8) * 255
            rgb = self.img_transforms(img)
            mask = self.mask_transforms(mask)

            h, w = rgb.shape[-2:]
            if (h != self.img_hw[0]) or (w != self.img_hw[1]):
                scale = self.img_hw[-1] / w
                src_intrinsics[i, :2] *= scale

                rgb = F.interpolate(rgb[None, :], size=self.img_hw, mode="area")[0]
                mask = F.interpolate(mask[None, :], size=self.img_hw, mode="area")[0]

            src_rgbs.append(rgb)
            src_masks.append(mask)

        depth_range = np.array([self.z_near, self.z_far])

        return {
            "rgb_path": rgb_path,
            "img_id": index,
            "img_hw": self.img_hw,
            "tgt_mask": tgt_mask.permute([1, 2, 0]).float(),
            "tgt_rgb": tgt_rgb.permute([1, 2, 0]).float(),
            "tgt_c2w_mat": torch.FloatTensor(render_pose),
            "tgt_intrinsic": torch.FloatTensor(intrinsic),
            "tgt_bbox": tgt_bbox,
            "src_masks": torch.stack(src_masks).permute([0, 2, 3, 1]).float(),
            "src_rgbs": torch.stack(src_rgbs).permute([0, 2, 3, 1]).float(),
            "src_c2w_mats": torch.FloatTensor(src_c2w_mats),
            "src_intrinsics": torch.FloatTensor(src_intrinsics),
            "depth_range": torch.FloatTensor(depth_range)
        }

data/srn_eval.py

In [None]:
import os
import glob

import imageio
import numpy as np
import tqdm
import torch
import torch.nn.functional as F
import  torchvision.transforms as T
from torch.utils.data import Dataset

def parse_intrinsic(path):
    with open(path, "r") as f:
        lines = f.readlines()
        focal, cx, cy, _ = map(float, lines[0].split())
    intrinsic = np.array([[focal, 0, cx, 0],
                          [0, focal, cy, 0],
                          [0,     0,  1, 0],
                          [0,     0,  0, 1]])
    return intrinsic

def parse_pose(path):
    return np.loadtxt(path, dtype=np.float32).reshape(4, 4)

class SRNEvalDataset(Dataset):
    """
    Dataset from SRN (V. Sitzmann et al. 2020)
    """
    def __init__(self, args, mode, **kwargs):
        """
        Args:
            args.data_path: path to data directory
            args.img_hw: image size (resize if needed)
            mode: train | test | val mode
        """
        super().__init__()
        self.base_path = args.data_path + "_" + mode
        self.dataset_name = os.path.basename(args.data_path)

        print("Loading SRN dataset", self.base_path, "name:", self.dataset_name)
        self.mode = mode
        assert os.path.exists(self.base_path)

        is_chair = "chair" in self.dataset_name
        if is_chair and mode == "train":
            # Ugly thing from SRN's public dataset
            tmp = os.path.join(self.base_path, "chairs_2.0_train")
            if os.path.exists(tmp):
                self.base_path = tmp

        intrinsic_paths = sorted(
            glob.glob(os.path.join(self.base_path, "*", "intrinsics.txt"))
        )

        if args.debug:
            intrinsic_paths = intrinsic_paths[:10]

        self.intrinsics = []
        self.poses = []
        self.rgb_paths = []
        for path in tqdm.tqdm(intrinsic_paths):
            dir = os.path.dirname(path)
            curr_paths = sorted(glob.glob(os.path.join(dir, "rgb", "*")))
            self.rgb_paths.append(curr_paths)

            pose_paths = [f.replace('rgb', 'pose').replace('png', 'txt') for f in curr_paths]
            c2w_mats = [parse_pose(x) for x in
                    pose_paths]
            self.poses.append(c2w_mats)

            self.intrinsics.append(parse_intrinsic(path))

        self.rgb_paths = np.array(self.rgb_paths)
        self.poses = np.stack(self.poses, 0)
        self.intrinsics = np.array(self.intrinsics)

        assert(len(self.rgb_paths) == len(self.poses))

        self.define_transforms()
        self.img_hw = args.img_hw

        self.num_views = args.num_source_views
        self.closest_n_views = args.closest_n_views

        # Default near/far plane depth
        if is_chair:
            self.z_near = 1.25
            self.z_far = 2.75
        else:
            self.z_near = 0.8
            self.z_far = 1.8

    def __len__(self):
        return len(self.intrinsics)

    def define_transforms(self):
        self.img_transforms = T.Compose(
            [T.ToTensor(), T.Normalize((0.0, 0.0, 0.0), (1.0, 1.0, 1.0))]
        )
        self.mask_transforms = T.Compose(
            [T.ToTensor(), T.Normalize((0.0,), (1.0,))]
        )

    def __getitem__(self, index):
        rgb_paths = self.rgb_paths[index]
        c2w_mats = np.array(self.poses[index])
        intrinsics = np.array([self.intrinsics[index]] * len(rgb_paths))

        rgbs = []
        masks = []
        bboxes = []

        # Read all RGB
        for i in range(len(rgb_paths)):

            img = imageio.imread(rgb_paths[i])[..., :3]
            mask = (img.sum(axis=-1) != 255*3)[..., None].astype(np.uint8) * 255
            rgb = self.img_transforms(img)
            mask = self.mask_transforms(mask)

            h, w = rgb.shape[-2:]
            if (h != self.img_hw[0]) or (w != self.img_hw[1]):
                scale = self.img_hw[-1] / w
                intrinsics[i, :2] *= scale

                rgb = F.interpolate(rgb[None, :], size=self.img_hw, mode="area")[0]
                mask = F.interpolate(mask[None, :], size=self.img_hw, mode="area")[0]

            rgbs.append(rgb)
            masks.append(mask)

            yy = torch.any(mask, axis=2)
            xx = torch.any(mask, axis=1)
            ynz = torch.nonzero(yy)[:, 1]
            xnz = torch.nonzero(xx)[:, 1]
            ymin, ymax = ynz[[0, -1]]
            xmin, xmax = xnz[[0, -1]]
            bbox = torch.FloatTensor([xmin, ymin, xmax, ymax])
            bboxes.append(bbox)

        depth_range = np.array([self.z_near, self.z_far])

        return {
            "rgb_path": rgb_paths[0],
            "img_id": index,
            "img_hw": self.img_hw,
            "bbox": torch.stack(bboxes, 0),
            "masks": torch.stack(masks).permute([0, 2, 3, 1]).float(),
            "rgbs": torch.stack(rgbs).permute([0, 2, 3, 1]).float(),
            "c2w_mats": torch.FloatTensor(c2w_mats),
            "intrinsics": torch.FloatTensor(intrinsics),
            "depth_range": torch.FloatTensor(depth_range)
        }

data/_init.py

In [None]:
dataset_dict = {
    'srn': SRNDataset,
    'dvr': DVRDataset,
}

eval_dataset_dict = {
    'srn': SRNEvalDataset,
    'dvr': DVREvalDataset,
}

data/create_training_dataset.py

In [None]:
from typing import Optional
from operator import itemgetter

import numpy as np
import torch
from torch.utils.data import Dataset, Sampler
from torch.utils.data import DistributedSampler, WeightedRandomSampler


class DatasetFromSampler(Dataset):
    """Dataset to create indexes from `Sampler`.
    Args:
        sampler: PyTorch sampler
    """

    def __init__(self, sampler: Sampler):
        """Initialisation for DatasetFromSampler."""
        self.sampler = sampler
        self.sampler_list = None

    def __getitem__(self, index: int):
        """Gets element of the dataset.
        Args:
            index: index of the element in the dataset
        Returns:
            Single element by index
        """
        if self.sampler_list is None:
            self.sampler_list = list(self.sampler)
        return self.sampler_list[index]

    def __len__(self) -> int:
        """
        Returns:
            int: length of the dataset
        """
        return len(self.sampler)


class DistributedSamplerWrapper(DistributedSampler):
    """
    Wrapper over `Sampler` for distributed training.
    Allows you to use any sampler in distributed mode.
    It is especially useful in conjunction with
    `torch.nn.parallel.DistributedDataParallel`. In such case, each
    process can pass a DistributedSamplerWrapper instance as a DataLoader
    sampler, and load a subset of subsampled data of the original dataset
    that is exclusive to it.
    .. note::
        Sampler is assumed to be of constant size.
    """

    def __init__(
        self,
        sampler,
        num_replicas: Optional[int] = None,
        rank: Optional[int] = None,
        shuffle: bool = True,
    ):
        """
        Args:
            sampler: Sampler used for subsampling
            num_replicas (int, optional): Number of processes participating in
              distributed training
            rank (int, optional): Rank of the current process
              within ``num_replicas``
            shuffle (bool, optional): If true (default),
              sampler will shuffle the indices
        """
        super(DistributedSamplerWrapper, self).__init__(
            DatasetFromSampler(sampler),
            num_replicas=num_replicas,
            rank=rank,
            shuffle=shuffle,
        )
        self.sampler = sampler

    def __iter__(self):
        self.dataset = DatasetFromSampler(self.sampler)
        indexes_of_indexes = super().__iter__()
        subsampler_indexes = self.dataset
        return iter(itemgetter(*indexes_of_indexes)(subsampler_indexes))


def create_training_dataset(args):
    # parse args.data_type, "+" indicates that multiple datasets are used, for example "ibrnet_collect+llff+spaces"
    # otherwise only one dataset is used
    # args.dataset_weights should be a list representing the resampling rate for each dataset, and should sum up to 1

    print('training dataset: {}'.format(args.data_type))
    mode = 'train'
    if '+' not in args.data_type:
        train_dataset = dataset_dict[args.data_type](args, mode)
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) if args.distributed else None
    else:
        train_dataset_names = args.data_type.split('+')
        weights = args.dataset_weights
        assert len(train_dataset_names) == len(weights)
        assert np.abs(np.sum(weights) - 1.) < 1e-6
        print('weights:{}'.format(weights))
        train_datasets = []
        train_weights_samples = []
        for training_dataset_name, weight in zip(train_dataset_names, weights):
            train_dataset = dataset_dict[training_dataset_name](args, mode,
                                                                scenes=args.train_scenes,
                                                                )
            train_datasets.append(train_dataset)
            num_samples = len(train_dataset)
            weight_each_sample = weight / num_samples
            train_weights_samples.extend([weight_each_sample]*num_samples)

        train_dataset = torch.utils.data.ConcatDataset(train_datasets)
        train_weights = torch.from_numpy(np.array(train_weights_samples))
        sampler = WeightedRandomSampler(train_weights, len(train_weights))
        train_sampler = DistributedSamplerWrapper(sampler) if args.distributed else sampler

    return train_dataset, train_sampler

gen_real.py

In [None]:
import os
import glob
import shutil

import tqdm
import imageio
import numpy as np
import torch
from torch.utils.data import Dataset
import torch.nn.functional as F
import torchvision.transforms as T




def parse_intrinsic(focal, cx, cy):
    intrinsic = np.array([[focal, 0, cx, 0],
                          [0, focal, cy, 0],
                          [0,     0,  1, 0],
                          [0,     0,  0, 1]])
    return intrinsic

class RealRenderDataset(Dataset):
    """
    Dataset for rendering
    """
    def __init__(self, args, **kwargs):
        """
        Args:
            args.data_path: path to data directory
            args.img_hw: image size (resize if needed)
        """
        super().__init__()
        self.base_path = args.data_path

        print("Loading real dataset", self.base_path)
        assert os.path.exists(self.base_path)
        #print("DEBUG: BEING HERE AFTER ASSERT")
        #self.rgb_paths = sorted(glob.glob(os.path.join(self.base_path, "*_normalize.jpg"))) + \
        #    sorted(glob.glob(os.path.join(self.base_path, "*_normalize.png")))
        self.rgb_paths = sorted(glob.glob(os.path.join(self.base_path, 'rgb','*.jpg'))) + \
            sorted(glob.glob(os.path.join(self.base_path, 'rgb','*.png')))
        #print(os.path.join(self.base_path, 'rgb','*.png'))
        #print(glob.glob(os.path.join(self.base_path,'rgb', '*.png')))
        #print("DEBUG: BEING HERE AFTER RGB_PATH")
        self.poses = []
        self.intrinsics = []
        #print("DEBUG: RGB_PATH: ",self.rgb_paths)
        for i in range(len(self.rgb_paths)):
            #print("DEBUG: BEING HERE IN ITERATION")
            intrinsic = parse_intrinsic(args.focal, args.img_hw[0]//2, args.img_hw[1]//2)
            cam_pose = trans_t(args.radius)
            self.poses.append(cam_pose)
            self.intrinsics.append(intrinsic)

        self.rgb_paths = np.array(self.rgb_paths)
        self.poses = np.stack(self.poses, 0)
        self.intrinsics = np.array(self.intrinsics)

        self.define_transforms()
        self.img_hw = args.img_hw

        # default near/far plane depth
        self.z_near = args.z_near
        self.z_far = args.z_far

    def __len__(self):
        return len(self.rgb_paths)

    def define_transforms(self):
        self.img_transforms = T.Compose(
            [T.ToTensor(), T.Normalize((0.0, 0.0, 0.0), (1.0, 1.0, 1.0))]
        )
        self.mask_transforms = T.Compose(
            [T.ToTensor(), T.Normalize((0.0,), (1.0,))]
        )

    def __getitem__(self, index):
        # Read source RGB
        src_rgb_path = self.rgb_paths[index]
        src_c2w_mat = self.poses[index]
        src_intrinsics = self.intrinsics[index]

        img = imageio.imread(src_rgb_path)[..., :3]
        mask = (img.sum(axis=-1) != 255*3)[..., None].astype(np.uint8) * 255
        rgb = self.img_transforms(img)
        mask = self.mask_transforms(mask)

        h, w = rgb.shape[-2:]
        if (h != self.img_hw[0]) or (w != self.img_hw[1]):
            scale = self.img_hw[-1] / rgb.shape[-1]
            src_intrinsics[:, :2] *= scale

            rgb = F.interpolate(rgb, size=self.img_hw, mode="area")
            mask = F.interpolate(mask, size=self.img_hw, mode="area")

        depth_range = np.array([self.z_near, self.z_far])

        return {
            "rgb_path": src_rgb_path,
            "img_id": index,
            "img_hw": self.img_hw,
            "src_rgbs": rgb[None, ...].permute([0, 2, 3, 1]).float(),
            "src_masks": mask[None, ...].permute([0, 2, 3, 1]).float(),
            "src_c2w_mats": torch.FloatTensor(src_c2w_mat)[None, :],
            "src_intrinsics": torch.FloatTensor(src_intrinsics)[None, :],
            "depth_range": torch.FloatTensor(depth_range)
        }


def trans_t(t):
    return torch.tensor(
        [[-1, 0, 0, 0], [0, 0, -1, t], [0, -1, 0, 0], [0, 0, 0, 1],], dtype=torch.float32,
    )

def rot_theta(angle):
    return torch.tensor(
        [
            [np.cos(angle), -np.sin(angle), 0, 0],
            [np.sin(angle), np.cos(angle), 0, 0],
            [0, 0, 1, 0],
            [0, 0, 0, 1],
        ],
        dtype=torch.float32,
    )

def rot_phi(phi):
    return torch.tensor(
        [
            [1, 0, 0, 0],
            [0, np.cos(phi), -np.sin(phi), 0],
            [0, np.sin(phi), np.cos(phi), 0],
            [0, 0, 0, 1],
        ],
        dtype=torch.float32,
    )

def pose_spherical(theta, phi, radius):
    """
    Spherical rendering poses, from NeRF
    """
    c2w = trans_t(radius)
    c2w = rot_phi(phi / 180.0 * np.pi) @ c2w
    c2w = rot_theta(theta / 180.0 * np.pi) @ c2w

    return c2w

def gen_video(args):

    device = "cuda"
    print(f"checkpoints reload from {args.ckptdir}")
    #print("DEBUG: Being Here")
    dataset = RealRenderDataset(args)
    #print("DEBUG: Being Here")
    # Create VisionNeRF model
    model = VisionNerfModel(args, False, False)
    # create projector
    projector = Projector(device=device)
    model.switch_to_eval()

    if not args.data_index:
        args.data_index = [x for x in range(len(dataset))]

    for d_idx in args.data_index:
        out_folder = os.path.join(args.outdir, args.expname, f'{d_idx:06d}')
        print(f'Rendering {dataset[d_idx]["rgb_path"][:-15]}')
        print(f'videos will be saved to {out_folder}')
        os.makedirs(out_folder, exist_ok=True)
        # save the args and config files
        f = os.path.join(out_folder, 'args.txt')
        with open(f, 'w') as file:
            for arg in sorted(vars(args)):
                attr = getattr(args, arg)
                file.write('{} = {}\n'.format(arg, attr))

        if args.config is not None:
            f = os.path.join(out_folder, 'config.txt')
            if not os.path.isfile(f):
                shutil.copy(args.config, f)

        sample = dataset[d_idx]
        pose_index = 0
        data_input = dict(
            rgb_path=sample['rgb_path'],
            img_id=sample['img_id'],
            img_hw=sample['img_hw'],
            tgt_intrinsic=sample['src_intrinsics'][0:1],
            src_masks=sample['src_masks'][pose_index][None, None, :],
            src_rgbs=sample['src_rgbs'][pose_index][None, None, :],
            src_c2w_mats=sample['src_c2w_mats'][pose_index][None, None, :],
            src_intrinsics=sample['src_intrinsics'][pose_index][None, None, :],
            depth_range=sample['depth_range'][None, :]
        )

        input_im = sample['src_rgbs'][pose_index].cpu().numpy()
        filename = os.path.join(out_folder, 'input.png')
        imageio.imwrite(filename, (input_im*255.).astype(np.uint8))

        radius = (dataset.z_near + dataset.z_far) * 0.5
        print("> Using default camera radius", radius)

        # Use 360 pose sequence from NeRF
        render_poses = torch.stack(
            [
                pose_spherical(angle, args.elevation, radius)
                for angle in np.linspace(-180, 180, args.num_frames)[::-1]
            ],
            0,
        )  # (NV, 4, 4)
        # +z is the vertical axis

        imgs = []
        with torch.no_grad():

            for idx, pose in enumerate(tqdm.tqdm(render_poses)):
                filename = os.path.join(out_folder, f'{idx:06}.png')
                data_input['tgt_c2w_mat'] = pose[None, :]

                # load training rays
                ray_sampler = RaySamplerSingleImage(data_input, device, render_stride=1)
                ray_batch = ray_sampler.get_all()
                featmaps = model.encode(ray_batch['src_rgbs'])

                ret = render_single_image(ray_sampler=ray_sampler,
                                        ray_batch=ray_batch,
                                        model=model,
                                        projector=projector,
                                        chunk_size=args.chunk_size,
                                        N_samples=args.N_samples,
                                        inv_uniform=args.inv_uniform,
                                        N_importance=args.N_importance,
                                        det=True,
                                        white_bkgd=args.white_bkgd,
                                        render_stride=1,
                                        featmaps=featmaps)

                if ret['outputs_fine']:
                    rgb_im = img_HWC2CHW(ret['outputs_fine']['rgb'].detach().cpu())
                else:
                    rgb_im = img_HWC2CHW(ret['outputs_coarse']['rgb'].detach().cpu())
                # clamping RGB images
                rgb_im = torch.clamp(rgb_im, 0.0, 1.0).permute([1, 2, 0]).cpu().numpy()
                rgb_im = (rgb_im * 255.).astype(np.uint8)
                imageio.imwrite(filename, rgb_im)
                imgs.append(rgb_im)
                torch.cuda.empty_cache()

            imgs = np.stack(imgs, 0)
            imageio.mimsave(os.path.join(out_folder, f'output.gif'), imgs, fps=12)


In [None]:
args=Parameters_gen_video()

In [None]:
#args=Parameters_eval()
args.outdir='/content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/'
args.data_path='/content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9/'
args.ckptdir='/content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/Pretrained/'
args.ckpt_path='/content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/Pretrained/srn_cars500000.pth'
args.img_hw=[128,128]
args.chunk_size=2048
args.mlp_block_num=6
args.white_bkgd=True
args.im_feat_dim=512
args.skip=1
args.data_range=[0,3]
args.distributed=False
args.config=None
gen_video(args)

checkpoints reload from /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/Pretrained/
Loading real dataset /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9/
Reloading from /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/Pretrained/srn_chairs_500000.pth, starting at step=500000
Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000000
> Using default camera radius 1.3


100%|██████████| 40/40 [01:32<00:00,  2.30s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000001
> Using default camera radius 1.3


100%|██████████| 40/40 [01:41<00:00,  2.54s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000002
> Using default camera radius 1.3


100%|██████████| 40/40 [01:41<00:00,  2.53s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000003
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000004
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000005
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000006
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000007
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000008
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000009
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000010
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000011
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000012
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000013
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000014
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000015
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000016
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000017
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000018
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000019
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000020
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000021
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000022
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000023
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000024
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000025
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000026
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000027
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000028
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000029
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000030
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000031
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000032
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000033
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000034
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000035
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000036
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000037
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000038
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000039
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000040
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000041
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000042
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000043
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000044
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000045
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000046
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000047
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000048
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000049
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000050
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000051
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000052
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000053
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000054
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000055
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000056
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000057
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000058
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000059
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000060
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000061
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000062
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000063
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000064
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000065
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000066
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000067
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000068
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000069
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000070
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000071
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000072
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000073
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.25s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000074
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000075
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000076
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000077
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000078
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000079
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000080
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000081
> Using default camera radius 1.3


100%|██████████| 40/40 [01:30<00:00,  2.26s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_cars/cars_test/1a3782ae4bd711b66b418c7d9fedcaa9
videos will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_video/000082
> Using default camera radius 1.3


 98%|█████████▊| 39/40 [01:28<00:02,  2.26s/it]

eval.py

In [None]:
import os
import glob
import shutil
import tqdm
import imageio
import numpy as np
import torch
from torch.utils.data import Dataset
import torch.nn.functional as F
import torchvision.transforms as T

def parse_intrinsic(path):
    with open(path, "r") as f:
        lines = f.readlines()
        focal, cx, cy, _ = map(float, lines[0].split())
    intrinsic = np.array([[focal, 0, cx, 0],
                          [0, focal, cy, 0],
                          [0,     0,  1, 0],
                          [0,     0,  0, 1]])
    return intrinsic

def parse_pose(path):
    return np.loadtxt(path, dtype=np.float32).reshape(4, 4)

def parse_pose_dvr(path, num_views):
    cameras = np.load(path)

    intrinsics = []
    c2w_mats = []

    for i in range(num_views):
        # ShapeNet
        wmat_inv_key = "world_mat_inv_" + str(i)
        wmat_key = "world_mat_" + str(i)
        kmat_key = "camera_mat_" + str(i)
        if wmat_inv_key in cameras:
            c2w_mat = cameras[wmat_inv_key]
        else:
            w2c_mat = cameras[wmat_key]
            if w2c_mat.shape[0] == 3:
                w2c_mat = np.vstack((w2c_mat, np.array([0, 0, 0, 1])))
            c2w_mat = np.linalg.inv(w2c_mat)

        intrinsics.append(cameras[kmat_key])
        c2w_mats.append(c2w_mat)

    intrinsics = np.stack(intrinsics, 0)
    c2w_mats = np.stack(c2w_mats, 0)

    return intrinsics, c2w_mats


class SRNRenderDataset(Dataset):
    """
    Dataset for rendering
    """
    def __init__(self, args, **kwargs):
        """
        Args:
            args.data_path: path to data directory
            args.img_hw: image size (resize if needed)
        """
        super().__init__()
        self.base_path = args.data_path
        self.dataset_name = os.path.basename(args.data_path)

        print("Loading SRN dataset", self.base_path, "name:", self.dataset_name)
        assert os.path.exists(self.base_path)

        is_chair = "chair" in self.dataset_name

        intrinsic_paths = sorted(
            glob.glob(os.path.join(self.base_path, "*", "intrinsics.txt"))
        )
        print("intrinsic",intrinsic_paths)

        self.intrinsics = []
        self.poses = []
        self.rgb_paths = []
        for path in tqdm.tqdm(intrinsic_paths):
            dir = os.path.dirname(path)
            curr_paths = sorted(glob.glob(os.path.join(dir, "rgb", "*")))
            self.rgb_paths.append(curr_paths)

            pose_paths = [f.replace('rgb', 'pose').replace('png', 'txt') for f in curr_paths]
            c2w_mats = [parse_pose(x) for x in
                    pose_paths]
            self.poses.append(c2w_mats)

            self.intrinsics.append(parse_intrinsic(path))

        self.rgb_paths = np.array(self.rgb_paths)
        self.poses = np.stack(self.poses, 0)
        self.intrinsics = np.array(self.intrinsics)

        assert(len(self.rgb_paths) == len(self.poses))

        self.define_transforms()
        self.img_hw = args.img_hw

        # default near/far plane depth
        if is_chair:
            self.z_near = 1.25
            self.z_far = 2.75
        else:
            self.z_near = 0.8
            self.z_far = 1.8

    def __len__(self):
        return len(self.intrinsics)

    def define_transforms(self):
        self.img_transforms = T.Compose(
            [T.ToTensor(), T.Normalize((0.0, 0.0, 0.0), (1.0, 1.0, 1.0))]
        )
        self.mask_transforms = T.Compose(
            [T.ToTensor(), T.Normalize((0.0,), (1.0,))]
        )

    def __getitem__(self, index):
        # Read source RGB
        src_rgb_paths = self.rgb_paths[index]
        src_c2w_mats = self.poses[index]
        src_intrinsics = np.array([self.intrinsics[index]] * len(src_c2w_mats))

        src_rgbs = []
        src_masks = []
        for rgb_path in src_rgb_paths:
            img = imageio.imread(rgb_path)[..., :3]
            mask = (img.sum(axis=-1) != 255*3)[..., None].astype(np.uint8) * 255
            rgb = self.img_transforms(img)
            mask = self.mask_transforms(mask)

            h, w = rgb.shape[-2:]
            if (h != self.img_hw[0]) or (w != self.img_hw[1]):
                scale = self.img_hw[-1] / w
                src_intrinsics[:, :2] *= scale

                rgb = F.interpolate(rgb, size=self.img_hw, mode="area")
                mask = F.interpolate(mask, size=self.img_hw, mode="area")

            src_rgbs.append(rgb)
            src_masks.append(mask)

        depth_range = np.array([self.z_near, self.z_far])

        return {
            "rgb_path": rgb_path,
            "img_id": index,
            "img_hw": self.img_hw,
            "src_masks": torch.stack(src_masks).permute([0, 2, 3, 1]).float(),
            "src_rgbs": torch.stack(src_rgbs).permute([0, 2, 3, 1]).float(),
            "src_c2w_mats": torch.FloatTensor(src_c2w_mats),
            "src_intrinsics": torch.FloatTensor(src_intrinsics),
            "depth_range": torch.FloatTensor(depth_range)
        }

def trans_t(t):
    return torch.tensor(
        [[-1, 0, 0, 0], [0, 0, -1, t], [0, -1, 0, 0], [0, 0, 0, 1],], dtype=torch.float32,
    )

def rot_theta(angle):
    return torch.tensor(
        [
            [np.cos(angle), -np.sin(angle), 0, 0],
            [np.sin(angle), np.cos(angle), 0, 0],
            [0, 0, 1, 0],
            [0, 0, 0, 1],
        ],
        dtype=torch.float32,
    )

def rot_phi(phi):
    return torch.tensor(
        [
            [1, 0, 0, 0],
            [0, np.cos(phi), -np.sin(phi), 0],
            [0, np.sin(phi), np.cos(phi), 0],
            [0, 0, 0, 1],
        ],
        dtype=torch.float32,
    )

def pose_spherical(theta, phi, radius):
    """
    Spherical rendering poses, from NeRF
    """
    c2w = trans_t(radius)
    c2w = rot_phi(phi / 180.0 * np.pi) @ c2w
    c2w = rot_theta(theta / 180.0 * np.pi) @ c2w

    return c2w

def gen_eval(args):

    device = "cuda"
    print(f"checkpoints reload from {args.ckptdir}")

    dataset = SRNRenderDataset(args)
    # Create VisionNeRF model
    model = VisionNerfModel(args, False, False)
    # create projector
    projector = Projector(device=device)
    model.switch_to_eval()

    if args.use_data_index:
        #print("use_data_index")
        data_index = args.data_indices
    else:
        #print("not_use_data_index")
        data_index = np.arange(args.data_range[0], args.data_range[1])

    for d_idx in data_index:
        out_folder = os.path.join(args.outdir, args.expname, f'{d_idx:06d}')
        print(f'Rendering {dataset[d_idx]["rgb_path"][:-15]}')
        print(f'images will be saved to {out_folder}')
        os.makedirs(out_folder, exist_ok=True)

        obj_name = os.path.basename(dataset[d_idx]["rgb_path"][:-15])

        # save the args and config files
        f = os.path.join(out_folder, 'args.txt')
        with open(f, 'w') as file:
            for arg in sorted(vars(args)):
                attr = getattr(args, arg)
                file.write('{} = {}\n'.format(arg, attr))

        #if args.config is not None:
        #    f = os.path.join(out_folder, 'config.txt')
        #    if not os.path.isfile(f):
        #        shutil.copy(args.config, f)

        sample = dataset[d_idx]
        pose_index = args.pose_index
        data_input = dict(
            rgb_path=sample['rgb_path'],
            img_id=sample['img_id'],
            img_hw=sample['img_hw'],
            tgt_intrinsic=sample['src_intrinsics'][0:1],
            src_masks=sample['src_masks'][pose_index][None, None, :],
            src_rgbs=sample['src_rgbs'][pose_index][None, None, :],
            src_c2w_mats=sample['src_c2w_mats'][pose_index][None, None, :],
            src_intrinsics=sample['src_intrinsics'][pose_index][None, None, :],
            depth_range=sample['depth_range'][None, :]
        )

        input_im = sample['src_rgbs'][pose_index].cpu().numpy() * 255.
        input_im = input_im.astype(np.uint8)
        filename = os.path.join(out_folder, 'input.png')
        imageio.imwrite(filename, input_im)

        render_poses = sample['src_c2w_mats']
        view_indices = np.arange(0, len(render_poses), args.skip)
        render_poses = render_poses[view_indices]

        with torch.no_grad():

            for idx, pose in tqdm.tqdm(zip(view_indices, render_poses), total=len(view_indices)):
                if not args.include_src and idx == args.pose_index:
                    continue
                filename = os.path.join(out_folder, f'{idx:06}.png')
                data_input['tgt_c2w_mat'] = pose[None, :]

                # load training rays
                ray_sampler = RaySamplerSingleImage(data_input, device, render_stride=1)
                ray_batch = ray_sampler.get_all()
                featmaps = model.encode(ray_batch['src_rgbs'])

                ret = render_single_image(ray_sampler=ray_sampler,
                                          ray_batch=ray_batch,
                                          model=model,
                                          projector=projector,
                                          chunk_size=args.chunk_size,
                                          N_samples=args.N_samples,
                                          inv_uniform=args.inv_uniform,
                                          N_importance=args.N_importance,
                                          det=True,
                                          white_bkgd=args.white_bkgd,
                                          render_stride=1,
                                          featmaps=featmaps)

                rgb_im = img_HWC2CHW(ret['outputs_fine']['rgb'].detach().cpu())
                # clamping RGB images
                rgb_im = torch.clamp(rgb_im, 0.0, 1.0)
                rgb_im = rgb_im.permute([1, 2, 0]).cpu().numpy()

                rgb_im = (rgb_im * 255.).astype(np.uint8)
                imageio.imwrite(filename, rgb_im)
                torch.cuda.empty_cache()

In [None]:
args=Parameters_eval()
args.outdir='/content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_chairs/'
args.data_path='/content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_chairs/chairs_test/'
args.ckptdir='/content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/Pretrained/'
args.ckpt_path='/content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/Pretrained/srn_chairs500000.pth'
args.img_hw=[128,128]
args.chunk_size=2048
args.mlp_block_num=6
args.white_bkgd=True
args.im_feat_dim=512
args.skip=1
args.data_range=[0,10]
gen_eval(args)

checkpoints reload from /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/Pretrained/
Loading SRN dataset /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_chairs/chairs_test/ name: 
intrinsic ['/content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_chairs/chairs_test/1a8bbf2994788e2743e99e0cae970928/intrinsics.txt', '/content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_chairs/chairs_test/1ab8a3b55c14a7b27eaeab1f0c9120b7/intrinsics.txt', '/content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_chairs/chairs_test/1ac6531a337de85f2f7628d6bf38bcc4/intrinsics.txt', '/content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_chairs/chairs_test/1aeb17f89e1bea954c6deb9ede0648df/intrinsics.txt', '/content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_chairs/chairs_test/1b05971a4373c7d2463600025db2266/intrinsics.txt', '/content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_chairs/chairs_test/1b4071814d1c1ae6e2367b9e27f16a71/intrinsics.txt', '/content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/sr

100%|██████████| 10/10 [00:03<00:00,  3.05it/s]


Reloading from /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/Pretrained/srn_chairs_500000.pth, starting at step=500000
Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_chairs/chairs_test/1a8bbf2994788e2743e99e0cae970928
images will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_chairs/000000


100%|██████████| 251/251 [06:50<00:00,  1.63s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_chairs/chairs_test/1ab8a3b55c14a7b27eaeab1f0c9120b7
images will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_chairs/000001


100%|██████████| 251/251 [06:49<00:00,  1.63s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_chairs/chairs_test/1ac6531a337de85f2f7628d6bf38bcc4
images will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_chairs/000002


100%|██████████| 251/251 [06:50<00:00,  1.63s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_chairs/chairs_test/1aeb17f89e1bea954c6deb9ede0648df
images will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_chairs/000003


100%|██████████| 251/251 [06:50<00:00,  1.63s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_chairs/chairs_test/1b05971a4373c7d2463600025db2266
images will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_chairs/000004


100%|██████████| 251/251 [06:50<00:00,  1.63s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_chairs/chairs_test/1b4071814d1c1ae6e2367b9e27f16a71
images will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_chairs/000005


100%|██████████| 251/251 [06:51<00:00,  1.64s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_chairs/chairs_test/1b5e876f3559c231532a8e162f399205
images will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_chairs/000006


100%|██████████| 251/251 [06:50<00:00,  1.64s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_chairs/chairs_test/1b67a3a1101a9acb905477d2a8504646
images will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_chairs/000007


100%|██████████| 251/251 [06:50<00:00,  1.64s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_chairs/chairs_test/1b81441b7e597235d61420a53a0cb96d
images will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_chairs/000008


100%|██████████| 251/251 [06:50<00:00,  1.63s/it]


Rendering /content/gdrive/MyDrive/Colab Notebooks/NeRF/Data/srn_chairs/chairs_test/1c9d7e56ae8c90c87ac6ce513ae497d3
images will be saved to /content/gdrive/MyDrive/Colab Notebooks/NeRF/VisionNeRF/out_chairs/000009


100%|██████████| 251/251 [06:50<00:00,  1.64s/it]
