Skip to content

Commit

Permalink
Compute surface_idx only once per u,v coordinate
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 371788077
  • Loading branch information
podlipensky authored and Copybara-Service committed May 3, 2021
1 parent ee853d4 commit d5c21b2
Show file tree
Hide file tree
Showing 7 changed files with 439 additions and 0 deletions.
282 changes: 282 additions & 0 deletions tensorflow_graphics/rendering/splat.py
@@ -0,0 +1,282 @@
# Copyright 2020 The TensorFlow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Differentiable point splatting functions for rasterize-then-splat."""

import math
from typing import Callable, Tuple

import tensorflow as tf

from tensorflow_graphics.rendering import interpolate
from tensorflow_graphics.rendering import rasterization_backend
from tensorflow_graphics.rendering import utils
from tensorflow_graphics.util import shape


def splat_at_pixel_centers(xyz_rgba: Tuple[tf.Tensor, tf.Tensor]) -> tf.Tensor:
"""Splat a buffer of XYZ, RGBA samples onto a pixel grid of the same size.
This is a specialized splatting function that takes a multi-layer buffer of
screen-space XYZ positions and RGBA colors and splats each sample into
a buffer of the same size, using a 3x3 Gaussian kernel of variance 0.25.
The accumulated layers are then composited back-to-front.
The specialized part is that the 3x3 kernel is always centered on the
pixel-coordinates of the sample in the input buffer, *not* the XY position
stored at that sample, but the weights are defined by using the XY position.
Computing weights w.r.t. the XY positions, rather than the pixel-centers,
allows gradients to flow from the output RGBA back to the XY positions. When
used in rasterize-then-splat, XY positions will always coincide with the pixel
centers, so the forward computation is the same as if the XY positions defined
the position of the splat.
When splatting, the Z of the splat is compared with the Z of the layers under
the splat sample. The sample is accumulated into the layer with the Z closest
to the Z of the splat itself.
Args:
xyz_rgba: a tuple of a float32 tensor of rasterized XYZ positions with shape
[num_layers, height, width, 3] and a tensor of RGBA colors [num_layers,
height, width, 4]. Passed as a tuple to support tf.vectorized_map.
Returns:
A tensor of shape [height, width, 4] with RGBA values, as well as
[num_layers, height, width, 4] tensor of accumulated and normalized colors
for visualization and debugging.
"""
extra_accumulation_epsilon = 0.05
xyz_layers, rgba_layers = xyz_rgba

xyz_layers = tf.convert_to_tensor(xyz_layers)
shape.check_static(tensor=xyz_layers, tensor_name='xyz_layers', has_rank=4)
rgba_layers = tf.convert_to_tensor(rgba_layers)
shape.check_static(tensor=rgba_layers, tensor_name='rgba_layers', has_rank=4)

gaussian_variance = 0.5**2
gaussian_exp_scale = -1.0 / (2 * gaussian_variance)

# The normalization coefficient for the Gaussian must be computed with care so
# that a full accumulation of neighboring splats adds up to 1.0 + epsilon. We
# need to trigger normalization when the splats accumulate to a full surface
# in order to avoid a spurious "spread-splats-to-darken-color" derivative, but
# we do not want to normalize otherwise (e.g., at the boundary with the
# background), so we use a small epsilon here.
weight_sum = 0
for u in (-1, 0, 1):
for v in (-1, 0, 1):
weight_sum += math.exp(gaussian_exp_scale * (u**2 + v**2))
gaussian_coef = (1.0 + extra_accumulation_epsilon) / weight_sum

# Accumulation buffers need a 1 pixel border because of 3x3 splats.
padding = ((0, 0), (1, 1), (1, 1), (0, 0))
# 3 accumulation layers (fg, surface, bg) of the same size as the image.
accumulation_shape = [3, rgba_layers.shape[1], rgba_layers.shape[2]]
accumulate_rgba = tf.pad(
tf.zeros(accumulation_shape + [4], dtype=rgba_layers.dtype), padding)
accumulate_weights = tf.pad(
tf.zeros(accumulation_shape + [1], dtype=rgba_layers.dtype), padding)
padded_center_z = tf.pad(xyz_layers[..., 2:3], padding, constant_values=1.0)
surface_idx_uv_map = {}
for u in (-1, 0, 1):
for v in (-1, 0, 1):
padding = [[max(v + 1, 0), abs(min(v - 1, 0))],
[max(u + 1, 0), abs(min(u - 1, 0))], [0, 0]]
# Find the layer index of the first surface shared by the center of the
# splat and the splat filter tap (i.e., sample position).
# The first surface must appear as the top layer either at center or at
# tap. The best matching Z between the top center layer and the tap
# layers is compared against the best match between the center layers
# and the top tap layer, and the pair of layers with smallest
# difference in Z is the estimated surface.
tap_z_layers = tf.pad(
xyz_layers[..., 2:3], [[0, 0]] + padding, constant_values=1.0)
dist_center_to_tap_layers = tf.abs(tap_z_layers - padded_center_z[0, ...])
best_center_surface_idx = tf.argmin(dist_center_to_tap_layers, axis=0)
best_center_surface_z = tf.reduce_min(dist_center_to_tap_layers, axis=0)
dist_tap_to_center_layers = tf.abs(padded_center_z - tap_z_layers[0, ...])
best_tap_surface_idx = tf.argmin(dist_tap_to_center_layers, axis=0)
best_tap_surface_z = tf.reduce_min(dist_tap_to_center_layers, axis=0)
# surface_idx is 0 if the first surface is the top layer for both center
# and tap, a negative number (of layers) if the surface is occluded at
# center, and a positive number if occluded at tap.
surface_idx = tf.where(best_tap_surface_z < best_center_surface_z,
-best_tap_surface_idx, best_center_surface_idx)
surface_idx_uv_map[(u, v)] = surface_idx

num_layers = rgba_layers.shape[0]
for l in range(num_layers):
rgba = rgba_layers[l, ...]
alpha = rgba_layers[l, :, :, 3:4]
xyz = xyz_layers[l, ...]

# Computes the offset from the splat to the pixel underneath the splat. Note
# that in the forward pass, splat_to_center_pixel will always be zero to
# within numerical precision, but it is necessary to define the filter tap
# weights as a function of the splat position so derivatives will flow to
# the splat. As the splat moves right, the pixel moves left relative to it,
# so the splat position xy is negated here.
splat_to_center_pixel = tf.floor(xyz[..., :2]) + (0.5, 0.5) - xyz[..., :2]

for u in (-1, 0, 1):
for v in (-1, 0, 1):
splat_to_pixel = splat_to_center_pixel + (u, v)
dist_sqr = tf.math.reduce_sum(splat_to_pixel**2, axis=-1, keepdims=True)
tap_weights = alpha * gaussian_coef * tf.exp(
gaussian_exp_scale * dist_sqr)

tap_rgba = tap_weights * rgba

padding = [[max(v + 1, 0), abs(min(v - 1, 0))],
[max(u + 1, 0), abs(min(u - 1, 0))], [0, 0]]
tap_rgba = tf.pad(tap_rgba, padding)
tap_weights = tf.pad(tap_weights, padding)
surface_idx = surface_idx_uv_map[(u, v)]

# If the current layer is in front of the surface, accumulate into fg.
# If at the surface, accumulate into surf. If behind, accumulate into
# bg. We use a masked accumulation here rather than a scatter, though
# scatter could also work if there are a lot of layers.
fg_mask = tf.cast(surface_idx > l, tf.float32)
surf_mask = tf.cast(surface_idx == l, tf.float32)
bg_mask = tf.cast(surface_idx < l, tf.float32)
layer_mask = tf.stack((fg_mask, surf_mask, bg_mask), axis=0)

masked_tap_rgba = tf.tile(
tf.expand_dims(tap_rgba, axis=0), (3, 1, 1, 1)) * layer_mask
masked_tap_weights = tf.tile(
tf.expand_dims(tap_weights, axis=0), (3, 1, 1, 1)) * layer_mask

accumulate_rgba += masked_tap_rgba
accumulate_weights += masked_tap_weights

# Normalize the accumulated colors by the accumulated weights. Normalization
# only happens if the accumulate weights are > 1.0.
accumulate_rgba = accumulate_rgba[:, 1:-1, 1:-1, :]
accumulate_weights = accumulate_weights[:, 1:-1, 1:-1, :]
normalization_scales = 1.0 / (tf.maximum(accumulate_weights - 1.0, 0.0) + 1.0)
normalized_rgba = accumulate_rgba * normalization_scales

# Composite the foreground, surface, and background layers back-to-front.
output_rgba = normalized_rgba[-1, ...]
for i in (2, 3):
alpha = normalized_rgba[-i, :, :, 3:4]
output_rgba = normalized_rgba[-i, ...] + (1.0 - alpha) * output_rgba

return output_rgba, accumulate_rgba, normalized_rgba


def rasterize_then_splat(vertices: tf.Tensor,
triangles: tf.Tensor,
attributes: tf.Tensor,
camera_matrices: tf.Tensor,
image_width: int,
image_height: int,
shading_function: Callable[[tf.Tensor], tf.Tensor],
num_layers=1,
return_extra_buffers=False):
"""Rasterization with differentiable occlusion using rasterize-then-splat.
Rasterizes the input triangles to produce surface point samples, applies
a user-specified shading function, then splats the shaded point
samples onto the pixel grid.
The attributes are arbitrary per-vertex quantities (colors, normals, texture
coordinates, etc.). The rasterization step interpolates these attributes
across triangles to produce a per-pixel interpolated attributes buffer
with shape [image_height, image_width, attribute_count]. This buffer is passed
to the user-provided shading_function, which should turn it into a
[image_height, image_width, 4] buffer of RGBA colors. The result of the shader
is replaced with (0,0,0,0) for background pixels.
In the common case that the attributes are RGBA vertex colors, the shading
function would just pass the rasterized attributes through (i.e.,
shading_function = lambda x: x).
Args:
vertices: float32 tensor of xyz positions with shape [vertex_count, d], or
[batch_size, vertex_count, d]. If camera_matrices is specified, d may be 3
or 4. If camera_matrices is None, d must be 4 and the values are assumed
to be xyzw homogenous coordinates.
triangles: int32 tensor or array with shape [triangle_count, 3].
attributes: float32 tensor of vertex attributes with shape [batch_size,
vertex_count, attribute_count]
camera_matrices: camera matrices with size [batch_size, 4, 4].
image_width: int specifying desired output image width in pixels.
image_height: int specifying desired output image height in pixels.
shading_function: a function that takes a [image_height, image_width,
attribute_count] rasterized attribute tensor and returns a [image_height,
image_width, 4] RGBA tensor.
num_layers: int specifying number of depth layers to composite.
return_extra_buffers: if True, the function will return raw accumulation
buffers for visualization.
Returns:
a [batch_size, image_height, image_width, 4] tensor of RGBA values.
"""
vertices = tf.convert_to_tensor(vertices)
triangles = tf.convert_to_tensor(triangles)
camera_matrices = tf.convert_to_tensor(camera_matrices)
shape.check_static(
tensor=vertices,
tensor_name='vertices',
has_rank_greater_than=1,
has_dim_equals=((-1, 3)))
shape.check_static(
tensor=triangles,
tensor_name='triangles',
has_rank=2,
has_dim_equals=((-1, 3)))
shape.check_static(
tensor=camera_matrices,
tensor_name='camera_matrices',
has_dim_equals=(((-2, 4), (-1, 4))))
# We don't need derivatives of barycentric coordinates for RtS, so use
# rasterization_backend directly.
# Back face culling is necessary when rendering multiple layers so that
# back faces aren't counted as occluding layers.
rasterized = rasterization_backend.rasterize(
vertices,
triangles,
camera_matrices, (image_width, image_height),
enable_cull_face=True,
num_layers=num_layers,
backend=rasterization_backend.RasterizationBackends.CPU)

interpolated = interpolate.interpolate_vertex_attribute(
attributes, rasterized, tf.zeros((attributes.shape[-1],),
dtype=tf.float32))

# Nested vectorized map over batch and layer dimensions.
shaded_buffer = tf.vectorized_map(
lambda l: tf.vectorized_map(shading_function, l), interpolated.value)
# Zero out shader result outside of foreground mask.
shaded_buffer = shaded_buffer * rasterized.foreground_mask
# Add layers dimension if absent.
if len(shaded_buffer.shape) == 4:
shaded_buffer = tf.expand_dims(shaded_buffer, axis=1)

clip_space_vertices = utils.transform_homogeneous(camera_matrices, vertices)
clip_space_buffer = interpolate.interpolate_vertex_attribute(
clip_space_vertices, rasterized, (0, 0, 1, 1)).value

ndc_xyz = clip_space_buffer[..., :3] / clip_space_buffer[..., 3:4]
viewport_xyz = (ndc_xyz + 1.0) * tf.constant([image_width, image_height, 1],
dtype=tf.float32,
shape=[1, 1, 1, 1, 3]) * 0.5
output, accum, norm_accum = tf.vectorized_map(splat_at_pixel_centers,
(viewport_xyz, shaded_buffer))
if return_extra_buffers:
return output, accum, norm_accum

return output
Expand Up @@ -53,6 +53,10 @@ def make_look_at_matrix(
return look_at.right_handed(camera_origin, look_at_point, camera_up)


def get_idendity_view_projection_matrix():
return tf.expand_dims(tf.eye(4), axis=0)


def compare_images(test_case,
baseline_image,
image,
Expand Down Expand Up @@ -91,6 +95,7 @@ def compare_images(test_case,
(baseline_image.dtype, image.dtype))
shape.check_static(
tensor=baseline_image, tensor_name="baseline_image", has_rank=4)
shape.check_static(tensor=image, tensor_name="image", has_rank=4)
# Flatten height, width and channels dimensions since we're interested in
# error per image.
image_height, image_width = image.shape[1:3]
Expand Down

0 comments on commit d5c21b2

Please sign in to comment.