In [1]:
import torch
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from typing import List, Tuple
from pytorch3d.renderer.cameras import get_screen_to_ndc_transform, get_ndc_to_screen_transform
from src.util.cameras import PatchPerspectiveCameras, get_patch_ndc_to_ndc_transform, get_ndc_to_patch_ndc_transform

In [2]:
# repeat sample image batch_size times with different patches of different sizes
image_size = [(256, 256), (128, 64), (64, 128), (32, 32)]
patch_size = [(64, 64), (32, 32), (16, 16), (8, 8)]

# wrt the top left of the full image in pixels
patch_center = [(0, 0), (0, 0), (0, 0), (0, 0)]

cam_kwargs = {
    "znear": 0.0,
    "zfar": 80.0,
    "focal_length": 10.0,
    "principal_point": ((0.0, 0.0),),
    "R": torch.eye(3).unsqueeze(0),
    "T": torch.zeros(1, 3),
    "device": "cpu",
    "in_ndc": False,
    "image_size": image_size
    }

cam = PatchPerspectiveCameras(**cam_kwargs)

In [3]:
len(cam.get_ndc_camera_transform(**cam_kwargs))

4

In [4]:
world_to_patch_ndc_transform = cam.get_patch_projection_transform(patch_size, patch_center, **cam_kwargs)

c_point_screen tensor([[[0, 0, 0]],

        [[0, 0, 0]],

        [[0, 0, 0]],

        [[0, 0, 0]]])


RuntimeError: expected scalar type Long but found Float

In [None]:
patch_ndc_to_world_transform = world_to_patch_ndc_transform.inverse()

In [None]:
X_patch_ndc = torch.tensor([[0.5, 0.5, 1.0], [0.25, 0.25, 1.0], [0.75, 0.75, 1.0], [1., 1., 1.0]])
X_world = patch_ndc_to_world_transform.transform_points(X_patch_ndc)

In [None]:
world_to_patch_ndc_transform = cam.get_patch_projection_transform(patch_size, patch_center, **cam_kwargs)

In [None]:
X_patch_ndc = world_to_patch_ndc_transform.transform_points(X_world)  

In [None]:
# repeat sample image batch_size times with different patches of different sizes
image_size = [(256, 256)]
patch_size = [(128, 128)]

# wrt corner of the full image in pixels
patch_center = [(128, 128)]

cam_kwargs = {
    "znear": 0.0,
    "zfar": 80.0,
    "focal_length": 1.0,
    "principal_point": ((0.0, 0.0),),
    "R": torch.eye(3).unsqueeze(0),
    "T": torch.zeros(1, 3),
    "device": "cpu",
    "in_ndc": False,
    "image_size": image_size
    }

cam = PatchPerspectiveCameras(**cam_kwargs)

In [None]:
# topleft, topcenter, topright, midleft, midcenter, midright, botleft, botcenter, botright (-1,-1, 1) to (1, 1, 1)
patch_ndc_points = [(-1.0, -1.0, 1.0), (0.0, -1.0, 1.0), (1.0, -1.0, 1.0),
                    (-1.0, 0.0, 1.0), (0.0, 0.0, 1.0), (1.0, 0.0, 1.0),
                    (-1.0, 1.0, 1.0), (0.0, 1.0, 1.0), (1.0, 1.0, 1.0)]


In [None]:
patch_ndc_to_ndc_transform = get_patch_ndc_to_ndc_transform(cameras=cam, 
                                                            image_size=image_size, 
                                                            patch_size=patch_size, 
                                                            patch_center=patch_center)
ndc_points = patch_ndc_to_ndc_transform.transform_points(torch.tensor(patch_ndc_points))
ndc_points

tensor([[-0.5000, -0.5000,  1.0000],
        [ 0.0000, -0.5000,  1.0000],
        [ 0.5000, -0.5000,  1.0000],
        [-0.5000,  0.0000,  1.0000],
        [ 0.0000,  0.0000,  1.0000],
        [ 0.5000,  0.0000,  1.0000],
        [-0.5000,  0.5000,  1.0000],
        [ 0.0000,  0.5000,  1.0000],
        [ 0.5000,  0.5000,  1.0000]])

In [None]:
patch_ndc_revert_transform = get_ndc_to_patch_ndc_transform(cameras=cam,
                                                            image_size=image_size, 
                                                            patch_size=patch_size, 
                                                            patch_center=patch_center)
patch_ndc_points_revert = patch_ndc_revert_transform.transform_points(ndc_points)
patch_ndc_points_revert

tensor([[-1., -1.,  1.],
        [ 0., -1.,  1.],
        [ 1., -1.,  1.],
        [-1.,  0.,  1.],
        [ 0.,  0.,  1.],
        [ 1.,  0.,  1.],
        [-1.,  1.,  1.],
        [ 0.,  1.,  1.],
        [ 1.,  1.,  1.]])

In [None]:
ndc_to_screen_transform = get_ndc_to_screen_transform(cameras=cam, image_size=image_size)
screen_points = ndc_to_screen_transform.transform_points(ndc_points)      
screen_points

tensor([[-192., -192.,    1.],
        [-128., -192.,    1.],
        [ -64., -192.,    1.],
        [-192., -128.,    1.],
        [-128., -128.,    1.],
        [ -64., -128.,    1.],
        [-192.,  -64.,    1.],
        [-128.,  -64.,    1.],
        [ -64.,  -64.,    1.]])

In [None]:
world_points = cam.unproject_points(screen_points) # screen --> world

In [None]:
world_points

tensor([[-192., -192.,    1.],
        [-128., -192.,    1.],
        [ -64., -192.,    1.],
        [-192., -128.,    1.],
        [-128., -128.,    1.],
        [ -64., -128.,    1.],
        [-192.,  -64.,    1.],
        [-128.,  -64.,    1.],
        [ -64.,  -64.,    1.]])

In [None]:
screen_points_revert = cam.transform_points_screen(world_points)
screen_points_revert

tensor([[192., 192.,   1.],
        [128., 192.,   1.],
        [ 64., 192.,   1.],
        [192., 128.,   1.],
        [128., 128.,   1.],
        [ 64., 128.,   1.],
        [192.,  64.,   1.],
        [128.,  64.,   1.],
        [ 64.,  64.,   1.]])

In [None]:
ndc_points_revert = cam.transform_points_ndc(world_points)
ndc_points_revert

tensor([[-0.5000, -0.5000,  1.0000],
        [ 0.0000, -0.5000,  1.0000],
        [ 0.5000, -0.5000,  1.0000],
        [-0.5000,  0.0000,  1.0000],
        [ 0.0000,  0.0000,  1.0000],
        [ 0.5000,  0.0000,  1.0000],
        [-0.5000,  0.5000,  1.0000],
        [ 0.0000,  0.5000,  1.0000],
        [ 0.5000,  0.5000,  1.0000]])