A wigglegram is a short GIF that allows to percieve depth through parallax as shown below:

<img src="wigglegram_example.gif " width="500"/>
[Image ource.]( https://www.reddit.com/r/wigglegrams/comments/jm4m2v/seagulls_nimslo/)

Such GIFs are usually captured with a custom analog camera, e.g. [Nishika N8000](https://en.wikipedia.org/wiki/Nimslo):
<img src="camera.jpg " width="500"/>

Our goal today is to generate a wigglegram using a single image.

The repository contrains a test image ```cat.jpg``` and a metric depth map ```cat_deapth_map.png``` generated with [ZoeDepth](https://huggingface.co/spaces/shariqfarooq/ZoeDepth).

In [None]:
import matplotlib.pylab as plt
import numpy as np
import torch
from jaxtyping import Float
from torch import Tensor

def dehomogenize_points(points):
    return points[..., :-1] / (points[..., -1:] + 1e-8 * torch.sign(points[..., -1:]))

def homogenize_points(points):
    last_coordinate = torch.ones_like(points[..., -1:])
    return torch.cat((points, last_coordinate), dim=-1)

In [None]:
image = plt.imread('cat.jpeg')[::2, ::2] / 255
image = torch.tensor(image, dtype=torch.float)
depths = plt.imread('cat_depth_map.png')[::2, ::2] * 256
depths = torch.tensor(depths, dtype=torch.float)
h, w, c = image.shape

fig, ax = plt.subplots(1, 2, figsize=(10, 7))
ax[0].imshow(image)
ax[0].axis('off')
ax[1].imshow(depths)
ax[1].axis('off');

In [None]:
# compute intrinsic matrix
# this one is for iphone 13 rear camera
K = torch.tensor([
    [26 / 24 ,     0., 0.5],
    [     0., 26 / 36, 0.5],
    [     0.,      0., 1.0]
])

In [None]:
def get_point_cloud(
    K: Float[Tensor, '3 3'],
    depths: Float[Tensor, 'h w 3'],
    h: int,
    w: int
) -> Float[Tensor, 'h w 3']:
    """ Generate a 3D point cloud using an image with [0, 1] x [0, 1] coordinates."""
    raise NotImplementedError

In [None]:
def get_colors(
    image: Float[Tensor, 'h w 3'],
    h: int,
    w: int,
) -> Float[Tensor, 'h w 3']:
    """Create an array with pixel colors."""
    raise NotImplementedError

In [None]:
def get_c_w2c(delta_x: float) -> Float[Tensor, '4 4']:
    """Returns a 4x4 world to camera matrix that moves the camera along x axis."""
    raise NotImplementedError

In [None]:
def project_points(
    points_3d: Float[Tensor, '... 3'],
    c_w2c: Float[Tensor, '4 4'],
    K: Float[Tensor, '3 3']
) -> Float[Tensor, '... 3']:
    """Projects point cloud onto a new screen defined by c_w2c."""
    raise NotImplementedError

In [None]:
points_3d = get_point_cloud(K, depths, h, w)
new_points_2d = project_points(points_3d, get_c_w2c(-1e-1), K)
colors = get_colors(image, h, w)

In [None]:
def generate_frame(
    points: Float[Tensor, 'h w 3'],
    colors: Float[Tensor, 'h w 3'], 
    h: int, 
    w: int
) -> Float[Tensor, 'h w 3']:
    # filter points outside of the frame
    # create canvas
    # fill the canvas
    raise NotImplementedError

In [None]:
# try for a single frame
plt.figure(figsize=(10, 7))
plt.imshow(generate_frame(new_points_2d, colors, h // 2, w // 2))
plt.axis('off')

In [None]:
# generate multiple frames
for i, delta_x in enumerate([-0.1, -0.05, 0., 0.05, 0.1]):
    new_points_2d = project_points(points_3d, get_c_w2c(delta_x), K)
    frame = generate_frame(new_points_2d, colors, h // 2, w // 2)
    plt.imsave(f'frame_{i}.png', frame.numpy())

In [None]:
# generate a gif!
!ffmpeg -i frame_%d.png -filter_complex "[0]reverse[r];[0][r]concat=n=2:v=1:a=0" output.gif