# Part 3: Single-View Geometry

## Usage
This code snippet provides an overall code structure and some interactive plot interfaces for the *Single-View Geometry* section of Assignment 3. In [main function](#Main-function), we outline the required functionalities step by step. Some of the functions which involves interactive plots are already provided, but [the rest](#Your-implementation) are left for you to implement.

## Package installation
- In this code, we use `tkinter` package. Installation instruction can be found [here](https://anaconda.org/anaconda/tk).

# Common imports

In [1]:
import numpy as np

%matplotlib tk
import matplotlib.pyplot as plt

from pathlib import Path

data_dir = Path('part3')

# Provided functions

In [2]:
def get_input_lines(im, min_lines=3):
    """
    Allows user to input line segments; computes centers and directions.
    Inputs:
        im: np.ndarray of shape (height, width, 3)
        min_lines: minimum number of lines required
    Returns:
        n: number of lines from input
        lines: np.ndarray of shape (3, n)
            where each column denotes the parameters of the line equation
        centers: np.ndarray of shape (3, n)
            where each column denotes the homogeneous coordinates of the centers
    """
    n = 0
    lines = []
    points = []

    h = plt.figure()
    plt.imshow(im)
    plt.show()

    print("Set at least %d lines to compute vanishing point" % min_lines)
    print("Click two endpoints, right key to undo, middle key to stop")
    while True:
        clicked = plt.ginput(2, timeout=0, show_clicks=True)
        if not clicked or len(clicked) < 2:
            if n < min_lines:
                print("Need at least %d lines, you have %d now" % (min_lines, n))
                continue
            else:
                # stop getting lines if number of lines is enough
                break

        # unpack user inputs and save endpoints as a tuple
        p1 = np.array([clicked[0][0], clicked[0][1], 1])
        p2 = np.array([clicked[1][0], clicked[1][1], 1])
        points.append(np.hstack([p1[:-1], p2[:-1]]))

        # get line equation using cross product
        #   line[0] * x + line[1] * y + line[2] = 0
        line = np.cross(p1, p2)
        lines.append(line)

        print(
            f"({p1[0]:.2f}, {p1[1]:.2f}) <> ({p2[0]:.2f}, {p2[1]:.2f}) -> {line[0]:.2f}*x + {line[1]:.2f}*y + {line[2]:.2f}"
        )

        # plot line segment
        plt.plot([p1[0], p2[0]], [p1[1], p2[1]], color="b")

        n += 1

    plt.close(h)

    return np.vstack(lines), np.vstack(points)


In [3]:
def plot_lines_and_vp(im, lines, vp, border=50):
    """
    Plots user-input lines and the calculated vanishing point.
    Inputs:
        im ((H, W, 3) array): An RGB image of height H and width W.
        lines ((N, 3) array): The line equation.
        vp ((2, ) array): A vanishing point
    """

    ny, nx, _ = im.shape
    bx1 = min(0, vp[0]) - border
    bx2 = max(nx - 1, vp[0]) + border
    by1 = min(0, vp[1]) - border
    by2 = max(ny - 1, vp[1]) + border

    fig, ax = plt.subplots(figsize=(8, 6))
    ax.imshow(im)
    for i, line in enumerate(lines):
        if line[0] < line[1]:
            pt1 = np.cross(np.array([1, 0, -bx1]), line)
            pt2 = np.cross(np.array([1, 0, -bx2]), line)
        else:
            pt1 = np.cross(np.array([0, 1, -by1]), line)
            pt2 = np.cross(np.array([0, 1, -by2]), line)
        pt1 = pt1 / pt1[2]
        pt2 = pt2 / pt2[2]
        plt.plot([pt1[0], pt2[0]], [pt1[1], pt2[1]], "g")

    plt.plot(*vp, "ro")
    plt.show()


In [4]:
def get_top_and_bottom_coordinates(im, obj):
    """
    For a specific object, prompts user to record the top coordinate and the bottom coordinate in the image.
    Inputs:
        im: np.ndarray of shape (height, width, 3)
        obj: string, object name
    Returns:
        coord: np.ndarray of shape (3, 2)
            where coord[:, 0] is the homogeneous coordinate of the top of the object and coord[:, 1] is the homogeneous
            coordinate of the bottom
    """
    plt.figure()
    plt.imshow(im)

    print("Click on the top coordinate of %s" % obj)
    clicked = plt.ginput(1, timeout=0, show_clicks=True)
    x1, y1 = clicked[0]
    # Uncomment this line to enable a vertical line to help align the two coordinates
    # plt.plot([x1, x1], [0, im.shape[0]], 'b')
    print("Click on the bottom coordinate of %s" % obj)
    clicked = plt.ginput(1, timeout=0, show_clicks=True)
    x2, y2 = clicked[0]

    plt.plot([x1, x2], [y1, y2], "b")

    return np.array([[x1, x2], [y1, y2], [1, 1]])


# Your implementation

In [5]:
def get_vanishing_point(lines):
    """
    Solves for the vanishing point using the user-input lines.

    Args:  
        lines ((N, 3) array): For lines a*x+b*y+c=0, store parameters as (a, b, c).

    Returns:
        TBD
    """
    vp = np.cross(lines[:, None, :], lines[None, :, :])
    # only select upper triangle (without diagnol, those are selfs)
    mask = np.triu_indices(vp.shape[0], 1)
    vp = vp[mask]

    # convert back from homogeneous coordinate
    vp /= vp[:, -1][:, np.newaxis]
    vp = vp[:, :2]

    # return the average
    vp = np.mean(vp, axis=0)

    return vp


In [6]:
def get_horizon_line(vps):
    """
    Calculates the ground horizon line.

    Args:
        vps (list of (2, ) arrays): List of vanishing points.

    Returns:
        (3, ) array: Tuple of normalized line equation coefficients, a*x + b*y + c = 0. 
    """
    # sort by y, horizon should have similar height (in image)
    vps.sort(key=lambda p: p[1])
    
    # to homogenous coordinate
    vps = np.array(vps[:2])
    vps = np.hstack([vps, np.ones((2, 1))])
    
    # find the line
    #   a*x + b*y + c = 0
    coef = np.cross(*vps)
    # problem statement asked for normalized equation
    scale = 1 / np.hypot(*coef[:2])
    coef *= scale
    print(f'{coef[0]:.6f}*x + {coef[1]:.6f}*y + {coef[2]:.6f} = 0')
    
    return coef
    


In [18]:
def plot_horizon_line(ax, im, line, boundary=20):
    """
    Plots the horizon line.

    Args:
        ax : The axes to plot the line on.
        im ((H, W, 3) array): An RGB image as shape reference.
        line ((3, ) array): Line equation coefficients.
        boundary (float, optional): Boundary of the horizon line.
    """
    _, nx, _ = im.shape
    p1x = -boundary
    p2x = nx + boundary

    # find line boundary
    #   ax + by + c = 0
    p1 = np.cross([1, 0, -p1x], line)
    p2 = np.cross([1, 0, -p2x], line)
    
    ax.plot([p1[0], p2[0]], [p1[1], p2[1]], 'w:')
    

In [8]:
def get_camera_parameters(vps):
    """
    Computes the camera parameters. Hint: The SymPy package is suitable for this.

    Args:
        vps (list of (2, ) arrays): List of vanishing points.
    
    Returns:
        TBD
    """
    x1, y1 = vps[0]
    x2, y2 = vps[1]
    x3, y3 = vps[2]

    # homogenous coordinate
    vps_h = np.hstack([vps, np.ones((3, 1))])

    # solve principal center
    x23, y23 = x2 - x3, y2 - y3
    x12, y12 = x1 - x2, y1 - y2

    A = np.array([[x23, y23], [x12, y12]])
    b = np.array([[x1 * x23 + y1 * y23], [x3 * x12 + y3 * y12]])
    x, residuals, _, _ = np.linalg.lstsq(A, b, rcond=None)
    u, v = np.squeeze(x)
    print(f"(u, v) = ({u}, {v})")

    # compute focal length
    f = np.abs(np.sqrt(-(x1 - u) * (x2 - u) - (y1 - v) * (y2 - v)))
    print(f"f={f}")

    # fill out the calibration matrix
    K = np.array([[f, 0, u], [0, f, v], [0, 0, 1]])

    return K


def get_camera_parameters_hz(vps):
    """
    TBD

    Use implementation from Hartley and Zisserman, algorithm 8.2

    Args:
        TBD 
    """
    A = []
    for i, p1 in enumerate(vps):
        for j, p2 in enumerate(vps):
            if i >= j:
                continue
            (u1, u2), u3 = p1, 1
            (v1, v2), v3 = p2, 1

            # solving
            #   w = [ w11, w12, w13; w21, w22, w23; w31, w32, w33 ]
            #     = [  w1,  w2,  w4;  w2,  w3,  w5;  w4,  w5,  w6 ]
            # we have
            #   A = [ v1*u1, v1*u2+v2*u1, v2*u2, v1*u3+v3*u1, v2*u3+v3*u2, v3*v3 ]
            #
            # constraints
            #   - zero skew, w12 = w21 = 0
            #   - square pixel, w11 = w22
            # we can simplify to
            #   A = [ v1*u1+v2*u2, v1*u3+v3*u1, v2*u3+v3*u2, v3*v3 ]
            # which has
            #   w = [  w1,   0,  w2;   0,  w1,  w3;  w2,  w3,  w4 ]
            a = [v1 * u1 + v2 * u2, v1 * u3 + v3 * u1, v2 * u3 + v3 * u2, v3 * v3]
            A.append(a)
    A = np.array(A)

    _, _, vh = np.linalg.svd(A, full_matrices=True)

    w1, w2, w3, w4 = vh[-1, :]
    w = np.array([[w1, 0, w2], [0, w1, w3], [w2, w3, w4]])

    # w = inv(K K.T)
    KT_inv = np.linalg.cholesky(w)
    K = np.linalg.inv(KT_inv.T)
    K /= K[-1, -1]

    print(f"(u, v) = ({K[0, -1]}, {K[1, -1]})")
    print(f"f={K[0, 0]}")

    return K


In [9]:
def get_rotation_matrix(vps, K):
    """
    Computes the rotation matrix using the camera parameters.

    Args:
        vps ():
        K ():

    Returns:
        ((3, 3) array): Normalized rotation matrix.
    """
    vps_h = np.hstack([vps, np.ones((3, 1))])
    
    # solve rotation matrix
    K_inv = np.linalg.inv(K)
    R = K_inv @ vps_h.T

    # normalize the matrix
    u, _, vh = np.linalg.svd(R)
    R = u @ vh

    return R


vps1 = np.array([[1343.24, 229.16], [493.84, 6330.22], [-237.51, 214.21],])
vps2 = np.array([[1299.76, 230.50], [515.99, 6820.60], [-923.18, 182.04]])

print("VPs=")
print(np.array(vps1))

K = get_camera_parameters(vps1)
R = get_rotation_matrix(vps1, K)
print("R=")
print(R)


VPs=
[[1343.24  229.16]
 [ 493.84 6330.22]
 [-237.51  214.21]]
(u, v) = (550.6446259066291, 323.9382339864041)
f=783.7651831564144
R=
[[ 0.70850006 -0.00937762 -0.70564845]
 [-0.08472215  0.99154967 -0.09824158]
 [ 0.70060676  0.12938822  0.7017185 ]]


In [46]:
from numpy.linalg import norm


def estimate_height(vps, coord_ref, h_ref, coord_obj):
    """
    Estimates height for a specific object using the recorded coordinates. You might need to plot additional images here for
    your report.
    
    Args:

    """
    # expand vps, v_z should be inf
    v_x, v_y, v_z = np.hstack([vps, np.ones((3, 1))])

    if (coord_ref == coord_obj).all():
        return h_ref

    # extract coordinates for target/reference objects
    t0, b0 = coord_obj
    r, b = coord_ref

    # calculate v
    #   v = (b x b0) x (v_x x v_y)
    v = np.cross(np.cross(b, b0), np.cross(v_x, v_y))
    # calculate t
    #   t = (v x t0) x (r x b)
    t = np.cross(np.cross(v, t0), np.cross(r, b))
    t /= t[-1]

    # calculate cross ratio
    #   |t-b|/|r-b| * |v_z-r|/|v_z-t| = H/R
    ratio = norm(t - b) / norm(r - b) * norm(v_z - r) / norm(v_z - t)
    H = h_ref * ratio

    return H


# Main function

### Use this function to save line info for each VP

Suggest to label lines for VP from *left* to *right*, so we can build the horizon directly with $v_0$ and $v_2$.

In [11]:
from skimage.io import imread

overwrite = False

# sanity check
if not overwrite:
    for i in range(3):
        vp_path = data_dir / f'vp{i}.npy'
        if vp_path.exists():
            raise RuntimeError(f"'vp{i}.npy' exists")

vp_data = []
for i in range(3):
    im = imread(data_dir / "CSL.jpg")
    lines, points = get_input_lines(im)

    print(f"vp{i}, {len(lines)} lines")
    data = np.hstack([lines, points])
    np.save(data_dir / f"vp{i}.npy", data)

RuntimeError: 'vp0.npy' exists

### Use this function to extract line info for each objects

In [None]:
overwrite = False

# sanity check
if not overwrite:
    height_path = data_dir / f'height.npy'
    if height_path.exists():
        raise RuntimeError(f"'height.npy' exists")

keys = ("person", "CSL building", "the spike statue", "the lamp posts")
coords = []
for key in keys:
    coords.append(get_top_and_bottom_coordinates(im, key))

coords = dict(zip(keys, coords))
np.save(data_dir / f"height.npy", coords)

RuntimeError: 'height.npy' exists

## Part 1
Get vanishing points for each of the directions.

In [40]:
from skimage.io import imread

im = imread(data_dir / "CSL.jpg")

vps = []
for i, color in enumerate(("r", "g", "b")):
    data = np.load(data_dir / f"vp{i}.npy")
    lines, p1, p2 = data[:, :3], data[:, 3:5], data[:, 5:]

    vp = get_vanishing_point(lines)
    vps.append(vp)
print(vps)

horizon_line = get_horizon_line(vps)

[array([-237.51901457,  214.21484887]), array([ 493.84651784, 6330.22134133]), array([1343.24186665,  229.16592685])]
-0.009458*x + 0.999955*y + -216.451659 = 0


Without VPs

In [25]:
fig, ax = plt.subplots(figsize=(8, 6))
ax.imshow(im)
for i, color in enumerate(("r", "g", "b")):
    data = np.load(data_dir / f"vp{i}.npy")
    lines, p1, p2 = data[:, :3], data[:, 3:5], data[:, 5:]

    # user input, ground truth
    for p1_, p2_ in zip(p1, p2):
        ax.plot([p1_[0], p2_[0]], [p1_[1], p2_[1]], color, linewidth=2)

plot_horizon_line(ax, im, horizon_line, boundary=-5)
fig.savefig("csl_horizon.png", bbox_inches="tight", pad_inches=0)

VPs and horizon

In [51]:
fig, ax = plt.subplots(figsize=(5, 15))
ax.imshow(im)
for i, (vp, color) in enumerate(zip(vps, ("r", "g", "b"))):
    data = np.load(data_dir / f"vp{i}.npy")
    lines, p1, p2 = data[:, :3], data[:, 3:5], data[:, 5:]

    # user input, ground truth
    for p1_, p2_ in zip(p1, p2):
        ax.plot([p1_[0], p2_[0]], [p1_[1], p2_[1]], color, linewidth=2)

    ax.scatter(*vp, c=color)

plot_horizon_line(ax, im, horizon_line, boundary=-5)
fig.savefig("csl_vp3+horizon.png", bbox_inches="tight", pad_inches=0)

In [52]:
# this ignore vertical VP

fig, ax = plt.subplots(figsize=(8, 8))
ax.imshow(im)
for i, (vp, color) in enumerate(zip(vps, ("r", "g", "b"))):
    data = np.load(data_dir / f"vp{i}.npy")
    lines, p1, p2 = data[:, :3], data[:, 3:5], data[:, 5:]

    # user input, ground truth
    for p1_, p2_ in zip(p1, p2):
        ax.plot([p1_[0], p2_[0]], [p1_[1], p2_[1]], color, linewidth=2)

    if vp[1] > 1000:
        continue
    ax.scatter(*vp, c=color)

plot_horizon_line(ax, im, horizon_line, boundary=-5)

ax.axis("off")
fig.savefig("csl_vp2+horizon.png", bbox_inches="tight", pad_inches=0)

## Part 2 and 3
Solve camera parameters and its rotation matrix.

In [39]:
# this requires VPs created from previous part
K = get_camera_parameters(vps)
print('K=')
print(K)

# reorder VPs to follow problem statement
#   X: right; Y: vertical; Z: left
print('(before) vps=')
print(vps)
print('(after) vps=')
vps = [vps[1], vps[2], vps[0]]
print(vps)
R = get_rotation_matrix(vps, K)
print('R=')
with np.printoptions(precision=3):
    print(R)

(u, v) = (550.6547951018749, 323.9452352558771)
f=783.7705031103384
K=
[[783.77050311   0.         550.6547951 ]
 [  0.         783.77050311 323.94523526]
 [  0.           0.           1.        ]]
(before) vps=
[array([1343.24186665,  229.16592685]), array([ 493.84651784, 6330.22134133]), array([-237.51901457,  214.21484887])]
(after) vps=
[array([ 493.84651784, 6330.22134133]), array([-237.51901457,  214.21484887]), array([1343.24186665,  229.16592685])]
R=
[[-0.009 -0.706  0.708]
 [ 0.992 -0.098 -0.085]
 [ 0.129  0.702  0.701]]


## Part 4
Record image coordinates for each object and store in map. 

In [48]:
# load coordinates from previous session
coords = np.load(data_dir / f"height.npy", allow_pickle=True)
coords = coords[()]

obj_ref = "person"
#h_ref = 1.6764  # 5'6 in cm
h_ref = 1.8288 # 6 ft in cm

# sanity check for VPs, v_z should be inf
for ax, vp in zip(('x', 'y', 'z'), vps):
    print(f'v_{ax}={vp}')
# estimate height for each object
for key, coord in coords.items():
    height = estimate_height(vps, coords[obj_ref].T, h_ref, coord.T)
    print(f"estimate height of {key} = {height} m")

v_x=[-237.51901457  214.21484887]
v_y=[1343.24186665  229.16592685]
v_z=[ 493.84651784 6330.22134133]
estimate height of person = 1.8288 m
estimate height of CSL building = 24.09905168816902 m
estimate height of the spike statue = 10.038959886281011 m
estimate height of the lamp posts = 5.959964529084401 m


In [43]:
from skimage.io import imread

im = imread(data_dir / "CSL.jpg")

plt.close("all")

# Part 1
# Get vanishing points for each of the directions
vps = []
fig, ax = plt.subplots(figsize=(8, 6))
ax.imshow(im)
show_vp = False
for i, color in enumerate(("r", "g", "b")):
    data = np.load(data_dir / f"vp{i}.npy")
    lines, p1, p2 = data[:, :3], data[:, 3:5], data[:, 5:]

    vp = get_vanishing_point(lines)
    vps.append(vp)

    # user input, ground truth
    for p1_, p2_ in zip(p1, p2):
        ax.plot([p1_[0], p2_[0]], [p1_[1], p2_[1]], color, linewidth=2)

    if show_vp:
        ax.scatter(*vp, c=color)

horizon_line = get_horizon_line(vps)
plot_horizon_line(ax, im, horizon_line, boundary=-5)
fig.savefig("csl_horizon.png", bbox_inches="tight", pad_inches=0)

# Part 2
# <YOUR IMPLEMENTATION> Solve for the camera parameters
K = get_camera_parameters(vps)

# Part 3
# <YOUR IMPLEMENTATION> Solve for the rotation matrix
R = get_rotation_matrix(vps, K)

# Part 4
# Record image coordinates for each object and store in map
coords = np.load(data_dir / f"height.npy", allow_pickle=True)
coords = coords[()]

obj_ref = "person"
h_ref = 167.64  # 5'6 in cm

# <YOUR IMPLEMENTATION> Estimate heights
for key, coord in coords.items():
    height = estimate_height(vps, coords[obj_ref].T, h_ref, coord.T)
    print(f"estimate height of {key} = {height} cm")


-0.009458*x + 0.999955*y + -216.451659 = 0
(u, v) = (550.6547951018748, 323.9452352558772)
f=783.7705031103379
[-237.51901457  214.21484887    1.        ]
[1.34324187e+03 2.29165927e+02 1.00000000e+00]
[4.93846518e+02 6.33022134e+03 1.00000000e+00]
estimate height of person = 167.64 cm
[-237.51901457  214.21484887    1.        ]
[1.34324187e+03 2.29165927e+02 1.00000000e+00]
[4.93846518e+02 6.33022134e+03 1.00000000e+00]
estimate height of CSL building = 2209.07973808216 cm
[-237.51901457  214.21484887    1.        ]
[1.34324187e+03 2.29165927e+02 1.00000000e+00]
[4.93846518e+02 6.33022134e+03 1.00000000e+00]
estimate height of the spike statue = 920.2379895757593 cm
[-237.51901457  214.21484887    1.        ]
[1.34324187e+03 2.29165927e+02 1.00000000e+00]
[4.93846518e+02 6.33022134e+03 1.00000000e+00]
estimate height of the lamp posts = 546.3300818327368 cm
