In [1]:
import os
import sys
import numpy as np
import cv2

from utils.homography import compute_h, cor_p
from utils.triangulation import triangulation
from pathlib import Path
from shapely.geometry import Polygon

# Triangulation

In [2]:
num_cams = 4
cam_poses = {} # key: cami, value: pose
for i in range(num_cams):
    with open(f'./camera_poses/{i:05d}.txt', 'r') as f:
        lines = f.readlines()
        pose = []
        for line in lines:
            data = list(map(float, line.split(" ")))
            pose.append(data)
        pose = np.array(pose)
        cam_poses[f'cam{i}'] = pose.reshape(4, 4)

yolo_outputs = {} # key: cami, value: yolo_output

for i in range(num_cams):

    # outputs of one camera:
    yolo_output = {} # key: class, value: yolo_output
    yolo_output['chair'], yolo_output['table'], yolo_output['person'] = [], [], []

    with open(f'./runs/detect/layout/cam{i}/labels/00000.txt', 'r') as f:
        lines = f.readlines()
        for line in lines:
            data = list(map(float, line.split(" ")))

            # check object class
            if data[0] == 56 or data[0] == 11: # chair or bench
                yolo_output['chair'].append(np.array(data[1:5]))
            elif data[0] == 60: # table
                yolo_output['table'].append(np.array(data[1:5]))
            elif data[0] == 0: # person
                yolo_output['person'].append(np.array(data[1:5]))
        yolo_outputs[f'cam{i}'] = yolo_output

In [10]:
h, w = 1456, 1928

poses = [cam_poses[f'cam{i}'][:3, :] for i in range(num_cams)]
K = np.array([
        [975.813843, 0, 960.973816],
        [0, 975.475220, 729.893921],
        [0, 0, 1]
    ])

triangulated = []
for idx in range(len(cor_p[0])):
    X = [cor_p[i][idx][0] for i in range(num_cams)]
    Y = [cor_p[i][idx][1] for i in range(num_cams)]
    triangulated.append(triangulation(poses, X, Y))
triangulated = np.array(triangulated)
print(triangulated)

[[-0.64439422  0.64644906  1.9002724 ]
 [-2.03783136 -0.37931437  3.51591321]
 [ 1.63704086  0.30526341  2.87154828]
 [-1.25637634 -0.80773087  4.60602474]
 [-0.4540996  -0.19573119  3.51672827]
 [ 1.31323541 -0.66075585  4.70064608]]


In [11]:
cords = np.hstack((triangulated, np.ones((len(triangulated), 1))))
_, sigma , V = np.linalg.svd(cords)
plane = V[-1, :]
print(plane)

[ 0.04389121 -0.49583658 -0.25795586  0.82805701]


In [12]:
def distToPlane(point, plane):
    A, B, C, D = plane
    x, y, z = point
    return np.abs(A*x+B*y+C*z+D)/np.sqrt(np.sum(np.power([A, B, C],2)))

for cord in triangulated:
    print(distToPlane(cord, plane))

0.019523480317964182
0.03521248863441822
0.013939752563476765
0.026283020684255493
0.0035386899106955544
0.0013631166298566893


In [13]:
Kinv = np.linalg.inv(K)
pose = poses[0]
M = np.vstack((plane, pose))


def pixelToCord(pixel, Kinv, M):
    vec = np.hstack(([0], Kinv @ pixel))
    res = np.linalg.inv(M) @ vec
    res = res[:-1]/res[-1]
    return res

pixel = cor_p[0][0] + [1]
print("Pixel: ", pixel)
res = pixelToCord(pixel, Kinv, M)
print("Triangulated: ", triangulated[0])
print("PixelToCord of Pixel: ", res)
print("Distance to Plane: ", distToPlane(res, plane))

Pixel:  [609, 1025, 1]
Triangulated:  [-0.64439422  0.64644906  1.9002724 ]
PixelToCord of Pixel:  [-0.7047781   0.59111358  1.95393009]
Distance to Plane:  1.9802641679796378e-16


# Layout

In [3]:
h, w = 1456, 1928

poses = [cam_poses[f'cam{i}'][:3, :] for i in range(num_cams)]

# Creates array of bounding box information given yolo-outputs of cam{id}
# Input: id
# Output: an array of [center, bbox] corresponding to a table
#   bboxes  : an array of [center, bbox] corresponding to a table
#   center  : [horizontal, vertical] pixel values
#   bbox    : [[horizontal, vertical] of the 4 corners of the bounding box]
def yoloToBoxes(id):
    bboxes = []
    for i, table in enumerate(yolo_outputs[f'cam{id}']['table']):
        y_, x_, w_, h_ = table
        vertices = [[-1, -1], [-1, 1], [1, 1], [1, -1]]
        vertices = [[x_ + p[0] * h_ / 2, y_ + p[1] * w_ / 2] for p in vertices] # (vertical, horizontal)
        center = np.array([int(y_ * w), int(x_ * h)])   # (horizontal, vertical)

        bbox = []
        for v in vertices:
            point = [int(v[1] * w), int(v[0] * h)] 
            bbox.append(point)
        bbox = np.array(bbox)
        bboxes.append([center, bbox])

    return bboxes

In [9]:
def distance(p1, p2, weight=[]):
    if len(weight)==0:
        weight = np.ones(len(p1))/len(p1)
    return np.linalg.norm((p1 - p2) * weight)

In [10]:
superPixels = []
for r in [0, 100, 200]:
    for g in [0, 100, 200]:
        for b in [0, 100, 200]:
            superPixels.append([r, g, b])
superPixels = np.array(superPixels)

for id in range(num_cams):
    img = cv2.imread(f'./data/layout/cam{id}/00000.jpg')
    res = np.zeros(img.shape, dtype=np.uint8) + 255
    w, h, c = img.shape
    for i in range(w):
        for j in range(h):
            data = img[i][j]
            distances = [distance(data, x) for x in superPixels]
            res[i][j] = superPixels[np.argmin(distances)]
    cv2.imwrite(f"./runs/discretize/cam{id}.jpg", res)
    break
