In [1]:
import sys
import numpy as np
import cv2


import os
import scipy.misc
from scipy.optimize import least_squares
import math
from copy import deepcopy
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from triangulation import *


In [2]:
image_data_dir = '../data/statue/'
image_paths = [os.path.join(image_data_dir, 'images', x) for x in sorted(os.listdir('../data/statue/images')) if '.jpg' in x]

In [3]:

matches_subset = np.load(os.path.join(image_data_dir,'matches_subset.npy'), allow_pickle=True,encoding='latin1')[0,:]
fundamental_matrices = np.load(os.path.join(image_data_dir,'fundamental_matrices.npy'),allow_pickle=True,encoding='latin1')[0,:]
image_points = matches_subset[0][:,0].reshape(2,2)

In [4]:
im0 = cv2.imread(image_paths[0])
im_height, im_width, _ = im0.shape

* Nous pouvons calculer la matrice essentielle E soit directement à partir de sa relation avec la matrice fondamentale F et la matrice intrinsèque K :
$$E=K^TFK$$

* Ici, K est donné par

$$K=\begin{bmatrix}
f & 0 &0 \\
 0&f  &0 \\
0 & 0 & 1
\end{bmatrix}$$

In [5]:
focal_length = 719.5459
K = np.eye(3)
K[0,0] = K[1,1] = focal_length
E = K.T.dot(fundamental_matrices[0]).dot(K)

* Nous pouvons réécrire la matrice essentielle comme suit :

$$E=[R \mid t]=Udiag(1,1,0)(WU^TR)$$
où
$$W=\begin{bmatrix}
0 &  -1&0 \\
 1& 0 & 0\\
 0&0  & 1
\end{bmatrix},Z=\begin{bmatrix}
0 &  1&0 \\
 -1& 0 & 0\\
 0&0  & 0
\end{bmatrix}$$

* En examinant attentivement cette expression, nous constatons qu'elle ressemble beaucoup à la décomposition en valeurs singulières $E = UΣV^T$ , on obtient alors les factorisations suivantes de E :

$$t=\pm u_3,\quad R=det(UWV^T)UWV^T \text{ ou } det(UW^TV^T)UW^TV^T$$

In [6]:
Z = np.array([[0, 1, 0],
                  [-1, 0, 0],
                  [0, 0, 0]])
W = np.array([[0, -1, 0],
                  [1, 0, 0],
                  [0, 0, 1]])

In [7]:
U, s, VT = np.linalg.svd(E)

In [8]:

Q1 = U.dot(W).dot(VT)
R1 = np.linalg.det(Q1) * 1.0 * Q1

Q2 = U.dot(W.T).dot(VT)
R2 = np.linalg.det(Q2) * 1.0 * Q2

In [9]:
M = U.dot(Z).dot(U.T)

In [10]:
# compute T
T1 = U[:, 2].reshape(-1, 1)
T2 = -U[:, 2].reshape(-1, 1)

R_set = [R1, R2]
T_set = [T1, T2]
RT_set = []
for i in range(len(R_set)):
        for j in range(len(T_set)):
            RT_set.append(np.hstack((R_set[i], T_set[j])))

estimated_RT = np.zeros((4, 3, 4))
for i in range(estimated_RT.shape[0]):
        estimated_RT[i, :, :] = RT_set[i]

In [11]:

print("Estimated RT:\n", estimated_RT)

Estimated RT:
 [[[ 0.98305251 -0.11787055 -0.14040758  0.99941228]
  [-0.11925737 -0.99286228 -0.00147453 -0.00886961]
  [-0.13923158  0.01819418 -0.99009269  0.03311219]]

 [[ 0.98305251 -0.11787055 -0.14040758 -0.99941228]
  [-0.11925737 -0.99286228 -0.00147453  0.00886961]
  [-0.13923158  0.01819418 -0.99009269 -0.03311219]]

 [[ 0.97364135 -0.09878708 -0.20558119  0.99941228]
  [ 0.10189204  0.99478508  0.00454512 -0.00886961]
  [ 0.2040601  -0.02537241  0.97862951  0.03311219]]

 [[ 0.97364135 -0.09878708 -0.20558119 -0.99941228]
  [ 0.10189204  0.99478508  0.00454512  0.00886961]
  [ 0.2040601  -0.02537241  0.97862951 -0.03311219]]]


In [12]:

example_RT = np.array([[0.9736, -0.0988, -0.2056, 0.9994],
        [0.1019, 0.9948, 0.0045, -0.0089],
        [0.2041, -0.0254, 0.9786, 0.0331]])
print("Example RT:\n", example_RT)

Example RT:
 [[ 0.9736 -0.0988 -0.2056  0.9994]
 [ 0.1019  0.9948  0.0045 -0.0089]
 [ 0.2041 -0.0254  0.9786  0.0331]]


* Il existe quatre solutions possibles pour extraire de la matrice Essential la rotation R et la translation t relatives de la caméra. rotation relative de la caméra R et la translation t de la matrice Essential. Cependant, seul le point reconstruit en (a), le point reconstruit se trouve devant les deux caméras.

<img src="https://drive.google.com/uc?export=view&id=1iAsvgrEzyt-J3SonDi29xVYrW1lIZWy0">

* Nous considérons des paires de caméras séquentielles pour déterminer les matrices de caméras

$$M_1=K[I\mid 0]$$ et $$M_2=K[R \mid t]$$  où

In [13]:
def camera1tocamera2(P, RT):
    P_homo = np.array([P[0], P[1], P[2], 1.0])
    A = np.zeros((4, 4))
    A[0:3, :] = RT
    A[3, :] = np.array([0.0, 0.0, 0.0, 1.0])
    P_prime_homo = A.dot(P_homo.T)
    P_prime_homo /= P_prime_homo[3]
    P_prime = P_prime_homo[0:3]
    return P_prime

In [14]:
unit_test_image_matches = np.load('../data/unit_test_image_matches.npy')

In [15]:
image_points=np.expand_dims(unit_test_image_matches[:2,:], axis=0)

In [16]:

count = np.zeros((1, 4))
I0 = np.array([[1.0, 0.0, 0.0, 0.0],
                   [0.0, 1.0, 0.0, 0.0],
                   [0.0, 0.0, 1.0, 0.0]])
M1 = K.dot(I0)

camera_matrices = np.zeros((2, 3, 4))
camera_matrices[0] = M1
for i in range(estimated_RT.shape[0]):
        RTi = estimated_RT[i] # 3x4 matrix
        M2i = K.dot(RTi)
        camera_matrices[1] = M2i
        for j in range(image_points.shape[0]):
            pointj_3d = nonlinear_estimate_3d_point(image_points[j], camera_matrices)
            Pj = np.vstack((pointj_3d.reshape(3, 1), [1]))
            Pj_prime = camera1tocamera2(Pj, RTi)
            if Pj[2] > 0 and Pj_prime[2] > 0:
                count[0, i] += 1

maxIndex = np.argmax(count)
maxRT = estimated_RT[maxIndex]

  P_homo = np.array([P[0], P[1], P[2], 1.0])


In [17]:
count

array([[0., 0., 1., 0.]])

In [18]:
print(example_RT)

[[ 0.9736 -0.0988 -0.2056  0.9994]
 [ 0.1019  0.9948  0.0045 -0.0089]
 [ 0.2041 -0.0254  0.9786  0.0331]]


In [19]:
print(maxRT)

[[ 0.97364135 -0.09878708 -0.20558119  0.99941228]
 [ 0.10189204  0.99478508  0.00454512 -0.00886961]
 [ 0.2040601  -0.02537241  0.97862951  0.03311219]]
