## Question 2.

Write a MATLAB/Python script to find the real world dimensions (e.g. diameter of a ball, side length of a cube) of an object using perspective projection equations. Validate using an experiment where you image an object using your camera from a specific distance (choose any distance but ensure you are able to measure it accurately) between the object and camera.


### Solution

**NOTE:** <br />
please run q1.ipynb before running q2.ipynb 
q1.ipynb saves all the important camera information 
(camera, rotational, and translational matrices) which 
are mandatory for q2.ipynb to work!


In [1]:
import numpy as np
import cv2 as cv
from scipy.spatial.transform import Rotation
from math import cos, sin, radians

In [45]:
# reading the rotational vectors from saved files

rot_v = []

with open('images/left/rotat_vector.txt', 'r') as f:
    for line in f :
        rot_v.append([[float(num)] for num in line.split(' ')])

np_rot_v = np.array(rot_v)

# print("Rotation vector:")    
# print(np_rot_v)

Rotation vector:
[[[ 0.19758]
  [-0.00655]
  [ 0.0063 ]]

 [[ 0.20569]
  [ 0.03023]
  [ 0.00369]]

 [[ 0.10432]
  [ 0.03595]
  [ 0.02063]]

 [[ 0.1286 ]
  [-0.00654]
  [ 0.01703]]

 [[ 0.10085]
  [ 0.01447]
  [ 0.01957]]

 [[ 0.11166]
  [ 0.0606 ]
  [ 0.02276]]

 [[ 0.14803]
  [ 0.07052]
  [ 0.0118 ]]

 [[ 0.15267]
  [-0.01626]
  [ 0.01514]]

 [[ 0.1488 ]
  [ 0.08836]
  [ 0.00804]]

 [[ 0.17478]
  [-0.00165]
  [ 0.0112 ]]]
Transation vector:
[[[-1.83611]
  [-3.89652]
  [13.4579 ]]

 [[-2.14455]
  [-3.89652]
  [13.47443]]

 [[-1.30849]
  [-4.02351]
  [14.27926]]

 [[-1.38264]
  [-3.96693]
  [13.92483]]

 [[-1.30123]
  [-3.99523]
  [14.17617]]

 [[-1.14546]
  [-4.09198]
  [14.49543]]

 [[-2.39812]
  [-3.94567]
  [13.88608]]

 [[-1.5367 ]
  [-3.97522]
  [13.74941]]

 [[-2.61243]
  [-3.87431]
  [14.01971]]

 [[-1.69338]
  [-3.94921]
  [13.66769]]]


In [49]:
r_obj = Rotation.from_rotvec(np.array(rot_v[0]).reshape(1,3))
rot_matrix = r_obj.as_matrix()

print("ROTATIONAL MATRIX")
print(rot_matrix)

ROTATIONAL MATRIX
[[[ 0.99995884 -0.00690397 -0.00588703]
  [ 0.00561404  0.98052484 -0.19631485]
  [ 0.00712773  0.19627372  0.98052324]]]


In [47]:
# Creation of rotation matrix from rotation vector
xc, xs = cos(radians(np_rot_v[0][0][0])), sin(radians(np_rot_v[0][0][0]))
yc, ys = cos(radians(np_rot_v[0][1][0])), sin(radians(np_rot_v[0][1][0]))
zc, zs = cos(radians(np_rot_v[0][2][0])), sin(radians(np_rot_v[0][2][0]))

In [48]:
#Creation of  rotation matrices along x,y and z
rot_x_mtx = np.array([[1,0,0,0],[0,xc,-xs,0],[0,xs,-xc,0],[0,0,0,1]])
rot_y_mtx = np.array([[yc,0,ys,0],[0,1,0,0],[-ys,0,yc,0],[0,0,0,1]])
rot_z_mtx = np.array([[zc,-zs,0,0],[zs,zc,0,0],[0,0,1,0],[0,0,0,1]])

In [51]:
# reading the translation vectors from saved files

tra_v = []

with open('images/left/trans_vector.txt', 'r') as f:
    for line in f :
        tra_v.append([[float(num)] for num in line.split(' ')])
        
np_tra_v = np.array(tra_v)

# print("Transation vector:")    
# print(np_tra_v)

In [50]:
# Translated matrix
tx = np_tra_v[0][0][0]
ty = np_tra_v[0][1][0]
tz = np_tra_v[0][2][0]
trans_mtx = np.array([[1,0,0,tx],[0,1,0,ty],[0,0,1,tz],[0,0,0,1]])

print("TRANSLATIONAL MATRIX")
print(trans_mtx)

TRANSLATIONAL MATRIX
[[ 1.       0.       0.      -1.83611]
 [ 0.       1.       0.      -3.89652]
 [ 0.       0.       1.      13.4579 ]
 [ 0.       0.       0.       1.     ]]


In [60]:
# reading the intrinsic camera matrix from saved files

c_mtx = []
 
with open('images/left/camera_matrix.txt', 'r') as f:
    for line in f :
        c_mtx.append([float(num) for num in line.split(' ')])

print("CAMERA INTRINSIC MATRIX:")
print(c_mtx)

CAMERA INTRINSIC MATRIX:
[[753.75451, 0.0, 313.42094], [0.0, 745.91179, 307.95401], [0.0, 0.0, 1.0]]


In [61]:
# Intrinsic matrix

int_mtx = np.append(np.append(c_mtx, [[0],[0],[1]], axis=1), [np.array([0,0,0,1])], axis=0)
print('Intrinsinc mtx:')
print(int_mtx)

Intrinsinc mtx:
[[753.75451   0.      313.42094   0.     ]
 [  0.      745.91179 307.95401   0.     ]
 [  0.        0.        1.        1.     ]
 [  0.        0.        0.        1.     ]]


In [62]:
# Extrinsic matrix

ext_mtx = np.dot(rot_z_mtx, np.dot(rot_y_mtx, np.dot(rot_x_mtx, trans_mtx)))
print('Extrinsinc mtx')
print(ext_mtx)

Extrinsinc mtx
[[ 9.99999987e-01 -1.10349308e-04  1.14697558e-04 -1.83413641e+00]
 [ 1.09955742e-04  9.99994048e-01 -3.44840211e-03 -3.94310695e+00]
 [ 1.14319066e-04  3.44841468e-03 -9.99994048e-01 -1.34714666e+01]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  1.00000000e+00]]


In [63]:
# Camera Matrix

camera_matrix = np.dot(int_mtx, ext_mtx)
print("CAMERA MATRIX: ")
print(camera_matrix)

CAMERA MATRIX: 
[[ 7.53790331e+02  9.97629081e-01 -3.13332621e+02 -5.60472832e+03]
 [ 1.17222299e-01  7.46969304e+02 -3.10524381e+02 -7.08980213e+03]
 [ 1.14319066e-04  3.44841468e-03 -9.99994048e-01 -1.24714666e+01]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  1.00000000e+00]]


In [102]:
inverse_mat = -np.linalg.inv(camera_matrix) 

project_points1 = np.array([[5],[10],[30],[1]])
project_points2 = np.array([[100],[90],[30],[1]])

print(inverse_mat)

real_dim_p1 = inverse_mat.dot(project_points1)
real_dim_p2 = inverse_mat.dot(project_points2)

[[-1.32669188e-03 -1.47411186e-07  4.15744093e-01 -2.25185409e+00]
 [ 1.45356975e-07 -1.34066500e-03  4.16266102e-01 -4.31278610e+00]
 [-1.51165825e-07 -4.62321324e-06  1.00148895e+00  1.24564111e+01]
 [-0.00000000e+00 -0.00000000e+00 -0.00000000e+00 -1.00000000e+00]]


In [103]:
print("Here are the dimension")
print("Point 1")

print("x axis length - ", real_dim_p1[0][0])
print("y axis length - ", real_dim_p1[1][0])
print("z axis length - ", real_dim_p1[2][0])

print("Point 2")
print("x axis length - ", real_dim_p2[0][0])
print("y axis length - ", real_dim_p2[1][0])
print("z axis length - ", real_dim_p2[2][0])

Here are the dimension
Point 1
x axis length -  10.213833755027938
y axis length -  8.161791044605714
z axis length -  42.50103246954882
Point 2
x axis length -  10.087786233597294
y axis length -  8.054551653324141
z axis length -  42.50064825173587


In [107]:
# distance between them 
from math import sqrt 

dist = sqrt((real_dim_p2[0][0]-real_dim_p1[0][0])**2 +
           (real_dim_p2[1][0]-real_dim_p1[1][0])**2 +
           (real_dim_p2[2][0]-real_dim_p1[2][0])**2 )

print(dist*100, "cm")

16.54944480173842 cm
