In [1]:
import numpy as np
import tensorflow as tf

In [2]:
# unit meter, the square in front of camera view, the robot origin offset is set as 0.6m
source = np.array([[0, 0.5], [1, 0.5], [1, -0.5], [0, -0.5]], dtype=np.float64)
source = source.transpose()

# image pixel target
target = np.array([[14.32 , 140.71], [ 120.78, 95.5], [199.49 ,96.84], [302.7 ,140.71]], dtype=np.float64)

# transform to camera coordinate, not scaled yet
target = np.array([160 - target[:, 1], target[:, 0]])

# estimated scale factor for sensor

In [3]:
T = tf.constant(target)
S = tf.constant(source)

CameraOffset = tf.Variable(0.6, dtype=tf.float64)
S = tf.stack([S[0, :] + CameraOffset, S[1, :]])

ScaleFactor = tf.Variable(2000.0, dtype=tf.float64)
T = T / ScaleFactor

Cx = 0
Cy = 320 / (2 * ScaleFactor)
Cz = tf.Variable(0.5, dtype=tf.float64)

Ex = 160 / (2 * ScaleFactor)
Ey = Cy
logEz = tf.Variable(np.log(0.04), dtype=tf.float64)
Ez = -tf.exp(logEz)

Sx, Sy = S[0, :], S[1, :]
Tx, Ty = T[0, :], T[1, :]

lossX = -Ex*(Cx + Ez - Sx) + Ez*(Cz + Ex) + Tx*(Cx + Ez - Sx)
lossY = -Ey*(Cx + Ez - Sx) + Ez*(-Cy + Ey + Sy) + Ty*(Cx + Ez - Sx)

ForwardTx = (Cx*Ex - Cz*Ez - Ex*Sx)/(Cx + Ez - Sx)
ForwardTy = (Ey*(Cx + Ez - Sx) - Ez*(-Cy + Ey + Sy))/(Cx + Ez - Sx)

loss = tf.reduce_mean(lossX**2 + lossY**2)

learning_rate = tf.Variable(0.000001)
optimizer = tf.train.GradientDescentOptimizer(learning_rate)

offset_grad = optimizer.compute_gradients(loss, [CameraOffset])
offset_train = optimizer.apply_gradients(offset_grad)

scale_grad = optimizer.compute_gradients(loss, [ScaleFactor])
scale_train = optimizer.apply_gradients(scale_grad)

view_depth_grad = optimizer.compute_gradients(loss, [logEz])
view_depth_train = optimizer.apply_gradients(view_depth_grad)

height_grad = optimizer.compute_gradients(loss, [Cz])
height_train = optimizer.apply_gradients(height_grad)

train_op = optimizer.minimize(loss)

In [4]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [5]:
#global train
iter_range = 2000
for i in range(iter_range):
    loss_val,  _ = sess.run([loss, train_op])
    if i == iter_range - 1:
        print('loss:', loss_val)

loss: 0.000454365989491


In [5]:
# optimize view depth
iter_range = 2000
for i in range(iter_range):
    loss_val, Ez_val, _ = sess.run([loss, Ez, view_depth_train])
    if i == iter_range - 1:
        print('loss and depth:', loss_val, -Ez_val)

loss and depth: 0.000454374284969 0.040000036895


In [6]:
# optimize camera height
iter_range = 20000
for i in range(iter_range):
    loss_val, Cz_val, _ = sess.run([loss, Cz, height_train])
    if i == iter_range - 1:
        print('loss and height:', loss_val, Cz_val)

loss and height: 0.000454370748116 0.499991589511


In [7]:
# optimize scale factor
iter_range = 2000
for i in range(iter_range):
    loss_val, ScaleFactor_val, _ = sess.run([loss, ScaleFactor, scale_train])
    if i == iter_range - 1:
        print('loss and scale:', loss_val, ScaleFactor_val)

loss and scale: 0.000454370747939 2000.0


In [8]:
# optimize CameraOffset
iter_range = 2000
for i in range(iter_range):
    loss_val, CameraOffset_val, _ = sess.run([loss, CameraOffset, offset_train])
    if i == iter_range - 1:
        print('loss and camera offset:', loss_val, CameraOffset_val)

loss and camera offset: 0.000454362805894 0.599996013518


In [6]:
# validate forward inference
sess.run(ForwardTx)

array([ 0.00624981,  0.02682924,  0.02682924,  0.00624981])

In [7]:
sess.run(ForwardTy)

array([ 0.04874978,  0.06780484,  0.09219516,  0.11125022])

In [8]:
sess.run(T)

array([[ 0.009645,  0.03225 ,  0.03158 ,  0.009645],
       [ 0.00716 ,  0.06039 ,  0.099745,  0.15135 ]])

In [11]:
sess.run([Cy, Cz])

[0.079999999999945434, 0.49999915892264074]

In [12]:
sess.run([Ex, Ey, Ez])

[0.039999999999972717, 0.079999999999945434, -0.040000036913171277]

In [14]:
sess.run([ScaleFactor*ForwardTx, ScaleFactor*ForwardTy])

[array([ 12.49962629,  53.65848987,  53.65848987,  12.49962629]),
 array([  97.49955662,  135.60967485,  184.39032515,  222.50044338])]

In [18]:
sess.run(T)

array([[ 0.009645,  0.03225 ,  0.03158 ,  0.009645],
       [ 0.00716 ,  0.06039 ,  0.099745,  0.15135 ]])