In [1]:
import tensorflow as tf
"""
Ax = b
shape of A matrix: (n,m)
shape of x vector: (m, 1)
shape of b vector: (n, 1)

1) solution1: pseudo-inverse

x = pseudo_inverse(A) * b

2) solution2: gradient descent optimization

f(x) = Ax - b
x := x - epsilon * gradient(f(x), x)

"""

'\nAx = b\nshape of A matrix: (n,m)\nshape of x vector: (m, 1)\nshape of b vector: (n, 1)\n\n1) solution1: pseudo-inverse\n\nx = pseudo_inverse(A) * b\n\n2) solution2: gradient descent optimization\n\nf(x) = Ax - b\nx := x - epsilon * gradient(f(x), x)\n\n'

In [2]:
def pseudo_inverse(A):
    """
    pseudo_inverse_A = inverse(A_transpose * A) * A_transpose
    condition: (A_transpose * A) is invertible matrix  
    """
    A_trans = tf.matrix_transpose(A, name='trans_A')
    A_trans_A = tf.matmul(A_trans, A)
    #same operation: tf.matmul(A, A, transpose_a=True, transpose_b=False)
    
    A_trans_A_inv = tf.matrix_inverse(A_trans_A)

    pseudo_inv_A = tf.matmul(A_trans_A_inv, A_trans)
    
    return pseudo_inv_A

In [3]:
def pseudo_inverse_with_svd(A):
    """
    pseudo_inverse_A = V * diag(lambda_plus) * U_transpose
    note1: SVD(A) = U * diag(lambda) * V_transpose
    note2: lambda_plus = pow(lambda, -1)
    """
    
    s, u, v = tf.svd(A)
    lamda_plus = tf.diag(tf.pow(s, -1))
    v_lamda_plus = tf.matmul(v, lamda_plus)
    u_trans = tf.matrix_transpose(u)
    pseudo_inv_A_approx = tf.matmul(v_lamda_plus, u_trans)
    
    return pseudo_inv_A_approx

In [4]:
# input
A = tf.placeholder(tf.float32)
b = tf.placeholder(tf.float32)

# create computational graph at default graph
# 1) sol_1: pseudo inverse
x = tf.matmul(pseudo_inverse(A), b)

# 2) sol_2: pseudo inverse with SVD
x_svd = tf.matmul(pseudo_inverse_with_svd(A), b)

# 3) sol_3: gradient descent optimize
init_val = [[20.]]
var_x = tf.Variable(init_val, name='x')
f_x = tf.abs(tf.matmul(A, var_x) - b)

optimizer = tf.train.GradientDescentOptimizer(1e-3).minimize(f_x)

In [5]:
# create TF session
sess = tf.Session()

# TF variable have to initialize before optimization
init = tf.global_variables_initializer()
sess.run(init)

In [6]:
# data
"""
matrix_A = [[1,-1],
            [1,2],
            [5,6]]

vector_b = [[1],
            [2],
            [3]]

"""
matrix_A = [[1],
            [2],
            [3],
            [5],
            [7],
            [20]]

vector_b = [[1.2],
            [1.9],
            [3.1],
            [5.8],
            [6.7],
            [19.]]

print '1) solution using pseudo_inverse'
print(sess.run(x, {A:matrix_A, b:vector_b}))
print '\n2) solution using pseudo_inverse with SVD'
print(sess.run(x_svd, {A:matrix_A, b:vector_b}))

1) solution using pseudo_inverse
[[ 0.96352446]]

2) solution using pseudo_inverse with SVD
[[ 0.9635247]]


In [7]:
print '\n3) solution using gradient descent optimize'
for i in range(1000):
    sess.run(optimizer, {A:matrix_A, b:vector_b})
    if i%50==0:
        now_var_x = sess.run(var_x)
        print('step: %d,  x: %f' % (i, now_var_x[0,0]))


3) solution using gradient descent optimize
step: 0,  x: 19.962000
step: 50,  x: 18.061995
step: 100,  x: 16.161989
step: 150,  x: 14.261984
step: 200,  x: 12.361979
step: 250,  x: 10.461973
step: 300,  x: 8.561968
step: 350,  x: 6.661963
step: 400,  x: 4.761957
step: 450,  x: 2.861952
step: 500,  x: 1.023950
step: 550,  x: 0.979950
step: 600,  x: 0.949951
step: 650,  x: 0.963951
step: 700,  x: 0.977951
step: 750,  x: 0.947952
step: 800,  x: 0.961952
step: 850,  x: 0.975953
step: 900,  x: 0.945953
step: 950,  x: 0.959953
