In [1]:
%matplotlib notebook
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
import tensorflow as tf
import numpy as np
import random
import math
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import seaborn as sns

In [2]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def relu(x):
    return np.maximum(x, 0)

def linear(x):
    return x

In [3]:
# Make data.
w1 = np.arange(-5, 5, 0.25)
w2 = np.arange(-5, 5, 0.25)
w1, w2 = np.meshgrid(w1, w2)

x1 = 1.
y1 = 0
x2 = -1
y2 = 1
f = (sigmoid(sigmoid(x1*w1)*w2) - y1) ** 2 \
+ (sigmoid(sigmoid(x2*w1)*w2) - y2) ** 2 \
+ (sigmoid(sigmoid(0.4*w1)*w2) - 1) ** 2 \
+ (sigmoid(sigmoid(-0.4*w1)*w2) - 0) ** 2
# f = (sigmoid(relu(x*w1)*w2) - y) ** 2
# f = (sigmoid(linear(x*w1)*w2) - y) ** 2

In [4]:
fig = plt.figure()
ax = fig.gca(projection='3d')


# Plot the surface.
surf = ax.plot_surface(w1, w2, f, cmap=cm.coolwarm,
                       linewidth=0, antialiased=False)

# Customize the z axis.
# ax.set_zlim(-1.01, 1.01)
ax.zaxis.set_major_locator(LinearLocator(10))
ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))

# Add a color bar which maps values to colors.
fig.colorbar(surf, shrink=0.5, aspect=5)
ax.set_xlabel('w1')
ax.set_ylabel('w2')
plt.show()


<IPython.core.display.Javascript object>

In [44]:
tf.reset_default_graph()

x = tf.placeholder(tf.float32, [None, 1])
y_ = tf.placeholder(tf.float32, [None, 1])


n_input = 1
n_hidden = 1
n_output = 1
lmd = 0


rang = 5.
parameters = tf.Variable(tf.concat([
                                    tf.random_uniform([2], minval= -rang, maxval= rang, seed=1),
                                   ], 0))

weights1 = tf.reshape(tf.slice(parameters, begin=[0], size=[1]), [1, 1])
hidden = tf.nn.sigmoid(tf.matmul(x, weights1) + 1)

weights2 = tf.reshape(tf.slice(parameters, begin=[1], size=[1]), [1, 1])
y = tf.nn.sigmoid(tf.matmul(hidden, weights2)-1)

loss = tf.reduce_mean(tf.reduce_sum(tf.pow(y_ - y, 2), reduction_indices=[1])) #I also tried simply tf.nn.l2_loss(y_ - y)

lr = tf.placeholder(tf.float32, shape=[])
optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
grads_and_vars = optimizer.compute_gradients(loss)
grad = tf.gradients(loss, parameters)
hess = tf.hessians(loss, parameters)
train_step = optimizer.apply_gradients(grads_and_vars)

In [76]:
def cg(A, b, it, x=None):
#     A: 2d numpy.array of positive semi-definite (symmetric) matrix
#     b: 1d numpy.array
#     x: 1d numpy.array of initial point

    A = np.array(A)
    b = np.array(b)
#     print (A, b)
    n = len(b)
    if not x:
        x = np.ones(n)
    r = np.dot(A, x) - b
    p = - r
    r_k_norm = np.dot(r, r)
    for i in range(it):
        Ap = np.dot(A, p)
        alpha = r_k_norm / np.dot(p, Ap)
        if alpha < 0:
#             print ('Neg Itr:', i)
            return p, 1
        x += alpha * p
        r += alpha * Ap
        r_kplus1_norm = np.dot(r, r)
        beta = r_kplus1_norm / r_k_norm
        r_k_norm = r_kplus1_norm
        if r_kplus1_norm < 1e-5:
#             print ('Itr:', i)
            break
        p = beta * p - r
    return x, -1

In [77]:
def get_accuracy():
    pred = sess.run(y, feed_dict={x: data, y_: label})    
    match = [(pred[i] - 0.5) * (label[i] - 0.5) > 0  for i in range(n*2)]
    acc = sum(match)*1./2/n
    return acc[0]

def get_norm_grad():
    nng = 0.
    for gv in grad:
#         print(str(sess.run(gv[0], feed_dict={x: data, y_: label})) + " - " + gv[1].name)
        g = sess.run(gv, feed_dict={x: data, y_: label})
#         print (g)
        nng += np.linalg.norm(g) ** 2
    return np.sqrt(nng)
    
     
def displayH(a):
    a = np.array(a[0])
#     print ("Matrix["+("%d" %a.shape[0])+"]["+("%d" %a.shape[1])+"]")
    rows = a.shape[0]
    cols = a.shape[1]
    for i in range(0, rows):
        for j in range(0, cols):
            print("%0.2g " %a[i,j], end="")
        print ()
    print ()

In [78]:
n = 6
data = [[-1], [-0.6], [-0.2], [0.2], [0.6], [1]] \
    + [[-0.8], [-0.4], [0], [0.4], [0.8], [0.9]]
label = [[1]] * n + [[0]] * n
data = np.array(data)
label = np.array(label)
# print (data, label)

fig = plt.figure()
plt.scatter(data[:n], [0]*n, color=['red'])
plt.scatter(data[n:], [0]*n, color=['green'])
plt.show()

<IPython.core.display.Javascript object>

In [79]:
sess = tf.InteractiveSession()
w1 = np.arange(-5, 5, 0.25)
w2 = np.arange(-5, 5, 0.25)
f = np.zeros((len(w1), len(w2)))
for i in range(len(w1)):
    for j in range(len(w2)):
        v = sess.run(loss, feed_dict={x: data, y_: label, parameters: [w1[i], w2[j]]})    
        f[j][i] = v
w1, w2 = np.meshgrid(w1, w2)
# v = sess.run(loss, feed_dict={x: data, y_: label, parameters: [0,-5]})    
# print (v)
sess.close() 

In [80]:
fig = plt.figure()
ax = fig.gca(projection='3d')


# Plot the surface.
surf = ax.plot_surface(w1, w2, f, cmap=cm.coolwarm,
                       linewidth=0, antialiased=False)
# Customize the z axis.
# ax.set_zlim(-1.01, 1.01)
ax.zaxis.set_major_locator(LinearLocator(10))
ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))

# Add a color bar which maps values to colors.
fig.colorbar(surf, shrink=0.5, aspect=5)
ax.set_xlabel('w1')
ax.set_ylabel('w2')
plt.show()

<IPython.core.display.Javascript object>

In [81]:
sess = tf.InteractiveSession()
dic = {}
nr = 10
cgIt = 1
stepsize = [1e1, 1e-0, 1e-1, 1e-2]

rts = []
for r in range(nr):
    
    tf.global_variables_initializer().run()
    v0, g, H, w = sess.run([loss, grad, hess, parameters], feed_dict={x: data, y_: label})    
    w0 = w
#     displayH(H)
    d, status = cg(H[0], g[0], cgIt)

    ss = 1.
    if status > 0:
        while True:
            w_cg = w0 - ss * d
            v_cg = sess.run(loss, feed_dict={x: data, y_: label, parameters: w_cg})    
            if v_cg >= v0:
                ss = -ss * 0.5
            else:
                break
    else:
        while True:
            w_cg = w0 - ss * d
            v_cg = sess.run(loss, feed_dict={x: data, y_: label, parameters: w_cg})    
            if v_cg >= v0:
                ss = -ss * 0.5
            else:
                break
  
    ss = 1.
    while True:
        w_gd = w0 - ss * g[0]
        v_gd = sess.run(loss, feed_dict={x: data, y_: label, parameters: w_gd})    
        if v_gd >= v0:
            ss = ss * 0.5
        else:
            break

    rt = (v_gd-v0) / (v_cg-v0)
    rts.append(rt)
    print (rt)
    

sess.close() 

1.23739
1.62845
0.93437
0.753352
0.0754045
0.0581693
1.57241
1.28521
1.04512
0.9976


In [82]:
fig = plt.figure()
ax = fig.gca(projection='3d')

# Plot the surface.
surf = ax.plot_surface(w1, w2, f, cmap=cm.coolwarm,
                       linewidth=0, antialiased=False)


sess = tf.InteractiveSession()
cgIt = 2

nr = 100
for r in range(nr):

    tf.global_variables_initializer().run()
    v0, g, H, w = sess.run([loss, grad, hess, parameters], feed_dict={x: data, y_: label})    
    w0 = w
    
#     displayH(H)
    d, status = cg(H[0], g[0], cgIt)

    ss = 10.
    if status > 0:
        while True:
            w_cg = w0 - ss * d
            v_cg = sess.run(loss, feed_dict={x: data, y_: label, parameters: w_cg})    
            if v_cg >= v0:
                ss = -ss * 0.5
            else:
                break
        ax.plot([w[0], w_cg[0]], [w[1], w_cg[1]], [v0, v_cg], '-ro', markersize=2)

    else:
        while True:
            w_cg = w0 - ss * d
            v_cg = sess.run(loss, feed_dict={x: data, y_: label, parameters: w_cg})    
            if v_cg >= v0:
                ss = -ss * 0.5
            else:
                break
        ax.plot([w[0], w_cg[0]], [w[1], w_cg[1]], [v0, v_cg], '-ko', markersize=2)

    
    ss = 10.
    while True:
        w_gd = w0 - ss * g[0]
        v_gd = sess.run(loss, feed_dict={x: data, y_: label, parameters: w_gd})    
        if v_gd >= v0:
            ss = ss * 0.5
        else:
            break
#     print (w, w_gd)
    ax.plot([w[0], w_gd[0]], [w[1], w_gd[1]], [v0, v_gd], '-bo', markersize=2)
        
    rt = (v_gd-v0) / (v_cg-v0)
#     print (rt)
    
    
ax.set_xlim(-5, 5)
ax.set_ylim(-5, 5)    
ax.zaxis.set_major_locator(LinearLocator(10))
ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))

# Add a color bar which maps values to colors.
fig.colorbar(surf, shrink=0.5, aspect=5)
ax.set_xlabel('w1')
ax.set_ylabel('w2')
plt.show()


sess.close() 


<IPython.core.display.Javascript object>