In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
# The step size at which the estimate will oscillate depends on
# the function to be estimated and the initialization of the weights.
# Depending on the initialization,
# the estimate may converge to a different solution and not oscillate.
tf.set_random_seed(963)
np.random.seed(963)

In [None]:
sess = tf.InteractiveSession()

In [None]:
def weight_variable(shape):
    initial = tf.random_normal(shape, stddev=1.0)
    return tf.Variable(initial)

def bias_vector(size):
    initial = np.random.normal(0, 1.0, size)
    return initial

In [None]:
x_dat = np.arange(0,1,0.001)
N = len(x_dat)

d = 20  #number of nodes in the hidden layer
W = weight_variable([d,1])
V = weight_variable([1,d])
b = bias_vector([1,d])

x  = tf.placeholder(tf.float32, shape=[N,1])
y_ = tf.placeholder(tf.float32, shape=[N,1])

y = tf.matmul(tf.nn.relu(tf.matmul(x,V)-b),W)

In [None]:
train_loss = tf.reduce_sum(tf.square(y-y_))

In [None]:
# generate a function that can be represented by the network
sess.run(tf.global_variables_initializer())
y_dat = y.eval(feed_dict={x: np.expand_dims(x_dat,1)})

In [None]:
# re-initialize the network for estimation with the larger step size
delta_large = 3e-4
train_step = tf.train.GradientDescentOptimizer(delta_large).minimize(train_loss)
sess.run(tf.global_variables_initializer())

In [None]:
for i in range(20001):
    train_step.run(feed_dict={x: np.expand_dims(x_dat,1), y_: y_dat})

In [None]:
# save the estimated function after odd number of iterations of GD
z1_dat = sess.run(y, feed_dict={x: np.expand_dims(x_dat,1)})

In [None]:
# save the estimated function after even number of iterations of GD
train_step.run(feed_dict={x: np.expand_dims(x_dat,1), y_: y_dat})
z2_dat = sess.run(y, feed_dict={x: np.expand_dims(x_dat,1)})

In [None]:
plt.rc('text', usetex=True)
plt.figure(figsize=(6,5))
plt.plot(x_dat, y_dat,color='seagreen', label='$f(x)$', linewidth=1.5)
plt.plot(x_dat, z1_dat,'-.', color='maroon', label=r'$\hat{f}_{\mathrm{odd}}(x)$', linewidth=2.0)
plt.plot(x_dat, z2_dat,'--', color='maroon', label=r'$\hat f_{\mathrm{even}}(x)$', linewidth=1.5)
plt.legend(loc='upper left', fontsize=16, frameon=True)
plt.xlabel('$x$',fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.title('$\delta = {0:.1e}$'.format(delta_large), fontsize=16)
plt.show()

In [None]:
# re-initialize and re-train the network with the smaller step size
delta_small = 1e-4
train_step = tf.train.GradientDescentOptimizer(delta_small).minimize(train_loss)
sess.run(tf.global_variables_initializer())
for i in range(20001):
    train_step.run(feed_dict={x: np.expand_dims(x_dat,1), y_: y_dat})
z3_dat = sess.run(y, feed_dict={x: np.expand_dims(x_dat,1)})
train_step.run(feed_dict={x: np.expand_dims(x_dat,1), y_: y_dat})
z4_dat = sess.run(y, feed_dict={x: np.expand_dims(x_dat,1)})

In [None]:
plt.rc('text', usetex=True)
plt.figure(figsize=(6,5))
plt.plot(x_dat, y_dat,color='seagreen', label='$f(x)$', linewidth=1.5)
plt.plot(x_dat, z3_dat,'-.', color='maroon', label=r'$\hat{f}_{\mathrm{odd}}(x)$', linewidth=2.0)
plt.plot(x_dat, z4_dat,'--', color='maroon', label=r'$\hat{f}_{\mathrm{even}}(x)$', linewidth=1.5)
plt.legend(loc='upper left',fontsize=16)
plt.xlabel('$x$',fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.title('$\delta = {0:.1e}$'.format(delta_small), fontsize=16)
plt.show()