In [17]:
### Cell 0 ###
### Imports ###

import numpy as np
import tensorflow as tf

In [18]:
### Cell 1 ###
### Data generation and normalization ###

train_size = 3000
valid_size = 1000
test_size = 1000

# y = k * x + b
k = 2
b = 5

# It is important to properly shuffle the training data -- failure to do so may result in SGD diverging.

# From 0 to 10, uniformly distributed values.
train_x = (10 * np.random.rand(train_size)).astype(np.float32).reshape((train_size, -1))
train_y = (k * train_x + b + 0.01 * np.random.randn()).reshape((train_size, -1))
valid_x = (10 * np.random.rand(valid_size)).astype(np.float32).reshape((valid_size, -1))
valid_y = (k * valid_x + b + 0.01 * np.random.randn()).reshape((valid_size, -1))
test_x = (10 * np.random.rand(test_size)).astype(np.float32).reshape((test_size, -1))
test_y = (k * test_x + b + 0.01 * np.random.randn()).reshape((test_size, -1))

# Rescale to [-1; 1]
def normalize(arr):
    return (arr - np.mean(arr)) / (np.max(arr) - np.min(arr))
train_x = normalize(train_x)
train_y = normalize(train_y)
valid_x = normalize(valid_x)
valid_y = normalize(valid_y)
test_x = normalize(test_x)
test_y = normalize(test_y)

In [27]:
### Cell 2 ###
### TensorFlow Graph definition ###

batch_size = 300
learning_rate = 0.00005
hidden_layer_size = 64 # RELU layer.

graph = tf.Graph()
with graph.as_default():
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, train_x.shape[1]))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, train_y.shape[1]))
    tf_valid_dataset = tf.constant(valid_x)
    tf_test_dataset = tf.constant(test_x)
    
    # Hidden layer.
    weights_1 = tf.Variable(tf.truncated_normal([train_x.shape[1], hidden_layer_size]))
    biases_1 = tf.Variable(tf.zeros([train_y.shape[1]]))
    hidden = tf.nn.relu(tf.matmul(tf_train_dataset, weights_1) + biases_1)
    
    # Output layer.
    weights_2 = tf.Variable(tf.truncated_normal([hidden_layer_size, train_y.shape[1]]))
    biases_2 = tf.Variable(tf.zeros([train_y.shape[1]]))
  
    # Training step computation.
    logits = tf.matmul(hidden, weights_2) + biases_2
    loss = tf.nn.l2_loss(logits - tf_train_labels) / batch_size
  
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

    train_hidden = tf.nn.relu(tf.matmul(tf_train_dataset, weights_1) + biases_1)
    train_prediction = tf.matmul(train_hidden, weights_2) + biases_2
    
    valid_hidden = tf.nn.relu(tf.matmul(tf_valid_dataset, weights_1) + biases_1)
    valid_prediction = tf.matmul(valid_hidden, weights_2) + biases_2
    
    test_hidden = tf.nn.relu(tf.matmul(tf_test_dataset, weights_1) + biases_1)
    test_prediction = tf.matmul(test_hidden, weights_2) + biases_2

In [28]:
### Cell 3 ###
### Run and evaluate ###

num_steps = 10000

with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    for step in range(num_steps):
        # Training data are randomized.
        offset = (step * batch_size) % (train_y.shape[0] - batch_size)
        batch_data = train_x[offset:(offset + batch_size), :]
        batch_labels = train_y[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 20 == 0):
            print("Batch loss at step %d: %.8f" % (step, l))
            print("Validation loss at step %d: %.8f\n" % (step, ((sum((valid_prediction.eval() - valid_y) ** 2)) / valid_size / 2.)))
    print("Test loss: %.8f" % (sum((test_prediction.eval() - test_y) ** 2) / test_size / 2.))

Batch loss at step 0: 0.06820954
Validation loss at step 0: 0.06943816

Batch loss at step 20: 0.06817070
Validation loss at step 20: 0.06892876

Batch loss at step 40: 0.06810138
Validation loss at step 40: 0.06842513

Batch loss at step 60: 0.06739894
Validation loss at step 60: 0.06792445

Batch loss at step 80: 0.06507642
Validation loss at step 80: 0.06742749

Batch loss at step 100: 0.07037343
Validation loss at step 100: 0.06693118

Batch loss at step 120: 0.06330893
Validation loss at step 120: 0.06644075

Batch loss at step 140: 0.06220790
Validation loss at step 140: 0.06595333

Batch loss at step 160: 0.06212242
Validation loss at step 160: 0.06546915

Batch loss at step 180: 0.06378719
Validation loss at step 180: 0.06498804

Batch loss at step 200: 0.06378284
Validation loss at step 200: 0.06450843

Batch loss at step 220: 0.06371924
Validation loss at step 220: 0.06403475

Batch loss at step 240: 0.06306570
Validation loss at step 240: 0.06356420

Batch loss at step 260: 