In [1]:
# Load packages
import os
import sys
import numpy as np
import tensorflow as tf
from sklearn.datasets import load_boston
tf.__version__

'1.12.0'

## Load Data

In [2]:
boston = load_boston()

features = np.array(boston.data)
labels = np.array(boston.target)

features.shape, labels.shape

((506, 13), (506,))

In [3]:
labels = labels.reshape((len(labels), 1)) # reshape rank 1 array to have 1 dim
labels.shape

(506, 1)

In [4]:
features[0]

array([6.320e-03, 1.800e+01, 2.310e+00, 0.000e+00, 5.380e-01, 6.575e+00,
       6.520e+01, 4.090e+00, 1.000e+00, 2.960e+02, 1.530e+01, 3.969e+02,
       4.980e+00])

In [5]:
labels[0]

array([24.])

### Data normalization

In [6]:
mean = np.mean(features, axis=0) # Feature wise mean
std = np.std(features, axis=0) # Feature wise standard deviation

# Normalize now
features = (features - mean) / std

features.shape

(506, 13)

In [7]:
features[0]

array([-0.41978194,  0.28482986, -1.2879095 , -0.27259857, -0.14421743,
        0.41367189, -0.12001342,  0.1402136 , -0.98284286, -0.66660821,
       -1.45900038,  0.44105193, -1.0755623 ])

### Random sampling of data

In [8]:
random_indices = np.random.rand(len(features)) < 0.80 # Random indexes for splitting dataset

# New train set
train_x = features[random_indices]
train_y = labels[random_indices]

# New validation set
val_x = features[~random_indices]
val_y = labels[~random_indices]

In [10]:
train_x.shape, train_y.shape

((402, 13), (402, 1))

In [11]:
val_x.shape, val_y.shape

((104, 13), (104, 1))

## Multi-variate linear regression model

In [12]:
num_features = features.shape[1] # number of features
num_features

13

In [13]:
# create placeholders for inputs
x = tf.placeholder(dtype=tf.float32, shape=(None, num_features), name="feature_matrix") # num_samples x num_features
y = tf.placeholder(dtype=tf.float32, shape=(None, 1), name="target_vector") # num_sampels x 1

In [14]:
w = tf.Variable(tf.zeros(dtype=tf.float32, shape=(1, num_features))) # Initialize weights with all zeros

In [15]:
b = tf.Variable(tf.zeros(dtype=tf.float32, shape=(1, num_features))) # Set bias to all zeros

In [16]:
# linear regression model
linear_model = tf.add(tf.multiply(w, x), b)

In [17]:
# Set loss function
loss_function = tf.reduce_mean(tf.square(linear_model - y))

### Using inbuilt optimizer for minimizing loss

In [18]:
trainer = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss_function)

In [19]:
# Create new session
with tf.Session() as sess:
    epochs = 10000 # number of iterations over the whole train set
    sess.run(tf.global_variables_initializer()) # Initialize global variables
    print("Training...")

    # Run the optimizer
    for i in range(epochs+1):
        # Update weights and bais
        sess.run(trainer, feed_dict={x:train_x, y:train_y})
        # print loss at each 1000th epoch
        if i % 1000 == 0:
            # Get training loss value at each 1000th epoch
            c = sess.run(loss_function, feed_dict={x:train_x, y:train_y})
            print("At step {}, train_loss is {}".format(i, c))
    # Get loss on the validation set
    print("Finally, val_loss is {}".format(sess.run(loss_function, feed_dict={x:val_x, y:val_y})))

Training...
At step 0, train_loss is 575.0603637695312
At step 1000, train_loss is 238.51412963867188
At step 2000, train_loss is 104.8044662475586
At step 3000, train_loss is 66.54669952392578
At step 4000, train_loss is 61.44092559814453
At step 5000, train_loss is 61.2686882019043
At step 6000, train_loss is 61.26815414428711
At step 7000, train_loss is 61.26813507080078
At step 8000, train_loss is 61.268150329589844
At step 9000, train_loss is 61.26815414428711
At step 10000, train_loss is 61.26817321777344
Finally, val_loss is 89.43696594238281


### Implement gradient descent

In [20]:
grad_w, grad_b = tf.gradients(xs=[w, b], ys=loss_function) # Get gradients

# Adjust weights and bias
new_w = w.assign(w - 0.01 * grad_w)
new_b = b.assign(b - 0.01 * grad_b)

In [21]:
# Create new session
with tf.Session() as sess:
    epochs = 10000
    # Initialize global variables
    sess.run(tf.global_variables_initializer())
    print("Training...")
    
    # Train
    for i in range(epochs+1):
        _, _, c = sess.run([new_w, new_b, loss_function], feed_dict={x:train_x, y:train_y})
        # At each 1000th epoch prompt training loss
        if i % 1000 == 0:
            print("At step {}, train_loss is {}".format(i, c))
    # Get loss on the validation set
    print("Finally, val_loss is {}".format(sess.run(loss_function, feed_dict={x:val_x, y:val_y})))

Training...
At step 0, train_loss is 575.5902709960938
At step 1000, train_loss is 84.65240478515625
At step 2000, train_loss is 62.35044860839844
At step 3000, train_loss is 61.319297790527344
At step 4000, train_loss is 61.27061080932617
At step 5000, train_loss is 61.26823806762695
At step 6000, train_loss is 61.268123626708984
At step 7000, train_loss is 61.26814270019531
At step 8000, train_loss is 61.26813507080078
At step 9000, train_loss is 61.26814270019531
At step 10000, train_loss is 61.26814270019531
Finally, val_loss is 89.43856811523438
