In [1]:
import tensorflow as tf

## Creating a first graph 

In [4]:
x = tf.Variable(3, name='x')
y = tf.Variable(4, name='y')

f = x * x * y + y + 2

In [5]:
sess = tf.Session()
sess.run(x.initializer)
sess.run(y.initializer)
result = sess.run(f)
print(result)
sess.close()

42


In [6]:
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    
    result = f.eval()
    
print(result)

42


In [7]:
init = tf.global_variables_initializer()
with tf.Session() as sess:
    init.run()
    result = f.eval()

## Managing multiple graphs

In [12]:
with tf.Session() as sess:
    x1 = tf.Variable(1)
    print(x1.graph is tf.get_default_graph())

True


In [8]:
with tf.Session() as sess:
    graph = tf.Graph()
    with graph.as_default():
        x2 = tf.Variable(2)
    print(x2.graph is tf.get_default_graph())

False


## Lifecycle of node values

In [9]:
w = tf.constant(3)
x = w + 2
y = x + 5
z = x * 3



In [11]:
with tf.Session() as sess:
    # x is recomputed for each call - not efficient
    print(y.eval())
    print(z.eval())

10
15


In [17]:
with tf.Session() as sess:
    # x is reused for each evaluation - efficient
    y_val, z_val = sess.run([y,z])
    print(y_val)
    print(z_val)

10
15


## Linear regression with TF

In [37]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler

In [38]:
housing = fetch_california_housing()
m, n = housing.data.shape

scaler = StandardScaler()
housing_data_plus_bias = np.c_[np.ones((m,1)), housing.data]
scaled_housing_data_plus_bias = scaler.fit_transform(housing_data_plus_bias)

In [44]:
housing_data_plus_bias.shape, scaled_housing_data_plus_bias.shape

((20640, 9), (20640, 9))

In [48]:
X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name='y')
XT = tf.transpose(X)
# theta = (Xt X)' XT y
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

In [49]:
y_pred = tf.matmul(X, theta)
rmse = tf.sqrt(tf.math.reduce_sum(tf.math.square(tf.math.subtract(y_pred, y))))

In [50]:
with tf.Session() as sess:
    theta_val, rmse_val = sess.run([theta, rmse])
    print('theta: ', theta_val)
    print('rmse : ', rmse_val)

theta:  [[-3.63148041e+01]
 [ 4.38327283e-01]
 [ 9.54509154e-03]
 [-1.09111905e-01]
 [ 6.51177227e-01]
 [-3.67034363e-06]
 [-3.80339054e-03]
 [-4.14844275e-01]
 [-4.27279472e-01]]
rmse :  104.0312


## Batch Gradient Descent Linear Regression TF

In [51]:
n_epochs = 1000
learning_rate = 0.0001

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')

error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')
gradients = tf.gradients(mse, [theta])[0]
# gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta - learning_rate * gradients)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE = ", mse.eval())
        sess.run(training_op)
        
    best_theta = theta.eval()
    best_error = error.eval()
    print(best_theta,best_error)

Epoch 0 MSE =  10.159446
Epoch 100 MSE =  9.908106
Epoch 200 MSE =  9.669728
Epoch 300 MSE =  9.443579
Epoch 400 MSE =  9.228973
Epoch 500 MSE =  9.025283
Epoch 600 MSE =  8.831894
Epoch 700 MSE =  8.648241
Epoch 800 MSE =  8.473796
Epoch 900 MSE =  8.308061
[[ 0.49345446]
 [-0.4639052 ]
 [ 0.5981647 ]
 [-0.549965  ]
 [ 0.66150874]
 [-0.04208593]
 [ 0.6210185 ]
 [ 0.32942343]
 [-0.01300348]] [[-5.099454  ]
 [-5.11695   ]
 [-3.5277438 ]
 ...
 [-0.2962908 ]
 [-0.17037159]
 [-0.440858  ]]


## Using an optimiser

In [52]:
n_epochs = 1000
learning_rate = 0.01

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name='X')
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name='y')
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')

error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')
gradients = tf.gradients(mse, [theta])[0]
optimiser = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimiser.minimize(mse)


init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE = ", mse.eval())
        sess.run(training_op)
        
    best_theta = theta.eval()
    best_error = error.eval()
    print(best_theta,best_error)


Epoch 0 MSE =  10.585614
Epoch 100 MSE =  4.9146767
Epoch 200 MSE =  4.8499827
Epoch 300 MSE =  4.8375635
Epoch 400 MSE =  4.828945
Epoch 500 MSE =  4.822579
Epoch 600 MSE =  4.817858
Epoch 700 MSE =  4.814344
Epoch 800 MSE =  4.8117156
Epoch 900 MSE =  4.8097425
[[ 0.20732474]
 [ 0.8546698 ]
 [ 0.14351124]
 [-0.27533066]
 [ 0.29730058]
 [ 0.00417901]
 [-0.04195486]
 [-0.69587284]
 [-0.6676284 ]] [[-2.4477556]
 [-1.6824293]
 [-1.8989267]
 ...
 [-2.678736 ]
 [-2.4642355]
 [-2.30393  ]]


### Placeholder nodes

In [53]:
A = tf.placeholder(tf.float32, shape=(None, 3))
B = A + 5

with tf.Session() as sess:
    B_val_1 = B.eval(feed_dict={A: [[1,2,3]]})
    B_val_2 = B.eval(feed_dict={A: [[4,5,6], [7,8,9]]})

B_val_1, B_val_1.shape, B_val_2, B_val_2.shape

(array([[6., 7., 8.]], dtype=float32), (1, 3), array([[ 9., 10., 11.],
        [12., 13., 14.]], dtype=float32), (2, 3))

### Minibatch Gradient Descent

In [62]:
housing_data_plus_bias[0:100,:].shape

(100, 9)

In [56]:
n_epochs = 1000
learning_rate = 0.01
batch_size = 100
n_batches = int(np.ceil(m/batch_size))

X = tf.placeholder(tf.float32, shape=(None, n + 1), name='X')
y = tf.placeholder(tf.float32, shape=(None, 1), name='y')

theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), name='theta')
y_pred = tf.matmul(X, theta, name='predictions')

error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name='mse')
gradients = tf.gradients(mse, [theta])[0]
optimiser = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimiser.minimize(mse)

def fetch_batch(epoch, i, batch_size):
    X_batch = scaled_housing_data_plus_bias[i * batch_size: min(m, (i+1) * batch_size), :]
    y_batch = housing.target.reshape(-1,1)[i * batch_size: min(m, (i+1) * batch_size), :]
    return X_batch, y_batch

init = tf.global_variables_initializer()
saver = tf.train.Saver()
saving = True

with tf.Session() as sess:
    if saving:
        try:
            saver.restore(sess, '/tmp/model.ckpt')
        except ValueError:
            print('no saved model. Training from scratch')
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(
                training_op, 
                 feed_dict={ X: X_batch, y: y_batch }
            )
            
        if epoch % 100 == 0:
            mse_val = sess.run([mse], feed_dict={
                X: housing_data_plus_bias,
                y: housing.target.reshape(-1,1)
            })
            print("Epoch", epoch, "MSE = ", mse_val)
            if saving:
                saver.save(sess, '/tmp/model.ckpt')
        
    best_theta, best_error = sess.run(
        [theta, error],
        feed_dict={
                X: housing_data_plus_bias,
                y: housing.target.reshape(-1,1)
            }
    )
    print(best_theta,best_error)


no saved model. Training from scratch
Epoch 0 MSE =  [25858.63]
Epoch 100 MSE =  [28317.373]
Epoch 200 MSE =  [28317.373]
Epoch 300 MSE =  [28317.373]
Epoch 400 MSE =  [28317.373]
Epoch 500 MSE =  [28317.373]
Epoch 600 MSE =  [28317.373]
Epoch 700 MSE =  [28317.373]
Epoch 800 MSE =  [28317.373]
Epoch 900 MSE =  [28317.373]
[[ 8.1041384e-01]
 [ 9.9553329e-01]
 [ 9.7243279e-02]
 [-3.6800289e-01]
 [ 3.5647058e-01]
 [-3.5688521e-03]
 [ 9.6779538e-04]
 [-2.3816462e-01]
 [-1.4885532e+00]] [[178.13167]
 [169.92992]
 [178.08241]
 ...
 [169.17659]
 [170.58711]
 [168.47897]]


### Visualising the graph and training curves using Tensorboard

In [59]:
from datetime import datetime

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

'20190205113346'

In [101]:
def perform_minibatch_gradient_descent(data, labels, graph=None, learning_rate=0.01, batch_size=100, n_epochs=1000, saving=False, logging=False, log_dir=None, save_dir=None):
    """
    Performs minibatch gradient descent to train an OLS model over the provided data
    
    Parameters
    ~~~~~~~~~~~
    
    :param data: the training data 
    
    :param labels: the training labels
    
    :param graph: if provided, the model will be placed into the provided graph.
    
    :param learning_rate: the learning rate with which to train the mse optimiser
    
    :param batch_size: the number of batches to use in minibatch gradient descent.
    
    :param n_epochs: the number of epochs with which to train.
    
    :param saving: whether the best model seen should be saved, if true, then the save_dir must also be provided
    
    :param save_dir: directory to save models to
    
    :param logging: whether the training progress should be logged. If true then the log_dir must also be provided
    
    """
    m, n = data.shape
    
    # parameter checking
    assert labels.shape[0] == m, "Invalid labels shape"
    assert labels.shape[1] == 1, "OLS requires a single continuous output label"
    assert not logging or log_dir
    assert not saving or save_dir       
    
    
    # if no graph is provided, just create a new one
    if not graph:
        graph = tf.Graph()
    
    with graph.as_default():
    
        # calculate the number of batches
        n_batches = int(np.ceil(m/batch_size))

        # set up placeholders for inputs
        X = tf.placeholder(tf.float32, shape=(None, n), name='X')
        y = tf.placeholder(tf.float32, shape=(None,1), name='y')

        # model parameters to be learned
        theta = tf.Variable(tf.random_uniform([n,1], -1.0, 1.0), name='theta')

        # predictions
        y_pred = tf.matmul(X, theta, name='predictions')


        with tf.name_scope('loss') as scope:
            # error function and optimiser for learning
            error = y_pred - y
            mse = tf.reduce_mean(tf.square(error), name='mse')
            
        optimiser = tf.train.AdamOptimizer(learning_rate=learning_rate)
        training_op = optimiser.minimize(mse)


        # additional configurations before training
        now = datetime.utcnow().strftime("%Y%m%d%H%M%S")

        if logging:
            log_dir = '{}/run-{}/'.format(log_dir, now)


        saver = None 
        file_writer = None
        mse_summary = tf.summary.scalar('MSE', mse)

        if saving:
            print('INFO: Saving Enabled. Saving models to {}'.format(save_dir))
            saver = tf.train.Saver()
        if logging:
            print('INFO: Logging Enabled. Logging training to {}'.format(log_dir))
            file_writer = tf.summary.FileWriter(log_dir, graph)

        init = tf.global_variables_initializer()
        last_mse = None


        def fetch_next_batch(batch_index):
            lower_bound = batch_index * batch_size
            upper_bound = min((batch_index + 1) * batch_size, m)
            return data[lower_bound:upper_bound, :], labels[lower_bound:upper_bound, :]

        with tf.Session() as sess:
            # initialise all variables
            if saving:
                # try loading prior model
                try:
                    saver.restore(sess, save_dir)
                except ValueError:
                    print('INFO: Could not load previously saved model - must train from scratch.')
                    sess.run(init)
            else:
                sess.run(init)


            for epoch in range(n_epochs):
                for batch_index in range(n_batches):
                    X_batch, y_batch = fetch_next_batch(batch_index)
                    
                    sess.run(training_op, feed_dict={X: X_batch, y: y_batch})

                    if logging and batch_index % 10 == 0:
                        summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})
                        step = epoch * n_batches + batch_index
                        file_writer.add_summary(summary_str, step)

                current_mse = sess.run(mse, feed_dict={X: data, y: labels})

                if epoch % 100 == 0:
                    print('EPOCH', epoch, ', MSE = ', current_mse)

                    if saving and (last_mse is None or last_mse > current_mse):
                        print('INFO: Saving model to save dir - old: {} -> new: {}'.format(last_mse, current_mse))
                        last_mse = current_mse
                        saver.save(sess, save_dir)

            # having trained, evaluate the model again
            current_mse = sess.run(mse, feed_dict={X: data, y: labels})
            if saving and (last_mse is None or last_mse > current_mse):
                print('INFO: Saving final model to save dir - new: {}'.format(current_mse))
                last_mse = current_mse
                saver.save(sess, save_dir)       

In [102]:
perform_minibatch_gradient_descent(scaled_housing_data_plus_bias, np.reshape(housing.target, (-1,1)), saving=True, save_dir='learning_models/saves/saved_model', logging=True, log_dir='learning_models/logs')

INFO: Saving Enabled. Saving models to learning_models/saves/saved_model
INFO: Logging Enabled. Logging training to learning_models/logs/run-20190205124812/
INFO:tensorflow:Restoring parameters from learning_models/saves/saved_model
EPOCH 0 , MSE =  4.9263
INFO: Saving model to save dir - old: None -> new: 4.926300048828125
EPOCH 100 , MSE =  4.9263
EPOCH 200 , MSE =  4.9263
EPOCH 300 , MSE =  4.9263
EPOCH 400 , MSE =  4.9263
EPOCH 500 , MSE =  4.9263
EPOCH 600 , MSE =  4.9263
EPOCH 700 , MSE =  4.9263
EPOCH 800 , MSE =  4.9263
EPOCH 900 , MSE =  4.9263


In [99]:
help(perform_minibatch_gradient_descent)

Help on function perform_minibatch_gradient_descent in module __main__:

perform_minibatch_gradient_descent(data, labels, graph=None, learning_rate=0.01, batch_size=100, n_epochs=1000, saving=False, logging=False, log_dir=None, save_dir=None)
    Performs minibatch gradient descent to train an OLS model over the provided data
    
    Parameters
    ~~~~~~~~~~~
    
    :param data: the training data 
    
    :param labels: the training labels
    
    :param graph: if provided, the model will be placed into the provided graph.
    
    :param learning_rate: the learning rate with which to train the mse optimiser
    
    :param batch_size: the number of batches to use in minibatch gradient descent.
    
    :param n_epochs: the number of epochs with which to train.
    
    :param saving: whether the best model seen should be saved, if true, then the save_dir must also be provided
    
    :param save_dir: directory to save models to
    
    :param logging: whether the training prog