## 1. What is a tensorflow graph and how do you define one?

A graph defines a computation without actually completing one. That is done in the session. 

In [1]:
import tensorflow as tf

graph1 = tf.Graph() # Creates a Graph 
with graph1.as_default():
    var = tf.Variable(32, dtype='float32', name="var")
    var2 = tf.Variable(34, dtype='float32', name="var2")
    init = tf.global_variables_initializer()
    total = tf.add(var, var2, name="total")

## 2. What is a Tensorflow session and how do you run one

A session is the execution phase where you run some or all of the graph. You first have to initialize all the variables and run the relevant operations. Then you can evaluate the outcomes.

In [2]:
with tf.Session(graph=graph1) as sess:
    sess.run(init)
    sess.run(total)
    fin_total = total.eval()
   
fin_total

66.0

## 3. A linear Regression with GradientDescent optimizer. 

In [3]:
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
print(housing.keys())


dict_keys(['data', 'target', 'feature_names', 'DESCR'])


In [4]:
import numpy as np

housing_data = housing.data
housing_targets = housing.target.reshape(-1,1)


#Scale the inputs 
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_housing_data_reg = scaler.fit_transform(housing_data)


# in the X data need to add a column of 1's to for the y intercept. 
m, n = scaled_housing_data_reg.shape
scaled_housing_data_reg = np.c_[np.ones((m, 1)), scaled_housing_data_reg]

print('scaled_housing_shape: ', scaled_housing_data_reg.shape)
print('targets_sahpe: ', housing_targets.shape)

scaled_housing_shape:  (20640, 9)
targets_sahpe:  (20640, 1)


In [5]:
# Now we have the data lets define the graph
lin_reg = tf.Graph()
with lin_reg.as_default(): 
    n_epoch = 1000
    
    X = tf.Variable(scaled_housing_data_reg, dtype='float32', name="X")
    y = tf.Variable(housing_targets, dtype='float32', name="y")
    theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), dtype='float32', name='theta')
    y_preds = tf.matmul(X, theta, name="y_preds")
    errors = y - y_preds
    mse = tf.reduce_mean(tf.square(errors), name="mse")
    optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.01)
    training_op = optimizer.minimize(mse)
    
    init = tf.global_variables_initializer()
    

In [6]:
#Lets start the session


with tf.Session(graph=lin_reg) as sess:
    sess.run(init)
    
    for epoch in range(n_epoch):
        if epoch % 100 == 0:
            print("MSE: ", mse.eval())
        sess.run(training_op)
    fin_theta = theta.eval()

MSE:  10.101544
MSE:  0.8903759
MSE:  0.67388684
MSE:  0.63307995
MSE:  0.60537
MSE:  0.5846682
MSE:  0.5690334
MSE:  0.5571304
MSE:  0.54798543
MSE:  0.5408885


# 4. How would you use Mini-batch Gradient Descent for Linear Regression
Write a Linear Regression model for the california housing data set using mini-batch gradient descent. This would be useful for distributed computing or if the dataset it too big to fit in RAM. 

In [7]:
print('scaled_housing_shape: ', scaled_housing_data_reg.shape)
print('targets_shape: ', housing_targets.shape)

scaled_housing_shape:  (20640, 9)
targets_shape:  (20640, 1)


In [8]:
tf.reset_default_graph()

m, n = scaled_housing_data_reg.shape

n_epochs = 10 
batch_size = 100 
n_batches = int(np.ceil(m/batch_size))

In [9]:
X = tf.placeholder(tf.float32, shape=(None, n), name="X")
y = tf.placeholder(tf.float32, shape=(None,1), name="y")

In [10]:

theta = tf.Variable(tf.random_uniform([n, 1], -1.0, 1.0), dtype='float32', name='theta')
y_preds = tf.matmul(X, theta, name="y_preds")
errors = y_preds - y
mse = tf.reduce_mean(tf.square(errors), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.01)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()
   

In [11]:
def fetch_batch(epoch, batch_size, batch_index): 
    indices = np.random.randint(m, size=batch_size)
    X_batch = scaled_housing_data_reg[indices]
    y_batch = housing_targets[indices]
    return X_batch, y_batch

In [12]:
with tf.Session() as sess: 
    sess.run(init)
    
    
    for epoch in range(n_epochs): 
        for batch in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_size, batch)
            sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
    best_theta = theta.eval()
    
            
                
print(best_theta)

[[ 2.0582917 ]
 [ 0.871823  ]
 [ 0.11908898]
 [-0.32261235]
 [ 0.36294264]
 [ 0.00272483]
 [-0.08213932]
 [-0.7787991 ]
 [-0.7721297 ]]


# 4. Saving the model
***Lets copy the Gradient Descent Model but save it***
* The saver object is a tf.train.Saver
* syntas for saving a session is. saver_obj.save(sess, 'directory.cpkt')

In [13]:
tf.reset_default_graph()

# Now we have the data lets define the graph
n_epoch = 1000

X = tf.Variable(scaled_housing_data_reg, dtype='float32', name="X")
y = tf.Variable(housing_targets, dtype='float32', name="y")
theta = tf.Variable(tf.random_uniform([n, 1], -1.0, 1.0), dtype='float32', name='theta')
y_preds = tf.matmul(X, theta, name="y_preds")
errors = y - y_preds
mse = tf.reduce_mean(tf.square(errors), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.01)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()


In [14]:
#Lets start the session
#------- Saver -------------
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epoch):
        if epoch % 100 == 0:
            print("MSE: ", mse.eval())
        sess.run(training_op)
    saver.save(sess, 'models/tensorflow/model.cpkt')
    fin_theta = theta.eval()

MSE:  9.8185625
MSE:  0.72300214
MSE:  0.58435875
MSE:  0.5677064
MSE:  0.556749
MSE:  0.54830384
MSE:  0.54168737
MSE:  0.53643996
MSE:  0.5322242
MSE:  0.5287885


## To restore the model and variables 

In [15]:

with tf.Session() as sess: 
    saver.restore(sess, "models/tensorflow/model.cpkt")
    print('MSE: ', mse.eval())

INFO:tensorflow:Restoring parameters from models/tensorflow/model.cpkt
MSE:  0.5259459


## 5. Viusalize a Graph and Training Curve for the mini batch gradient descent model in tensorboard.

Tensorboard is a visualisation software for stats you provide from your model. you must make your model output it's variables to a specific log. Which tensorboard will read from. every time you run the model it will output to the same log which can mess up visualisations so it is useful to use a datetime in the log title. 

***Steps***
1. add a tf.summary.scalar to give it the parameter to output to the log
2. create a tf.summary.Filewriter and give it the log directory and graph 
3. in the execution phase. every few epochs evaluate the tf.summary.scalar
4. Write the result of step 3 to the log. 

In [16]:
tf.reset_default_graph()
from datetime import datetime 
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "models/tensorflow/tensorboard/tf_logs"
logdir = "{}/run-{}".format(root_logdir, now)

In [17]:
m, n = scaled_housing_data_reg.shape

n_epochs = 10 
batch_size = 100 
n_batches = int(np.ceil(m/batch_size))

In [18]:
X = tf.placeholder(tf.float32, shape=(None, n), name="X")
y = tf.placeholder(tf.float32, shape=(None,1), name="y")

In [19]:

theta = tf.Variable(tf.random_uniform([n, 1], -1.0, 1.0), dtype='float32', name='theta')
y_preds = tf.matmul(X, theta, name="y_preds")
errors = y_preds - y
mse = tf.reduce_mean(tf.square(errors), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.01)
training_op = optimizer.minimize(mse)

#-------- steps 1 and 2 ------------------
mse_summary = tf.summary.scalar('mse', mse) # the node you are writing to the log
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) # A FileWriter that has a directory and the
# the graph you want to write from.

init = tf.global_variables_initializer()

In [20]:
def fetch_batch(epoch, batch_size, batch_index): 
    indices = np.random.randint(m, size=batch_size)
    X_batch = scaled_housing_data_reg[indices]
    y_batch = housing_targets[indices]
    return X_batch, y_batch

In [21]:

#--- Execution ------------
with tf.Session() as sess: 
    sess.run(init)
    
    
    for epoch in range(n_epochs): 
        for batch in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_size, batch)
            if batch%10 == 0:
                summary_str = mse_summary.eval(feed_dict={X:X_batch, y:y_batch}) #the value to write
                step = epoch * n_batches + batch
                file_writer.add_summary(summary_str, step) # summary to 
            sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
    best_theta = theta.eval()
file_writer.close() 
            
                
print(best_theta)

[[ 2.0735905 ]
 [ 0.8425267 ]
 [ 0.12584218]
 [-0.3047448 ]
 [ 0.29672837]
 [-0.00408026]
 [ 0.00807172]
 [-0.7907821 ]
 [-0.77579284]]


Check your directory to see if it has worked.

use the following command in your command line: 
* tensorboard --dirctory/to/logs log_folder/

<img src="models/tensorflow/tensorboard/model_graphs/california_housing.png">

# 6. Why would you create a name scope and how do you do it for a model? 

In complex neural networks for example when thousands of nodes are present the graphs can look quite complicated. To avoid this is it benefitial to group certain operations under a single name scope to declutter the graph.

lets create one for the model above.

In [27]:
tf.reset_default_graph()
from datetime import datetime 
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "models/tensorflow/tensorboard/tf_logs"
logdir = "{}/run-{}".format(root_logdir, now)

In [28]:
m, n = scaled_housing_data_reg.shape

n_epochs = 10 
batch_size = 100 
n_batches = int(np.ceil(m/batch_size))

In [29]:
X = tf.placeholder(tf.float32, shape=(None, n), name="X")
y = tf.placeholder(tf.float32, shape=(None,1), name="y")

In [30]:

theta = tf.Variable(tf.random_uniform([n, 1], -1.0, 1.0), dtype='float32', name='theta')
y_preds = tf.matmul(X, theta, name="y_preds")


#-------------- Add name scope ------------------------#
with tf.name_scope("loss") as scope:
    errors = y_preds - y
    mse = tf.reduce_mean(tf.square(errors), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.01)
training_op = optimizer.minimize(mse)

mse_summary = tf.summary.scalar('mse', mse)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

init = tf.global_variables_initializer()

In [31]:
def fetch_batch(epoch, batch_size, batch_index): 
    indices = np.random.randint(m, size=batch_size)
    X_batch = scaled_housing_data_reg[indices]
    y_batch = housing_targets[indices]
    return X_batch, y_batch

In [35]:

#--- Execution ------------
with tf.Session() as sess: 
    sess.run(init)
    
    
    for epoch in range(n_epochs): 
        for batch in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_size, batch)
            if batch%10 == 0:
                summary_str = mse_summary.eval(feed_dict={X:X_batch, y:y_batch})
                step = epoch * n_batches + batch
                file_writer.add_summary(summary_str, step)
            sess.run(training_op, feed_dict={X:X_batch, y:y_batch})
    best_theta = theta.eval()
    
file_writer.close() 
            
                
print(best_theta)

loss/sub
[[ 2.0649602e+00]
 [ 8.5481781e-01]
 [ 1.3767937e-01]
 [-3.1671110e-01]
 [ 3.1873599e-01]
 [ 7.8556128e-04]
 [-4.6501223e-02]
 [-8.0104780e-01]
 [-7.8397709e-01]]


***lets have a look at the new graph with the name scope***

<img src="models/tensorflow/tensorboard/model_graphs/california_housing_name_scope.png">

***As you can see compared to the graph above the mse and error operation nodes are grouped under Loss***

In [36]:
print(errors.op.name) # the name of each op is defined within the scope with the prex "loss"

loss/sub


In [37]:
print(mse.op.name)

loss/mse


# 7. suppose you wanted to create a graph that outputs the sum of 2 rectified linear units. How would you do it? 

rectified linear units compute the result of the linear equation and return either the result or 0 depending on which is larger. Hence rectified. writing two different graphs for the same process would be repetative so you can do the following. 


In [81]:
tf.reset_default_graph()

def relu(X):
    with tf.name_scope("relu") as scope:
        w_shape = (int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name="w")
        b = tf.Variable(0.0, name="bias")
        z = tf.add(tf.matmul(X, w), b, name="z")
        return tf.maximum(z, 0., name="relu")

n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name="output")

In [82]:
file_writer = tf.summary.FileWriter("models/tensorflow/tensorboard/relu3", tf.get_default_graph())
file_writer.close()


The resultant graph represents three different graphs in the same space showing modularity. 

<img src="models/tensorflow/tensorboard/model_graphs/modularity_relu.png">

# Sharing Variables