In [19]:
# To support both python 2 and python 3
from __future__ import division,print_function,unicode_literals 

import numpy as np
import os 

# to make this notebook's output stable across runs 
def reset_graph(seed=42): 
    tf.reset_default_graph() 
    tf.set_random_seed(seed) 
    np.random.seed(seed) 
    
#to plot pretty figures
%matplotlib inline 
import matplotlib 
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize']=14
plt.rcParams['xtick.labelsize']=12
plt.rcParams['ytick.labelsize']=12

# where to save the figures 
PROJECT_ROOT_DIR = "." 
CHAPTER_ID = "tensorflow" 

def save_fig(fig_id,tight_layout=True):
    path = os.path.join(PROJECT_ROOT_DIR,"images",CHAPTER_ID,fig_id + ".png")
    print("Saving figure",fig_id) 
    if tight_layout: 
        plt.tight_layoutout()
    plt.savefig(path,format='png',dpi=300) 

## Creating and running a graph

In [20]:
import tensorflow as tf 

x = tf.Variable(3,name="x")
y = tf.Variable(4,name="y") 
f = x*x*y+y+2

In [21]:
f

<tf.Tensor 'add_7:0' shape=() dtype=int32>

In [22]:
sess = tf.Session()
sess.run(x.initializer) 
sess.run(y.initializer) 
result = sess.run(f)
print(result)

42


In [23]:
sess.close()

In [24]:
with tf.Session() as sess: 
    x.initializer.run()
    y.initializer.run() 
    result = f.eval() 
    

In [25]:
result

42

In [26]:
init = tf.global_variables_initializer() 
with tf.Session() as sess: 
    init.run() 
    result = f.eval()

In [27]:
result

42

In [28]:
w = tf.constant(3)
x = w + 2 
y = x + 5
z = x*3 
with tf.Session() as sess: 
    print(y.eval())
    print(z.eval())

10
15


## Linear Regression 
### Using the Normal Equation 

In [37]:
import numpy as np 
from sklearn.datasets import fetch_california_housing

reset_graph() 

housing = fetch_california_housing()
m,n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m,1)),housing.data]

X = tf.constant(housing_data_plus_bias,dtype=tf.float32,name="X")
y = tf.constant(housing.target.reshape(-1,1),dtype=tf.float32,name="y")
XT = tf.transpose(X) 
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT,X)),XT),y)

with tf.Session() as sess: 
    theta_value = theta.eval() 

In [38]:
theta_value

array([[-3.7171074e+01],
       [ 4.3633682e-01],
       [ 9.3871783e-03],
       [-1.0717344e-01],
       [ 6.4540231e-01],
       [-4.1238391e-06],
       [-3.7809242e-03],
       [-4.2373490e-01],
       [-4.3720812e-01]], dtype=float32)

Compare with pure Numpy

In [42]:
X = housing_data_plus_bias
y = housing.target.reshape(-1,1)
theta_numpy = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y) 

print(theta_numpy) 

[[-3.69419202e+01]
 [ 4.36693293e-01]
 [ 9.43577803e-03]
 [-1.07322041e-01]
 [ 6.45065694e-01]
 [-3.97638942e-06]
 [-3.78654265e-03]
 [-4.21314378e-01]
 [-4.34513755e-01]]


Compare with Scikit-Learn 

In [46]:
from sklearn.linear_model import LinearRegression 
lin_reg = LinearRegression() 
lin_reg.fit(housing.data,housing.target.reshape(-1,1)) 

print(np.r_[lin_reg.intercept_.reshape(-1,1),lin_reg.coef_.T])

[[-3.69419202e+01]
 [ 4.36693293e-01]
 [ 9.43577803e-03]
 [-1.07322041e-01]
 [ 6.45065694e-01]
 [-3.97638942e-06]
 [-3.78654265e-03]
 [-4.21314378e-01]
 [-4.34513755e-01]]


## Using Batch Gradient Descent 

Gradient Descent requires scaling the feature vectors first. We could do this using TF, but lets just use Scikit-Learn for now. 

In [47]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler() 
scaled_housing_data = scaler.fit_transform(housing.data) 
scaled_housing_data_plus_bias = np.c_[np.ones((m,1)),scaled_housing_data]

In [53]:
print(scaled_housing_data_plus_bias.mean(axis=0))
print(scaled_housing_data_plus_bias.mean(axis=1)) 
print(scaled_housing_data_plus_bias.mean())
print(scaled_housing_data_plus_bias.shape)

[ 1.00000000e+00  6.60969987e-17  5.50808322e-18  6.60969987e-17
 -1.06030602e-16 -1.10161664e-17  3.44255201e-18 -1.07958431e-15
 -8.52651283e-15]
[ 0.38915536  0.36424355  0.5116157  ... -0.06612179 -0.06360587
  0.01359031]
0.11111111111111005
(20640, 9)


## Manually computing gradients

In [57]:
reset_graph() 

n_epochs = 1000 
learning_rate = 0.01 

X = tf.constant(scaled_housing_data_plus_bias,dtype=tf.float32,name="X")
y = tf.constant(housing.target.reshape(-1,1),dtype=tf.float32,name="y")
theta = tf.Variable(tf.random_uniform([n+1,1],-1.0,1.0,seed=42),name="theta")
y_pred = tf.matmul(X,theta,name="predictions") 
error = y_pred-y
mse = tf.reduce_mean(tf.square(error),name="mse") 
gradients = 2/m * tf.matmul(tf.transpose(X),error) 
training_op = tf.assign(theta,theta-learning_rate * gradients) 

init = tf.global_variables_initializer() 

with tf.Session() as sess: 
    sess.run(init) 
    
    for epoch in range(n_epochs): 
        if epoch % 100 ==0: 
            print("Epoch",epoch,"MSE =",mse.eval())
        sess.run(training_op) 
    
    best_theta = theta.eval()

Epoch 0 MSE = 9.161542
Epoch 100 MSE = 0.71450055
Epoch 200 MSE = 0.56670487
Epoch 300 MSE = 0.55557173
Epoch 400 MSE = 0.5488112
Epoch 500 MSE = 0.5436363
Epoch 600 MSE = 0.53962904
Epoch 700 MSE = 0.5365092
Epoch 800 MSE = 0.53406775
Epoch 900 MSE = 0.5321473


In [58]:
best_theta

array([[ 2.0685523 ],
       [ 0.8874027 ],
       [ 0.14401656],
       [-0.34770885],
       [ 0.36178368],
       [ 0.00393811],
       [-0.04269556],
       [-0.66145283],
       [-0.6375278 ]], dtype=float32)

## Using autodiff 
Same as above except for the gradients = ... line: 

In [59]:
reset_graph() 

n_epochs = 1000 
learning_rate = 0.01 

X = tf.constant(scaled_housing_data_plus_bias,dtype=tf.float32,name="X")
y = tf.constant(housing.target.reshape(-1,1),dtype=tf.float32,name="y")
theta = tf.Variable(tf.random_uniform([n+1,1],-1.0,1.0,seed=42),name="theta")
y_pred = tf.matmul(X,theta,name="predictions") 
error = y_pred-y
mse = tf.reduce_mean(tf.square(error),name="mse") 

In [65]:
gradients = tf.gradients(mse,[theta])[0]
gradients

<tf.Tensor 'gradients_5/predictions_grad/MatMul_1:0' shape=(9, 1) dtype=float32>

In [66]:
training_op = tf.assign(theta,theta-learning_rate * gradients) 

init = tf.global_variables_initializer() 

with tf.Session() as sess: 
    sess.run(init) 
    
    for epoch in range(n_epochs): 
        if epoch % 100 == 0: 
            print("Epoch", epoch,"MSE=",mse.eval()) 
        sess.run(training_op) 
        
    best_theta = theta.eval() 

print("Best theta:") 
print(best_theta) 

Epoch 0 MSE= 9.161542
Epoch 100 MSE= 0.7145004
Epoch 200 MSE= 0.56670487
Epoch 300 MSE= 0.55557173
Epoch 400 MSE= 0.5488112
Epoch 500 MSE= 0.5436363
Epoch 600 MSE= 0.53962904
Epoch 700 MSE= 0.5365092
Epoch 800 MSE= 0.53406775
Epoch 900 MSE= 0.5321473
Best theta:
[[ 2.0685525 ]
 [ 0.8874027 ]
 [ 0.14401658]
 [-0.34770882]
 [ 0.36178368]
 [ 0.00393811]
 [-0.04269556]
 [-0.6614528 ]
 [-0.6375277 ]]


## Using a GradientDescentOptimizer 

In [67]:
reset_graph() 

n_epochs = 1000 
learning_rate = 0.01 

X = tf.constant(scaled_housing_data_plus_bias,dtype=tf.float32,name="X")
y = tf.constant(housing.target.reshape(-1,1),dtype=tf.float32,name="y")
theta = tf.Variable(tf.random_uniform([n+1,1],-1.0,1.0,seed=42),name="theta")
y_pred = tf.matmul(X,theta,name="predictions") 
error = y_pred-y
mse = tf.reduce_mean(tf.square(error),name="mse") 

In [68]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)
training_op = optimizer.minimize(mse) 

In [69]:
init = tf.global_variables_initializer() 
with tf.Session() as sess: 
    sess.run(init) 
    
    for epoch in range(n_epochs): 
        if epoch % 100 ==0: 
            print("Epoch",epoch,"MSE =",mse.eval())
        sess.run(training_op) 
    best_theta = theta.eval() 
    
print("Best theta:") 
print(best_theta) 

Epoch 0 MSE = 9.161542
Epoch 100 MSE = 0.7145004
Epoch 200 MSE = 0.56670487
Epoch 300 MSE = 0.55557173
Epoch 400 MSE = 0.5488112
Epoch 500 MSE = 0.5436363
Epoch 600 MSE = 0.53962904
Epoch 700 MSE = 0.5365092
Epoch 800 MSE = 0.53406775
Epoch 900 MSE = 0.5321473
Best theta:
[[ 2.0685525 ]
 [ 0.8874027 ]
 [ 0.14401658]
 [-0.34770882]
 [ 0.36178368]
 [ 0.00393811]
 [-0.04269556]
 [-0.6614528 ]
 [-0.6375277 ]]


## Feeding data to the training algorithm

### Placeholder nodes 

In [75]:
reset_graph() 

A = tf.placeholder(tf.float32,shape=(None,3))
B = A + 5

with tf.Session() as sess: 
    B_val_1 = B.eval(feed_dict={A:[[1,2,3]]})
    B_val_2 = B.eval(feed_dict={A:[[4,5,6],[7,8,9]]})

print(B_val_1) 

[[6. 7. 8.]]


In [71]:
print(B_val_2)

[[ 9. 10. 11.]
 [12. 13. 14.]]


### Mini-batch Gradient Descent 

In [72]:
n_epochs = 1000
learning_rate = 0.01 

In [82]:
reset_graph() 

X = tf.placeholder(tf.float32,shape=(None,n+1),name="X")
y = tf.placeholder(tf.float32,shape=(None,1),name="y")


In [84]:
theta = tf.Variable(tf.random_uniform([n+1,1],-1.0,1.0,seed=42),name="theta")
y_pred = tf.matmul(X,theta,name="predictions")
error = y_pred-y
mse = tf.reduce_mean(tf.square(error),name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate) 
training_op = optimizer.minimize(mse) 

init = tf.global_variables_initializer()


In [85]:
n_epochs = 10

In [86]:
batch_size = 100 
n_batches = int(np.ceil(m/batch_size)) 

In [88]:
def fetch_batch(epoch,batch_index,batch_size): 
    np.random.seed(epoch * n_batches + batch_index) 
    indices = np.random.randint(m,size = batch_size) 
    X_batch = scaled_housing_data_plus_bias[indices]
    y_batch = housing.target.reshape(-1,1)[indices]
    return X_batch,y_batch 

with tf.Session() as sess: 
    sess.run(init) 
    for epoch in range(n_epochs): 
        for batch_index in range(n_batches): 
            X_batch,y_batch = fetch_batch(epoch,batch_index,batch_size) 
            sess.run(training_op,feed_dict={X:X_batch,y:y_batch}) 
    best_theta = theta.eval() 

In [89]:
best_theta

array([[ 2.0703337 ],
       [ 0.8637145 ],
       [ 0.12255151],
       [-0.31211874],
       [ 0.38510373],
       [ 0.00434168],
       [-0.01232954],
       [-0.83376896],
       [-0.8030471 ]], dtype=float32)

## Visualizing the graph
### inside Jupyter 

In [91]:
from tensorflow_graph_in_jupyter import show_graph

ModuleNotFoundError: No module named 'tensorflow_graph_in_jupyter'

### Using TensorBoard 

In [92]:
reset_graph() 

from datetime import datetime 
now = datetime.utcnow().strftime("%Y%m%d%H%M%S") 
root_logdir = "tf_logs" 
logdir = "{}/run-{}/".format(root_logdir,now) 

In [94]:
n_epochs = 1000 
learning_rate = 0.01

X = tf.placeholder(tf.float32,shape=(None,n+1),name="X") 
y = tf.placeholder(tf.float32, shape=(None,1),name="y")
theta = tf.Variable(tf.random_uniform([n+1,1],-1.0,1.0,seed=42),name="theta")
y_pred = tf.matmul(X,theta,name="predications")
error = y_pred - y 
mse = tf.reduce_mean(tf.square(error),name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate) 
training_op = optimizer.minimize(mse) 

init = tf.global_variables_initializer() 


In [95]:
mse_summary = tf.summary.scalar('MSE',mse) 
file_writer = tf.summary.FileWriter(logdir,tf.get_default_graph()) 

In [96]:
n_epochs = 10 
batch_size = 100 
n_batches = int(np.ceil(m/batch_size)) 

In [98]:
with tf.Session() as sess: 
    sess.run(init) 
    for epoch in range(n_epochs): 
        for batch_index in range(n_batches): 
            X_batch,y_batch = fetch_batch(epoch,batch_index,batch_size) 
            if batch_index%10==0: 
                summary_str = mse_summary.eval(feed_dict={X:X_batch,y:y_batch})
                step = epoch * n_batches + batch_index 
                file_writer.add_summary(summary_str,step) 
            sess.run(training_op,feed_dict={X:X_batch,y:y_batch})
    best_theta = theta.eval() 

In [99]:
file_writer.close()

In [100]:
best_theta

array([[ 2.0703337 ],
       [ 0.8637145 ],
       [ 0.12255151],
       [-0.31211874],
       [ 0.38510373],
       [ 0.00434168],
       [-0.01232954],
       [-0.83376896],
       [-0.8030471 ]], dtype=float32)

## Name Scopes
When dealing with more complex models such as neural networks, the graph can easily become cluttered with thousands of nodes. To avoid this, we can create name scopes to group related nodes. 

In [101]:
reset_graph() 
now = datetime.utcnow().strftime('%Y%m%d%H%M%S') 
root_logdir = "tf_logs" 
logdir = "{}/run-{}/".format(root_logdir,now) 

n_epochs = 1000
learning_rate = 0.01

X = tf.placeholder(tf.float32,shape = (None,n+1),name="X")
y = tf.placeholder(tf.float32,shape = (None,1),name="y")
theta = tf.Variable(tf.random_uniform([n+1,1],-1.0,1.0,seed=42),name="theta")
y_pred = tf.matmul(X,theta,name="predictions")

In [102]:
with tf.name_scope("loss") as scope: 
    error = y_pred-y
    mse = tf.reduce_mean(tf.square(error),name="mse")
    

In [103]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse) 

init = tf.global_variables_initializer() 

mse_summary = tf.summary.scalar('MSE',mse) 
file_writer = tf.summary.FileWriter(logdir,tf.get_default_graph())

In [104]:
n_epochs = 10
batch_size = 100 
n_batches = int(np.ceil(m/batch_size)) 

with tf.Session() as sess: 
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch,y_batch = fetch_batch(epoch,batch_index,batch_size)
            if batch_index % 10 ==0: 
                summary_str = mse_summary.eval(feed_dict = {X:X_batch,y:y_batch})
                step = epoch * n_batches + batch_index 
                file_writer.add_summary(summary_str,step) 
            sess.run(training_op,feed_dict={X:X_batch,y:y_batch})
    best_theta = theta.eval()

file_writer.flush() 
file_writer.close() 
print("Best Theta:")
print(best_theta)

Best Theta:
[[ 2.0703337 ]
 [ 0.8637145 ]
 [ 0.12255151]
 [-0.31211874]
 [ 0.38510373]
 [ 0.00434168]
 [-0.01232954]
 [-0.83376896]
 [-0.8030471 ]]


In [105]:
print(error.op.name)

loss/sub


In [106]:
print(mse.op.name)

loss/mse
