## TensorFlow

In [1]:
import tensorflow as tf

#create a computation graph
x = tf.Variable(3, name="x")
y = tf.Variable(4, name="y")
f = x*x*y +y +2

  from ._conv import register_converters as _register_converters


Instructions for updating:
Colocations handled automatically by placer.


In [2]:
#open tensorflow session
sess = tf.Session()

#initialize the variables
sess.run(x.initializer)
sess.run(y.initializer)

#evaluate f
result = sess.run(f)
print(result)

sess.close()

42


In [3]:
#repeating sess.run() is cumbersome so there is a better way
with tf.Session() as sess:
    x.initializer.run()
    y.initializer.run()
    result = f.eval()

In [4]:
#instead of manually running the initializer for every single variable use this function
init = tf.global_variables_initializer() #prepare an init node

with tf.Session() as sess:
    init.run() #actually initializes all the variables
    result=f.eval()

In [9]:
#you can also create an interactive session which automatically sets itself as the default 
#(don't need a block but need to manually close)
sess = tf.InteractiveSession()
init.run()
result = f.eval()
print(result)
sess.close()

42


### Linear Regression w/ TensorFlow

In [10]:
import numpy as np
from sklearn.datasets import fetch_california_housing

In [11]:
housing = fetch_california_housing()
m, n = housing.data.shape
housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data]

In [12]:
X = tf.constant(housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
XT = tf.transpose(X)
theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

In [15]:
with tf.Session() as sess:
    theta_value = theta.eval()
theta_value

array([[-3.6959320e+01],
       [ 4.3698898e-01],
       [ 9.4245886e-03],
       [-1.0791138e-01],
       [ 6.4842808e-01],
       [-3.9986235e-06],
       [-3.7866351e-03],
       [-4.2142656e-01],
       [-4.3467718e-01]], dtype=float32)

### Gradient Descent

In [37]:
n_epochs = 1000
learning_rate = .01

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_housing_data_plus_bias = scaler.fit_transform(housing_data_plus_bias)

In [53]:
#initialize constants
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
#first just use a random theta for equation: (theta - nMSE(theta))
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred-y
mse = tf.reduce_mean(tf.square(error), name="mse")
gradients = 2/m * tf.matmul(tf.transpose(X), error)
training_op = tf.assign(theta, theta-learning_rate*gradients)

init = tf.global_variables_initializer()

In [64]:
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()

Epoch 0 MSE = 9.428481
Epoch 100 MSE = 4.88447
Epoch 200 MSE = 4.807394
Epoch 300 MSE = 4.8039184
Epoch 400 MSE = 4.803636
Epoch 500 MSE = 4.803559
Epoch 600 MSE = 4.8035054
Epoch 700 MSE = 4.8034625
Epoch 800 MSE = 4.803427
Epoch 900 MSE = 4.803398


In [65]:
#closer look
with tf.Session() as sess:
    sess.run(init)
    for i in range(0,1000):
        print(mse.eval())
        sess.run(training_op)

8.288693
8.109467
7.940908
7.782323
7.633064
7.492534
7.3601747
7.23546
7.117909
7.007069
6.90252
6.8038654
6.710742
6.622807
6.539744
6.461252
6.387055
6.3168926
6.250522
6.1877174
6.1282663
6.0719695
6.018645
5.968116
5.9202213
5.874808
5.8317337
5.790865
5.752079
5.715254
5.680283
5.6470613
5.6154923
5.585486
5.5569534
5.529817
5.5040007
5.4794326
5.456047
5.4337797
5.412571
5.3923674
5.3731136
5.3547616
5.3372645
5.3205786
5.304661
5.289473
5.2749786
5.2611413
5.24793
5.235311
5.2232575
5.2117386
5.200731
5.190208
5.1801453
5.170522
5.1613164
5.1525083
5.1440797
5.13601
5.128285
5.1208854
5.1138
5.10701
5.100505
5.094269
5.088291
5.082559
5.077061
5.071787
5.0667276
5.0618715
5.0572104
5.052736
5.048437
5.0443096
5.040344
5.0365324
5.0328703
5.0293484
5.025964
5.022707
5.019574
5.0165596
5.013658
5.010866
5.008177
5.0055866
5.0030904
5.0006876
4.998369
4.996135
4.993981
4.9919033
4.989898
4.987963
4.986096
4.984292
4.982551
4.980868
4.9792423
4.9776707
4.9761505
4.9746804
4.9732594

4.82969
4.8296404
4.829591
4.829541
4.829492
4.829443
4.829394
4.8293447
4.829296
4.829247
4.829198
4.8291492
4.8291006
4.8290524
4.829004
4.8289557
4.8289075
4.8288593
4.8288107
4.8287635
4.8287153
4.828668
4.8286195
4.8285723
4.828525


### Using autodiff/optimizer to automatically calculate gradient

In [80]:
#previous requires you to mathematically derive gradients from the cost function MSE
#linear regression isn't too bad but it is a headache with more advanced algorithms
#initialize constants
X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")
y = tf.constant(housing.target.reshape(-1,1), dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred-y
mse = tf.reduce_mean(tf.square(error), name="mse")
#auto************************************
gradients = tf.gradients(mse, [theta])[0]

training_op = tf.assign(theta, theta-learning_rate*gradients)

init = tf.global_variables_initializer()

In [77]:
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()

Epoch 0 MSE = 9.918679
Epoch 100 MSE = 5.0377765
Epoch 200 MSE = 4.93862
Epoch 300 MSE = 4.9015946
Epoch 400 MSE = 4.875283
Epoch 500 MSE = 4.8561325
Epoch 600 MSE = 4.8421664
Epoch 700 MSE = 4.831963
Epoch 800 MSE = 4.8244977
Epoch 900 MSE = 4.8190227


In [78]:
#optimizer
theta = tf.Variable(tf.random_uniform([n+1, 1], -1.0, 1.0), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred-y
mse = tf.reduce_mean(tf.square(error), name="mse")
#auto************************************
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
#could also use momentum optimizer
#optimizer2 = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=.9)

training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

In [79]:
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch", epoch, "MSE =", mse.eval())
        sess.run(training_op)

    best_theta = theta.eval()

Epoch 0 MSE = 6.415772
Epoch 100 MSE = 4.989398
Epoch 200 MSE = 4.92045
Epoch 300 MSE = 4.8883715
Epoch 400 MSE = 4.865686
Epoch 500 MSE = 4.8491817
Epoch 600 MSE = 4.8371296
Epoch 700 MSE = 4.8283124
Epoch 800 MSE = 4.821848
Epoch 900 MSE = 4.817099


### Minibatch Gradient Descent

In [87]:
#use placeholder nodes to replace X/y at every iteration
A = tf.placeholder(tf.float32, shape=(None, 3))
B = A + 5
with tf.Session() as sess:
    B_val_1 = B.eval(feed_dict={A: [[1,2,3]]})
    B_val_2 = B.eval(feed_dict={A: [[4,5,6], [7,8,9]]})
    
    print(B_val_1)
    print(B_val_2)

[[6. 7. 8.]]
[[ 9. 10. 11.]
 [12. 13. 14.]]


In [89]:
#make X,y placeholder nodes
X = tf.placeholder(tf.float32, shape=(None, n+1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

In [90]:
#define the batch size and compute the total number of batches
batch_size=100
n_batches=int(np.ceil(m/batch_size))

In [92]:
#fetch the mini batches one by one, then provide the value of X and y via feed_dict when evaluating a node
def fetch_batch(epoch, batch_index, batch_size):
    [...] #load data from disk
    return X_batch, y_batch

with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
    best_theta = theta.eval()