# TensorFlow

### Neural Net

In [None]:
'''V1'''
import numpy as np
import tensrotflow as tf

# 1. define computational graph
N, D, H = 64, 1000, 100
x = tf.placeholder(tf.float32, shape=(N, D)) # create placeholders
y = tf.placeholder(tf.float32, shape=(N, D))
w1 = tf.placeholder(tf.float32, shape=(D, H)) 
w2 = tf.placeholder(tf.float32, shape=(H, D))

# forward pass / no computation. just build.
h = tf.maximum(tf.matmul(x, w1), 0)
y_pred = tf.matmul(h, w2)
diff = y_pred - y
loss = tf.reduce_mean(tf.reduce_sum(diff**2, axis=1)) # L2 dist(y, y_pred)

# loss of gradient / no computation. just build.
grad_w1, grad_w2 = tf.gradients(loss, [w1, w1])

# 2. run the graph many times with feeding data
with tf.Session() as sess:
    values = {  x: np.random.randn(N, D), # numpy arrays to fill placeholders
                w1: np.random.randn(D, H),
                w2: np.random.randn(H, D),
                y: np.random.randn(N, D), }
    learning_rate = 1e-5
    # train the network
    for t in range(50):
        out = sess.run([loss, grad_w1, grad_w2], feed_dict=values) # run the graph
        loss_val, grad_w1_val, grad_w2_val = out # output: arrays

        values[w1] -= learning_rate * grad_w1_val # use gradient to update weights
        values[w2] -= learning_rate * grad_w2_val

'''
Problem: copying weights between CPU & GPU each step
'''


In [None]:
'''V2'''
import numpy as np
import tensrotflow as tf

# 1. define computational graph
N, D, H = 64, 1000, 100
x = tf.placeholder(tf.float32, shape=(N, D)) # create placeholders
y = tf.placeholder(tf.float32, shape=(N, D))
''''''
w1 = tf.Variable(tf.float32, shape=(D, H)) # create Variables
w2 = tf.Variable(tf.float32, shape=(H, D))

# forward pass / no computation. just build.
h = tf.maximum(tf.matmul(x, w1), 0)
y_pred = tf.matmul(h, w2)
diff = y_pred - y
loss = tf.reduce_mean(tf.reduce_sum(diff**2, axis=1)) # L2 dist(y, y_pred)

# loss of gradient / no computation. just build.
grad_w1, grad_w2 = tf.gradients(loss, [w1, w1])

''''''
learning_rate = 1e-5
new_w1 = w1.assign(w1 - learning_rate * grad_w1)
new_w2 = w2.assign(w2 - learning_rate * grad_w2)

# 2. run the graph many times with feeding data
with tf.Session() as sess:
    ''''''
    sess.run(tf.global_variables_initializer()) # run graph once to initialize w1, w2
    values = {  x: np.random.randn(N, D), # numpy arrays to fill placeholders
                y: np.random.randn(N, D), }

    ''''''
    # train the network
    for t in range(50):
        loss_val, = sess.run([loss], feed_dict=values) # run the graph
        

'''
Problem: loss not going down
'''


In [None]:
'''V3'''
import numpy as np
import tensrotflow as tf

# 1. define computational graph
N, D, H = 64, 1000, 100
x = tf.placeholder(tf.float32, shape=(N, D)) # create placeholders
y = tf.placeholder(tf.float32, shape=(N, D))
w1 = tf.Variable(tf.float32, shape=(D, H)) # create Variables
w2 = tf.Variable(tf.float32, shape=(H, D))

# forward pass / no computation. just build.
h = tf.maximum(tf.matmul(x, w1), 0)
y_pred = tf.matmul(h, w2)
diff = y_pred - y
loss = tf.reduce_mean(tf.reduce_sum(diff**2, axis=1)) # L2 dist(y, y_pred)

# loss of gradient / no computation. just build.
grad_w1, grad_w2 = tf.gradients(loss, [w1, w1])

learning_rate = 1e-5
new_w1 = w1.assign(w1 - learning_rate * grad_w1)
new_w2 = w2.assign(w2 - learning_rate * grad_w2)

''''''
updates = tf.group(new_w1, new_w2) # add dummy graph node

# 2. run the graph many times with feeding data
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer()) # run graph once to initialize w1, w2
    values = {  x: np.random.randn(N, D), # numpy arrays to fill placeholders
                y: np.random.randn(N, D), }

    ''''''
    # train the network
    for t in range(50):
        loss_val, _ = sess.run([loss, updates], feed_dict=values) # run the graph & compute dummy node(null return)
        



### Optimization

In [None]:
'''original'''
import numpy as np
import tensrotflow as tf

# 1. define computational graph
N, D, H = 64, 1000, 100
x = tf.placeholder(tf.float32, shape=(N, D)) 
y = tf.placeholder(tf.float32, shape=(N, D))
w1 = tf.Variable(tf.float32, shape=(D, H))
w2 = tf.Variable(tf.float32, shape=(H, D))

# forward pass / no computation. just build.
h = tf.maximum(tf.matmul(x, w1), 0)
y_pred = tf.matmul(h, w2)
diff = y_pred - y
loss = tf.reduce_mean(tf.reduce_sum(diff**2, axis=1))

# loss of gradient / no computation. just build.
grad_w1, grad_w2 = tf.gradients(loss, [w1, w1])

learning_rate = 1e-5
new_w1 = w1.assign(w1 - learning_rate * grad_w1)
new_w2 = w2.assign(w2 - learning_rate * grad_w2)

''''''
updates = tf.group(new_w1, new_w2) # add dummy graph node

# 2. run the graph many times with feeding data
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer()) # run graph once to initialize w1, w2
    values = {  x: np.random.randn(N, D), # numpy arrays to fill placeholders
                y: np.random.randn(N, D), }

    ''''''
    # train the network
    for t in range(50):
        loss_val, _ = sess.run([loss, updates], feed_dict=values) # run the graph & compute dummy node(null return)
        

In [None]:
'''optimization'''
import numpy as np
import tensrotflow as tf

# 1. define computational graph
N, D, H = 64, 1000, 100
x = tf.placeholder(tf.float32, shape=(N, D))
y = tf.placeholder(tf.float32, shape=(N, D))
w1 = tf.Variable(tf.float32, shape=(D, H))
w2 = tf.Variable(tf.float32, shape=(H, D))

# forward pass / no computation. just build.
h = tf.maximum(tf.matmul(x, w1), 0)
y_pred = tf.matmul(h, w2)
diff = y_pred - y
loss = tf.reduce_mean(tf.reduce_sum(diff**2, axis=1)) 

''''''
optimizer = tf.train.GradientDescentOprimizer(1e-5) # optimizer: compute grad & update W
updates = optimizer.minimize(loss)

# 2. run the graph many times with feeding data
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer()) # run graph once to initialize w1, w2
    values = {  x: np.random.randn(N, D), # numpy arrays to fill placeholders
                y: np.random.randn(N, D), }

    losses = []
    # train the network
    for t in range(50):
        loss_val, _ = sess.run([loss, updates], feed_dict=values) # run the graph & exec optimizer


### Loss

In [None]:
'''Loss'''
import numpy as np
import tensrotflow as tf

# 1. define computational graph
N, D, H = 64, 1000, 100
x = tf.placeholder(tf.float32, shape=(N, D))
y = tf.placeholder(tf.float32, shape=(N, D))
w1 = tf.Variable(tf.float32, shape=(D, H))
w2 = tf.Variable(tf.float32, shape=(H, D))

# forward pass / no computation. just build.
h = tf.maximum(tf.matmul(x, w1), 0)
y_pred = tf.matmul(h, w2)
''''''
loss = tf.losses.mean_squared_error(y_pred, y) # predefined loss

optimizer = tf.train.GradientDescentOprimizer(1e-3) # optimizer: compute grad & update W
updates = optimizer.minimize(loss)

# 2. run the graph many times with feeding data
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer()) # run graph once to initialize w1, w2
    values = {  x: np.random.randn(N, D), # numpy arrays to fill placeholders
                y: np.random.randn(N, D), }

    losses = []
    # train the network
    for t in range(50):
        loss_val, _ = sess.run([loss, updates], feed_dict=values) # run the graph & exec optimizer


### Layers

In [None]:
'''Layers'''
import numpy as np
import tensrotflow as tf

# 1. define computational graph
N, D, H = 64, 1000, 100
x = tf.placeholder(tf.float32, shape=(N, D))
y = tf.placeholder(tf.float32, shape=(N, D))

''''''
# automatically set up W (tf.layers)
init = tf.contrib.layers.xavier_initializer() # Xavier initializer
h = tf.layers.dense(inputs=x, units=H, activation=tf.nn.relu, kernel_initializer=init)
y_pred = tf.layers.dense(inputs=h, units=D, kernel_initializer=init)

# forward pass / no computation. just build.
h = tf.maximum(tf.matmul(x, w1), 0)
y_pred = tf.matmul(h, w2)
loss = tf.losses.mean_squared_error(y_pred, y) # predefined loss

optimizer = tf.train.GradientDescentOprimizer(1e-3) # optimizer: compute grad & update W
updates = optimizer.minimize(loss)

# 2. run the graph many times with feeding data
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer()) # run graph once to initialize w1, w2
    values = {  x: np.random.randn(N, D), # numpy arrays to fill placeholders
                y: np.random.randn(N, D), }

    losses = []
    # train the network
    for t in range(50):
        loss_val, _ = sess.run([loss, updates], feed_dict=values) # run the graph & exec optimizer
