# Regression with Automatic Differentiation in TensorFlow

# Task 1: TensorFlow

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

print('Using TensorFlow version:', tf.__version__)
print('Devices available:', tf.config.list_physical_devices())

# Task 2: Constants

In [None]:
tf.constant([[1, 2, 3]])

In [None]:
tf.convert_to_tensor([[1, 2, 3]])

In [None]:
tf.convert_to_tensor([[1, 2, 3]], dtype=tf.float32)

In [None]:
tf.convert_to_tensor([[1, 2, 3]]).numpy()

# Task 3: Variables

In [None]:
tf.Variable([[1, 2, 3]])

In [None]:
v = tf.Variable(1)
print('Initial value:', v.numpy())
v.assign(2)
print('New value:', v.numpy())

In [None]:
c = tf.convert_to_tensor(np.random.randn(2, 3))
v = tf.Variable(np.random.randn(3, 1))

print(tf.matmul(c, v))

# Task 4: Automatic Differentiation

Let's take a simple equation as an example:
\begin{equation}
y = x^3 ; \frac{dy}{dx} = 3x^2
\end{equation}

In [None]:
x = tf.Variable(3.0)

with tf.GradientTape() as tape:
    y = x**3

dy_dx = tape.gradient(y, x)
print('gradient at x={} is {}'.format(x.numpy(), dy_dx.numpy()))

What about higher order gradients?

\begin{equation}
y = x^3 ; \frac{dy}{dx} = 3x^2 ; \frac{d^2 y}{dx^2} = 6x
\end{equation}

In [None]:
x = tf.Variable(3.0)

with tf.GradientTape() as t1:
    with tf.GradientTape() as t2:
        y = x**3
    dy_dx = t2.gradient(y, x)
d2y_dx2 = t1.gradient(dy_dx, x)

print('2nd order gradient at x={} is {}'.format(x.numpy(), d2y_dx2.numpy()))

# Task 5: Watching Tensors

In [None]:
x = tf.constant(3.0)

with tf.GradientTape() as tape:
    y = x**3
dy_dx = tape.gradient(y, x)

print(dy_dx)

In [None]:
x = tf.constant(3.0)

with tf.GradientTape() as tape:
    tape.watch(x)
    y = x**3
dy_dx = tape.gradient(y, x)

print(dy_dx)

# Task 6: Persistent Tape

\begin{equation}
y = x^3 ; z = 2y ; \frac{dz}{dx} = \frac{dz}{dy} . \frac{dy}{dx}
\end{equation}

In [None]:
x = tf.Variable(3.0)

with tf.GradientTape(persistent=True) as tape:
    y = x**3
    z = 2*y

dz_dy = tape.gradient(z, y)
dy_dx = tape.gradient(y, x)
dz_dx = tape.gradient(z, x)

del tape

print('dz_dy =', dz_dy.numpy())
print('dy_dx =', dy_dx.numpy())
print('dz_dx =', dz_dx.numpy())

print('dz_dx =', dy_dx.numpy() * dz_dy.numpy())

# Task 7: Generating Data for Linear Regression

Solve a simple linear regression equation:

\begin{equation}
y = wx + b
\end{equation}

In [None]:
true_w, true_b = 7.0, 4.0

def create_batch(batch_size=64):
    x = np.random.randn(batch_size, 1)
    y = np.random.randn(batch_size, 1) + true_w * x + true_b
    
    return x, y

In [None]:
x, y = create_batch()

plt.plot(x, y, '.');

# Task 8: Linear Regression

In [None]:
iterations = 100
lr = 0.03

w = tf.Variable(10.0)
b = tf.Variable(1.0)

param_history = {'w': [], 'b': []}

for i in range(0, iterations):
    x_batch, y_batch = create_batch()
    x_batch = tf.constant(x_batch, dtype=tf.float32)
    y_batch = tf.constant(y_batch, dtype=tf.float32)
    with tf.GradientTape(persistent=True) as tape:
        y = b + w * x_batch
        loss = tf.reduce_mean(tf.square(y - y_batch))
    dw = tape.gradient(loss, w)
    db = tape.gradient(loss, b)
    
    del tape
    
    w.assign_sub(lr * dw)
    b.assign_sub(lr * db)
    
    param_history['w'].append(w.numpy())
    param_history['b'].append(b.numpy())
    
    if i%10==0:
        print('At iter {}, w={}, b={}'.format(i, w.numpy(), b.numpy()))

In [None]:
plt.figure(figsize=(6, 6))
plt.plot(range(iterations), param_history['w'], label='Learned W')
plt.plot(range(iterations), param_history['b'], label='Learned b')
plt.plot(range(iterations), [true_w]*iterations, label='True W')
plt.plot(range(iterations), [true_b]*iterations, label='True b')
plt.xlabel('Training Iterations')
plt.ylabel('Value')
plt.legend()
plt.show()