# Recreating Gradient Descent Algorithm with TensorFlow 2.0

#### All needed imports

In [1]:
import tensorflow as tf
from tensorflow import keras

import numpy as np
import matplotlib.pyplot as plt

## We will simulate the following formula: 2*X -7*W + Bias

### Generating data

In [2]:
observations = 25000

X = np.random.uniform(low = -10, high = 10, size = (observations, 1))
W = np.random.uniform(low = -10, high = 10, size = (observations, 1))

INPUTS = np.column_stack((X, W))

noise = np.random.uniform(low = -1, high = 1, size = (observations, 1))

In [3]:
X, X.shape

(array([[-2.62299213],
        [ 4.64955799],
        [ 4.28726575],
        ...,
        [ 2.33593824],
        [-3.0926153 ],
        [ 3.29312211]]), (25000, 1))

In [4]:
W, W.shape

(array([[ 9.26464939],
        [-5.94606681],
        [ 2.30662044],
        ...,
        [ 6.36647994],
        [ 0.52967216],
        [-9.45511758]]), (25000, 1))

In [5]:
INPUTS, INPUTS.shape

(array([[-2.62299213,  9.26464939],
        [ 4.64955799, -5.94606681],
        [ 4.28726575,  2.30662044],
        ...,
        [ 2.33593824,  6.36647994],
        [-3.0926153 ,  0.52967216],
        [ 3.29312211, -9.45511758]]), (25000, 2))

In [6]:
noise, noise.shape

(array([[-0.13851105],
        [-0.06652155],
        [ 0.27689163],
        ...,
        [-0.00527711],
        [ 0.25470859],
        [-0.58796814]]), (25000, 1))

In [7]:
TARGETS = 2*X - 7*W + 10 + noise
TARGETS

array([[-60.23704107],
       [ 60.85506211],
       [  2.70508009],
       ...,
       [-29.8987602 ],
       [  0.36177286],
       [ 82.18409913]])

#### Saving generated Data

In [8]:
np.savez('TF_INTRO', inputs=INPUTS, targets=TARGETS)

### Building the Model

#### Loading Data

In [9]:
training_data = np.load('TF_INTRO.npz')

#### Variables that measure the size of our inputs and outputs

In [10]:
input_size = 2
output_size = 1

#### Building the Model

1. <i>Sequential</i> specifies how the model will be laid down (stacked)


2. <i>Dense</i> layer takes the inputs provided to the model and calculates the dot product of the inputs and the weights and adds bias. 

FROM TENSORFLOW DOCS: `Dense` implements the operation:
`output = activation(dot(input, kernel) + bias)`
where `activation` is the element-wise activation function
passed as the `activation` argument, `kernel` is a weights matrix
created by the layer, and `bias` is a bias vector created by the layer

In [11]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(output_size)
])

# configuring the model for training
model.compile(optimizer='SGD', loss='mean_squared_error')

model.fit(training_data['inputs'], training_data['targets'], epochs = 100, verbose = 0)

<tensorflow.python.keras.callbacks.History at 0x7f8d9f2831d0>

Setting <i>verbose = 0</i> makes the model "silent", thus no output is given

Set <i>verbose</i> to "2" to obtain one-line-output

In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                multiple                  3         
Total params: 3
Trainable params: 3
Non-trainable params: 0
_________________________________________________________________


In [13]:
model.fit(training_data['inputs'], training_data['targets'], epochs = 100, verbose = 1)

Train on 25000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/10

Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x7f8d9e6b6b10>

### Extracting weights and biases

Output is a tensor with two arrays - one for the weights, one for the biases

In [27]:
model.layers[0].get_weights()

[array([[ 1.9937725],
        [-7.0247025]], dtype=float32), array([10.006209], dtype=float32)]

### It worked great!

### Extracting the outputs, making predictions

In [28]:
model.predict_on_batch(training_data['inputs'])

array([[ -0.4158182 ],
       [ 69.27829   ],
       [  0.45665264],
       ...,
       [ 22.525946  ],
       [ 21.836842  ],
       [-48.28674   ]], dtype=float32)

In [30]:
training_data['targets']

array([[  0.09531858],
       [ 69.86728164],
       [  1.26979627],
       ...,
       [ 23.57069651],
       [ 21.64203904],
       [-47.23952255]])

In [31]:
model.evaluate(training_data['inputs'])



0.0

## Customizing the Model

In [39]:
keras.backend.clear_session()

In [40]:
model_ctd = tf.keras.Sequential([
    tf.keras.layers.Dense(output_size, kernel_initializer = tf.random_uniform_initializer(-0.1, 0.1),
                         bias_initializer = tf.random_uniform_initializer(-0.1, 0.1))
])

custom_optimizer = tf.keras.optimizers.SGD(learning_rate=0.002)

model_ctd.compile(optimizer=custom_optimizer, loss='huber_loss')

model_ctd.fit(training_data['inputs'], training_data['targets'], epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7fb2ddf04a90>

In [41]:
model.layers[0].get_weights()

[array([[ 1.9937725],
        [-7.0247025]], dtype=float32), array([10.006209], dtype=float32)]

### Loss is much smaller!