### 3.3.1 Simple Neural Network

- Author: Phanxuan Phuc
- Project: https://github.com/phanxuanphucnd/TensorFlow-2.0-Tutorial



#### Neural Network Overview

<img src="http://cs231n.github.io/assets/nn1/neural_net2.jpeg" alt="nn" style="width: 400px;"/>

#### MNIST Dataset Overview

This example is using MNIST handwritten digits. The dataset contains 60,000 examples for training and 10,000 examples for testing. The digits have been size-normalized and centered in a fixed-size image (28x28 pixels) with values from 0 to 255. 

In this example, each image will be converted to float32, normalized to [0, 1] and flattened to a 1-D array of 784 features (28*28).

![MNIST Dataset](http://neuralnetworksanddeeplearning.com/images/mnist_100_digits.png)

In [1]:
import numpy as np
import tensorflow as tf

2021-07-08 22:54:21.138842: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-07-08 22:54:21.138868: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
# Dataset parameters: MNIST
num_classes = 10
num_features = 784

# Training parameters
learning_rate = 0.001
batch_size = 256
training_steps = 3000
display_step = 500

# Network parameters
n_hidden_1 = 128    # 1st layer number of neurons.
n_hidden_2 = 256    # 2st layer number of neurons.

In [3]:
# Prepare MNIST Dataset

from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Convert to float32
x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)

# Flatten images to 1-D vector of 784 features (28 * 28)
x_train, x_test = x_train.reshape([-1, num_features]), x_test.reshape([-1, num_features])

# Normalize images value from [0, 255] to [0, 1]
x_train, x_test = x_train / 255, x_test / 255

In [4]:
# Use tf.data API to shuffle and batch data

train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.repeat().shuffle(5000).batch(batch_size).prefetch(1)

2021-07-08 22:54:22.425627: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-07-08 22:54:22.425655: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303)
2021-07-08 22:54:22.425675: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (phucphan-ThinkPad): /proc/driver/nvidia/version does not exist
2021-07-08 22:54:22.425975: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
# Store layers weight and bias

# A random value generator to initilaize weights
random_normal = tf.initializers.RandomNormal()

weights = {
    'h1': tf.Variable(random_normal([num_features, n_hidden_1])),
    'h2': tf.Variable(random_normal([n_hidden_1, n_hidden_2])),
    'out': tf.Variable(random_normal([n_hidden_2, num_classes]))
}

biases = {
    'b1': tf.Variable(tf.zeros([n_hidden_1])), 
    'b2': tf.Variable(tf.zeros([n_hidden_2])),
    'out': tf.Variable(tf.zeros([num_classes]))
}

In [6]:
# Create the model

def NeuralNet(x):
    # Hidden fully connected layer with 128 neurons
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    
    # Apply sigmoid to layer_1 output for non-linearity
    layer_1 = tf.nn.sigmoid(layer_1)
    
    # Hidden fully connected layer with 256 neurons
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    
    # Apply sigmoid to layer_2 output for non-linearity
    layer_2 = tf.nn.sigmoid(layer_2)
    
    # Output fully connected layer with a neuron for each class
    out_layer = tf.add(tf.matmul(layer_2, weights['out']), biases['out'])
    
    # Apply softmax to normalize to logits to a probability distribution
    out = tf.nn.softmax(out_layer)
    
    return out

In [7]:
# Cross-entropy loss function
def cross_entropy(y_pred, y_true):
    # Encode label to one-hot vector
    y_true = tf.one_hot(y_true, depth=num_classes)
    
    # Clip prediction values to void log(0) error
    y_pred = tf.clip_by_value(y_pred, 1e-9, 1.)
    
    # Compute cross-entropy
    cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_true * tf.math.log(y_pred)))
    
    return cross_entropy


# Metric accuracy
def accuracy(y_pred, y_true):
    correct = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
    acc = tf.reduce_mean(tf.cast(correct, tf.float32), axis=-1)
    
    return acc

# Optimizer
optimizer = tf.optimizers.SGD(learning_rate)

In [8]:
# Optimization process
def run_optimization(x, y):
    # Wrap computation inside a GradientTape for automatic differentiation
    with tf.GradientTape() as g:
        pred = NeuralNet(x)
        loss = cross_entropy(pred, y)
        
    # Variables to update, i.e. trainable variables
    trainable_variables = list(weights.values()) + list(biases.values())
    
    # Compute gradients
    gradients = g.gradient(loss, trainable_variables)
    
    # Update W and b following gradients
    optimizer.apply_gradients(zip(gradients, trainable_variables))

In [9]:
# Training
for step, (batch_x, batch_y) in enumerate(train_dataset.take(training_steps), 1):
    # Run the optimization to update W and b values
    run_optimization(batch_x, batch_y)
    
    if step % display_step == 0:
        pred = NeuralNet(batch_x)
        loss = cross_entropy(pred, batch_y)
        acc = accuracy(pred, batch_y)
        
        print(f"Step: {step}, loss: {loss}, accuracy: {acc}")

Step: 500, loss: 150.621826171875, accuracy: 0.8125
Step: 1000, loss: 84.77633666992188, accuracy: 0.90625
Step: 1500, loss: 67.48261260986328, accuracy: 0.9296875
Step: 2000, loss: 68.82249450683594, accuracy: 0.93359375
Step: 2500, loss: 43.670745849609375, accuracy: 0.93359375
Step: 3000, loss: 50.74599838256836, accuracy: 0.9453125
