#### Single LTU(linear threshold unit) network using perceptrons 

<em>The decision boundary of each output neuron is linear, so Perceptrons are incapable
of learning complex patterns (just like Logistic Regression classifiers)

In [2]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import Perceptron
from warnings import filterwarnings

filterwarnings(action='ignore')

In [None]:
iris = load_iris()
X = iris.data[:, (2, 3)] # petal length, petal width
y = (iris.target == 0).astype(np.int) # Iris Setosa?

per_clf = Perceptron(random_state=42)
per_clf.fit(X, y)
y_pred = per_clf.predict([[2, 0.5]])
y_pred

#### Training an MLP with TensorFlow’s High-Level API

<em>The following
code trains a DNN for classification with two hidden layers (one with 300
neurons, and the other with 100 neurons) and a softmax output layer with 10
neurons:

In [None]:
import tensorflow as tf

# (X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [None]:
feature_cols = [tf.feature_column.numeric_column("X", shape=[28 * 28])]
dnn_clf = tf.estimator.DNNClassifier(hidden_units=[300,100], n_classes=10,
                                     feature_columns=feature_cols)

input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"X": X_train}, y=y_train, num_epochs=40, batch_size=50, shuffle=True)
# dnn_clf.train(input_fn=input_fn)

In [None]:
test_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"X": X_test}, y=y_test, shuffle=False)
eval_results = dnn_clf.evaluate(input_fn=test_input_fn)

In [None]:
eval_results

## Using plain TensorFlow

#### Construction phase

In [32]:
import tensorflow as tf 
import numpy as np

tf.reset_default_graph()

n_inputs = 28*28 
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

In [33]:
# Setting placeholders for X and y 

X = tf.placeholder(dtype=tf.float32, shape=(None, n_inputs), name='X')
y = tf.placeholder(dtype=tf.int32, shape=(None), name='y')

In [34]:
# function that we will use to create one layer at a time.

def neuron_layer(X, n_neurons, name, activation=None):
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1])
        std_dev = 2/np.sqrt(n_inputs)
        init = tf.random.truncated_normal(shape=(n_inputs, n_neurons), stddev=std_dev)
        W = tf.Variable(init, name='weights')
        b = tf.Variable(tf.zeros([n_neurons]), name='biases')
        z = tf.matmul(X, W) + b 
        
        if activation == 'relu':
            return tf.nn.relu(z)
        else:
            return z

In [None]:
# Create the DNN (Deep neural network)

with tf.name_scope('dnn'):
    hidden1 = neuron_layer(X, n_hidden1, 'hidden1', activation='relu')
    hidden2 = neuron_layer(hidden1, n_hidden2, 'hidden2', activation='relu')
    output  = neuron_layer(hidden2, n_outputs, 'outputs')

<em>TensorFlow’s fully_connected() function creates a fully connected layer, where all the inputs are connected to
all the neurons in the layer. It takes care of creating the weights and biases variables,
with the proper initialization strategy, and it uses the ReLU activation function by
default

In [35]:
# Using the fully connected instead of the created construction function 

from tensorflow.contrib.layers import fully_connected

with tf.name_scope('dnn'):
    hidden1 = fully_connected(X, n_hidden1, scope="hidden1")
    hidden2 = fully_connected(hidden1, n_hidden2, scope="hidden2")
    logits = fully_connected(hidden2, n_outputs, scope="outputs",activation_fn=None)
    
# logits are the output of the network before going through the softmax activation function

In [36]:
# We are using cross-entropy as out loss function 

with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

In [37]:
# define a GradientDescentOptimizer that will tweak the model parameters to minimize the cost function.

learning_rate = 0.01 

with tf.name_scope('train'):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    training_op = optimizer.minimize(loss)

In [38]:
# Specify how to evaluate the model.

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(logits, y, k=1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [41]:
# Writing log_directories for tensorboard 

from datetime import datetime

now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

cost_summary = tf.summary.scalar('Cross_entropy_cost', tensor=loss)
fileWriter = tf.summary.FileWriter(logdir=logdir, graph=tf.get_default_graph())

In [42]:
# create a node to initialize all variables, and we will also create a Saver to save our trained model parameters to disk

init = tf.global_variables_initializer()
saver = tf.train.Saver()

### Execution Phase

In [43]:
# Prepairing the data

from scipy.io import loadmat

mnist_org = loadmat('mnist-original.mat')
data = mnist_org['data'].T
targets = mnist_org['label'].T
X_train, X_test, y_train, y_test = data[:60000], data[60000:], targets[:60000], targets[60000:]

shuffle_index = np.random.permutation(60000)
X_train, y_train = X_train[shuffle_index], y_train[shuffle_index]
y_train, y_test = y_train.ravel(), y_test.ravel()

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [44]:
n_epochs = 20
batch_size = 50
n_batches = X_train.shape[0]//batch_size

In [21]:
# Next batch function to quickly extract mini-batches 

def next_batch(size, X, y):
    index = np.random.randint(X.shape[0], size=size)
    X_batch = X[index]
    y_batch = y[index]
    return X_batch, y_batch

In [45]:
# Training the model 

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(n_batches):
            X_batch, y_batch = next_batch(batch_size, X_train, y_train)
            sess.run(training_op, feed_dict = {X: X_batch, y: y_batch})
            
            if iteration % 10 == 0:
                summary_str = cost_summary.eval(feed_dict={X: X_batch, y: y_batch})
                step = epoch * n_batches + iteration 
                fileWriter.add_summary(summary_str, step)
                
        acc_train = accuracy.eval(feed_dict = {X: X_train, y: y_train})
        acc_test = accuracy.eval(feed_dict = {X: X_test, y: y_test})
        print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)
        
    save_path = saver.save(sess, "./my_model_final.ckpt")

0 Train accuracy: 0.9296833 Test accuracy: 0.9225
1 Train accuracy: 0.9497167 Test accuracy: 0.9406
2 Train accuracy: 0.9601 Test accuracy: 0.9492
3 Train accuracy: 0.96635 Test accuracy: 0.9532
4 Train accuracy: 0.97175 Test accuracy: 0.9583
5 Train accuracy: 0.975 Test accuracy: 0.96
6 Train accuracy: 0.97851664 Test accuracy: 0.9608
7 Train accuracy: 0.9809333 Test accuracy: 0.964
8 Train accuracy: 0.983 Test accuracy: 0.9649
9 Train accuracy: 0.98503333 Test accuracy: 0.967
10 Train accuracy: 0.98658335 Test accuracy: 0.968
11 Train accuracy: 0.9881667 Test accuracy: 0.9687
12 Train accuracy: 0.98898333 Test accuracy: 0.9691
13 Train accuracy: 0.99053335 Test accuracy: 0.9707
14 Train accuracy: 0.99121666 Test accuracy: 0.9702
15 Train accuracy: 0.99256665 Test accuracy: 0.9713
16 Train accuracy: 0.9931333 Test accuracy: 0.9721
17 Train accuracy: 0.9942167 Test accuracy: 0.9712
18 Train accuracy: 0.99475 Test accuracy: 0.9729
19 Train accuracy: 0.99516666 Test accuracy: 0.9727


In [None]:
fileWriter.close()

#### Using the Neural Network

In [27]:
with tf.Session() as sess:
    saver.restore(sess, "./my_model_final.ckpt")
    Z = logits.eval(feed_dict={X: X_test})
    y_pred = np.argmax(Z, axis=1)

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt


<em>If you wanted to know all the estimated class probabilities, you would need to apply
the softmax() function to the logits, but if you just want to predict a class, you can
simply pick the class that has the highest logit value (using the argmax() function
does the trick).