In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow_graph_in_jupyter import show_graph

In [2]:
#Construction Phase

In [3]:
#set number of inputs and outputs and the number of hidden neurons in each layers
n_inputs = 28*28 
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

In [4]:
#set the placeholders for the training and target data
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

In [5]:
#creating the neuron layer construction function
def neuron_layer(X, n_neurons, name, activation=None):
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1])
        stddev = 2/np.sqrt(n_inputs + n_neurons) 
        #initialize the weights by using truncated normal to speed up the process
        init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
        W = tf.Variable(init, name="kernel")#weights
        b = tf.Variable(tf.zeros([n_neurons]), name="bias") #bias parametes (1 per each neuron)
        Z = tf.matmul(X, W) + b
        if activation is not None:
            return activation(Z)
        else:
            return Z

In [6]:
#creating the actual deep neural network
with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1",
                              activation=tf.nn.relu)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name="hidden2",
                              activation=tf.nn.relu)
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")
    y_proba = tf.nn.softmax(logits)
    #outputs of the network before going to softmax activation function fro optimization reasons

Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Colocations handled automatically by placer.


In [7]:
#define the cost function as cross entropy
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                              logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss") #compute the mean cross entropy cross all instances

In [8]:
#minimize the cost function
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

In [9]:
#evaluate the model accuracy
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [10]:
#initialize all variables and save the model to local machine
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [11]:
#Excition Pahse

In [12]:
#loading the data using tensorFlow helper function
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [13]:
n_epochs = 40
batch_size = 50

In [14]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [15]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, "Batch accuracy:", acc_batch, "Val accuracy:", acc_val)

    save_path = saver.save(sess, "./my_model_final.ckpt")

0 Batch accuracy: 0.88 Val accuracy: 0.9084
1 Batch accuracy: 0.92 Val accuracy: 0.923
2 Batch accuracy: 0.96 Val accuracy: 0.9352
3 Batch accuracy: 0.98 Val accuracy: 0.9422
4 Batch accuracy: 0.98 Val accuracy: 0.9474
5 Batch accuracy: 0.9 Val accuracy: 0.9532
6 Batch accuracy: 1.0 Val accuracy: 0.9544
7 Batch accuracy: 1.0 Val accuracy: 0.9588
8 Batch accuracy: 0.94 Val accuracy: 0.9622
9 Batch accuracy: 0.98 Val accuracy: 0.9616
10 Batch accuracy: 0.98 Val accuracy: 0.9644
11 Batch accuracy: 1.0 Val accuracy: 0.967
12 Batch accuracy: 0.96 Val accuracy: 0.9676
13 Batch accuracy: 0.98 Val accuracy: 0.9676
14 Batch accuracy: 1.0 Val accuracy: 0.9686
15 Batch accuracy: 0.96 Val accuracy: 0.971
16 Batch accuracy: 0.98 Val accuracy: 0.9726
17 Batch accuracy: 1.0 Val accuracy: 0.972
18 Batch accuracy: 0.98 Val accuracy: 0.9728
19 Batch accuracy: 1.0 Val accuracy: 0.9756
20 Batch accuracy: 1.0 Val accuracy: 0.975
21 Batch accuracy: 1.0 Val accuracy: 0.9752
22 Batch accuracy: 1.0 Val accurac

In [16]:
with tf.Session() as sess:
    saver.restore(sess, save_path) 
    X_new_scaled = X_test[:20]
    Z = logits.eval(feed_dict={X: X_new_scaled})
    y_pred = np.argmax(Z, axis=1)

Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt


In [17]:
y_proba

<tf.Tensor 'dnn/Softmax:0' shape=(?, 10) dtype=float32>

In [18]:
print("Predicted classes:", y_pred)
print("Actual classes:   ", y_test[:20])

Predicted classes: [7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]
Actual classes:    [7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


In [19]:
show_graph(tf.get_default_graph())

In [20]:
softmax(Z)

NameError: name 'softmax' is not defined