# Tensorflow Tutorial

## Importing MNIST

In [1]:
import tensorflow as tf

  return f(*args, **kwds)


In [5]:
from tensorflow.examples.tutorials.mnist import input_data

In [15]:
import numpy as np

In [23]:
def strass(A, B, steps):

    #Check Dimensions
    # tensor.get_shape().as_list()
    (m, n) = A.get_shape().as_list()
    (nn, p) = B.get_shape().as_list()

    #old code case m, n, nn, and p as ints

    if n != nn: raise ValueError("incompatible dimensions")
    C = tf.zeros([m,p])

    #Base case
    if steps == 0 or m ==1 or n ==1 or p == 1:
        C = tf.matmul(A,B)
        return C

    #Dynamic peeling
    # *****************
    if m % 2 == 1:
        #C[:m-1, :]
        Cmat= strass(A[:m-1,:],B, steps)
        #C[m-1,:], need to expand the dims b/c tf.matmul doesn't work for 1D vectors
        Crow = tf.matmul(tf.expand_dims(A[m-1,:],0),B)
        return tf.concat([Cmat, Crow], 0)
    if n % 2 == 1:
        Cmat = strass(A[:, :n-1], B[:n-1,:], steps)
        C = tf.add(Cmat,  tf.matmul(tf.expand_dims(A[:,n-1],1),tf.expand_dims(B[n-1,:],0)))
        return C
    if p % 2 == 1:
        #C[:, :p-1]
        Cmat = strass(A, B[:,:p-1], steps)
        #C[:,p-1]
        Ccol = tf.matmul(A,tf.expand_dims(B[:,p-1],1))
        return tf.concat([Cmat, Ccol], 1)

    # divide when m, n and p are all even
    m2 = int(m/2)
    n2 = int(n/2)
    p2 = int(p/2)
    A11 = A[:m2,:n2]
    A12 = A[:m2,n2:]
    A21 = A[m2:,:n2]
    A22 = A[m2:,n2:]
    B11 = B[:n2,:p2]
    B12 = B[:n2,p2:]
    B21 = B[n2:,:p2]
    B22 = B[n2:,p2:]

    # conquer
    M1 = strass(A11, tf.subtract(B12,B22)   ,steps-1)
    M2 = strass(tf.add(A11,A12), B22   ,steps-1)
    M3 = strass(tf.add(A21,A22),B11    ,steps-1)
    M4 = strass(A22    ,tf.subtract(B21,B11),steps-1)
    M5 = strass(tf.add(A11, A22), tf.add(B11, B22),steps-1)
    M6 = strass( tf.subtract(A12,A22), tf.add(B21,B22),steps-1)
    M7 = strass(tf.subtract(A11,A21), tf.add(B11, B12),steps-1)

    # conquer
    #C[:m2,:p2]
    C11 = tf.add(tf.subtract(tf.add(M5, M4), M2), M6)
    #C[:m2,p2:]
    C12 = tf.add(M1, M2)
    #C[m2:,:p2]
    C21 = tf.add(M3,M4)
    #C[m2:,p2:]
    C22 = tf.subtract(tf.subtract(tf.add(M1,M5), M3), M7)

    # nation building
    C1 = tf.concat([C11, C12], 1)
    C2 = tf.concat([C21,C22], 1)
    C = tf.concat([C1,C2], 0)

    return C

In [6]:
mnist = input_data.read_data_sets("/tmp/data/")

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [9]:
X_train = mnist.train.images
X_test = mnist.test.images
y_train = mnist.train.labels.astype("int")
y_test = mnist.test.labels.astype("int")

## Using tf.layers.dense()

In [24]:
tf.reset_default_graph()

In [25]:
n_inputs = 28*28  # MNIST
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

Placeholder nodes represent the training data and targets. The shape of X and Y is only partailly defined because we know it will be 2D but we don't know how many instances each training batch will contain

In [26]:
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

In [27]:
def neuron_layer(X, n_neurons, name, activation=None):
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1])
        stddev = 2 / np.sqrt(n_inputs)
        init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
        W = tf.Variable(init, name="kernel")
        b = tf.Variable(tf.zeros([n_neurons]), name="bias")
        
        Z = strass(X, W, 1) + b #tf.matmul(X, W) + b 
        
        if activation is not None:
            return activation(Z)
        else:
            return Z

In [28]:
with tf.name_scope("dnn"):
    hidden1 = neuron_layer(X, n_hidden1, name="hidden1",
                           activation=tf.nn.relu)
    hidden2 = neuron_layer(hidden1, n_hidden2, name="hidden2",
                           activation=tf.nn.relu)
    logits = neuron_layer(hidden2, n_outputs, name="outputs")

ValueError: Cannot convert a partially known TensorShape to a Tensor: (?, 300)

note that logits is the output of the network before going through the output activation funtion (for optimization reasons)

The random initilaztions of the weights are already built into dense()

In [17]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

tf.nn.sparse_softmax_cross_entropy_with_logits() function is equivalent to applying the softmax activation function and then computing the cross entropy. This is why there is no activation function in the last layer of dnn

Now we define a GradientDescentOptimizer that will tweak the model parameters to minimize the cost function

In [18]:
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

We now need to specify how to evaluate the model. We will use accuracy as our performance measure. To determine if the network's prediction is correct we check whether or not the highest logit corresponds to the target class. We do this with in_top_k(). This returns a vector of boolean values, we need to cast theses to floats and then compute the average. This will give us the network's overall accuracy.

In [19]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

Now a node to initalize all the variables and create a saver

In [20]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [21]:
n_epochs = 10
batch_size = 50

In [22]:
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: mnist.test.images,
                                            y: mnist.test.labels})
        print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)

    #save_path = saver.save(sess, "./my_model_final.ckpt")
    #use the above line if you want to save and reuse the network later on

0 Train accuracy: 0.96 Test accuracy: 0.9103
1 Train accuracy: 0.92 Test accuracy: 0.9284
2 Train accuracy: 0.94 Test accuracy: 0.9354
3 Train accuracy: 0.96 Test accuracy: 0.9438
4 Train accuracy: 0.96 Test accuracy: 0.9489
5 Train accuracy: 1.0 Test accuracy: 0.9531
6 Train accuracy: 0.96 Test accuracy: 0.9563
7 Train accuracy: 0.96 Test accuracy: 0.9579
8 Train accuracy: 1.0 Test accuracy: 0.9591
9 Train accuracy: 0.98 Test accuracy: 0.9628
