In [None]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import time
import itertools as it
import os.path as osp
from tensorflow.examples.tutorials.mnist import input_data
from sklearn.utils import shuffle
import time

## Load the MNIST dataset

In [None]:
mnist = input_data.read_data_sets("MNIST_data/", reshape=False)
X_train, y_train           = mnist.train.images, mnist.train.labels
X_validation, y_validation = mnist.validation.images, mnist.validation.labels
X_test, y_test             = mnist.test.images, mnist.test.labels

assert(len(X_train) == len(y_train))
assert(len(X_validation) == len(y_validation))
assert(len(X_test) == len(y_test))

print()
print("Image Shape: {}".format(X_train[0].shape))
print()
print("Training Set:   {} samples".format(len(X_train)))
print("Validation Set: {} samples".format(len(X_validation)))
print("Test Set:       {} samples".format(len(X_test)))
X_train      = np.pad(X_train, ((0,0),(2,2),(2,2),(0,0)), 'constant')
X_validation = np.pad(X_validation, ((0,0),(2,2),(2,2),(0,0)), 'constant')
X_test       = np.pad(X_test, ((0,0),(2,2),(2,2),(0,0)), 'constant')
    
print("Updated Image Shape: {}".format(X_train[0].shape))
X_train, y_train = shuffle(X_train, y_train)
EPOCHS = 10
BATCH_SIZE = 128

## define LeNet , Notice that at the end of each layer we deinf a name for the layer (e.g. name="conv1")

In [None]:
from tensorflow.contrib.layers import flatten

def LeNet(x):    
    # Hyperparameters
    mu = 0
    sigma = 0.1
    layer_depth = {
        'layer_1' : 6,
        'layer_2' : 16,
        'layer_3' : 120,
        'layer_f1' : 84
    }

    with tf.name_scope('convo'):
        # TODO: Layer 1: Convolutional. Input = 32x32x1. Output = 28x28x6.
        conv1_w = tf.Variable(tf.truncated_normal(shape = [5,5,1,6],mean = mu, stddev = sigma))
        conv1_b = tf.Variable(tf.zeros(6))
        conv1 = tf.nn.conv2d(x,conv1_w, strides = [1,1,1,1], padding = 'VALID') + conv1_b 
        # TODO: Activation.
        conv1 = tf.nn.relu(conv1)

        # TODO: Pooling. Input = 28x28x6. Output = 14x14x6.
        pool_1 = tf.nn.max_pool(conv1,ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'VALID',name="conv1")

        # TODO: Layer 2: Convolutional. Output = 10x10x16.
        conv2_w = tf.Variable(tf.truncated_normal(shape = [5,5,6,16], mean = mu, stddev = sigma))
        conv2_b = tf.Variable(tf.zeros(16))
        conv2 = tf.nn.conv2d(pool_1, conv2_w, strides = [1,1,1,1], padding = 'VALID') + conv2_b
        # TODO: Activation.
        conv2 = tf.nn.relu(conv2)

        # TODO: Pooling. Input = 10x10x16. Output = 5x5x16.
        pool_2 = tf.nn.max_pool(conv2, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'VALID',name="conv2") 
    with tf.name_scope('fully_connected'):
        # TODO: Flatten. Input = 5x5x16. Output = 400.
        fc1 = flatten(pool_2)

        # TODO: Layer 3: Fully Connected. Input = 400. Output = 120.
        fc1_w = tf.Variable(tf.truncated_normal(shape = (400,120), mean = mu, stddev = sigma))
        fc1_b = tf.Variable(tf.zeros(120))
        fc1 = tf.matmul(fc1,fc1_w) + fc1_b

        # TODO: Activation.
        fc1 = tf.nn.relu(fc1,name="fc1")

        # TODO: Layer 4: Fully Connected. Input = 120. Output = 84.
        fc2_w = tf.Variable(tf.truncated_normal(shape = (120,84), mean = mu, stddev = sigma))
        fc2_b = tf.Variable(tf.zeros(84))
        fc2 = tf.matmul(fc1,fc2_w) + fc2_b
        # TODO: Activation.
        fc2 = tf.nn.relu(fc2,name="fc2")

        # TODO: Layer 5: Fully Connected. Input = 84. Output = 10.
        fc3_w = tf.Variable(tf.truncated_normal(shape = (84,10), mean = mu , stddev = sigma))
        fc3_b = tf.Variable(tf.zeros(10))
        logits = tf.matmul(fc2, fc3_w,name="fc3") + fc3_b
    return logits

In [None]:
graph = tf.Graph()
with graph.as_default():
    with tf.name_scope('inputs'):
        x = tf.placeholder(tf.float32, (None, 32, 32, 1), name="images")
        y = tf.placeholder(tf.int32, (None), name="labels")
        one_hot_y = tf.one_hot(y, 10)  
    rate = 0.001
    logits = LeNet(x)
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=one_hot_y)
    loss_operation = tf.reduce_mean(cross_entropy)
    optimizer = tf.train.AdamOptimizer(learning_rate = rate)
    training_operation = optimizer.minimize(loss_operation)
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))
    accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    saver = tf.train.Saver()

In [None]:
def evaluate(X_data, y_data):
    num_examples = len(X_data)
    total_accuracy = 0
    sess = tf.get_default_session()
    for offset in range(0, num_examples, BATCH_SIZE):
        batch_x, batch_y = X_data[offset:offset+BATCH_SIZE], y_data[offset:offset+BATCH_SIZE]
        accuracy = sess.run(accuracy_operation, feed_dict={x: batch_x, y: batch_y})
        total_accuracy += (accuracy * len(batch_x))
    return total_accuracy / num_examples

## Please notice the accuracy achieved by running the orginal LeNet ( it takes 4 minuties to train 10 epoches)

In [None]:
config = tf.ConfigProto(
        device_count = {'CPU': 0}
    )
start = time.time()

with tf.Session(graph=graph) as sess:
    sess.run(tf.global_variables_initializer())
    num_examples = len(X_train)
    
    print("Training...")
    print()
    for i in range(EPOCHS):
        X_train, y_train = shuffle(X_train, y_train)
        for offset in range(0, num_examples, BATCH_SIZE):
            end = offset + BATCH_SIZE
            batch_x, batch_y = X_train[offset:end], y_train[offset:end]
            sess.run(training_operation, feed_dict={x: batch_x, y: batch_y})
            
        validation_accuracy = evaluate(X_validation, y_validation)
        print("EPOCH {} ...".format(i+1))
        print("Validation Accuracy = {:.3f}".format(validation_accuracy))
        print()
        
    saver.save(sess, './lenet')
    print("Model saved")
#your code here    
print('runtime= ',(time.time() - start)/60)

## Try to figuer out the changes happen to the orginal LeNet

In [None]:
old_lenet_graph = tf.Graph()
with old_lenet_graph.as_default():
    # importing the graph will populate new_alex_graph
    saver = tf.train.import_meta_graph("./lenet.meta")

## check the previous configuration of LeNet

In [None]:
print(old_lenet_graph.get_tensor_by_name("convo/conv1:0"))
print(old_lenet_graph.get_tensor_by_name("convo/conv2:0"))
print(old_lenet_graph.get_tensor_by_name("fully_connected/fc1:0"))
print(old_lenet_graph.get_tensor_by_name("fully_connected/fc2:0"))
print(old_lenet_graph.get_tensor_by_name("fully_connected/fc3:0"))

# Exceries
## try to understand and explain the next class and what it do ti the orginal LeNet

In [None]:
class TransferedLeNet:
    def __init__(s, init_graph, num_tgt_classes):
        with init_graph.as_default():
            with tf.name_scope('inputs'):
                # have input placeholder from original graph
                s.labels = tf.placeholder(tf.int32, shape=[None], name='labels')

            with tf.name_scope('hyperparams'):
                s.learning_rate = tf.placeholder(tf.float32, name='learning_rate')                
            
            s.one_hot_labels = tf.one_hot(s.labels, num_tgt_classes) # , dtype=tf.float32) 
            
            #
            # we're going to rewire the outputs from the old conv2 to the new layer
            #
            orig_convo2    = old_lenet_graph.get_tensor_by_name("convo/conv2:0")
            with tf.name_scope('new_convo_layer'):
                # use convo instead of 3 fully connected layers
                conv3_w = tf.Variable(tf.truncated_normal(shape = [5,5,16,num_tgt_classes], mean = 0, stddev = 0.1))
                conv3_b = tf.Variable(tf.zeros(num_tgt_classes))
                convo3 =tf.nn.conv2d(orig_convo2, conv3_w, strides = [1,1,1,1], padding = 'VALID') + conv3_b
                s.logits = tf.squeeze(convo3)
            
            with tf.name_scope('loss'):
                smce = tf.nn.softmax_cross_entropy_with_logits
                s.loss = tf.reduce_mean(smce(logits=s.logits, labels=s.one_hot_labels),
                                             name="loss")

            with tf.name_scope('global_step'):
                global_step = tf.Variable(0, trainable=False, name='global_step')
                s.inc_step = tf.assign_add(global_step, 1, name='inc_step')

            # use to_train_vars = None to train on all trainable (including those from original)
            to_train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "new_convo_layer")
            
            with tf.name_scope('train'):
                decayed_rate = tf.train.exponential_decay(s.learning_rate, global_step,
                                                          600, 0.998, True)
                momopt = tf.train.MomentumOptimizer
                s.train = momopt(decayed_rate, 0.9).minimize(s.loss, var_list=to_train_vars)
            
            # there is a prediction namescope in the original model
            # note, that the variable assignments here are attributes of this class
            # and refer to distinct operations compared to the original model
            with tf.name_scope('new_prediction'):
                s.softmax    = tf.nn.softmax(s.logits, name="softmax")
                s.prediction = tf.cast(tf.arg_max(s.softmax, 1), tf.int32)
                
                s.pred_correct  = tf.equal(s.labels, s.prediction)
                s.pred_accuracy = tf.reduce_mean(tf.cast(s.pred_correct, tf.float32))    
            
            init = tf.global_variables_initializer()
        s.session = tf.Session(graph=init_graph)
        s.session.run(init)

    def fit(s, train_dict):
        tr_loss, step, tr_acc, _ = s.session.run([s.loss, s.inc_step, s.pred_accuracy, s.train], 
                                                 feed_dict=train_dict)
        return tr_loss, step, tr_acc
    
    def predict(s, test_dict):
        ct_correct, preds = s.session.run([s.pred_correct, s.prediction], 
                                          feed_dict=test_dict)
        return ct_correct, preds   

In [None]:
new_alex = TransferedLeNet(old_lenet_graph, num_tgt_classes=10) 

# Exceries
## (a) run this trainig and comment on the accuracy of this new training compared to the orignla one
## (b) investigate the differences between class TransferedLeNet and the orginal code and recommend a way to improve the accuracy of the new class

In [None]:
num_examples = len(X_train)
for epoch in range(10):
    X_train, y_train = shuffle(X_train, y_train)
    for offset in range(0, num_examples, BATCH_SIZE):
        end = offset + BATCH_SIZE
        batch_x, batch_y = X_train[offset:end], y_train[offset:end]
        train_dict = {'inputs/images:0':batch_x, 
                      new_alex.labels : batch_y, 
                      new_alex.learning_rate : 0.01}
        tr_loss, step, tr_acc = new_alex.fit(train_dict)
    info_update = "Epoch: {:2d} Step: {:5d} Loss: {:8.2f} Acc: {:5.2f} Time: {:5.2f}"
    print(info_update.format(epoch, step, tr_loss, tr_acc, (end - start) / 60.0))