# Self-Driving Car Engineer Nanodegree

## Deep Learning



---
## Step 0: Load The Data

In [None]:
from keras.datasets import cifar10
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
# y_train.shape is 2d, (50000, 1). While Keras is smart enough to handle this
# it's a good idea to flatten the array.
y_train = y_train.reshape(-1)
y_test = y_test.reshape(-1)


In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train,
                                                 test_size=0.2, random_state=0)

In [None]:
type(X_train)
print(X_train.shape)
print(y_train.shape)
print(X_valid.shape)
print(y_valid.shape)
print(X_test.shape)
print(y_test.shape)

print(y_train[0])

---

## Step 1: Dataset Summary & Exploration

The pickled data is a dictionary with 4 key/value pairs:

- `'features'` is a 4D array containing raw pixel data of the traffic sign images, (num examples, width, height, channels).
- `'labels'` is a 1D array containing the label/class id of the traffic sign. The file `signnames.csv` contains id -> name mappings for each id.
- `'sizes'` is a list containing tuples, (width, height) representing the original width and height the image.
- `'coords'` is a list containing tuples, (x1, y1, x2, y2) representing coordinates of a bounding box around the sign in the image. **THESE COORDINATES ASSUME THE ORIGINAL IMAGE. THE PICKLED DATA CONTAINS RESIZED VERSIONS (32 by 32) OF THESE IMAGES**

Complete the basic data summary below. Use python, numpy and/or pandas methods to calculate the data summary rather than hard coding the results. For example, the [pandas shape method](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.shape.html) might be useful for calculating some of the summary results. 

In [None]:

### Data exploration visualization code goes here.
### Feel free to use as many code cells as needed.
import random
import matplotlib.pyplot as plt
# Visualizations will be shown in the notebook.
%matplotlib inline

index = random.randint(0, len(X_train))
image = X_train[index]
label = y_train[index]

plt.figure(figsize=(1,1))
#plt.title('Label %d: %s' % (label, df.loc[label]['SignName']))
plt.imshow(image)

index = random.randint(0, len(X_train))
image = X_train[index]
label = y_train[index]

plt.figure(figsize=(1,2))
#plt.title('Label %d: %s' % (label, df.loc[label]['SignName']))
plt.imshow(image)



In [None]:
import numpy as np
import matplotlib.pyplot as plt

### Provide a Basic Summary of the Data Set Using Python, Numpy and/or Pandas

### Include an exploratory visualization of the dataset

In [None]:
#count of each type image
unique, index, counts = np.unique(y_train, return_index=True, return_counts=True)

#Plotting the bar graph of the frequency of classes 
plt.bar(unique, counts, 1)
plt.title('Train Class Frequency')
plt.xlabel('Class')
plt.ylabel('Frequency')
plt.show()

----

## Step 2: Design and Test a Model Architecture

Design and implement a deep learning model that learns to recognize traffic signs. 

### Pre-process the Data Set (normalization, grayscale, etc.)

Use the code cell (or multiple code cells, if necessary) to implement the first step of your project.

In [None]:
import numpy as np
from __future__ import division
import cv2

In [None]:
### Preprocess the data here. Preprocessing steps could include normalization, converting to grayscale, etc.
### Feel free to use as many code cells as needed.

def normalize(X):
    Y = np.zeros_like(X, dtype=np.float32)
    for i in range(X.shape[0]):
        for ch in range(X.shape[3]):
            X[i,:,:,ch] = cv2.equalizeHist(X[i,:,:,ch])
            mu = np.mean(X[i, :, :, ch])
            std = np.std(X[i, :, :, ch])
            Y[i, :, :, ch] = (X[i, :, :, ch] - mu) / std
    return Y


In [None]:

import math
import numpy as np
import random

# use opencv to do geometric perturbations
# Thanks to hengcherkeng (another Udacity SDCND student) for this function
def perturb(image, keep, angle_limit=2*15, scale_limit=0.1, translate_limit=3, distort_limit=3, illumin_limit=0.7):
    
    u=np.random.uniform()
    if u>keep :
        #geometric -------------
        (W, H, C) = image.shape
        center = np.array([W / 2., H / 2.])
        da = np.random.uniform(low=-1, high=1) * angle_limit/180. * math.pi
        scale = np.random.uniform(low=-1, high=1) * scale_limit + 1

        cc = scale*math.cos(da)
        ss = scale*math.sin(da)
        rotation    = np.array([[cc, ss],[-ss,cc]])
        translation = np.random.uniform(low=-1, high=1, size=(1,2)) * translate_limit
        distort     = np.random.standard_normal(size=(4,2)) * distort_limit

        pts1 = np.array([[0., 0.], [0., H], [W, H], [W, 0.]])
        pts2 = np.matmul(pts1-center, rotation) + center  + translation

        #add perspective noise
        pts2 = pts2 + distort

        #http://milindapro.blogspot.jp/2015/05/opencv-filters-copymakeborder.html
        matrix  = cv2.getPerspectiveTransform(pts1.astype(np.float32), pts2.astype(np.float32))
        #matrix  = cv2.getPerspectiveTransform(pts1.astype(np.float32), pts1.astype(np.float32))
        #perturb = cv2.warpPerspective(image, matrix, (W, H))  # BORDER_WRAP  #BORDER_REFLECT_101  #cv2.BORDER_CONSTANT  BORDER_REPLICATE
        perturb = cv2.warpPerspective(image, matrix, (W, H), flags=cv2.INTER_LINEAR,borderMode=cv2.BORDER_REPLICATE)
                       #               borderMode=cv2.BORDER_REFLECT_101)  # BORDER_WRAP  #BORDER_REFLECT_101  #cv2.BORDER_CONSTANT  BORDER_REPLICATE
        
        return perturb

    else:
        return image
    
    
def make_perturb_images(images, keep ):
    arguments = np.zeros(images.shape, dtype=np.uint8)
    for n in range(len(images)):
        arguments[n] = perturb(images[n],keep = keep)

    return arguments

def make_perturb_images_float(images, keep ):
    arguments = np.zeros(images.shape, dtype=np.float32)
    for n in range(len(images)):
        arguments[n] = perturb(images[n],keep = keep)

    return arguments

X_train_perturb = make_perturb_images(X_train, keep=0.1)
y_train_perturb = np.copy(y_train)

In [None]:
import random
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

index = random.randint(0, len(X_train))
image = X_train[index].squeeze()

plt.figure(figsize=(1,1))
plt.imshow(image)
print(y_train[index])
#print(X_train[index])

image = X_train_perturb[index].squeeze()
plt.figure(figsize=(1,2))
plt.imshow(image)
print(y_train[index])
#print(X_train_perturb[index])

In [None]:
print(X_train.shape)

In [None]:
X_train = normalize(X_train)
X_valid = normalize(X_valid)
X_test = normalize(X_test)
n_channels = 3

### Model Architecture

In [None]:
import tensorflow as tf
from tensorflow.contrib.layers import flatten

In [None]:
nb_classes = 10

In [None]:
 # Xavier-Glorot initialization
# [-sqrt(6)/sqrt(size), sqrt(6)/sqrt(size)]

# where size is the total size of the weight matrix being initialized. This is how I wrote it in Tensorflow:

# shp = (3, 3, 32, 32)
# sz = shp[0] * shp[1] * shp[2] * shp[3]

# conv2_W = tf.Variable(tf.random_uniform(shape=shp, minval= -2.45/np.sqrt(sz), maxval=2.45/np.sqrt(sz)))

def XavierInit(shape, name):
    n = 1
    for i in range(len(shape)):
        n *= shape[i]
    return tf.Variable(tf.random_uniform(shape=shape, minval= -np.sqrt(6)/np.sqrt(n), maxval=np.sqrt(6)/np.sqrt(n)), name=name)


In [None]:
### Define your architecture here.
### Feel free to use as many code cells as needed.

def LeNetImproved(x, keep_prob):    
    mu = 0
    sigma = 0.05
    
    n_filt1 = 48 # 48 # 24 # 12 # 6    
    n_filt2 = 128 # 128 # 64 # 32 # 16
    n_filt3 = 128
    n_fc1 = 1024 # 120 # 1024 # 512 # 120
    n_fc2 = 84 # 84 # 84 # 512 # 256 # 84    
    
    n_output = nb_classes
    
    # conv: SAME
    
    conv0_W = tf.Variable(tf.truncated_normal(shape=(1, 1, n_channels, n_channels), mean = mu, stddev = sigma), name='conv0W')
    conv0_b = tf.Variable(tf.zeros(n_channels))
    conv0   = tf.nn.conv2d(x, conv0_W, strides=[1, 1, 1, 1], padding='SAME') + conv0_b
    
    # Layer 1: Convolutional. Input = 32x32x1. Output = 28x28x6.
    
    conv1_W = tf.Variable(tf.truncated_normal(shape=(5, 5, n_channels, n_filt1), mean = mu, stddev = sigma), name='conv1W')
    conv1_b = tf.Variable(tf.zeros(n_filt1))
    conv1   = tf.nn.conv2d(conv0, conv1_W, strides=[1, 1, 1, 1], padding='VALID') + conv1_b
    conv1r = tf.nn.relu(conv1)
    
    # Pooling. Input = 28x28x6. Output = 14x14x6.
    conv1 = tf.nn.max_pool(conv1r, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')


    # Layer 2: Convolutional. Output = 10x10x16.
    conv2_W = tf.Variable(tf.truncated_normal(shape=(3, 3, n_filt1, n_filt2), mean = mu, stddev = sigma), name='conv2W')
    conv2_b = tf.Variable(tf.zeros(n_filt2))
    conv2   = tf.nn.conv2d(conv1, conv2_W, strides=[1, 1, 1, 1], padding='VALID') + conv2_b    
    conv2 = tf.nn.relu(conv2)
    
    # SOLUTION: Pooling. Input = 10x10x16. Output = 5x5x16.
    conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')

    conv3_W = tf.Variable(tf.truncated_normal(shape=(3, 3, n_filt2, n_filt3), mean = mu, stddev = sigma), name='conv3W')
    conv3_b = tf.Variable(tf.zeros(n_filt3))
    conv3   = tf.nn.conv2d(conv2, conv3_W, strides=[1, 1, 1, 1], padding='VALID') + conv3_b    
    conv3 = tf.nn.relu(conv3)
    
    # Pooling. Input = 10x10x16. Output = 5x5x16.
    conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
    
    # added for later on visualization
    tf.add_to_collection('vars', conv1_W)
    tf.add_to_collection('vars', conv2_W)

    # Flatten all three conv layers and concat
    print(conv1)
    print(conv2)
    print(conv3)
    conv1_f = flatten(conv1)
    conv2_f = flatten(conv2)
    conv3_f = flatten(conv3)
    fc0 = tf.concat(1,[conv1_f,conv2_f]) #flatten(conv3)
    fc0 = tf.concat(1,[fc0,conv3_f])
    
    #print(fc0.get_shape().as_list())
    
    # Flatten. Input = 5x5x16. Output = 400.
    #fc0   = flatten(conv3)
    
    # Layer 3: Fully Connected. Input = 400. Output = 120.
    #fc1_W = tf.Variable(tf.truncated_normal(shape=(2*2*n_filt3, n_fc1), mean = mu, stddev = sigma), name='FC1W')
    fc1_W = tf.Variable(tf.truncated_normal(shape=(fc0.get_shape().as_list()[-1], n_fc1), mean = mu, stddev = sigma), name='FC1W')
    fc1_b = tf.Variable(tf.zeros(n_fc1))
    fc1   = tf.matmul(fc0, fc1_W) + fc1_b
    
    #fc1 = tf.contrib.layers.batch_norm(fc1, center=True, scale=True, is_training=phase)
    fc1    = tf.nn.relu(fc1)
    fc1 = tf.nn.dropout(fc1, keep_prob)

    # Layer 4: Fully Connected. Input = 120. Output = 84.
    fc2_W  = tf.Variable(tf.truncated_normal(shape=(n_fc1, n_fc2), mean = mu, stddev = sigma), name='FC2W')
    fc2_b  = tf.Variable(tf.zeros(n_fc2))
    fc2    = tf.matmul(fc1, fc2_W) + fc2_b
    
    #fc2 = tf.contrib.layers.batch_norm(fc2, center=True, scale=True, is_training=phase)
    fc2    = tf.nn.relu(fc2)
    fc2 = tf.nn.dropout(fc2, keep_prob)

    # Layer 5: Fully Connected. Input = 84. Output = 10.
    fc3_W  = tf.Variable(tf.truncated_normal(shape=(n_fc2, n_output), mean = mu, stddev = sigma), name='FC3W')
    fc3_b  = tf.Variable(tf.zeros(n_output))
    logits = tf.matmul(fc2, fc3_W) + fc3_b
    
    regularizer = tf.nn.l2_loss(conv1_W) + tf.nn.l2_loss(conv2_W) + tf.nn.l2_loss(fc1_W) + tf.nn.l2_loss(fc2_W) + tf.nn.l2_loss(fc3_W)

    return (logits, regularizer, conv1r)

In [None]:

def LeNetX(x, keep_prob, keep_prob1, keep_prob2, keep_prob3):    
    mu = 0
    sigma = 0.05
    
    n_filt1 = 48 # 48 # 24 # 12 # 6    
    n_filt2 = 128 # 128 # 64 # 32 # 16
    n_filt3 = 128
    n_fc1 = 1024 # 1024 # 120 # 1024 # 512 # 120
    n_fc2 = 84 # 84 # 84 # 84 # 512 # 256 # 84    
    
    n_output = nb_classes
    
    # conv: SAME
    
    conv0_W = tf.Variable(tf.truncated_normal(shape=(1, 1, n_channels, n_channels), mean = mu, stddev = sigma), name='conv0W')
    conv0_b = tf.Variable(tf.zeros(n_channels))
    conv0   = tf.nn.conv2d(x, conv0_W, strides=[1, 1, 1, 1], padding='SAME') + conv0_b
    
    # Layer 1: Convolutional. Input = 32x32x1. Output = 28x28x6.
    
    conv1_W = tf.Variable(tf.truncated_normal(shape=(5, 5, n_channels, n_filt1), mean = mu, stddev = sigma), name='conv1W')
    conv1_b = tf.Variable(tf.zeros(n_filt1))
    conv1   = tf.nn.conv2d(conv0, conv1_W, strides=[1, 1, 1, 1], padding='SAME') + conv1_b
    conv1 = tf.nn.relu(conv1)
    
    conv1r = conv1 # return conv1 for visu
    
    conv1b_W = tf.Variable(tf.truncated_normal(shape=(5, 5, n_filt1, n_filt1), mean = mu, stddev = sigma))
    conv1b_b = tf.Variable(tf.zeros(n_filt1))
    conv1b   = tf.nn.conv2d(conv1, conv1b_W, strides=[1, 1, 1, 1], padding='SAME') + conv1b_b
    conv1b = tf.nn.relu(conv1b)
    
    # Pooling. Input = 28x28x6. Output = 14x14x6.
    conv1 = tf.nn.max_pool(conv1b, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
    conv1 = tf.nn.dropout(conv1, keep_prob1)

    # Layer 2: Convolutional. Output = 10x10x16.
    conv2_W = tf.Variable(tf.truncated_normal(shape=(3, 3, n_filt1, n_filt2), mean = mu, stddev = sigma), name='conv2W')
    conv2_b = tf.Variable(tf.zeros(n_filt2))
    conv2   = tf.nn.conv2d(conv1, conv2_W, strides=[1, 1, 1, 1], padding='SAME') + conv2_b    
    conv2 = tf.nn.relu(conv2)
    
    conv2b_W = tf.Variable(tf.truncated_normal(shape=(3, 3, n_filt2, n_filt2), mean = mu, stddev = sigma))
    conv2b_b = tf.Variable(tf.zeros(n_filt2))
    conv2b   = tf.nn.conv2d(conv2, conv2b_W, strides=[1, 1, 1, 1], padding='SAME') + conv2b_b    
    conv2b = tf.nn.relu(conv2b)
    
    # SOLUTION: Pooling. Input = 10x10x16. Output = 5x5x16.
    conv2 = tf.nn.max_pool(conv2b, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
    conv2 = tf.nn.dropout(conv2, keep_prob2)

    conv3_W = tf.Variable(tf.truncated_normal(shape=(3, 3, n_filt2, n_filt3), mean = mu, stddev = sigma), name='conv3W')
    conv3_b = tf.Variable(tf.zeros(n_filt3))
    conv3   = tf.nn.conv2d(conv2, conv3_W, strides=[1, 1, 1, 1], padding='SAME') + conv3_b    
    conv3 = tf.nn.relu(conv3)
    
    conv3b_W = tf.Variable(tf.truncated_normal(shape=(3, 3, n_filt3, n_filt3), mean = mu, stddev = sigma))
    conv3b_b = tf.Variable(tf.zeros(n_filt3))
    conv3b   = tf.nn.conv2d(conv3, conv3b_W, strides=[1, 1, 1, 1], padding='SAME') + conv3b_b    
    conv3b = tf.nn.relu(conv3b)
    
    # Pooling. Input = 10x10x16. Output = 5x5x16.
    conv3 = tf.nn.max_pool(conv3b, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
    conv3 = tf.nn.dropout(conv3, keep_prob3)
    
    # added for later on visualization
    tf.add_to_collection('vars', conv1_W)
    tf.add_to_collection('vars', conv2_W)

    # Flatten all three conv layers and concat
    print(conv1)
    print(conv2)
    print(conv3)
    conv1_f = flatten(conv1)
    conv2_f = flatten(conv2)
    conv3_f = flatten(conv3)
    fc0 = tf.concat(1,[conv1_f,conv2_f]) #flatten(conv3)
    fc0 = tf.concat(1,[fc0,conv3_f])
    
    #print(fc0.get_shape().as_list())
    
    # Flatten. Input = 5x5x16. Output = 400.
    #fc0   = flatten(conv3)
    
    # Layer 3: Fully Connected. Input = 400. Output = 120.
    #fc1_W = tf.Variable(tf.truncated_normal(shape=(2*2*n_filt3, n_fc1), mean = mu, stddev = sigma), name='FC1W')
    fc1_W = tf.Variable(tf.truncated_normal(shape=(fc0.get_shape().as_list()[-1], n_fc1), mean = mu, stddev = sigma), name='FC1W')
    fc1_b = tf.Variable(tf.zeros(n_fc1))
    fc1   = tf.matmul(fc0, fc1_W) + fc1_b
    
    #fc1 = tf.contrib.layers.batch_norm(fc1, center=True, scale=True, is_training=phase)
    fc1    = tf.nn.relu(fc1)
    fc1 = tf.nn.dropout(fc1, keep_prob)

    # Layer 4: Fully Connected. Input = 120. Output = 84.
    fc2_W  = tf.Variable(tf.truncated_normal(shape=(n_fc1, n_fc2), mean = mu, stddev = sigma), name='FC2W')
    fc2_b  = tf.Variable(tf.zeros(n_fc2))
    fc2    = tf.matmul(fc1, fc2_W) + fc2_b
    
    #fc2 = tf.contrib.layers.batch_norm(fc2, center=True, scale=True, is_training=phase)
    fc2    = tf.nn.relu(fc2)
    fc2 = tf.nn.dropout(fc2, keep_prob)

    # Layer 5: Fully Connected. Input = 84. Output = 10.
    fc3_W  = tf.Variable(tf.truncated_normal(shape=(n_fc2, n_output), mean = mu, stddev = sigma), name='FC3W')
    fc3_b  = tf.Variable(tf.zeros(n_output))
    logits = tf.matmul(fc2, fc3_W) + fc3_b
    
    regularizer = tf.nn.l2_loss(fc1_W) + tf.nn.l2_loss(fc2_W) + tf.nn.l2_loss(fc3_W)

    return (logits, regularizer, conv1r)

### Train, Validate and Test the Model

In [None]:
x = tf.placeholder(tf.float32, (None, 32, 32, n_channels))
y = tf.placeholder(tf.int32, (None))
keep_prob = tf.placeholder(tf.float32) # probability to keep units
keep_prob1 = tf.placeholder(tf.float32) # probability to keep units
keep_prob2 = tf.placeholder(tf.float32) # probability to keep units
keep_prob3 = tf.placeholder(tf.float32) # probability to keep units
#k_p_conv = tf.placeholder( tf.float32)
#is_training = tf.placeholder(tf.bool)
one_hot_y = tf.one_hot(y, nb_classes)

rate = tf.placeholder( tf.float32, name='rate')


In [None]:
EPOCHS = 50 # 50 # 100
BATCH_SIZE = 128 # 128

#rate = 0.001
reg_rate = 1e-4

#logits, regularizer, conv1_activation = LeNetImproved(x, keep_prob)
logits, regularizer, conv1_activation = LeNetX(x, keep_prob, keep_prob1, keep_prob2, keep_prob3)


cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, one_hot_y)

#beta = 0.0001
#loss_reg = beta * (tf.nn.l2_loss(weights['W_fc1']) + tf.nn.l2_loss(weights['W_fc2']) + tf.nn.l2_loss(weights['W_fc3']) )
#loss_operation = tf.reduce_mean( cross_entropy ) + loss_reg

loss_operation = tf.reduce_mean(cross_entropy)
#loss_operation = loss_operation + reg_rate * regularizer

optimizer = tf.train.AdamOptimizer(learning_rate = rate)
training_operation = optimizer.minimize(loss_operation)

In [None]:
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))
accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
saver = tf.train.Saver()

def evaluate(X_data, y_data):
    num_examples = len(X_data)
    total_accuracy = 0
    sess = tf.get_default_session()
    for offset in range(0, num_examples, BATCH_SIZE):
        batch_x, batch_y = X_data[offset:offset+BATCH_SIZE], y_data[offset:offset+BATCH_SIZE]
        accuracy = sess.run(accuracy_operation, feed_dict={x: batch_x, y: batch_y,keep_prob1: 1.0, 
                                                           keep_prob2: 1.0, keep_prob3: 1.0, keep_prob: 1.0})
        #accuracy = sess.run(accuracy_operation, feed_dict={x: batch_x, y: batch_y, keep_prob: 1.0})
        #accuracy = sess.run(accuracy_operation, feed_dict={x: batch_x, y: batch_y, k_p_conv:1.0, keep_prob: 1.0 })
        total_accuracy += (accuracy * len(batch_x))
    return total_accuracy / num_examples

A validation set can be used to assess how well the model is performing. A low accuracy on the training and validation
sets imply underfitting. A high accuracy on the training set but low accuracy on the validation set implies overfitting.

In [None]:
### Train your model here.
### Calculate and report the accuracy on the training and validation set.
### Once a final model architecture is selected, 
### the accuracy on the test set should be calculated and reported as well.
### Feel free to use as many code cells as needed.

from sklearn.utils import shuffle

N_AUGMENT = 1
X_train_perturb = [None] * N_AUGMENT
y_train_perturb = [None] * N_AUGMENT

TRAIN_AUGMENT = 8

LR=0.001

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    num_examples = len(X_train)
    
    print("Training...")
    print()
    best_accuracy = 0.0
    for i in range(EPOCHS):
        
        if i >= 25:
            LR = 0.0001
        
        X_train, y_train = shuffle(X_train, y_train)
        for offset in range(0, num_examples, BATCH_SIZE):
            end = offset + BATCH_SIZE
            batch_x, batch_y = X_train[offset:end], y_train[offset:end]
            sess.run(training_operation, feed_dict={x: batch_x, y: batch_y, keep_prob1: 0.9, keep_prob2: 0.8, 
                                                    keep_prob3: 0.7, keep_prob: 0.5, rate: LR})
            #sess.run(training_operation, feed_dict={x: batch_x, y: batch_y, keep_prob: 0.5, rate: LR})
            #sess.run(training_operation, feed_dict={x: batch_x, y: batch_y, k_p_conv:0.6, keep_prob: 0.5 })
        
        if i%TRAIN_AUGMENT == 0:
            print("New perturb")
            for n in range(N_AUGMENT):
                X_train_perturb[n] = make_perturb_images_float(X_train, keep=0.05)
                y_train_perturb[n] = np.copy(y_train)
        
        for n in range(N_AUGMENT):
            X_train_perturb[n], y_train_perturb[n] = shuffle(X_train_perturb[n], y_train_perturb[n])
            for offset in range(0, num_examples, BATCH_SIZE):
                end = offset + BATCH_SIZE
                batch_x, batch_y = X_train_perturb[n][offset:end], y_train_perturb[n][offset:end]
                sess.run(training_operation, feed_dict={x: batch_x, y: batch_y, keep_prob1: 0.9, keep_prob2: 0.8, 
                                                        keep_prob3: 0.7, keep_prob: 0.5, rate: LR})
                #sess.run(training_operation, feed_dict={x: batch_x, y: batch_y, keep_prob: 0.5, rate: LR})
                #sess.run(training_operation, feed_dict={x: batch_x, y: batch_y, k_p_conv:0.6, keep_prob: 0.5 })
            
        training_accuracy = evaluate(X_train, y_train)
        print("EPOCH {} ...".format(i+1))
        print("Training Accuracy = {:.3f}".format(training_accuracy))            
        
        valid_accuracy = evaluate(X_valid, y_valid)
        print("Validation Accuracy = {:.4f}".format(valid_accuracy))
        print()
        if (valid_accuracy > best_accuracy):
            saver.save(sess, './lenet')
            print("Model saved")
            best_accuracy = valid_accuracy

    print("Stored model for best validation Accuracy = {:.4f}".format(best_accuracy))

In [None]:
with tf.Session() as sess:
    saver.restore(sess, tf.train.latest_checkpoint('.'))

    test_accuracy = evaluate(X_test, y_test)
    print("Test Accuracy = {:.3f}".format(test_accuracy))