In [None]:
import os
import random
import tarfile
import sys
import cv2
import time
import tensorflow as tf
from IPython.display import clear_output
from scipy import ndimage
import numpy as np
from six.moves.urllib.request import urlretrieve
import matplotlib.pyplot as plt
%matplotlib inline

class VideoCamera(object):
    
    def __init__(self, index=0):
        self.video = cv2.VideoCapture(index)
        self.index = index
        self.is_opened = self.video.isOpened()
        print self.video.isOpened()

    def __del__(self):
        self.video.release()
    
    def get_frame(self, in_grayscale=False):
        _, frame = self.video.read()
        if in_grayscale:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        return frame
    
class FaceDetector(object):
    def __init__(self, xml_path):
        self.classifier = cv2.CascadeClassifier(xml_path)
    
    def detect(self, image, biggest_only=True):
        scale_factor = 1.2
        min_neighbors = 5
        min_size = (30, 30)
        biggest_only = True
        flags = cv2.CASCADE_FIND_BIGGEST_OBJECT | \
                    cv2.CASCADE_DO_ROUGH_SEARCH if biggest_only else \
                    cv2.CASCADE_SCALE_IMAGE
        faces_coord = self.classifier.detectMultiScale(image,
                                                       scaleFactor=scale_factor,
                                                       minNeighbors=min_neighbors,
                                                       minSize=min_size,
                                                       flags=flags)
        return faces_coord

def draw_rectangle(image, coords):
    for (x, y, w, h) in coords:
        w_rm = int(0.2 * w / 2) 
        cv2.rectangle(image, (x + w_rm, y), (x + w - w_rm, y + h), 
                              (150, 150, 0), 8)
        
def cut_faces(image, faces_coord):
    faces = []
    for (x, y, w, h) in faces_coord:
        w_rm = int(0.2 * w / 2)
        faces.append(image[y: y + h, x + w_rm: x + w - w_rm])
    return faces

def resize(images, size=80):
    images_norm = []
    for image in images:
        if image.shape < size:
            image_norm = cv2.resize(image, (size, size), 
                                    interpolation = cv2.INTER_AREA)
        else:
            image_norm = cv2.resize(image, (size, size), 
                                    interpolation = cv2.INTER_CUBIC)
        images_norm.append(image_norm)
    return images_norm 
        
def plt_show(image, title=""):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    plt.axis("off")
    plt.title(title)
    plt.imshow(image)
    plt.show()
    
def normalize_faces(frame, faces_coord, img_size=80):
    faces = cut_faces(frame, faces_coord)
    faces = resize(faces, img_size)
    return faces
        
# Open a new thread to manage the external cv2 interaction
cv2.startWindowThread()

### Taking Pictures

In [None]:
# 4 pictures every second
num_pictures = 50
img_size = 100

webcam = VideoCamera()
if webcam.is_opened:
    detector = FaceDetector("xml/frontal_face.xml")

    pictures_folder = os.path.join('data', 'pictures_tf')
    folder = os.path.join(pictures_folder, raw_input('Person: ').lower()) # input name
    cv2.namedWindow("PyData Tutorial", cv2.WINDOW_AUTOSIZE)

    if not os.path.exists(folder):
        os.mkdir(folder)
        counter = 1
        while counter <= num_pictures : # take 20 pictures
            frame = webcam.get_frame()
            faces_coord = detector.detect(frame) # detect
            if len(faces_coord): # every Second or so
                faces = normalize_faces(frame, faces_coord, img_size) # norm pipeline
                cv2.imwrite(folder + '/' + str(counter) + '.png', faces[0])
                clear_output(wait = True) # saved face in notebook
                counter += 1
                clear_output(wait=True)
                print counter
            draw_rectangle(frame, faces_coord) # rectangle around face
            cv2.imshow("PyData Tutorial", frame) # live feed in external
            cv2.waitKey(250)
        cv2.destroyAllWindows()
    else:
        print "This name already exists."
del webcam

### Reviewing Data

In [None]:
# display a two random image from each letter in the train folder
def print_images(folders):
    images = []
    for folder in folders:
        images_path = os.listdir(folder)
        random.choice(images_path)
        for i in range(3):
            image_path = os.path.join(folder,
                                      random.choice(images_path))
            frame = cv2.imread(image_path)
            images.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    plt.figure(figsize=(20, 2))
    plt.imshow(np.hstack(images))
    plt.axis('off')
    plt.show()
    print 'Maximum pixel intensity value: %.2f' %np.amax(images[0])
    print 'Shape of images ' + str(images[0].shape) 

data_folders = os.listdir(os.path.join('data', 'pictures_tf'))
data_folders = [os.path.join('data', 'pictures_tf', folder) for folder in data_folders]
print_images(data_folders)

### Building the dataset

In [None]:
img_size = 100 # pixel size
num_imgs = 50 # images per class
num_channels = 3
num_classes = 2

def number_of_images(folders):
    
    dataset = np.ndarray((num_imgs * num_classes, img_size, img_size, num_channels), dtype=np.int32)
    labels = np.ndarray(num_imgs * num_classes, dtype=np.int32)
    num_per_class = {}
    counter = 0
    for image_class, folder in enumerate(folders):
        per_class_counter = 0
        for image_name in os.listdir(folder):
            if per_class_counter < num_imgs:
                image_path = os.path.join(folder, image_name)
                try:
                    image = cv2.imread(image_path)
#                     image = (image - np.max(image) / 2) / np.max(image)
                    if image.shape == (img_size, img_size, num_channels):
                        dataset[counter] = image
                        labels[counter] = image_class
                        counter += 1
                        per_class_counter += 1
                    else:
                        raise Exception("Unexpected image shape")
                except Exception as e:
                    print 'Unable to use image: ' + str(e)
        num_per_class[image_class] = per_class_counter
    return num_per_class, dataset, labels

num_per_class, dataset, labels = number_of_images(data_folders)
print 'Total number of images: %d' %dataset.shape[0]
print 'Number of images per class:'
print num_per_class
images_per_class = np.amin(num_per_class.values())
print 'We take %d images per class' %images_per_class
print 'Images Shape:' + str(dataset[0].shape)
print 'Dataset shape: ' + str(dataset.shape)

In [None]:
train_size = int(num_imgs * .65)
print train_size
valid_size = int(num_imgs * .2)
print valid_size
test_size = num_imgs - train_size - valid_size

train_ds = np.ndarray((train_size * num_classes, img_size, img_size, num_channels),dtype=np.float32)
train_lb = np.ndarray(train_size * num_classes, dtype=np.int32)

valid_ds = np.ndarray((valid_size * num_classes, img_size, img_size, num_channels), dtype=np.float32)
valid_lb = np.ndarray(valid_size * num_classes, dtype=np.int32)

test_ds = np.ndarray((test_size * num_classes, img_size, img_size, num_channels), dtype=np.float32)
test_lb = np.ndarray(test_size * num_classes, dtype=np.int32)

for i in range(num_classes):
    start_set, end_set = i * num_imgs, (i + 1) * num_imgs
    start_train, end_train = i * train_size, (i + 1) * train_size
    start_valid, end_valid = i * valid_size, (i + 1) * valid_size
    start_test, end_test = i * test_size, (i + 1) * test_size
    
    letter_set = dataset[start_set : end_set]
    np.random.shuffle(letter_set)
    
    train_ds[start_train : end_train] = letter_set[0: train_size]
    train_lb[start_train : end_train] = i
    valid_ds[start_valid : end_valid] = letter_set[train_size: train_size + valid_size]
    valid_lb[start_valid : end_valid] = i
    test_ds[start_test : end_test] = letter_set[train_size + valid_size: train_size + valid_size + test_size]
    test_lb[start_test : end_test] = i

print("Train Shapes --> Dataset: %s   Labels: %s" %(train_ds.shape, train_lb.shape))
print("Valid Shapes --> Dataset: %s    Labels: %s" %(valid_ds.shape, valid_lb.shape))
print("Test Shapes  --> Dataset: %s    Labels: %s" %(test_ds.shape, test_lb.shape))

In [None]:
def randomize(dataset, labels):
    permutation = np.random.permutation(labels.shape[0])
    shuffled_ds = dataset[permutation]
    shuffled_lb = labels[permutation]
    return shuffled_ds, shuffled_lb

train_ds, train_lb = randomize(train_ds, train_lb)
test_ds, test_lb = randomize(test_ds, test_lb)
valid_ds, valid_lb = randomize(valid_ds, valid_lb)

In [None]:
def reformat(dataset, labels):
    # as.type is not needed as the array is already float32 but just in case
    dataset = dataset.reshape((-1, img_size, img_size, num_channels)).astype(np.float32)
    # Map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...]
    labels = (np.arange(num_classes) == labels[:, None]).astype(np.float32)
    return dataset, labels

train_ds, train_lb = reformat(train_ds, train_lb)
valid_ds, valid_lb = reformat(valid_ds, valid_lb)
test_ds, test_lb = reformat(test_ds, test_lb)

print("Train Shapes --> Dataset: %s   Labels: %s" %(train_ds.shape, train_lb.shape))
print("Valid Shapes --> Dataset: %s    Labels: %s" %(valid_ds.shape, valid_lb.shape))
print("Test Shapes  --> Dataset: %s    Labels: %s" %(test_ds.shape, test_lb.shape))

In [None]:
def accuracy(predictions, labels):
    return 100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1)) / predictions.shape[0]

In [None]:
batch_size = 50
patch_size = 5
depth1 = 2
# depth2 = 16
# num_hidden = 4

graph = tf.Graph()

with graph.as_default():
    
    #Input data
    tf_train_ds = tf.placeholder(tf.float32, shape=(batch_size, img_size, img_size, num_channels))
    tf_train_lb = tf.placeholder(tf.float32, shape=(batch_size, num_classes))
    tf_valid_ds = tf.constant(valid_ds)
    tf_test_ds = tf.constant(test_ds)
    
    # Variables.
#     patch1_weights = tf.Variable(tf.truncated_normal([patch_size, patch_size, num_channels, depth1], stddev=0.1))
#     patch1_biases = tf.Variable(tf.zeros([depth1]))
    
#     patch2_weights = tf.Variable(tf.truncated_normal([patch_size, patch_size, depth1, depth2], stddev=0.1))
#     patch2_biases = tf.Variable(tf.constant(1.0, shape=[depth2]))
    
    # divided by four because that is the size once the patches have scanned the image
#     layer1_weights = tf.Variable(tf.truncated_normal(
#                                  [img_size // 4 * img_size // 4 * depth1, num_classes], stddev=0.1))
    layer1_weights = tf.Variable(tf.truncated_normal(
                                 [img_size * img_size * num_channels, num_classes], stddev=0.1))
    layer1_biases = tf.Variable(tf.constant(1.0, shape=[num_classes]))
    
    
#     layer2_weights = tf.Variable(tf.truncated_normal([num_hidden, num_classes], stddev=0.1))
#     layer2_biases = tf.Variable(tf.constant(1.0, shape=[num_classes]))
    
    # Model
    def model(data, training):
        # first convolution layer. Stride only matter in two elements in the middle
#         conv = tf.nn.conv2d(data, patch1_weights, [1, 4, 4, 1], padding="SAME")
#         conv = tf.nn.max_pool(conv1 + patch1_biases, [1, 2, 2, 1], [1, 2, 2, 1], padding="SAME" )
#         conv = tf.nn.relu(conv)
        
        # second convolution layer
#         conv = tf.nn.conv2d(conv, patch2_weights, [1, 2, 2, 1], padding="SAME")
#         conv = tf.nn.max_pool(conv + patch2_biases, [1, 2, 2, 1], [1, 2, 2, 1], padding="SAME" )
#         conv = tf.nn.relu(conv)

        # reshape to apply fully connected layer
#         shape_conv = conv.get_shape().as_list()
#         input_hidden = tf.reshape(conv, [shape_conv[0], shape_conv[1] * shape_conv[2] * shape_conv[3]])
        input_hidden = tf.reshape(data, [-1, img_size * img_size * num_channels])
#         hidden_layer = tf.nn.relu(tf.matmul(input_hidden, layer1_weights) + layer1_biases)
        
        # adding dropout layer
#         if training:
#             hidden_layer = tf.nn.dropout(hidden_layer, 0.6)
        
        return tf.matmul(input_hidden, layer1_weights) + layer1_biases
#         return tf.matmul(hidden_layer, layer2_weights) + layer2_biases
    
    # training computation
    logits = model(tf_train_ds, True)
    regularization = 0 #tf.nn.l2_loss(layer1_weights) #+ tf.nn.l2_loss(layer2_weights)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_lb)) + \
           .0005 * regularization
    
    # Optimizer
    global_step = tf.Variable(0)
#     learning_rate = tf.train.exponential_decay(0.05, global_step, 200, 0.95, staircase = True)
#     optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    optimizer = tf.train.GradientDescentOptimizer(.05).minimize(loss, global_step=global_step)


    # Predictions for the training, validation, and test data.
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(model(tf_valid_ds, False))
    test_prediction = tf.nn.softmax(model(tf_test_ds, False))

In [None]:
num_steps = 101

with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    print('Initialized')
    for step in range(num_steps):
        # randomize offset
        offset = (step * batch_size) % (train_lb.shape[0] - batch_size)
        batch_ds = train_ds[offset:(offset + batch_size)]
        batch_lb = train_lb[offset:(offset + batch_size)]
        
        feed_dict = {tf_train_ds : batch_ds, tf_train_lb : batch_lb}
        _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 100 == 0):
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_lb))
            print('Validation accuracy: %.1f%%' % accuracy(
            valid_prediction.eval(), valid_lb))
    print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_lb))