In [1]:
import numpy as np
import tensorflow as tf
import os
import time
slim = tf.contrib.slim 
from scipy.io import loadmat 
from scipy.misc import imread
from scipy.misc import imresize
from random import shuffle
from datetime import timedelta
from layers import * # bunch of wrapped layers that are we use to run the network
from PIL import Image
import from_image_to_floats

In [2]:
tf.__version__

'1.0.1'

In [3]:
# load imdb dataset. Befor training the network you have to download dataset from the link below:
# https://data.vision.ee.ethz.ch/cvl/rrothe/imdb-wiki/
# put images and .mat file mapping to folder /dataset/imdb_crom/
# you might specify your own folder if you wish

dataset_name = 'imdb'
dataset_path = '../datasets/imdb_crop/imdb.mat'
images_path = '../datasets/imdb_crop/'
face_score_treshold = 3
dataset = loadmat(dataset_path)
image_names_array = dataset['imdb']['full_path'][0, 0][0]
gender_classes = dataset['imdb']['gender'][0, 0][0]
face_score = dataset['imdb']['face_score'][0, 0][0]
second_face_score = dataset['imdb']['second_face_score'][0, 0][0]
face_score_mask = face_score > face_score_treshold
second_face_score_mask = np.isnan(second_face_score)
unknown_gender_mask = np.logical_not(np.isnan(gender_classes))
mask = np.logical_and(face_score_mask, second_face_score_mask)
mask = np.logical_and(mask, unknown_gender_mask)
image_names_array = image_names_array[mask]
gender_classes = gender_classes[mask].tolist()
image_names = []

for image_name_arg in range(image_names_array.shape[0]):
    image_name = image_names_array[image_name_arg][0]
    image_names.append(image_name)
    
ground_truth_data = dict(zip(image_names, gender_classes))

In [12]:
# splitting data to training and validation set

def split_data(ground_truth_data, training_ratio=.8, do_shuffle=False):
    ground_truth_keys = sorted(ground_truth_data.keys())
    if do_shuffle == True:
        shuffle(ground_truth_keys)
    num_train = int(round(training_ratio * len(ground_truth_keys)))
    train_keys = ground_truth_keys[:num_train]
    validation_keys = ground_truth_keys[num_train:]
    return train_keys, validation_keys

In [4]:
# before feeding labels to neural net we have to convert them to one-hot-encoded array

def to_categorical(integer_classes, num_classes=3):
    integer_classes = np.asarray(integer_classes, dtype='int')
    num_samples = integer_classes.shape[0]
    categorical = np.zeros((num_samples, num_classes))
    categorical[np.arange(num_samples), integer_classes] = 1
    return categorical

In [5]:
# helping function that generate validation batch

def generate_val_batch(j):
    
    test_batch_size = 256
    x_batch = []
    y_true_batch = [] 
    i = j
    while len(y_true_batch) < test_batch_size and i < len(val_keys):
        idx = i
        image_path = images_path + val_keys[idx]
        image_array = imread(image_path)
        image_array = imresize(image_array, [img_size, img_size])
        num_image_channels = len(image_array.shape)
        if num_image_channels != 3:
            i += 1
            continue
            
        image_array = image_array.astype('float32')
        x_batch.append(image_array)
        
        ground_truth = ground_truth_data[val_keys[idx]]
        y_true_batch.append(ground_truth)
        i += 1

    
    y_true_batch = to_categorical(y_true_batch, num_classes=2)
    y_true_batch = np.asarray(y_true_batch, dtype='float32')
    x_batch = np.asarray(x_batch, dtype='float32')

    return x_batch, y_true_batch, i    

In [7]:
# we might want to augment training data, this function slightly change input images
# sometimes this allows to gain better accuracy
# however we skip this part for simplicity

def preprocess_image(image, training=True):
    
    #if training:
    #    image = tf.image.random_flip_left_right(image)
    #    
    #    image = tf.image.random_hue(image, max_delta=0.05)
    #    image = tf.image.random_contrast(image, lower=0.3, upper=1.0)
    #    image = tf.image.random_brightness(image, max_delta=0.2)
    #    image = tf.image.random_saturation(image, lower=0.0, upper=2.0)
        
    #    image = tf.minimum(image, 255.0)
    #    image = tf.maximum(image, 0.0)
    
    return image    

In [8]:
# wrapping function that helps to permute images from train batch
def pre_process(images, training):
    
    images = tf.map_fn(lambda image: preprocess_image(image, training=training), images)
    
    return images       

In [9]:
# generate train batch

def random_batch():
    
    x_batch = []
    y_true_batch = []
        
    while len(x_batch) < train_batch_size:
        idx = np.random.choice(num_images, replace=False)
        image_path = images_path + train_keys[idx]
        image_array = imread(image_path)
        image_array = imresize(image_array, [img_size, img_size])
        num_image_channels = len(image_array.shape)
        if num_image_channels != 3:
            continue
            
        image_array = image_array.astype('float32')       
        x_batch.append(image_array)
        
        ground_truth = ground_truth_data[train_keys[idx]]
        y_true_batch.append(ground_truth)
    
    y_true_batch = to_categorical(y_true_batch, num_classes=2)
    y_true_batch = np.asarray(y_true_batch, dtype='float32')
    x_batch = np.asarray(x_batch, dtype='float32')
    
    return x_batch, y_true_batch

In [10]:
def print_test_accuracy():
    num_images = len(val_keys)
    i = 0
    acc_array = []
    start_time = time.time()
    while i < num_images:
        x_batch, y_true_batch, k = generate_val_batch(i)
        acc = session.run(accuracy, feed_dict={x: x_batch,
                                      y_true: y_true_batch})
        acc_array.append(acc)
        i = k
        msg = "Checked {0:>6} pictures, Validation Accuracy of this block is: {1:>6.1%}"
        print(msg.format(i+1, acc))
        
    
    total_acc = session.run(tf.reduce_mean(acc_array))
    time_dif = time.time() - start_time
    
    print("Validation accuracy: {0:>6%}".format(total_acc))
    print("Time usage: " + str(timedelta(seconds=int(round(time_dif)))))   

In [13]:
train_keys, val_keys = split_data(ground_truth_data)
num_images = len(train_keys)
train_batch_size = 64

In [14]:
img_size = 48
num_channels = 3
num_classes = 2 

In [15]:
x = tf.placeholder(tf.float32, shape=[None, img_size, img_size, num_channels], name='x')
y_true = tf.placeholder(tf.float32, shape=[None, num_classes], name='y_true')
y_true_cls = tf.argmax(y_true, dimension=1, name='y_true_cls')
    
#48x48
conv1_1 = conv_layer(inputs=x, W_shape=[5, 5, 3, 16], b_shape=16)
conv1_1 = batch_norm(conv1_1)
conv1_1 = tf.nn.relu(conv1_1, name='conv1_1')
conv1_2 = conv_layer(conv1_1, W_shape=[5, 5, 16, 64], b_shape=64)
conv1_2 = batch_norm(conv1_2)
conv1_2 = tf.nn.relu(conv1_2, name='conv1_2')
max_pool1 = max_pool(conv1_2)
    
#24x24
conv2_1 = conv_layer(max_pool1, W_shape=[3, 3, 64, 128], b_shape=128)
conv2_1 = slim.batch_norm(conv2_1)
conv2_1 = tf.nn.relu(conv2_1, name='conv2_1')
conv2_2 = conv_layer(conv2_1, W_shape=[3, 3, 128, 256], b_shape=256)
conv2_2 = slim.batch_norm(conv2_2)
conv2_2 = tf.nn.relu(conv2_2, name='conv2_2')
max_pool2 = max_pool(conv2_2)
    
#12x12
conv3_1 = conv_layer(max_pool2, W_shape=[3, 3, 256, 256], b_shape=256)
conv3_1 = slim.batch_norm(conv3_1)
conv3_1 = tf.nn.relu(conv3_1, name='conv3_1')

conv3_2 = conv_layer(conv3_1, W_shape=[3, 3, 256, 512], b_shape=512)
conv3_2 = slim.batch_norm(conv3_2)
conv3_2 = tf.nn.relu(conv3_2, name='conv3_2')
max_pool3 = max_pool(conv3_2)
    
#6x6
flatten, num_features = flatten_layer(max_pool3)
    
#1x1
fc1 = fc_layer(flatten, num_features, 256, name="fc_1")
fc2 = fc_layer(fc1, 256, num_classes, use_relu=False)
y_pred = tf.nn.softmax(fc2)

y_pred_cls = tf.argmax(y_pred, dimension=1, name="y_pred_cls")
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=fc2,
                                                        labels=y_true)
cost = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [16]:
session = tf.InteractiveSession()
session.run(tf.global_variables_initializer())

In [17]:
def optimize(num_iterations):
    start_time = time.time()

    for i in range(num_iterations):
        x_batch, y_true_batch = random_batch()
        
        feed_dict_train = {x: x_batch,
                           y_true: y_true_batch}
        
        session.run(optimizer, feed_dict_train)
        if i % 100 == 0:
            acc = session.run(accuracy, feed_dict=feed_dict_train)
            msg = "Optimization Iteration: {0:>6}, Training Accuracy: {1:>6.1%}"
            print(msg.format(i, acc))
            
    end_time = time.time()
    time_dif = end_time - start_time
    print("Time usage: " + str(timedelta(seconds=int(round(time_dif)))))

In [18]:
save_dir = 'checkpoint/'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
    
save_path = os.path.join(save_dir, 'gender_recognition')

In [19]:
optimize(num_iterations=201)

Optimization Iteration:      0, Training Accuracy:  60.9%
Optimization Iteration:    100, Training Accuracy:  78.1%
Optimization Iteration:    200, Training Accuracy:  78.1%
Time usage: 0:34:19


In [20]:
def get_layer_weights(name):
    with tf.variable_scope('conv1_1', reuse=True) as scope_conv:
        W_conv1 = tf.get_variable('weights', shape=[5, 5, 3, 16])
        ww = session.run(W_conv1)
        return ww

In [21]:
saver = tf.train.Saver()

In [22]:
saver.save(session, save_path=save_path)
#saver.restore(sess=session, save_path=save_path)

'checkpoint/gender_recognition'

In [23]:
from tensorflow.python.framework.graph_util import convert_variables_to_constants

In [24]:
minimal_graph = convert_variables_to_constants(session, session.graph_def, ["y_pred_cls"])

INFO:tensorflow:Froze 22 variables.
Converted 22 variables to const ops.


In [25]:
tf.train.write_graph(minimal_graph, './graphs', 'tinyNet.pb', False)

'./graphs/tinyNet.pb'

In [26]:
def get_filepath(filepath):
    count = 0
    new_path = ''
    for i in reversed(filepath):
        if i == '/':
            count += 1
            if count == 2:
                break
        new_path += i    
    new_path = new_path[::-1]
    return new_path[:-4]

In [27]:
def get_floats(images_path, save_dir='./floating_signatures/'):
    for subdir, dirs, files in os.walk(images_path):
        for file in files:
            filepath = subdir + os.sep + file
            if filepath.endswith(".jpg"):
                img = imread(filepath)
                if len(img.shape) != 3:
                    continue
                img = imresize(img, [48, 48])
                values = session.run(fc1, feed_dict={x: [img]})
                values = np.squeeze(values)
                save_path1 = get_filepath(filepath)
                check_folder = save_dir + save_path1[:3]
                if not os.path.exists(check_folder):
                    os.makedirs(check_folder)
                save_path = os.path.join(save_dir, save_path1)
                np.save(save_path, values)

In [51]:
get_floats(images_path=images_path)