In [2]:
import os
import glob
import time

import numpy as np

from PIL import Image, ImageFilter, ImageEnhance

In [3]:
img_w, img_h = 58, 40
bg_w, bg_h = 60, 60
channel = 3

offsets = [((bg_w-img_w)/2, (bg_h-img_h)/2)]

# http://blog.csdn.net/icamera0/article/details/50708888
filters = [None, ImageFilter.GaussianBlur, ImageFilter.BLUR, ImageFilter.EDGE_ENHANCE_MORE, ImageFilter.DETAIL, ImageFilter.SHARPEN]

angles = range(0, 360, 45)

X_images = []
Y_images = []

def scan_files():
    for filepath in glob.glob("HPEImages/*"):
        filename = os.path.basename(filepath)
        y = filepath[filepath.find("/")+1:filepath.find(".")-1]
        
        img = Image.open(filepath)        
        ori_size = img.size
        is_horizontal = True if ori_size[0] > ori_size[1] else False
        
        if is_horizontal:
            img = img.resize((img_w, img_h), Image.ANTIALIAS)
        else:
            img = img.resize((img_h, img_w), Image.ANTIALIAS)
            
        for filter in filters:
            for offset in offsets:
                new_img = Image.new("RGB", (bg_w, bg_h), (255, 255, 255))
                new_img.paste(img.filter(filter) if filter else img, (offset[0], offset[1]) if is_horizontal else (offset[1], offset[0]))
                for angle in angles:
                    X_images.append(new_img.rotate(angle))
                    Y_images.append(y)

    print len(X_images), len(Y_images)

In [4]:
scan_files()

1968 1968


In [5]:
names = {}

def one_hot(y):
    for sub_y in y:
        if sub_y not in names:
            names[sub_y] = len(names)
            
    ret = np.zeros(shape=(len(y), len(names)), dtype=np.int32)
    for idx, sub_y in enumerate(y):
        ret[idx, names[sub_y]] = 1
    
    return np.array(ret)

In [7]:
X = []
for x, y in zip(X_images, Y_images):
    weight, height = x.size
    pixels = x.load()
    
    for w in range(weight):
        for h in range(height):
            X.append(pixels[w, h])
            
X = (np.array(X, dtype=np.float32)/255).reshape((-1, bg_w, bg_h, channel))
print X.shape

(1968, 60, 60, 3)


In [8]:
Y = one_hot(Y_images)
print Y.shape

(1968, 8)


# Split the dataset to Training/Testing

In [12]:
#from sklearn.model_selection import ShuffleSplit

#rs = ShuffleSplit(n_splits=1, test_size=.25, random_state=0)

#train_idx, test_idx = None, None
#for training_idx, testing_idx in rs.split(X):
#    pass

# The real images are 'TESTING' dataset; the fake(generated by programes) are 'TRAINING' dataset

training_idx = [idx for idx in range(len(X)) if idx%(len(offsets)*len(angles)*len(filters))!=0]
testing_idx = [idx for idx in range(len(X)) if idx%(len(offsets)*len(angles)*len(filters))==0]

dataset_training_x, dataset_training_y = X[training_idx], Y[training_idx]
dataset_testing_x, dataset_testing_y = X[testing_idx], Y[testing_idx]

print dataset_training_x.shape, dataset_testing_x.shape

(1927, 60, 60, 3) (41, 60, 60, 3)


In [13]:
import tensorflow as tf

In [14]:
def variable_summaries(var):
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean', mean)
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
        tf.summary.scalar('stddev', stddev)
        tf.summary.scalar('max', tf.reduce_max(var))
        tf.summary.scalar('min', tf.reduce_min(var))
        tf.summary.histogram('histogram', var)

def get_weight(shape):
    return tf.Variable(tf.truncated_normal(shape=shape, stddev=0.1))

def get_bias(shape, value=0.1):
    return tf.Variable(tf.constant(value, shape=shape))

def get_conv2d(x, shape, value=0.1, padding="SAME"):
    with tf.name_scope("conv_weight") as scope:
        w = get_weight(shape)
        variable_summaries(w)
        
    with tf.name_scope("conv_bias") as scope:
        b = get_bias([shape[3]]) 
        variable_summaries(b)
    
    return tf.nn.relu(tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding=padding) + b)

def get_max_pooling2x2(conv, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME"):
    return tf.nn.max_pool(conv, 
                          ksize=ksize, 
                          strides=strides, 
                          padding=padding)

def get_full_layer(data, shape):
    w = get_weight(shape)
    b = get_bias([shape[-1]])
    
    return tf.matmul(data, w) + b

# Get the images based on the batch-size

In [15]:
def next_batch(dataset_x, dataset_y, batch_size=8):
    from random import shuffle
    
    ret = range(0, len(dataset_y))
    shuffle(ret)
    ret = np.array(ret)
    
    for idx in range((len(dataset_y)/batch_size)+1):
        start_idx = idx*batch_size
        end_idx = min((idx+1)*batch_size, len(dataset_y))
        
        yield dataset_x[ret[start_idx:end_idx]], dataset_y[ret[start_idx:end_idx]]

In [16]:
dim_input = bg_w
dim_dense = 1024
dim_output = len(names)

batch_size=32
nepoch = 256
printing_epoch = 16

# 1.Baseline

In [479]:
tf.reset_default_graph()

_x = tf.placeholder(tf.float32, shape=[None, dim_input, dim_input, channel])
_y = tf.placeholder(tf.float32, shape=[None, dim_output])
keep_prob = tf.placeholder(tf.float32)

conv_pixel = 5
conv_padding = "SAME"

conv1_feature_map = 32
conv1 = get_conv2d(_x, [conv_pixel, conv_pixel, channel, conv1_feature_map])
conv1_pooling = get_max_pooling2x2(conv1)
conv1_drop = tf.nn.dropout(conv1_pooling, keep_prob=keep_prob)

conv2_feature_map = 64
conv2 = get_conv2d(conv1_drop, [conv_pixel, conv_pixel, conv1_feature_map, conv2_feature_map])
conv2_pooling = get_max_pooling2x2(conv2)
conv2_flat = tf.reshape(conv2_pooling, [-1, bg_w / 2 / 2 * bg_w / 2 / 2 * conv2_feature_map])
conv2_drop = tf.nn.dropout(conv2_flat, keep_prob=keep_prob)

full_layer = tf.nn.relu(get_full_layer(conv2_drop, [bg_w / 2 / 2 * bg_w / 2 / 2 * conv2_feature_map, dim_dense]))
full_drop = tf.nn.dropout(full_layer, keep_prob=keep_prob)

final_layer = get_full_layer(full_drop, [dim_dense, dim_output])

cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=final_layer, labels=_y)
train_step = tf.train.AdamOptimizer(0.0001).minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(final_layer, 1), tf.argmax(_y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))*100

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    
    timestamp_start = time.time()
    for idx in range(nepoch):
        for train_x, train_y in next_batch(dataset_training_x, dataset_training_y, batch_size=batch_size):
            sess.run([train_step], feed_dict={_x: train_x, _y: train_y, keep_prob: 0.5})

        if idx%printing_epoch == 0 or idx==nepoch-1:
            print "epoch at {}".format(idx+1)
            acc_training = accuracy.eval(feed_dict={_x: dataset_training_x, 
                                                    _y: dataset_training_y, 
                                                    keep_prob:1.0})
            acc_testing = accuracy.eval(feed_dict={_x: dataset_testing_x, 
                                                   _y: dataset_testing_y, 
                                                   keep_prob:1.0})
            print "\tTraining/Testing Accuracy: {}%/{}%, {} seconds".format(\
                round(acc_training,4), round(acc_testing,4), round(time.time()-timestamp_start, 4))

epoch at 1
	Training/Testing Accuracy: 12.6103%/14.6341%, 30.5936 seconds
epoch at 17
	Training/Testing Accuracy: 29.3202%/34.1463%, 405.5412 seconds
epoch at 33
	Training/Testing Accuracy: 33.5755%/29.2683%, 791.2239 seconds
epoch at 49
	Training/Testing Accuracy: 47.1199%/56.0976%, 1150.7532 seconds
epoch at 65
	Training/Testing Accuracy: 56.0976%/56.0976%, 1512.2828 seconds
epoch at 81
	Training/Testing Accuracy: 71.8734%/82.9268%, 1874.5922 seconds
epoch at 97
	Training/Testing Accuracy: 82.9268%/90.2439%, 2236.2906 seconds
epoch at 113
	Training/Testing Accuracy: 90.8147%/92.6829%, 2598.061 seconds
epoch at 129
	Training/Testing Accuracy: 95.0701%/97.561%, 2959.8502 seconds
epoch at 145
	Training/Testing Accuracy: 96.6788%/100.0%, 3323.3047 seconds
epoch at 161
	Training/Testing Accuracy: 98.547%/100.0%, 3686.8538 seconds
epoch at 177
	Training/Testing Accuracy: 98.0799%/100.0%, 4051.7833 seconds
epoch at 193
	Training/Testing Accuracy: 98.9102%/100.0%, 4416.2333 seconds
epoch at 

# 2. LRN after Pooling

In [480]:
tf.reset_default_graph()

_x = tf.placeholder(tf.float32, shape=[None, dim_input, dim_input, channel])
_y = tf.placeholder(tf.float32, shape=[None, dim_output])
keep_prob = tf.placeholder(tf.float32)

conv_pixel = 5
conv_padding = "SAME"

conv1_feature_map = 32
conv1 = get_conv2d(_x, [conv_pixel, conv_pixel, channel, conv1_feature_map])
conv1_pooling = get_max_pooling2x2(conv1)
conv1_norm = tf.nn.lrn(conv1_pooling, depth_radius=5, bias=2.0, alpha=1e-3, beta=0.75)
conv1_drop = tf.nn.dropout(conv1_norm, keep_prob=keep_prob)

conv2_feature_map = 64
conv2 = get_conv2d(conv1_drop, [conv_pixel, conv_pixel, conv1_feature_map, conv2_feature_map])
conv2_pooling = get_max_pooling2x2(conv2)
conv2_norm = tf.nn.lrn(conv2_pooling, depth_radius=5, bias=2.0, alpha=1e-3, beta=0.75)
conv2_flat = tf.reshape(conv2_norm, [-1, bg_w/2/2*bg_w/2/2*conv2_feature_map])
conv2_drop = tf.nn.dropout(conv2_flat, keep_prob=keep_prob)

full_layer = tf.nn.relu(get_full_layer(conv2_drop, [bg_w/2/2*bg_w/2/2*conv2_feature_map, dim_dense]))
full_drop = tf.nn.dropout(full_layer, keep_prob=keep_prob)

final_layer = get_full_layer(full_drop, [dim_dense, dim_output])

cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=final_layer, labels=_y)
train_step = tf.train.AdamOptimizer(0.0001).minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(final_layer, 1), tf.argmax(_y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))*100

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    
    timestamp_start = time.time()
    for idx in range(nepoch):
        for train_x, train_y in next_batch(dataset_training_x, dataset_training_y, batch_size=batch_size):
            sess.run([train_step], feed_dict={_x: train_x, _y: train_y, keep_prob: 0.5})

        if idx%printing_epoch == 0 or idx==nepoch-1:
            print "epoch at {}".format(idx+1)
            acc_training = accuracy.eval(feed_dict={_x: dataset_training_x, 
                                                    _y: dataset_training_y, 
                                                    keep_prob:1.0})
            acc_testing = accuracy.eval(feed_dict={_x: dataset_testing_x, 
                                                   _y: dataset_testing_y, 
                                                   keep_prob:1.0})
            print "\tTraining/Testing Accuracy: {}%/{}%, {} seconds".format(\
                round(acc_training,4), round(acc_testing,4), round(time.time()-timestamp_start, 4))

epoch at 1
	Training/Testing Accuracy: 16.2948%/17.0732%, 32.7216 seconds
epoch at 17
	Training/Testing Accuracy: 39.8547%/31.7073%, 430.8147 seconds
epoch at 33
	Training/Testing Accuracy: 59.5745%/63.4146%, 826.3669 seconds
epoch at 49
	Training/Testing Accuracy: 70.109%/80.4878%, 1219.3636 seconds
epoch at 65
	Training/Testing Accuracy: 90.3477%/95.1219%, 1612.0352 seconds
epoch at 81
	Training/Testing Accuracy: 94.9144%/97.561%, 2005.1507 seconds
epoch at 97
	Training/Testing Accuracy: 98.0799%/100.0%, 2396.9534 seconds
epoch at 113
	Training/Testing Accuracy: 99.4292%/100.0%, 2785.9088 seconds
epoch at 129
	Training/Testing Accuracy: 99.9481%/100.0%, 3173.7932 seconds
epoch at 145
	Training/Testing Accuracy: 99.8962%/100.0%, 3560.9024 seconds
epoch at 161
	Training/Testing Accuracy: 99.8443%/100.0%, 3946.9674 seconds
epoch at 177
	Training/Testing Accuracy: 99.9481%/100.0%, 4332.7239 seconds
epoch at 193
	Training/Testing Accuracy: 99.9481%/100.0%, 4717.4697 seconds
epoch at 209
	

# 3. Batch Normalization

In [481]:
tf.reset_default_graph()

_x = tf.placeholder(tf.float32, shape=[None, dim_input, dim_input, channel])
_y = tf.placeholder(tf.float32, shape=[None, dim_output])
is_training = tf.placeholder(tf.bool, name="is_training")
keep_prob = tf.placeholder(tf.float32)

conv_pixel = 5
conv_padding = "SAME"

conv1_feature_map = 32
conv1 = get_conv2d(_x, [conv_pixel, conv_pixel, channel, conv1_feature_map])

#http://ruishu.io/2016/12/27/batchnorm/
conv1_norm = tf.contrib.layers.batch_norm(conv1, center=True, scale=True, is_training=is_training)
conv1_pooling = get_max_pooling2x2(conv1_norm)
conv1_drop = tf.nn.dropout(conv1_pooling, keep_prob=keep_prob)

conv2_feature_map = 64
conv2 = get_conv2d(conv1_drop, [conv_pixel, conv_pixel, conv1_feature_map, conv2_feature_map])

#http://ruishu.io/2016/12/27/batchnorm/
conv2_norm = tf.contrib.layers.batch_norm(conv2, center=True, scale=True, is_training=is_training)
conv2_pooling = get_max_pooling2x2(conv2_norm)
conv2_flat = tf.reshape(conv2_pooling, [-1, bg_w/2/2*bg_w/2/2*conv2_feature_map])
conv2_drop = tf.nn.dropout(conv2_flat, keep_prob=keep_prob)

full_layer = tf.nn.relu(get_full_layer(conv2_drop, [bg_w/2/2*bg_w/2/2*conv2_feature_map, dim_dense]))
full_drop = tf.nn.dropout(full_layer, keep_prob=keep_prob)

final_layer = get_full_layer(full_drop, [dim_dense, dim_output])

cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=final_layer, labels=_y)
train_step = tf.train.AdamOptimizer(0.0001).minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(final_layer, 1), tf.argmax(_y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))*100


init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    
    timestamp_start = time.time()
    for idx in range(nepoch):
        for train_x, train_y in next_batch(dataset_training_x, dataset_training_y, batch_size=batch_size):
            sess.run([train_step], feed_dict={_x: train_x, _y: train_y, keep_prob: 0.5, is_training: True})

        if idx%printing_epoch == 0 or idx==nepoch-1:
            print "epoch at {}".format(idx+1)
            acc_training = accuracy.eval(feed_dict={_x: dataset_training_x, 
                                                    _y: dataset_training_y, 
                                                    keep_prob:1.0, 
                                                    is_training: False})
            acc_testing = accuracy.eval(feed_dict={_x: dataset_testing_x, 
                                                   _y: dataset_testing_y, 
                                                   keep_prob:1.0, 
                                                   is_training: False})
            print "\tTraining/Testing Accuracy: {}%/{}%, {} seconds".format(\
                round(acc_training,4), round(acc_testing,4), round(time.time()-timestamp_start, 4))

epoch at 1
	Training/Testing Accuracy: 24.2346%/36.5854%, 43.9442 seconds
epoch at 17
	Training/Testing Accuracy: 22.1588%/24.3902%, 574.3647 seconds
epoch at 33
	Training/Testing Accuracy: 16.5542%/17.0732%, 1102.7844 seconds
epoch at 49
	Training/Testing Accuracy: 15.3607%/14.6341%, 1630.9512 seconds
epoch at 65
	Training/Testing Accuracy: 14.6341%/14.6341%, 2157.9161 seconds
epoch at 81
	Training/Testing Accuracy: 14.6341%/14.6341%, 2684.1291 seconds
epoch at 97
	Training/Testing Accuracy: 14.6341%/14.6341%, 3210.3743 seconds
epoch at 113
	Training/Testing Accuracy: 14.6341%/14.6341%, 3736.9272 seconds
epoch at 129
	Training/Testing Accuracy: 14.6341%/14.6341%, 4263.2824 seconds
epoch at 145
	Training/Testing Accuracy: 14.6341%/14.6341%, 4790.0198 seconds
epoch at 161
	Training/Testing Accuracy: 14.6341%/14.6341%, 5316.8031 seconds
epoch at 177
	Training/Testing Accuracy: 15.6201%/17.0732%, 5843.0808 seconds
epoch at 193
	Training/Testing Accuracy: 15.205%/17.0732%, 6369.7467 second

In [None]:
tf.reset_default_graph()

_x = tf.placeholder(tf.float32, shape=[None, dim_input, dim_input, channel])
_y = tf.placeholder(tf.float32, shape=[None, dim_output])
is_training = tf.placeholder(tf.bool, name="is_training")
keep_prob = tf.placeholder(tf.float32)

conv_pixel = 5
conv_padding = "SAME"

conv1_feature_map = 32
conv1 = get_conv2d(_x, [conv_pixel, conv_pixel, channel, conv1_feature_map])

#http://ruishu.io/2016/12/27/batchnorm/

conv1_pooling = get_max_pooling2x2(conv1)
conv1_norm = tf.contrib.layers.batch_norm(conv1_pooling, center=True, scale=True, is_training=is_training)
conv1_drop = tf.nn.dropout(conv1_norm, keep_prob=keep_prob)

conv2_feature_map = 64
conv2 = get_conv2d(conv1_drop, [conv_pixel, conv_pixel, conv1_feature_map, conv2_feature_map])

#http://ruishu.io/2016/12/27/batchnorm/

conv2_pooling = get_max_pooling2x2(conv2)
conv2_norm = tf.contrib.layers.batch_norm(conv2_pooling, center=True, scale=True, is_training=is_training)
conv2_flat = tf.reshape(conv2_norm, [-1, bg_w/2/2*bg_w/2/2*conv2_feature_map])
conv2_drop = tf.nn.dropout(conv2_flat, keep_prob=keep_prob)

full_layer = tf.nn.relu(get_full_layer(conv2_drop, [bg_w/2/2*bg_w/2/2*conv2_feature_map, dim_dense]))
full_drop = tf.nn.dropout(full_layer, keep_prob=keep_prob)

final_layer = get_full_layer(full_drop, [dim_dense, dim_output])

bn_final_layer = tf.contrib.layers.batch_norm(final_layer, center=True, scale=True, is_training=is_training)

cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=bn_final_layer, labels=_y)
train_step = tf.train.AdamOptimizer(0.0001).minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(final_layer, 1), tf.argmax(_y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))*100


init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    
    timestamp_start = time.time()
    for idx in range(nepoch):
        for train_x, train_y in next_batch(dataset_training_x, dataset_training_y, batch_size=batch_size):
            sess.run([train_step], feed_dict={_x: train_x, _y: train_y, keep_prob: 0.5, is_training: True})

        if idx%printing_epoch == 0 or idx==nepoch-1:
            print "epoch at {}".format(idx+1)
            acc_training = accuracy.eval(feed_dict={_x: dataset_training_x, 
                                                    _y: dataset_training_y, 
                                                    keep_prob:1.0, 
                                                    is_training: False})
            acc_testing = accuracy.eval(feed_dict={_x: dataset_testing_x, 
                                                   _y: dataset_testing_y, 
                                                   keep_prob:1.0, 
                                                   is_training: False})
            print "\tTraining/Testing Accuracy: {}%/{}%, {} seconds".format(\
                round(acc_training,4), round(acc_testing,4), round(time.time()-timestamp_start, 4))

epoch at 1
	Training/Testing Accuracy: 12.1951%/12.1951%, 73.3212 seconds
epoch at 17
	Training/Testing Accuracy: 20.9652%/19.5122%, 815.3655 seconds
epoch at 33
	Training/Testing Accuracy: 60.9756%/63.4146%, 1549.7709 seconds
