In [1]:
import os
import sys
import tarfile
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from six.moves import urllib

sess = tf.Session()

In [2]:
batch_size = 128
output_every = 50
generations = 20000
eval_every = 500
image_height = 32
image_width = 32
crop_height = 24
crop_width = 24
num_channels = 3
num_targets = 10

data_dir = 'CIFAR10/'
extract_folder = 'cifar-10-batches-bin'
# root_dir = os.getcwd()

In [3]:
learning_rate = 0.1
lr_decay = 0.9
num_gens_to_wait = 250

In [4]:
image_vec_length = image_height * image_width * num_channels
record_length = 1+ image_vec_length

In [5]:
# 用队列读取文件的方式

# 1. tf.train.string_input_producer 建立文件队列
#   filename_queue = tf.train.string_input_producer(filenames, num_epochs, shuffle)

#  2. tf.reader读取数据
#   tf.WholeFileReader() 直接读取整个文件
#   tf.FixedLengthRecordReader 二进制部分读取文件
#   reader = tf.FixedLengthRecordReader(record_bytes = result._record_bytes)
#   result.key, value = reader.read(filename_queue)

# 3. tf.train.start_queue_runners启动队列

def read_cifar_files(filename_queue, distort_image=True):
    
    reader = tf.FixedLengthRecordReader(record_bytes=record_length)
    key, record_string = reader.read(filename_queue)
    # tf.dtypes.DType（tf.uint8→8-bit unsigned integer.）
    record_bytes = tf.decode_raw(record_string, tf.uint8)
    print(record_bytes)
    
    #extract label
    #tf.slice: https://www.jianshu.com/p/71e6ef6c121b
    image_label = tf.cast(tf.slice(record_bytes,[0],[1]), tf.int32)
    print(image_label)
    
    #extract image
    image_extracted = tf.reshape(tf.slice(record_bytes,[1],[image_vec_length]), [num_channels, image_height, image_width])
    print(image_extracted)
    
    #reshape image
    image_unit8image = tf.transpose(image_extracted, [1,2,0])
    print(image_unit8image)
    reshaped_image = tf.cast(image_unit8image, tf.float32)
    print(reshaped_image)
    
    #randomly crop image
    # tf.image.resize_with_crop_or_pad(image, target_height, target_width)
    #Resizes an image to a target width and height by either centrally cropping the image or padding it evenly with zeros.
    final_image = tf.image.resize_image_with_crop_or_pad(reshaped_image, crop_width, crop_height)
    if distort_image:
        # tf.image.random_flip_left_right(image, seed=None)
        # Randomly flip an image horizontally (left to right)
        final_image = tf.image.random_flip_left_right(final_image)
        # tf.image.random_brightness(image, max_delta, seed=None)
        #Adjust the brightness of images by a random factor.
        final_image = tf.image.random_brightness(final_image, max_delta=63)
        # Adjust the contrast of an image or images by a random factor.
        # tf.image.random_contrast(image, lower, upper, seed=None)
        final_image = tf.image.random_contrast(final_image, lower=0.2, upper=1.8)
    
    #normalize whitening
    # tf.image.per_image_standardization(image)
    # Linearly scales each image in image to have mean 0 and variance 1.
    final_image = tf.image.per_image_standardization(final_image)
        
    return (final_image, image_label)

In [6]:
# tf.train.shuffle_batch
# tf.train.shuffle_batch() 将队列中数据打乱后再读取出来．
# 函数是先将队列中数据打乱，然后再从队列里读取出来，因此队列中剩下的数据也是乱序的．
# tensors：排列的张量或词典．
# batch_size：从队列中提取新的批量大小．
# capacity：队列中元素的最大数量．
# min_after_dequeue：出队后队列中元素的最小数量，用于确保元素的混合级别．
# num_threads：线程数量．
# seed：队列内随机乱序的种子值．
# enqueue_many：tensors中的张量是否都是一个例子．
# shapes：每个示例的形状．(可选项）
# allow_smaller_final_batch：为True时，若队列中没有足够的项目，则允许最终批次更小．(可选项）
# shared_name：如果设置，则队列将在多个会话中以给定名称共享．(可选项）
# name：操作的名称．(可选项）

def input_pipeline(batch_size, train_logical=True):
    if train_logical:
        files = [os.path.join(data_dir, extract_folder, 'data_batch_{}.bin'.format(i))  for i in range(1,6)]
    else:
        files = [os.path.join(data_dir, extract_folder,'test_batch_{}.bin')]
    
    filename_queue = tf.train.string_input_producer(files)
    image,label = read_cifar_files(filename_queue)
    
    # min_after_dequeue: (threads+error margin)*batch_size
    min_after_dequeue =100
    capacity = min_after_dequeue + 3*batch_size
    
    example_batch, label_batch = tf.train.shuffle_batch([image,label], batch_size, capacity, min_after_dequeue)
    return (example_batch,label_batch)
    

In [7]:
def cifar_cnn_model(input_images, batch_size, train_logical=True):
    
    def truncated_normal_var(name, shape, dtype):
        return (tf.get_variable(name=name, shape=shape, dtype=dtype, initializer=tf.truncated_normal_initializer(stddev=0.5)))
    def zero_var(name, shape, dtype):
        return (tf.get_variable(name=name, shape=shape, dtype=dtype, initializer=tf.constant_initializer(0.0)))

    # First Conv Layer
    with tf.variable_scope('conv1') as scope:
        conv1_kernel = truncated_normal_var(name='conv_kernel1', shape=[5,5,3,64], dtype=tf.float32)
        conv1 = tf.nn.conv2d(input_images, conv1_kernel, [1,1,1,1], padding='SAME')
        conv_bias = zero_var(name='conv1_bias', shape=[64],dtype=tf.float32)
        conv1_add_bias = tf.nn.bias_add(conv1, conv_bias)
        relu_conv1 = tf.nn.relu(conv1_add_bias)
        #max pooling
        pool1 = tf.nn.max_pool(relu_conv1, ksize=[1,3,3,1], strides=[1,2,2,1], padding='SAME', name='pool_layer1')
        #normalization
        norm1 = tf.nn.lrn(pool1, depth_radius=5, bias=2.0, alpha=1e-3, beta=0.75, name='norm1')
        print(norm1)
        
    #Second Conv Layer
    with tf.variable_scope('conv2') as scope:
        conv2_kernel = truncated_normal_var(name='conv_kernel2', shape=[5,5,64,64], dtype=tf.float32)
        conv2 = tf.nn.conv2d(norm1, conv2_kernel, [1,1,1,1], padding='SAME')
        conv2_bias = zero_var(name='conv2_bias', shape=[64],dtype=tf.float32)
        relu_conv2 = tf.nn.bias_add(conv2, conv2_bias)
        #max pooling
        pool2 = tf.nn.max_pool(relu_conv2, ksize=[1,3,3,1], strides=[1,2,2,1], padding='SAME', name='pool_layer2')
        norm2 = tf.nn.lrn(pool2, depth_radius=5, bias=2.0,alpha=1e-3, beta=0.75, name='norm2')
        reshaped_output = tf.reshape(norm2, [batch_size,-1])
        reshaped_dim = reshaped_output.get_shape()[1].value
        print(reshaped_dim)
        
    #First Fully Connected Layer
    with tf.variable_scope('full1') as scope:
        full1_weight1 = truncated_normal_var(name='full_mult1', shape=[reshaped_dim,384], dtype=tf.float32)
        full1_bias1 = zero_var(name='full_bias1', shape=[384],dtype=tf.float32)
        full_layer1 = tf.nn.relu(tf.add(tf.matmul(reshaped_output,full1_weight1),full1_bias1))
        print(full_layer1)

    #Second Fully Connected Layer
    with tf.variable_scope('full2') as scope:
        full1_weight2 = truncated_normal_var(name='full_mult2', shape=[384,192], dtype=tf.float32)
        full1_bias2 = zero_var(name='full_bias2', shape=[192],dtype=tf.float32)
        full_layer2 = tf.nn.relu(tf.add(tf.matmul(full_layer1,full1_weight2),full1_bias2))
        print(full_layer2)
        
    #Final Fully Connected Layer
    with tf.variable_scope('full3') as scope:
        full1_weight3 = truncated_normal_var(name='full_mult3', shape=[192,num_targets], dtype=tf.float32)
        full1_bias3 = zero_var(name='full_bias3', shape=[num_targets],dtype=tf.float32)
        final_output = tf.nn.relu(tf.add(tf.matmul(full_layer2,full1_weight3),full1_bias3))
        print(final_output)
    
    return (final_output)

In [8]:
def cifar_loss(logits, targets):
    targets = tf.squeeze(tf.cast(targets, tf.int32))
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels =targets, logits=logits)
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    return (cross_entropy_mean)

In [9]:
def train_step(loss_value, generation_num):
    model_learning_rate = tf.train.exponential_decay(learning_rate, generation_num, num_gens_to_wait, lr_decay, staircase=True)
    my_optimizer = tf.train.GradientDescentOptimizer(model_learning_rate)
    train_step = my_optimizer.minimize(loss_value)
    return (train_step)

In [10]:
def accuracy_of_batch(logits, targets):
    targets = tf.squeeze(tf.cast(targets, tf.int32))
    batch_predictions = tf.cast(tf.argmax(logits, 1), tf.int32)
    predicted_correctly = tf.equal(batch_predictions, targets)
    accuracy = tf.reduce_mean(tf.cast(predicted_correctly, tf.float32))
    return (accuracy)

In [11]:
images, targets = input_pipeline(batch_size, train_logical=True)
test_images, test_targets = input_pipeline(batch_size, train_logical=True)

Tensor("DecodeRaw:0", shape=(?,), dtype=uint8)
Tensor("Cast:0", shape=(1,), dtype=int32)
Tensor("Reshape:0", shape=(3, 32, 32), dtype=uint8)
Tensor("transpose:0", shape=(32, 32, 3), dtype=uint8)
Tensor("Cast_1:0", shape=(32, 32, 3), dtype=float32)
Tensor("DecodeRaw_1:0", shape=(?,), dtype=uint8)
Tensor("Cast_4:0", shape=(1,), dtype=int32)
Tensor("Reshape_2:0", shape=(3, 32, 32), dtype=uint8)
Tensor("transpose_1:0", shape=(32, 32, 3), dtype=uint8)
Tensor("Cast_5:0", shape=(32, 32, 3), dtype=float32)


In [12]:
with tf.variable_scope('model_definition') as scope:
    model_output = cifar_cnn_model(images, batch_size)
    scope.reuse_variables()
    test_output = cifar_cnn_model(test_images, batch_size)

Tensor("model_definition/conv1/norm1:0", shape=(128, 12, 12, 64), dtype=float32)
2304
Tensor("model_definition/full1/Relu:0", shape=(128, 384), dtype=float32)
Tensor("model_definition/full2/Relu:0", shape=(128, 192), dtype=float32)
Tensor("model_definition/full3/Relu:0", shape=(128, 10), dtype=float32)
Tensor("model_definition/conv1_1/norm1:0", shape=(128, 12, 12, 64), dtype=float32)
2304
Tensor("model_definition/full1_1/Relu:0", shape=(128, 384), dtype=float32)
Tensor("model_definition/full2_1/Relu:0", shape=(128, 192), dtype=float32)
Tensor("model_definition/full3_1/Relu:0", shape=(128, 10), dtype=float32)


In [13]:
loss = cifar_loss(model_output, targets)
accuracy = accuracy_of_batch(test_output, test_targets)
generation_num = tf.Variable(0, trainable=False)
train_op = train_step(loss, generation_num)

In [14]:
init = tf.global_variables_initializer()
sess.run(init)
tf.train.start_queue_runners(sess=sess)

[<Thread(Thread-4, started daemon 123145600851968)>,
 <Thread(Thread-5, started daemon 123145606107136)>,
 <Thread(Thread-6, started daemon 123145611362304)>,
 <Thread(Thread-7, started daemon 123145616617472)>]

In [None]:
train_loss = []
test_accuracy = []
for i in range(generations):
    _, loss_value = sess.run([train_op, loss])
    if (i+1)% output_every == 0:
        train_loss.append(loss_value)
        output = 'Generation {}: Loss = {:5f}'.format((i+1), loss_value)
        print(output)
    if(i+1)%eval_every == 0:
        [temp_accuracy]=sess.run([accuracy])
        test_accuracy.append(temp_accuracy)
        acc_output = '--- Test Accuracy ={:.2f}% '.format(100.*temp_accuracy)
        print(acc_output)

Generation 50: Loss = 2.303088
Generation 100: Loss = 2.302793
Generation 150: Loss = 2.302259
Generation 200: Loss = 2.302094
Generation 250: Loss = 2.303295
Generation 300: Loss = 2.303544
Generation 350: Loss = 2.302510
Generation 400: Loss = 2.302585
Generation 450: Loss = 2.302585
Generation 500: Loss = 2.302585
--- Test Accuracy =7.81 
Generation 550: Loss = 2.302585
Generation 600: Loss = 2.302585
Generation 650: Loss = 2.302585
Generation 700: Loss = 2.302585
Generation 750: Loss = 2.302585
Generation 800: Loss = 2.302585
Generation 850: Loss = 2.302585
Generation 900: Loss = 2.302585
Generation 950: Loss = 2.302585
Generation 1000: Loss = 2.302585
--- Test Accuracy =12.50 
Generation 1050: Loss = 2.302585
Generation 1100: Loss = 2.302585
Generation 1150: Loss = 2.302585
Generation 1200: Loss = 2.302585
Generation 1250: Loss = 2.302585
Generation 1300: Loss = 2.302585
Generation 1350: Loss = 2.302585
Generation 1400: Loss = 2.302585
Generation 1450: Loss = 2.302585
Generation 1