In [1]:
import os
import tensorflow as tf
import numpy as np
import time
import vgg_preprocess

class Vgg16:
    def __init__(self, images):
        self.parameters = [] #在类的初始化时加入全局列表，将所需共享的参数加载到复用类中
        self.images = images
        self.conv_layers()
        self.fc_layers()
        self.probs = tf.nn.softmax(self.fc8) #输出每个属于各个类别的概率值

    def saver(self):
        return tf.train.Saver()

    def load_weights(self, weight_file, sess):
        weights = np.load(weight_file)
        keys = sorted(weights.keys())
        for i, k in enumerate(keys):
            if i not in [30, 31]: #排除不需要载入的层（只保留最后一个全连阶层fc8）
                sess.run(self.parameters[i].assign(weights[k]))
        print("-----weighes loaded-----")

    def conv(self, name, input_data, out_channel, trainable=False):
        in_channel = input_data.get_shape()[-1]
        with tf.variable_scope(name):
            kernel = tf.get_variable("weights", [3, 3, in_channel, out_channel], dtype=tf.float32, trainable=trainable)
            biases = tf.get_variable("biases", [out_channel], dtype=tf.float32, trainable=trainable)
            conv_res = tf.nn.conv2d(input_data, kernel, [1,1,1,1], padding="SAME")
            res = tf.nn.bias_add(conv_res, biases)
            out = tf.nn.relu(res, name=name)
        self.parameters += [kernel, biases] #将卷积层定义的参数（kernel, biases）加入列表
        return out

    def fullconn(self, name, input_data, out_channel, trainable=True):
        shape = input_data.get_shape().as_list()
        if len(shape) == 4:
            size = shape[-3] * shape[-2] * shape[-1] #获得输入数据各个维度的维数
        else:
            size = shape[1]
        input_data_flat = tf.reshape(input_data, [-1,size])
        with tf.variable_scope(name):
            weights = tf.get_variable(name="weights", shape=[size, out_channel], dtype=tf.float32, trainable=trainable)
            biases = tf.get_variable(name="biases", shape=[out_channel], dtype=tf.float32, trainable=trainable)
            res = tf.matmul(input_data_flat, weights)
            out = tf.nn.relu(tf.nn.bias_add(res, biases))
        self.parameters += [weights, biases] #将全连接层定义的参数（weights, biases）加入列表
        return out

    def conv_layers(self):
        #conv1
        self.conv1_1 = self.conv("conv1re_1", self.images, 64, trainable=False)
        self.conv1_2 = self.conv("conv1_2", self.conv1_1, 64, trainable=False)
        self.pool1 = self.maxpool("poolre1", self.conv1_2)
        #conv2
        self.conv2_1 = self.conv("conv2_1", self.pool1, 128, trainable=False)
        self.conv2_2 = self.conv("convwe2_2", self.conv2_1, 128, trainable=False)
        self.pool2 = self.maxpool("pool2", self.conv2_2)
        #conv3
        self.conv3_1 = self.conv("conv3_1", self.pool2, 256, trainable=False)
        self.conv3_2 = self.conv("convrwe3_2", self.conv3_1, 256, trainable=False)
        self.conv3_3 = self.conv("convrwe3_3", self.conv3_2, 256, trainable=False)
        self.pool3 = self.maxpool("poolre3", self.conv3_3)
        #conv4
        self.conv4_1 = self.conv("conv4_1", self.pool3, 512, trainable=False)
        self.conv4_2 = self.conv("convrwe4_2", self.conv4_1, 512, trainable=False)
        self.conv4_3 = self.conv("convrwe4_3", self.conv4_2, 512, trainable=False)
        self.pool4 = self.maxpool("pool4", self.conv4_3)
        #conv5
        self.conv5_1 = self.conv("conv5_1", self.pool4, 512, trainable=False)
        self.conv5_2 = self.conv("convrwe5_2", self.conv5_1, 512, trainable=False)
        self.conv5_3 = self.conv("conv5_3", self.conv5_2, 512, trainable=False)
        self.pool5 = self.maxpool("poolrwe5", self.conv5_3)

    n_class = 2
    def fc_layers(self):
        global n_class
        self.fc6 = self.fullconn("fc1", self.pool5, 4096, trainable=False)
        self.fc7 = self.fullconn("fc2", self.fc6, 4096, trainable=False)
        self.fc8 = self.fullconn("fc3", self.fc7, self.n_class, trainable=True)

    def maxpool(self, name, input_data):
        out = tf.nn.max_pool(input_data, [1,2,2,1], [1,2,2,1], padding="SAME", name=name)
        return out

def get_file(file_dir):
    '''
    Args:
        file_dir: file directory
    Returns:
        list of images and labels
    '''
    cats = []
    dogs = []
    image_list = []
    label_list = []

    for root, sub_folders, files in os.walk(file_dir): #遍历file_dir下的所有目录和文件
        for name in files: #images中保存的是file_dir下各级子文件夹下的文件(完整路径)
            image_list.append(os.path.join(root, name))
            category = name.split(sep='.') #文件名按.分割
            if category[0] == 'cat': #如果是cat，标签为0，dog为1
                cats.append(os.path.join(root, name))
                label_list.append(0)
            elif category[0] == 'dog':
                dogs.append(os.path.join(root, name))
                label_list.append(1)
            else:
                print("unrecognized category")
        #for file in sub_folders: #temp中保存的是file_dir下各级子文件夹(完整路径)
        #    temp.append(os.path.join(root, file))
    print('There are %d cats\nThere are %d dogs' %(len(cats), len(dogs))) #打印猫和狗的数量
    print("image_list is")
    #print(image_list)
    print("label_list is")
    #print(label_list)
    label_list = [int(i) for i in label_list] #将label_list中的数据类型转为int型
    #print("label_list is")
    #print(label_list)
    
    return image_list, label_list

#get_file(r"E:\shiyan")
#get_file(r"E:\AI-learn\kaggle\train")

def get_batch(image_list, label_list, image_width, image_height, batch_size, capacity): #通过读取列表来批量载入图片以及对应标签
    image = tf.cast(image_list, tf.string) #将image_list的数据格式转化成string
    label = tf.cast(label_list, tf.int32) #将label_list的数据格式转化成int32
    input_queue = tf.train.slice_input_producer([image, label]) #每次从一个tensor列表中按顺序或者随机抽取出一个tensor放入文件名队列
    label = input_queue[1]
    image_contents = tf.read_file(input_queue[0]) #读取图片
    
    image = tf.image.decode_jpeg(image_contents, channels=3) #解码JPEG格式图像
    image = vgg_preprocess.preprocess_for_train(image, 224, 224)
    image_batch, label_batch = tf.train.batch([image, label], batch_size=batch_size, num_threads=64, capacity=capacity)
    label_batch = tf.reshape(label_batch, [batch_size])
    
    return image_batch, label_batch

def onehot(labels):
    n_sample = len(labels)
    n_class = max(labels)+1
    onehot_labels = np.zeros((n_sample, n_class)) #n_sample行, n_class列
    onehot_labels[np.arange(n_sample), labels] = 1 #对应的行,对应的列赋为1,很巧妙的方法

    return onehot_labels

tf.reset_default_graph()

start_time = time.time() #计算每次迭代的时间
batch_size =32 #批处理样本的大小
capacity = 256 #内存中存储的最大数据容量
means = [123.68, 116.799, 103.939] #VGG训练时图像预处理所减均值(RGB三通道)

image_list, label_list = get_file(r"E:\AI-learn\kaggle\train") #获取图像列表和标签列表
#image_list, label_list = get_file(r"E:\AI-learn\train") #获取图像列表和标签列表
image_batch, label_batch = get_batch(image_list, label_list, 224, 224, batch_size, capacity) #通过读取列表来批量载入图片及标签
x = tf.placeholder(tf.float32, [None, 224, 224, 3])
y = tf.placeholder(tf.int32, [None, 2]) #对"猫"和狗两个类别进行判定

vgg = Vgg16(x)

fc8_finetuining = vgg.probs
loss_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=fc8_finetuining, labels=y)) #损失函数
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss_function)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
vgg.load_weights(r"E:\AI-learn\vgg16_weights.npz", sess) #通过npz格式的文件获取Vgg的相应权重参数,从而实现权重复用
saver = tf.train.Saver()

#启动线程
coord = tf.train.Coordinator() #使用协调器来管理线程
threads = tf.train.start_queue_runners(coord=coord, sess=sess)

epoch_start_time = time.time()
for i in range(1000):
    images, labels = sess.run([image_batch, label_batch])
    labels = onehot(labels) #用one-hot形式对标签进行编码    
    sess.run(optimizer, feed_dict={x:images, y:labels})
    loss = sess.run(loss_function, feed_dict={x:images, y:labels})
    print("Epoch %d, the loss is %f" %(i, loss))
    epoch_end_time = time.time()
    print("Current epoch takes: ", (epoch_end_time-epoch_start_time))
    epoch_start_time = epoch_end_time

    if (i+1)%500 == 0:
        saver.save(sess, os.path.join("./model/", "epoch_{:06d}.ckpt".format(i)))
    print("-------Epoch %d is finished.-------" %i)

saver.save(sess, "./model/")
print("Optimization Finished.")

duration = time.time() - start_time
print("The train process takes:", "{:.2f}".format(duration))

#关闭线程
coord.request_stop() #通知其他线程关闭
coord.join(threads) #等待其他线程结束,其他所有线程关闭之后,这一函数才能返回



There are 12500 cats
There are 12500 dogs
image_list is
label_list is
-----weighes loaded-----
Epoch 0, the loss is 0.683989
Current epoch takes:  49.4779999256134
-------Epoch 0 is finished.-------
Epoch 1, the loss is 0.715494
Current epoch takes:  27.31000018119812
-------Epoch 1 is finished.-------
Epoch 2, the loss is 0.716406
Current epoch takes:  26.90499997138977
-------Epoch 2 is finished.-------
Epoch 3, the loss is 0.724763
Current epoch takes:  27.621000051498413
-------Epoch 3 is finished.-------
Epoch 4, the loss is 0.669271
Current epoch takes:  26.84999990463257
-------Epoch 4 is finished.-------
Epoch 5, the loss is 0.640444
Current epoch takes:  27.134999990463257
-------Epoch 5 is finished.-------
Epoch 6, the loss is 0.666984
Current epoch takes:  27.869999885559082
-------Epoch 6 is finished.-------
Epoch 7, the loss is 0.674446
Current epoch takes:  27.633000135421753
-------Epoch 7 is finished.-------
Epoch 8, the loss is 0.697619
Current epoch takes:  27.6340000