In [None]:
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelBinarizer
import pandas as pd
import vgg16
import utils
import pickle
from PIL import Image
import os 
from sklearn.model_selection import StratifiedShuffleSplit

import csv

## 数据读取，调整大小

In [None]:

f=pd.read_csv('.\TrainSet\TrainSetLabels.csv')
ne=f.sample(frac=0.3) # 分配比例 

In [None]:


batch=()
labels=[]
cout=0
for i in ne.iterrows():
    img=Image.open(r'.\TrainSet\\' +i[1]['Name'][1:-1],'r')
    if img.mode!='RGB':
        img = img.convert("RGB")
    img=img.resize((224,224))
    matrix = np.array(img).reshape((1,224,224,3))
    batch=batch + (matrix,)
    labels.append(i[1]['Label'])
    cout=cout+1
    if cout==100:
        print('d')
        cout=0
batch = np.concatenate(batch, 0)
print(batch.shape)
print(len(labels))

## 将图片转化为特征向量

In [None]:
codes=None

os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
with tf.device('/cpu:0'):
    with tf.Session() as sess:
    
        vgg = vgg16.Vgg16()
        input_ = tf.placeholder("float", [None,224, 224, 3])
        with tf.name_scope("content_vgg"):
            vgg.build(input_)
        turn=len(batch)//64
        for i in range(turn+1):
            if i==turn:
                feed_dict = {input_: batch[turn*64:]}
            else:
                feed_dict = {input_: batch[i*64:i*64+64]}
            codes_batch = sess.run(vgg.relu6, feed_dict=feed_dict)
            if codes is None:
                    codes = codes_batch
            else:
                    codes = np.concatenate((codes, codes_batch))
            print(i)

### 保存特征值

In [None]:
with open('codes', 'w') as f:
    codes.tofile(f)
    

with open('labels', 'w') as f:
    writer = csv.writer(f, delimiter='\n')
    writer.writerow(labels)

### 读取特征值

In [None]:
labels = []
with open('labels','r') as f:
    label = csv.reader(f, delimiter = '\n')
    for i in label:
        if i != []:
            labels.append(int(i[0]))

with open('codes','r') as f:
    codes = np.fromfile(f, dtype = np.int32)
    step = 4096
    b = [codes[i : i + step] for i in range(0, len(codes), step)]
    codes = np.array(b)

## 划分训练集

In [None]:
lb = LabelBinarizer()
lb.fit(labels)

labels_vecs = lb.transform(labels)

ss = StratifiedShuffleSplit(n_splits=1, test_size=0.2)

train_idx, val_idx = next(ss.split(codes, labels))

half_val_len = int(len(val_idx)/2)
val_idx, test_idx = val_idx[:half_val_len], val_idx[half_val_len:]

train_x, train_y = codes[train_idx], labels_vecs[train_idx]
val_x, val_y = codes[val_idx], labels_vecs[val_idx]
test_x, test_y = codes[test_idx], labels_vecs[test_idx]

print("Train shapes (x, y):", train_x.shape, train_y.shape)
print("Validation shapes (x, y):", val_x.shape, val_y.shape)
print("Test shapes (x, y):", test_x.shape, test_y.shape)

## 添加全连接网络

In [None]:
# 输入数据的维度
inputs_ = tf.placeholder(tf.float32, shape=[None, codes.shape[1]])
# 标签数据的维度
labels_ = tf.placeholder(tf.int64, shape=[None, labels_vecs.shape[1]])

# 加入一个256维的全连接的层  （这里可能要改）
#fc = tf.contrib.layers.fully_connected(inputs_, 4096)

# 加入一个257维的全连接层
#logits = tf.contrib.layers.fully_connected(fc, labels_vecs.shape[1], activation_fn=None)
logits = tf.contrib.layers.fully_connected(inputs_, labels_vecs.shape[1], activation_fn=None)

# 计算cross entropy值
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=labels_, logits=logits)

# 计算损失函数
cost = tf.reduce_mean(cross_entropy)

# 采用用得最广泛的AdamOptimizer优化器
optimizer = tf.train.AdamOptimizer().minimize(cost)

# 得到最后的预测分布
predicted = tf.nn.softmax(logits)

# 计算准确度
correct_pred = tf.equal(tf.argmax(predicted, 1), tf.argmax(labels_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

## 训练添加的全连接层网络

In [None]:
def get_batches(x, y, n_batches=10):
    """ 这是一个生成器函数，按照n_batches的大小将数据划分了小块 """
    batch_size = len(x)//n_batches
    
    for ii in range(0, n_batches*batch_size, batch_size):
        # 如果不是最后一个batch，那么这个batch中应该有batch_size个数据
        if ii != (n_batches-1)*batch_size:
            X, Y = x[ii: ii+batch_size], y[ii: ii+batch_size] 
        # 否则的话，那剩余的不够batch_size的数据都凑入到一个batch中
        else:
            X, Y = x[ii:], y[ii:]
        # 生成器语法，返回X和Y
        yield X, Y
        

In [71]:
# 运行多少轮次
epochs = 100
# 统计训练效果的频率
iteration = 0
# 保存模型的保存器
saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for e in range(epochs):
        for x, y in get_batches(train_x, train_y):
            feed = {inputs_: x,
                    labels_: y}
            # 训练模型
            loss, _ = sess.run([cost, optimizer], feed_dict=feed)
            print("Epoch: {}/{}".format(e+1, epochs),
                  "Iteration: {}".format(iteration),
                  "Training loss: {:.5f}".format(loss))
            iteration += 1
            
            if loss == 0:
                print("Epoch: {}/{}".format(e, epochs),
                      "Iteration: {}".format(iteration),
                      "Validation Acc: {:.4f}".format(val_acc))
                break
            
            if iteration % 5 == 0:
                feed = {inputs_: val_x,
                        labels_: val_y}
                val_acc = sess.run(accuracy, feed_dict=feed)
                # 输出用验证机验证训练进度
                print("Epoch: {}/{}".format(e, epochs),
                      "Iteration: {}".format(iteration),
                      "Validation Acc: {:.4f}".format(val_acc))
    # 保存模型
    saver.save(sess, "checkpoints/cif.ckpt")

Epoch: 1/100 Iteration: 0 Training loss: 1617180416.00000
Epoch: 1/100 Iteration: 1 Training loss: 4783244800.00000
Epoch: 1/100 Iteration: 2 Training loss: 4730234880.00000
Epoch: 1/100 Iteration: 3 Training loss: 5579386368.00000
Epoch: 1/100 Iteration: 4 Training loss: 6286001664.00000
Epoch: 0/100 Iteration: 5 Validation Acc: 0.0690
Epoch: 1/100 Iteration: 5 Training loss: 6766119936.00000
Epoch: 1/100 Iteration: 6 Training loss: 7732555776.00000
Epoch: 1/100 Iteration: 7 Training loss: 8106216448.00000
Epoch: 1/100 Iteration: 8 Training loss: 7714605568.00000
Epoch: 1/100 Iteration: 9 Training loss: 7498455040.00000
Epoch: 0/100 Iteration: 10 Validation Acc: 0.0975
Epoch: 2/100 Iteration: 10 Training loss: 7997742592.00000
Epoch: 2/100 Iteration: 11 Training loss: 7059925504.00000
Epoch: 2/100 Iteration: 12 Training loss: 7893856256.00000
Epoch: 2/100 Iteration: 13 Training loss: 6838800384.00000
Epoch: 2/100 Iteration: 14 Training loss: 6855182848.00000
Epoch: 1/100 Iteration: 15

Epoch: 13/100 Iteration: 124 Training loss: 18208.51562
Epoch: 12/100 Iteration: 125 Validation Acc: 0.1225
Epoch: 13/100 Iteration: 125 Training loss: 23784.29102
Epoch: 13/100 Iteration: 126 Training loss: 31415.44336
Epoch: 13/100 Iteration: 127 Training loss: 60567.23828
Epoch: 13/100 Iteration: 128 Training loss: 30341.66016
Epoch: 13/100 Iteration: 129 Training loss: 22924.78711
Epoch: 12/100 Iteration: 130 Validation Acc: 0.1189
Epoch: 14/100 Iteration: 130 Training loss: 69264.41406
Epoch: 14/100 Iteration: 131 Training loss: 48121.96484
Epoch: 14/100 Iteration: 132 Training loss: 32528.59766
Epoch: 14/100 Iteration: 133 Training loss: 29210.36328
Epoch: 14/100 Iteration: 134 Training loss: 20086.38867
Epoch: 13/100 Iteration: 135 Validation Acc: 0.1177
Epoch: 14/100 Iteration: 135 Training loss: 12030.87500
Epoch: 14/100 Iteration: 136 Training loss: 27092.88477
Epoch: 14/100 Iteration: 137 Training loss: 46141.78125
Epoch: 14/100 Iteration: 138 Training loss: 16130.53906
Epoc

Epoch: 25/100 Iteration: 249 Training loss: 2690.36450
Epoch: 24/100 Iteration: 250 Validation Acc: 0.1225
Epoch: 26/100 Iteration: 250 Training loss: 9979.15430
Epoch: 26/100 Iteration: 251 Training loss: 4579.48291
Epoch: 26/100 Iteration: 252 Training loss: 2114.94067
Epoch: 26/100 Iteration: 253 Training loss: 2971.35107
Epoch: 26/100 Iteration: 254 Training loss: 6539.33838
Epoch: 25/100 Iteration: 255 Validation Acc: 0.1177
Epoch: 26/100 Iteration: 255 Training loss: 6629.78125
Epoch: 26/100 Iteration: 256 Training loss: 4864.03369
Epoch: 26/100 Iteration: 257 Training loss: 2137.80127
Epoch: 26/100 Iteration: 258 Training loss: 3850.91895
Epoch: 26/100 Iteration: 259 Training loss: 1680.93066
Epoch: 25/100 Iteration: 260 Validation Acc: 0.1213
Epoch: 27/100 Iteration: 260 Training loss: 8966.18945
Epoch: 27/100 Iteration: 261 Training loss: 9507.51758
Epoch: 27/100 Iteration: 262 Training loss: 1963.27161
Epoch: 27/100 Iteration: 263 Training loss: 4198.17432
Epoch: 27/100 Itera

Epoch: 37/100 Iteration: 375 Validation Acc: 0.1189
Epoch: 38/100 Iteration: 375 Training loss: 2466.60278
Epoch: 38/100 Iteration: 376 Training loss: 2073.40283
Epoch: 38/100 Iteration: 377 Training loss: 662.21375
Epoch: 38/100 Iteration: 378 Training loss: 1795.15405
Epoch: 38/100 Iteration: 379 Training loss: 2680.69312
Epoch: 37/100 Iteration: 380 Validation Acc: 0.1141
Epoch: 39/100 Iteration: 380 Training loss: 6014.19678
Epoch: 39/100 Iteration: 381 Training loss: 2382.56201
Epoch: 39/100 Iteration: 382 Training loss: 313.62604
Epoch: 39/100 Iteration: 383 Training loss: 1749.38965
Epoch: 39/100 Iteration: 384 Training loss: 433.05817
Epoch: 38/100 Iteration: 385 Validation Acc: 0.1141
Epoch: 39/100 Iteration: 385 Training loss: 1252.02881
Epoch: 39/100 Iteration: 386 Training loss: 1989.66589
Epoch: 39/100 Iteration: 387 Training loss: 789.07922
Epoch: 39/100 Iteration: 388 Training loss: 978.88782
Epoch: 39/100 Iteration: 389 Training loss: 1111.53113
Epoch: 38/100 Iteration:

KeyboardInterrupt: 

## 测试准确率

In [None]:
with tf.Session() as sess:
    saver.restore(sess, tf.train.latest_checkpoint('checkpoints'))
    
    feed = {inputs_: test_x,
            labels_: test_y}
    test_acc = sess.run(accuracy, feed_dict=feed)
    print("Test accuracy: {:.4f}".format(test_acc))