In [None]:
#from __future__ import absolute_import
#from __future__ import division
#from __future__ import print_function
import os
import cv2
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

### 导入数据,并设置加载哪个属性的数据用于训练

In [None]:
df_train = pd.read_csv('../train/Annotations/train.csv', header=None)
df_train.columns = ['image_id', 'class', 'label']
df_train.head()

In [None]:
#:设置加载哪个属性的数据用于训练
classes = ['collar_design_labels', 'neckline_design_labels', 'skirt_length_labels', 
           'sleeve_length_labels', 'neck_design_labels', 'coat_length_labels', 'lapel_design_labels', 
           'pant_length_labels']

###################################################
#设置加载那个属性的数据用于训练:
cur_class = classes[0]

#设置输入训练模型的图像尺寸,注意:有些使224*224,有些是要求299*299
width = 299
###################################################

In [None]:
#抽取相应属性数据到内存的表格
df_load = df_train[(df_train['class'] == cur_class)].copy()
df_load.reset_index(inplace=True)
del df_load['index']

print("选择的属性为:{0}, 种类的为:{1},样本数: {2}".format(cur_class , len(df_load['label'][0]),len(df_load)))
df_load.head()

In [None]:
df_load[(df_load.index == 2)]

### 加载表格图像并resize到内存

In [None]:
#注意这里的图像resize的设置!!!
n = len(df_load)
n_class = len(df_load['label'][0])
X = np.zeros((n, width, width, 3), dtype=np.uint8)
y = np.zeros((n, n_class), dtype=np.uint8)

for i in tqdm(range(n)):
    tmp_label = df_load['label'][i]
    if len(tmp_label) > n_class:
        print(df_load['image_id'][i])
    X[i] = cv2.resize(cv2.imread('../train/{0}'.format(df_load['image_id'][i])), (width, width))
    y[i][tmp_label.find('y')] = 1

In [None]:
plt.figure(figsize=(12, 7))
for i in range(8):
    random_index = random.randint(0, n-1)
    plt.subplot(2, 4, i+1)
    plt.imshow(X[random_index][:,:,::-1])
    plt.title(y[random_index])

### 提取特征

In [None]:
#########################################################
#注意要from keras.applications import 可以导入所有可用模型,但要注意:
#image size为224*224的预处理函数不同于image size为229*229!!! 如下:

#预处理函数:
from keras.applications.inception_v3 import preprocess_input
from keras.applications import imagenet_utils
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
#导入的image_utils包包含了一系列函数，使得对图片进行前处理以及对分类结果解码更加容易
#VGG16，VGG19以及ResNet接受224×224的输入图像， 而Inception V3和Xception要求为299×299
#使用合适的预处理函数来执行mean subtraction/scaling
#预处理 图像编码服从规定，譬如,RGB，GBR这一类的，preprocess_input(x)  
#preprocessing function is also different (same as Xception)
#return imagenet_utils.preprocess_input(x, mode='tf')
#而其他VGG16，VGG19以及ResNet接受224×224的输入图像,使用preprocess = imagenet_utils.preprocess_input预处理
#InceptionV3和Xception,还有   
#########################################################

from keras.layers import *
#from keras.layers import Input
from keras.models import *
#from keras.models import Model
#from keras.layers import Input, Dense
#a = Input(shape=(32,))
#b = Dense(32)(a)
#model = Model(inputs=a, outputs=b)
from keras.callbacks import *
from keras.optimizers import *
from keras.regularizers import *
# 
from keras.applications import *
#from keras.applications import ResNet50
#from keras.applications import VGG16
#from keras.applications import VGG19
#from keras.applications import Xception # TensorFlow ONLY
#from keras.applications import InceptionResNetV2
#from keras.applications import InceptionV3

#tf.keras.applications.inception_v3.InceptionV3
#tf.keras.applications.inception_resnet_v2.InceptionResNetV2

In [None]:
#import numpy as np
#import argparse
#import cv2


####################################################################
#设置GPU
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
#os.environ["CUDA_VISIBLE_DEVICES"]="0,1" 
os.environ["CUDA_VISIBLE_DEVICES"] = "3"



#看具体的模型参数设置在:https://www.tensorflow.org/api_docs/python/tf/keras/applications
MODELS = {"ResNet50":ResNet50, "InceptionV3":InceptionV3,"DenseNet121":DenseNet121,
        "DenseNet169":DenseNet169,"DenseNet201":DenseNet201,"Xception":Xception, 
        "InceptionResNetV2":InceptionResNetV2,"NASNetLarge":NASNetLarge}
#设置项
Network = MODELS["ResNet50"]
#设置model,不同model放在不同文件夹下
mymodel = "ResNet50"

#设置预处理方式,当为224*224时选择如下:
#ppreprocess = imagenet_utils.preprocess_input 
#为229*229,设置如下
ppreprocess = preprocess_input
##############################################################

In [None]:
#input_tensor: optional Keras tensor (i.e. output of layers.Input()) to use as image input for the model.
#pooling: Optional pooling mode for feature extraction when include_top is False. - None means that the output of the model will be the 4D tensor output of the last convolutional layer. - 'avg' means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - 'max' means that global max pooling will be applied.
#cnn_model = Xception(include_top=False,input_shape=(width, width, 3), weights='imagenet',)
#当include_top=False,即在最后卷积末尾新加pooling='avg'的全局均值采用,得到全局滤波器个数*1的二维输出
cnn_model = Network(include_top=False, input_shape=(width, width, 3), weights='imagenet',pooling='avg')

inputs = Input((width, width, 3))
x = inputs
x = Lambda(ppreprocess, name='preprocessing')(x)
x = cnn_model(x)
#下面是新加的层
#x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
# n_class为对应属性的分类个数
x = Dense(n_class, activation='softmax', name='softmax')(x)

model = Model(inputs, x)

### 划分训练、测试集

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.12, random_state=42)
X_train.shape, y_train.shape

In [None]:
adam = Adam(lr=0.001)
prefix_cls = cur_class.split('_')[0]

model.compile(optimizer=adam,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

checkpointer = ModelCheckpoint(filepath='../models/{0}/{0}_{1}.best.h5'.format(prefix_cls,mymodel), verbose=1, 
                               save_best_only=True)

h = model.fit(X_train, y_train, batch_size=32, epochs=80, 
              callbacks=[EarlyStopping(patience=3), checkpointer], 
              shuffle=True, 
              validation_split=0.1)

In [None]:
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.plot(h.history['loss'])
plt.plot(h.history['val_loss'])
plt.legend(['loss', 'val_loss'])
plt.ylabel('loss')
plt.xlabel('epoch')

plt.subplot(1, 2, 2)
plt.plot(h.history['acc'])
plt.plot(h.history['val_acc'])
plt.legend(['acc', 'val_acc'])
plt.ylabel('acc')
plt.xlabel('epoch')
#保存
plt.savefig('../models/{0}/{0}_{1}.png'.format(prefix_cls, mymodel),bbox_inches='tight')

In [None]:
model.evaluate(X_train, y_train, batch_size=256)

In [None]:
model.evaluate(X_valid, y_valid, batch_size=256)

### 加载测试集

In [None]:
df_test = pd.read_csv('../test/Tests/question.csv', header=None)
df_test.columns = ['image_id', 'class', 'x']
del df_test['x']
df_test.head()

In [None]:
df_load = df_test[(df_test['class'] == cur_class)].copy()
df_load.reset_index(inplace=True)
del df_load['index']

print('{0}: {1}'.format(cur_class, len(df_load)))
df_load.head()

In [None]:
n = len(df_load)
X_test = np.zeros((n, width, width, 3), dtype=np.uint8)

for i in tqdm(range(n)):
    X_test[i] = cv2.resize(cv2.imread('../test/{0}'.format(df_load['image_id'][i])), (width, width))

In [None]:
test_np = model.predict(X_test, batch_size=256)

In [None]:
test_np.shape

In [None]:
result = []

for i, row in df_load.iterrows():
    tmp_list = test_np[i]
    tmp_result = ''
    for tmp_ret in tmp_list:
        tmp_result += '{:.4f};'.format(tmp_ret)
        
    result.append(tmp_result[:-1])

df_load['result'] = result
df_load.head()

In [None]:
df_load.to_csv('../result/{1}/{0}_{1}.csv'.format(prefix_cls, mymodel), header=None, index=False)
prefix_cls