# 数据集目录结构
```
FashionAI-Attributes
├── models/
├── data/
│   ├── base/
│   └── rank/
└── solution.ipynb
```

python package依赖
```
matplotlib
numpy
pandas
tensorflow
keras == 2.1.4
pillow
h5py
```

In [None]:
import os
import re
import random
import shutil
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

%reload_ext autoreload
%autoreload 2
%matplotlib inline

# 设置训练任务
本次比赛训练八个不同任务图片分类器，要想处理其他类型的服饰，只需要将task变量指定为对应的服饰类型名称就行了。

In [None]:
classes = ['collar_design_labels', 'neckline_design_labels', 'skirt_length_labels', 
           'sleeve_length_labels', 'neck_design_labels', 'coat_length_labels', 'lapel_design_labels', 
           'pant_length_labels']
task = classes[0]

# 查看图片数据示例

In [None]:
data_path = os.path.join("data/base/Images/", task)

first_image_path = os.path.join(data_path, os.listdir(data_path)[0])
img = plt.imread(first_image_path)
plt.imshow(img);

# 数据整理
Keras提供了一个读取图像分类任务数据的接口keras.preprocessing.image.ImageDataGenerator，它期望数据是类似ImageNet的格式组织的，即每一类图片都统一放在一个目录下。因此我们需要将数据进行整理，并且取10%的数据作为验证集。

In [None]:
def mkdir_if_not_exist(path):
    if not os.path.exists(os.path.join(*path)):
        os.makedirs(os.path.join(*path))
mkdir_if_not_exist(['data/base/train_valid'])

In [None]:
label_path = 'data/base/Annotations/label.csv'

image_path = []

with open(label_path, 'r') as f:
    lines = f.readlines()
    tokens = [l.rstrip().split(',') for l in lines]
    for path, tk, label in tokens:
        if tk == task:
            image_path.append(('data/base/' + path, label))

In [None]:
task_class = len(image_path[0][1])
print(image_path[0])
print(len(image_path))
print(task_class)

### 构建训练集和测试集数据目录，以及类别的子目录
假设当前分类任务为collar_design_labels，则运行后的目录结构如下:
```
train_valid
└── collar_design_labels
    ├── train
    │   ├── 0
    │   ├── 1
    │   ├── 2
    │   ├── 3
    │   └── 4
    └── val
        ├── 0
        ├── 1
        ├── 2
        ├── 3
        └── 4
```

In [None]:
mkdir_if_not_exist(['data/train_valid', task])
mkdir_if_not_exist(['data/train_valid', task, 'train'])
mkdir_if_not_exist(['data/train_valid', task, 'valid'])
for i in range(task_class):
    mkdir_if_not_exist(['data/train_valid', task, 'train', str(i)])
    mkdir_if_not_exist(['data/train_valid', task, 'valid', str(i)])

### 数据复制
将图片数据复制到各自对应的目录，需要注意的是，这里我们刻意随机打乱了图片的顺序，从而防止训练集与测试集切分不均匀的情况出现。

In [None]:
n = len(image_path)
random.seed(1024)
random.shuffle(image_path)

In [None]:
train_count = 0
for path, label in image_path:
    label_index = list(label).index('y')
    if train_count < n * 0.95:
        shutil.copy(path,
                    os.path.join('data/train_valid', task, 'train', str(label_index)))
    else:
        shutil.copy(path,
                    os.path.join('data/train_valid', task, 'valid', str(label_index)))
    train_count += 1

# 算法设计——微调版迁移学习

In [None]:
image_size = 299
batch_size = 32
epochs = 50
train_data_dir = os.path.join('data/train_valid', task, 'train')
valid_data_dir = os.path.join('data/train_valid', task, 'valid')

In [None]:
import tensorflow as tf
from keras import backend as K
from keras.layers import Dropout, Flatten, Dense
from keras.applications.inception_v3 import InceptionV3
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.applications.inception_v3 import preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping, ModelCheckpoint

In [None]:
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input,
                                   rotation_range=40,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                   fill_mode='nearest')

test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input,
                                  horizontal_flip=True,
                                  vertical_flip=True)

train_generator = train_datagen.flow_from_directory(train_data_dir,
    target_size=(image_size, image_size),
    batch_size=batch_size, 
    class_mode='categorical')

validation_generator = test_datagen.flow_from_directory(valid_data_dir,
    shuffle=False,
    target_size=(image_size, image_size),
    batch_size=batch_size,
    class_mode='categorical')

In [None]:
print(image_size)

In [None]:
pretrained_model=InceptionV3(input_shape=(image_size, image_size, 3), weights='imagenet', include_top=False)
# pretrained_model = ResNet50(input_shape=(image_size, image_size, 3), weights='imagenet', include_top=False)
# for layer in pretrained_model.layers:
#     if re.search(r'^(res5c|bn5c)', layer.name) is not None:
#         layer.trainable = True
#     else:
#          layer.trainable = False

for layer in pretrained_model.layers[:140]:
    layer.trainable = False
for layer in pretrained_model.layers[140:]:
    layer.trainable = True
    
x = pretrained_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024)(x)
predictions = Dense(task_class, activation='softmax')(x)

model = Model(inputs=pretrained_model.input, outputs=predictions)
    
model.summary()

In [None]:
for layer in pretrained_model.layers:
    print(layer.name + ": " + str(layer.trainable))

In [None]:
from keras.optimizers import * 
# opt=Adam(1e-5)
# opt = SGD(1e-3, momentum=0.9, nesterov=True, decay=1e-5)
model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['accuracy'])

model_file_path = 'models/best_model.h5'

checkpoint = ModelCheckpoint(model_file_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
early = EarlyStopping(monitor="val_loss", mode="min", patience=20)

callbacks_list = [checkpoint, early]

his = model.fit_generator(train_generator,
                            train_generator.n // batch_size, 
                            epochs=epochs,
                            callbacks=callbacks_list,
                            validation_data=validation_generator,
                            validation_steps=validation_generator.n // batch_size)

In [None]:
model.save("model_"+task+".h5")

In [None]:
acc = his.history['acc'] 
val_acc = his.history['val_acc'] 
loss = his.history['loss'] 
val_loss = his.history['val_loss']

epochs = range(1, len(acc) + 1)

plt.plot(epochs, acc, 'bo', label='Training acc') 
plt.plot(epochs, val_acc, 'b', label='Validation acc') 
plt.title('Training and validation accuracy') 
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss') 
plt.plot(epochs, val_loss, 'b', label='Validation loss') 
plt.title('Training and validation loss') 
plt.legend()

plt.show()