In [4]:
import numpy as np
import pandas as pd
import os,io,sys,glob
from time import time
from tqdm import tqdm_notebook,trange,tqdm
import h5py
from collections import Counter
from PIL import Image

from keras.models import Model
from keras.preprocessing import image
from keras.applications.resnet50 import ResNet50, preprocess_input

Using TensorFlow backend.


# Load Dataset

In [3]:
t1=time()
dataset = np.load('../AmazonFashion6ImgPartitioned.npy',encoding='bytes',allow_pickle=True)
train, val, test, meta, usernum, itemnum = dataset
print('data load complete[%.2fs]'%(time()-t1))

data load complete[7.38s]


# Save Item Images

In [7]:
for idx in trange(len(meta)):
    try:
        c=io.BytesIO(meta[idx][b'imgs'])
        im=Image.open(c)
        im.save('../meta_img/%s.jpg'%idx)
        
    except OSError: # png, but useless
        os.remove(('../meta_img/%s.jpg'%idx))

100%|█████████████████████████████████████████████████████████████████████████| 166270/166270 [10:16<00:00, 269.84it/s]


# Save 'image features'

#### - Extract the 'image features' from the Amazon product image using the 'ReNnet50' model

In [18]:
def feature_extractor():
    t1=time()
    avg_pool_features={}
    base_model = ResNet50(weights='imagenet')
    model = Model(inputs=base_model.input, outputs=base_model.get_layer('avg_pool').output)
    print('model load complete[%0.2fs]'%(time()-t1))
    
    # generate image batch
    path = '../meta_img/*'
    file_list = glob.glob(path)
    file_list= [i for i in file_list if i.endswith('jpg')]
    
    for index, img_path in tqdm_notebook(enumerate(file_list)):
        meta_index = img_path.split('\\')[1].split('.')[0] # meta_index는 이미지파일 번호로 딕셔너리 key용으로 사용
        img = image.load_img(img_path, target_size=(224, 224))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        avg_pool_features[meta_index] = model.predict(x)[0]

    return avg_pool_features

avg_pool_features = feature_extractor()

In [42]:
file = '../amazonfashion6_imgfeature.hdf5'

with h5py.File(file, 'w') as f:
    f.create_dataset('imgs', (len(meta),2048,), dtype='float32')
    img_set=f['imgs']
    
    for n,i in tqdm_notebook(avg_pool_features.items()):
        img_set[int(n)]= i

HBox(children=(IntProgress(value=0, max=166132), HTML(value='')))

# Convert deepfashion type to keras-yolo3

In [22]:
df=pd.read_csv('../In-shop_Clothes_Retrieval_Benchmark/Anno/list_bbox_inshop.txt',header=None,sep='\s+',
               names=['path','label','pose','x_1','y_1','x_2','y_2'])
df.head(3)

Unnamed: 0,path,label,pose,x_1,y_1,x_2,y_2
0,img/WOMEN/Blouses_Shirts/id_00000001/02_1_fron...,1,1,50,49,208,235
1,img/WOMEN/Blouses_Shirts/id_00000001/02_2_side...,1,2,119,48,136,234
2,img/WOMEN/Blouses_Shirts/id_00000001/02_3_back...,1,3,50,42,213,240


In [39]:
train=[]
for index,f,class_id,pose,x_1,y_1,x_2,y_2 in df.itertuples():
    train.append('../In-shop_Clothes_Retrieval_Benchmark/%s %s,%s,%s,%s,%s'%(f,x_1,y_1,x_2,y_2,int(class_id)+79))

# Save train.txt
f = open("../train.txt", 'w')
for i in train:
    data = i+'\n'
    f.write(data)
f.close()