In [13]:
import os
import sys
import json
import pickle
import pandas as pd
import matplotlib.pyplot as plt

from glob import glob
from tqdm import tqdm
from PIL import Image
from collections import defaultdict, Counter

In [14]:
!pwd

/media/jaeho/SSD/paper/notebooks


In [15]:
saved_dir = '/media/jaeho/SSD/datasets/deepfashion/preprocessed_data'

In [16]:
data_dir = '/media/jaeho/SSD/datasets/deepfashion/category_and_attribute_prediction/category_and_attribute_prediction_benchmark/Anno_coarse'

In [17]:
os.listdir(data_dir)

['list_attr_cloth.txt',
 'list_attr_img.txt',
 'list_bbox.txt',
 'list_category_cloth.txt',
 'list_category_img.txt',
 'list_landmarks.txt']

In [18]:
target_img_dir = '/media/jaeho/SSD/datasets/deepfashion/img-001/'

In [19]:
attr_cloth_path = os.path.join(data_dir, 'list_attr_cloth.txt')
attr_img_path = os.path.join(data_dir, 'list_attr_img.txt')
bbox_path = os.path.join(data_dir, 'list_bbox.txt')
category_cloth_path = os.path.join(data_dir, 'list_category_cloth.txt')
category_img_path = os.path.join(data_dir, 'list_category_img.txt')
landmark_path = os.path.join(data_dir, 'list_landmarks.txt')

# Attribute

## attr_cloth
- attribute_type
    - 1 : texture_related attributes
    - 2 : fabric_related attributes
    - 3 : shape_related attributes
    - 4 : part_related attributes
    - 5 : style_related attributes
- attribute_name으로 사전식 정렬

In [20]:
attr_cloth = []
with open(attr_cloth_path, 'r') as f:
    cnt = 0
    while True:
        line = f.readline()
        line = line.replace("\n", "")
        if not line : break
        if cnt < 2 :
            if cnt == 0:
                print(f"number of attrs : {line}")
            cnt += 1
            continue
        splited_line = [x for x in [l.replace(" ", "") for l in line.split("  ")] if x != ""]
        attribute_name, attribute_type = splited_line

        attr_cloth.append({'attr_name': attribute_name,
                           'attr_type': int(attribute_type)})
        cnt += 1

number of attrs : 1000


In [21]:
with open(os.path.join(saved_dir, 'attr_cloth.pickle'), 'wb') as f:
    pickle.dump(attr_cloth, f, pickle.HIGHEST_PROTOCOL)
print(os.path.join(saved_dir, 'attr_cloth.pickle'))

/media/jaeho/SSD/datasets/deepfashion/preprocessed_data/attr_cloth.pickle


In [22]:
attr_cloth[:2]

[{'attr_name': 'a-line', 'attr_type': 3},
 {'attr_name': 'abstract', 'attr_type': 1}]

## attr_img

In [23]:
attr_img = []
with open(attr_img_path, 'r') as f:
    cnt = 0
    while True:
        line = f.readline()
        if not line : break
        if cnt < 2 :
            if cnt==0:
                print(f"number of images : {line}")
            cnt += 1
            continue
        line = line.replace("\n", "")
        splited_line = [x for x in line.split(".jpg") if x != '']
        img_path, attr_labels = splited_line
        img_path += '.jpg'
        attr_labels = list(map(int, [x for x in attr_labels.split(' ') if x != '']))
        attr_img.append({
            'img_path' : img_path,
            'attr_labels' : attr_labels
        })

        cnt += 1

number of images : 289222



In [24]:
with open(os.path.join(saved_dir, 'attr_img.pickle'), 'wb') as f:
    pickle.dump(attr_img, f, pickle.HIGHEST_PROTOCOL)
print(os.path.join(saved_dir, 'attr_img.pickle'))

/media/jaeho/SSD/datasets/deepfashion/preprocessed_data/attr_img.pickle


In [25]:
len(attr_img)

289222

# Categories

## category_cloth
- category type
    - 1 : upper_body
    - 2 : lower_body
    - 3 : full_body
- category 이름, 사전순 정렬

In [31]:
category_cloth = []
with open(category_cloth_path, 'r') as f:
    cnt = 0
    while True:
        line = f.readline()
        line = line.replace("\n", "")
        if not line : break
        if cnt < 2 :
            if cnt == 0:
                print(f"number of categories : {line}")
            cnt += 1
            continue
        splited_line = [x for x in [l.replace(" ", "") for l in line.split("  ")] if x != ""]
        category_name, category_type = splited_line

        category_cloth.append({'category_name': category_name,
                           'category_type': int(category_type)})
        cnt += 1

number of categories : 50


In [32]:
with open(os.path.join(saved_dir, 'category_cloth.pickle'), 'wb') as f:
    pickle.dump(category_cloth, f, pickle.HIGHEST_PROTOCOL)
print(os.path.join(saved_dir, 'category_cloth.pickle'))

/media/jaeho/SSD/datasets/deepfashion/preprocessed_data/category_cloth.pickle


In [33]:
category_cloth[:2]

[{'category_name': 'Anorak', 'category_type': 1},
 {'category_name': 'Blazer', 'category_type': 1}]

## category_img

In [35]:
category_img = []
with open(category_img_path, 'r') as f:
    cnt = 0
    while True:
        line = f.readline()
        if not line : break
        if cnt < 2 :
            if cnt==0:
                print(f"number of images : {line}")
            cnt += 1
            continue
        line = line.replace("\n", "")
        splited_line = [x for x in line.split(".jpg") if x != '']
        img_path, category_label = splited_line
        img_path += '.jpg'
        category_label = list(map(int, [x for x in category_label.split(' ') if x != '']))
        category_label = [x-1 for x in category_label]
        category_img.append({
            'img_path' : img_path,
            'category_label' : category_label
        })

        cnt += 1

number of images : 289222



In [36]:
category_img[0]['img_path']

'img/Sheer_Pleated-Front_Blouse/img_00000001.jpg'

In [37]:
category_img[0]['category_label']

[2]

In [38]:
with open(os.path.join(saved_dir, 'category_img.pickle'), 'wb') as f:
    pickle.dump(category_img, f, pickle.HIGHEST_PROTOCOL)
print(os.path.join(saved_dir, 'category_img.pickle'))

/media/jaeho/SSD/datasets/deepfashion/preprocessed_data/category_img.pickle


# Bbox
- format : [x1, y1, x2, y2]
- x1, y1 : upper left point
- x2, y2 : lower right point

In [39]:
bboxes = []
with open(bbox_path, 'r') as f:
    cnt = 0
    while True:
        line = f.readline()
        line = line.replace("\n", "")
        if not line : break
        if cnt < 2 :
            if cnt == 0:
                print(f"number of images : {line}")
            else :
                print(line)
            cnt += 1
            continue
        img_path, bbox = line.split('.jpg')
        img_path += '.jpg'
        bbox = list(map(int, [x for x in bbox.split(' ') if x != '']))

        bboxes.append({'img_path': img_path,
                      'bbox': bbox})
        cnt += 1

number of images : 289222
image_name  x_1  y_1  x_2  y_2


In [40]:
bboxes[0]

{'img_path': 'img/Sheer_Pleated-Front_Blouse/img_00000001.jpg',
 'bbox': [72, 79, 232, 273]}

In [41]:
with open(os.path.join(saved_dir, 'bboxes.pickle'), 'wb') as f:
    pickle.dump(bboxes, f, pickle.HIGHEST_PROTOCOL)
print(os.path.join(saved_dir, 'bboxes.pickle'))

/media/jaeho/SSD/datasets/deepfashion/preprocessed_data/bboxes.pickle


# Landmark
- column info
    - image name
    - clothes type
        - 1 : upper_body
            - 6개의 landmark
        - 2 : lower_body
            - 4개의 landmark
        - 3 : fully_body
            - 8개의 landmark
    - variation type
        - 1 : normal pose
        - 2 : medium pose
        - 3 : large pose
        - 4 : medium zoom-in
        - 5 : large zoom-in
    - list of landmark info (1 to 8)
        - landmark visibility
            - 0 : visible
            - 1 : invisible/occluded
            - 2 : truncated/cut-off
        - landmark location x
        - landmark location y
        
---

- upper-body clothes : [left_collar, right_collar, left_sleeve, right_sleeve, left_hem, right_hem]
- lower-body clothes : [left_waistline, right_waistline, left_hem, right_hem]
- fully-body clothes : [left_collar, right_collar, left_sleeve, right_sleeve, left_waistline, right_waistline, left_hem, right_hem]

In [42]:
landmark_rule_dict = {
    1 : ['left_collar', 'right_collar', 'left_sleeve', 'right_sleeve', 'left_hem', 'right_hem'],
    2 : ['left_waistline', 'right_waistline', 'left_hem', 'right_hem '],
    3 : ['left_collar', 'right_collar', 'left_sleeve', 'right_sleeve', 'left_waistline', 'right_waistline', 'left_hem', 'right_hem']
}

In [43]:
landmarks = []
with open(landmark_path, 'r') as f:
    cnt = 0
    while True:
        line = f.readline()
        line = line.replace("\n", "")
        if not line : break
        if cnt < 2 :
            if cnt == 0:
                print(f"number of images : {line}")
            # else :
            #     print(line)
            cnt += 1
            continue

        img_path, landmark = line.split('.jpg')
        img_path += '.jpg'
        # sys.exit(1)
        
        landmark = list(map(int, [x for x in landmark.split(' ') if x != '']))
        clothes_type, landmark = landmark[0], landmark[1:]
        landmark_dict = defaultdict(list)
        for idx, value in enumerate(landmark):
            landmark_dict[landmark_rule_dict[clothes_type][idx//3]].append(value)

        landmarks.append({
            'img_path' : img_path,
            'clothes_type' : clothes_type,
            'landmark' : landmark_dict
        })

        cnt += 1

number of images : 289222


In [44]:
landmark

[1,
 99,
 84,
 1,
 141,
 82,
 0,
 59,
 209,
 0,
 174,
 198,
 0,
 82,
 173,
 0,
 156,
 168,
 1,
 75,
 275,
 0,
 160,
 271]

In [None]:
landmarks[0]

{'img_path': 'img/Sheer_Pleated-Front_Blouse/img_00000001.jpg',
 'clothes_type': 1,
 'landmark': defaultdict(list,
             {'left_collar': [0, 146, 102],
              'right_collar': [0, 173, 95],
              'left_sleeve': [0, 94, 242],
              'right_sleeve': [0, 205, 255],
              'left_hem': [0, 136, 229],
              'right_hem': [0, 177, 232]})}

In [46]:
for lm in tqdm(landmarks):
    for landmark_name, landmark_value in lm['landmark'].items():
        # print(landmark_name)
        # print(landmark_value)
        if landmark_value[0] != 0:
            print(lm)
            print()
            print(landmark_name, landmark_value)
            sys.exit(1)

  0%|          | 4/289222 [00:00<00:10, 28630.06it/s]

{'img_path': 'img/Sheer_Pleated-Front_Blouse/img_00000005.jpg', 'clothes_type': 1, 'landmark': defaultdict(<class 'list'>, {'left_collar': [1, 102, 106], 'right_collar': [0, 116, 102], 'left_sleeve': [0, 63, 194], 'right_sleeve': [1, 146, 216], 'left_hem': [0, 105, 245], 'right_hem': [0, 137, 246]})}

left_collar [1, 102, 106]





SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
landmark_map = ["left collar", "right collar", "left sleeve", "right sleeve", "left waistline", "right waistline", "left hem", "right hem"]
len(landmark_map)

8

In [None]:
with open(os.path.join(saved_dir, 'landmarks.pickle'), 'wb') as f:
    pickle.dump(landmarks, f, pickle.HIGHEST_PROTOCOL)
print(os.path.join(saved_dir, 'landmarks.pickle'))

/media/jaeho/SSD/datasets/deepfashion/preprocessed_data/landmarks.pickle


# 이미지 확인
- 이미지 정보를 dictionary형태로 만든다
    ```python
    {
        'img_path' : {
            
        }
    }
    ```

In [None]:
os.listdir('/media/jaeho/SSD/datasets/deepfashion/preprocessed_data/')

['attr_cloth.pickle',
 'attr_img.pickle',
 'bboxes.pickle',
 'category_cloth.pickle',
 'category_img.pickle',
 'landmarks.pickle',
 'preprocessed_data.pickle']

In [None]:
pickle_paths = glob('/media/jaeho/SSD/datasets/deepfashion/preprocessed_data/*')

In [None]:
map_paths = [x for x in pickle_paths if 'cloth' in x]
img_data_paths = [x for x in pickle_paths if 'cloth' not in x]

In [None]:
map_paths

['/media/jaeho/SSD/datasets/deepfashion/preprocessed_data/attr_cloth.pickle',
 '/media/jaeho/SSD/datasets/deepfashion/preprocessed_data/category_cloth.pickle']

In [None]:
img_data_paths

['/media/jaeho/SSD/datasets/deepfashion/preprocessed_data/attr_img.pickle',
 '/media/jaeho/SSD/datasets/deepfashion/preprocessed_data/bboxes.pickle',
 '/media/jaeho/SSD/datasets/deepfashion/preprocessed_data/category_img.pickle',
 '/media/jaeho/SSD/datasets/deepfashion/preprocessed_data/landmarks.pickle',
 '/media/jaeho/SSD/datasets/deepfashion/preprocessed_data/preprocessed_data.pickle']

In [65]:
total_img_dict = defaultdict(dict)

for img_data_path in img_data_paths:
    # print(os.path.basename(img_data_path))
    print(os.path.basename(img_data_path).replace('.pickle','').split('_')[0])
    task_name = os.path.basename(img_data_path).replace('.pickle','').split('_')[0]
    if task_name == 'bboxes':
        task_name = 'bbox'
    elif task_name == 'landmarks':
        task_name = 'landmark'
    elif task_name == 'preprocessed':
        continue
    
    print('file opening...')
    with open(img_data_path, 'rb') as f:
        datas = pickle.load(f)
    
    # collect datas
    for data in tqdm(datas):
        img_path = data['img_path']
        for key, value in data.items():
            if key == 'img_path':
                continue
            if task_name == 'landmark':
                total_img_dict[img_path][task_name] = value
            elif task_name in key:
                total_img_dict[img_path][task_name] = value
    print()

attr
file opening...


100%|██████████| 289222/289222 [00:00<00:00, 550464.98it/s]



bboxes
file opening...


100%|██████████| 289222/289222 [00:00<00:00, 1589457.37it/s]



category
file opening...


100%|██████████| 289222/289222 [00:00<00:00, 1610213.58it/s]



landmarks
file opening...


100%|██████████| 289222/289222 [00:00<00:00, 1335239.36it/s]


preprocessed





In [None]:
for img_path, img_info in total_img_dict.items():
    print(img_path)
    print(json.dumps(img_info, indent=2))
    break

img/Sheer_Pleated-Front_Blouse/img_00000001.jpg
{
  "attr": [
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
    -1,
  

In [66]:
save_path = os.path.join(saved_dir, 'preprocessed_data.pickle')
with open(save_path, 'wb') as f:
    pickle.dump(total_img_dict, f, pickle.HIGHEST_PROTOCOL)
print(save_path)

/media/jaeho/SSD/datasets/deepfashion/preprocessed_data/preprocessed_data.pickle


In [67]:
import pickle
from tqdm import tqdm
from collections import defaultdict

In [68]:
with open('/media/jaeho/SSD/datasets/deepfashion/preprocessed_data/preprocessed_data.pickle', 'rb') as f:
    data = pickle.load(f)

In [70]:
cnt_dict = defaultdict(int)
for d, info in tqdm(data.items()):
    cnt_dict[info['category'][0]] += 1

100%|██████████| 289222/289222 [00:00<00:00, 2827677.58it/s]


In [72]:
info.keys()

dict_keys(['attr', 'bbox', 'category', 'landmark'])

In [71]:
{key:value for key,value in sorted(cnt_dict.items(), key=lambda x : x[0])}

{0: 160,
 1: 7495,
 2: 24557,
 3: 309,
 4: 330,
 5: 13311,
 6: 324,
 7: 17,
 8: 716,
 9: 4048,
 10: 10467,
 11: 748,
 12: 676,
 13: 97,
 14: 791,
 15: 13123,
 16: 15429,
 17: 36887,
 18: 10078,
 19: 146,
 20: 77,
 21: 527,
 22: 486,
 23: 1669,
 24: 49,
 25: 7076,
 26: 594,
 27: 45,
 28: 4416,
 29: 5013,
 30: 32,
 31: 19666,
 32: 14773,
 33: 3048,
 34: 1106,
 35: 386,
 36: 54,
 38: 2120,
 39: 17,
 40: 72158,
 41: 6153,
 42: 126,
 43: 2294,
 45: 70,
 46: 150,
 47: 7408}