In [None]:
import os
import os.path as osp
import numpy as np
import wandb
from pathlib import Path

from PIL import Image, ImageDraw
import json

In [None]:
WANDB_PROJECT = 'fashion-retrieval'
ENTITY = None

DEEP_FASHION_DIR = osp.join(osp.expanduser('~'), 'Documents', 'dev', 'DeepFashion')
DEEP_FASHION_CLOTHING_ANNOS_DIR = osp.join(DEEP_FASHION_DIR, 'Anno_coarse')
DEEP_FASHION_CLOTHING_IMAGES_DIR = osp.join(DEEP_FASHION_DIR, 'img')
DEEP_FASHION_CLOTHING_CATEGORIES_PATH = osp.join(DEEP_FASHION_CLOTHING_ANNOS_DIR, 'list_category_cloth.txt')
DEEP_FASHION_CLOTHING_ATTRIBUTES_PATH = osp.join(DEEP_FASHION_CLOTHING_ANNOS_DIR, 'list_attr_cloth.txt')
DEEP_FASHION_CLOTHING_LIST_CAT_IMG_PATH = osp.join(DEEP_FASHION_CLOTHING_ANNOS_DIR, 'list_category_img.txt')
DEEP_FASHION_CLOTHING_LIST_ATT_IMG_PATH = osp.join(DEEP_FASHION_CLOTHING_ANNOS_DIR, 'list_attr_img.txt')

CLASS_LABELS = []
ATTR_LABELS = []

# 1. In category type, "1" represents upper-body clothes, "2" represents lower-body clothes, "3" represents full-body clothes;
# 2. The order of category labels accords with the order of category names;
# 3. In category labels, the number represents the category id in category names;
# 4. For the clothing categories, "Cape", "Nightdress", "Shirtdress" and "Sundress" have been merged into "Dress";
# 5. Category prediction is treated as a 1-of-K classification problem.

CATEGORY_TYPES = {
    1: 'upper-body',
    2: 'lower-body',
    3: 'full-body'
}

# 1. In attribute type, "1" represents texture-related attributes, "2" represents fabric-related attributes, "3" represents shape-related attributes, "4" represents part-related attributes, "5" represents style-related attributes;
# 2. The order of attribute labels accords with the order of attribute names;
# 3. In attribute labels, "1" represents positive while "-1" represents negative, '0' represents unknown;
# 4. Attribute prediction is treated as a multi-label tagging problem.

ATTRIBUTE_TYPES = {
    1: 'texture',
    2: 'fabric',
    3: 'shape',
    4: 'part',
    5: 'style'
}

with open(DEEP_FASHION_CLOTHING_CATEGORIES_PATH, 'r') as f:
    for idx, line in enumerate(f):
        if idx > 1:
            CLASS_LABELS.append(line.split()[0])
            
with open(DEEP_FASHION_CLOTHING_ATTRIBUTES_PATH, 'r') as f:
    for idx, line in enumerate(f):
        if idx > 1:
            ATTR_LABELS.append(' '.join(l for l in line.split() if l.replace('-', '').isalpha()))
            
IDX_TO_CLASS = {idx + 1:c for idx, c in enumerate(CLASS_LABELS)}
CLS_TO_IDX = {v: k for k, v in IDX_TO_CLASS.items()}

DATA_DICT = dict()

with open(DEEP_FASHION_CLOTHING_LIST_CAT_IMG_PATH, 'r') as f:
    for idx, line in enumerate(f):
        if idx > 1:
            DATA_DICT[idx-2] = {
                "path": line.split()[0],
                "cat_index": int(line.split()[1]),
                "category": IDX_TO_CLASS[int(line.split()[1])]
            }

with open(DEEP_FASHION_CLOTHING_LIST_ATT_IMG_PATH, 'r') as f:
    for idx, line in enumerate(f):
        if idx > 1:
            DATA_DICT[idx-2]['attributes'] = [idx for idx, att in enumerate(line.split()[1:]) if int(att) > 0]  
            
            
            

In [None]:
IDX_TO_ATTR = {idx: val for idx, val in enumerate(ATTR_LABELS)}

In [None]:
# Initiate a wandb run
run = wandb.init(project=WANDB_PROJECT, entity=ENTITY, job_type='upload')

In [None]:
artifact = wandb.Artifact(name='deep-fashion', type='raw-data')

In [None]:
DEEP_FASHION_DIR

In [None]:
artifact.add_file(osp.join(DEEP_FASHION_DIR, 'README.txt'))
# artifact.add_dir(DEEP_FASHION_CLOTHING_IMAGES_DIR, name='Images')
# artifact.add_dir(DEEP_FASHION_CLOTHING_ANNOS_DIR, name='Annotations')

In [None]:
run.log_artifact(artifact)

In [None]:
run.finish()

In [None]:
run = wandb.init(project=WANDB_PROJECT, entity=ENTITY, job_type='upload')
artifact = run.use_artifact('deep-fashion:latest',
                            type='raw-data')
artifact_dir = artifact.download()

In [None]:
import random
idx = random.sample(range(0, len(DATA_DICT.keys())), 10000)

In [None]:
table = wandb.Table(columns=['Image Name', 'Image', 'Category', 'Attributes'])

In [None]:
for _i in idx:
    data = DATA_DICT.get(_i)
    _name = data['path'].split('/')[1]
    _img = Image.open(osp.join(DEEP_FASHION_DIR, data['path']))
    _cat = data['category']
    _attrs = [IDX_TO_ATTR[attr] for attr in data['attributes']]
    table.add_data(_name,
                  wandb.Image(_img),
                  _cat,
                  _attrs)

In [None]:
eda_artifact = wandb.Artifact('deep-fashion', type='raw-data')
eda_artifact.add(table, name='EDA Table')

In [None]:
run.log_artifact(eda_artifact)
run.finish()

In [None]:
# table = wandb.Table(columns=['Image Name', 'Image', 'Category', 'Attributes'])

In [None]:
# for key, val in DATA_DICT:
#     img = Image.open(osp.join())
#     table.add_data(val['path'].split('/')[1],
#                   )

In [None]:
def create_mask(img_shape, segmentation_map_list, category):
    mask = np.zeros(img_shape[1]*img_shape[0], dtype=int)
    return mask

def get_polygon_regions(segm_mask):
    return [[pair for pair in zip(region[::2], region[1::2])] for region in segm_mask]

In [None]:
DEEP_FASHION_TRAIN_DIR = os.path.join(DEEP_FASHION_DIR, 'train')
DEEP_FASHION_VALIDATION_DIR = os.path.join(DEEP_FASHION_DIR, 'validation')
DEEP_FASHION_TEST_DIR = os.path.join(DEEP_FASHION_DIR, 'test')

In [None]:
train_annos = sorted(os.listdir(Path(DEEP_FASHION_TRAIN_DIR) / 'annos'))
train_images = sorted(os.listdir(Path(DEEP_FASHION_TRAIN_DIR) / 'image'))

In [None]:
train_annos_paths = list(map(lambda x: os.path.join(DEEP_FASHION_TRAIN_DIR, 'annos', x), train_annos))
train_images_paths = list(map(lambda x: os.path.join(DEEP_FASHION_TRAIN_DIR, 'image', x), train_images))

In [None]:
with open(train_annos_paths[0], 'r') as f:
    data = json.load(f)

In [None]:
# These points define a polygon
segm = data['item1']['segmentation']

In [None]:
x, y = 257, 35
nn[y*w + x]

In [None]:
nn.reshape(h, w)[35, 257]

In [None]:
img = Image.open(train_images_paths[0]).convert('RGB')

In [None]:
polys = get_polygon_regions(segm)

In [None]:
for poly in polys:
    ImageDraw.Draw(img).polygon(poly, outline=1, fill=1)

In [None]:
img

In [1]:
import os
import os.path as osp
import config

In [2]:
deep_fashion_dir = config.DEEP_FASHION_DIR
annos_dir = osp.join(deep_fashion_dir, 'Anno_coarse')
bboxes_path = osp.join(annos_dir, 'list_bbox.txt')
attributes_path = osp.join(annos_dir, 'list_attr_img.txt')

In [15]:
with open(attributes_path, 'r') as f:
    lines = f.readlines()[2:]

## Time for executing with only stripping and splitting:

`CPU times: user 38.3 s, sys: 47.1 s, total: 1min 25s
Wall time: 1min 30s`

## Time by only storing the line content
`CPU times: user 9.13 s, sys: 15.6 s, total: 24.8 s
Wall time: 25 s`

## Time w/o using `for` loop
`atts.append(list(filter(lambda x: x != '', list(map(lambda line: line.strip().split(' ')[1:], lines[2:])) )))`

`CPU times: user 43.1 s, sys: 1min 2s, total: 1min 45s
Wall time: 1min 56s`

In [12]:
%%time
atts = []

atts.append(list(filter(lambda x: x != '', list(map(lambda line: line.strip().split(' ')[1:], lines[2:])) )))
# for line in lines[2:]:
#     atts.append(line)
#     atts.append(list(filter(lambda x: x != '', line.strip().split(' ')[1:])))
#     atts.append(list(map(lambda x: int(x), list(filter(lambda x: x != '', line.strip().split(' ')[1:])))))

CPU times: user 43.1 s, sys: 1min 2s, total: 1min 45s
Wall time: 1min 56s


In [52]:
lines = list(map(lambda x: x.strip(), lines[2:]))

In [72]:
len('                        ')

24

In [76]:
%%time
bbox = []
with open(bboxes_path, 'r') as f:
    for idx, line in enumerate(f):
        if idx > 1:
            line_split = list(map(int, line.strip().split(' ')[24:]))
            bbox.append(line)

ValueError: invalid literal for int() with base 10: ''

In [64]:
bbox[0]

['072', '079', '232', '273']

In [40]:
with open(bboxes_path, 'r') as f:
    lines = f.readlines()[2:]

In [33]:
%%time
bbox = list( list(filter(lambda x: x != '' , list(map(lambda line: line.strip().split(' ')[1:], lines))) ) )

CPU times: user 15.3 s, sys: 48.3 s, total: 1min 3s
Wall time: 1min 8s


In [44]:
len(list(filter(lambda x: x == ' ', lines[0].split(' '))))

0

In [46]:
lines[0].replace(' ', '')

'img/Sheer_Pleated-Front_Blouse/img_00000001.jpg072079232273\n'

In [103]:
%%time
atts = []

with open(attributes_path, 'r') as f:
    for idx, line in enumerate(f):
        if idx > 1:
            atts.append(line.strip().split(' ')[-1000:])

CPU times: user 29.8 s, sys: 19.3 s, total: 49.1 s
Wall time: 50.8 s


In [102]:
len(atts)

289222