In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
from tqdm.notebook import tqdm
from PIL import Image
import os
import shutil
import json
import io

# Inspect full image archive without extracting all files

In [2]:
root_path = '../../../validation/image/'
filenames = []
paths = []
for file in os.listdir(root_path)[:]:
    if 'jpg' in file:
        filenames.append(file)
        paths.append(root_path + file)
df = pd.DataFrame({'filename': filenames, 'path': paths})
print(len(df))
df.sample(n=5)

32153


Unnamed: 0,filename,path
4001,000494.jpg,../../../validation/image/000494.jpg
9488,020374.jpg,../../../validation/image/020374.jpg
21542,019819.jpg,../../../validation/image/019819.jpg
18293,022108.jpg,../../../validation/image/022108.jpg
27071,011115.jpg,../../../validation/image/011115.jpg


# Basic EDA


# Examine classes

In [3]:
%%time

ids = []
sources = []
scales = []
viewpoints = []
zooms = []
occlusions = []
landmarks = []
bboxes = []
styles = []
category_ids = []
category_names = []
annos = [f.replace('image', 'annos').replace('jpg', 'json') for f in df['path']]
print('# rows', len(annos))
for file in tqdm(annos): 
    anno = open(file).read()
    anno = json.loads(anno)
    
    for key in anno.keys():
        if 'item' in key:
            ids.append(file.split('/')[-1].split('.')[0])
            sources.append(anno['source'])
            #anno.keys() => dict_keys(['segmentation', 'scale', 'viewpoint', 'zoom_in', 'landmarks', 'style', 'bounding_box', 'category_id', 'occlusion', 'category_name'])
            scales.append(anno[key]['scale'])
            viewpoints.append(anno[key]['viewpoint'])
            zooms.append(anno[key]['zoom_in'])
            occlusions.append(anno[key]['occlusion'])
            landmarks.append(anno[key]['landmarks'])
            bboxes.append(anno[key]['bounding_box'])
            styles.append(anno[key]['style'])
            category_ids.append(anno[key]['category_id'])
            category_names.append(anno[key]['category_name'])    

# rows 32153


  0%|          | 0/32153 [00:00<?, ?it/s]

CPU times: user 9.43 s, sys: 1.63 s, total: 11.1 s
Wall time: 1min 22s


In [4]:
df_meta_all = pd.DataFrame(data=zip(ids, sources, scales, viewpoints, zooms, occlusions, landmarks, bboxes, styles, category_ids, category_names), columns=['id', 'source', 'scale', 'viewpoint', 'zoom', 'occlusion', 'landmarks', 'bbox', 'style', 'category_id', 'category_name'])
print(len(df_meta_all))
df_meta_all[:5]

52779


Unnamed: 0,id,source,scale,viewpoint,zoom,occlusion,landmarks,bbox,style,category_id,category_name
0,29122,shop,1,2,1,1,"[447, 482, 1, 398, 482, 2, 421, 530, 2, 453, 5...","[333, 449, 578, 863]",1,10,short sleeve dress
1,25833,shop,2,2,1,3,"[473, 205, 1, 426, 164, 1, 439, 193, 1, 468, 2...","[165, 140, 569, 1024]",1,12,vest dress
2,25833,shop,2,2,1,2,"[476, 201, 1, 426, 159, 1, 442, 185, 2, 473, 2...","[219, 76, 593, 557]",0,1,short sleeve top
3,995,shop,2,2,1,1,"[313, 189, 1, 283, 190, 2, 300, 201, 2, 316, 2...","[207, 177, 439, 543]",1,10,short sleeve dress
4,24459,shop,1,2,2,1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[230, 0, 401, 106]",0,1,short sleeve top


In [5]:
df_meta = df_meta_all.copy()

## Reference:  https://github.com/switchablenorms/DeepFashion2

Key fields:

- source: a string, where 'shop' indicates that the image is from commercial store while 'user' indicates that the image is taken by users.

- scale: a number, where 1 represents small scale, 2 represents modest scale and 3 represents large scale.

- occlusion: a number, where 1 represents slight occlusion(including no occlusion), 2 represents medium occlusion and 3 represents heavy occlusion.

- zoom_in: a number, where 1 represents no zoom-in, 2 represents medium zoom-in and 3 represents lagre zoom-in.

- viewpoint: a number, where 1 represents no wear, 2 represents frontal viewpoint and 3 represents side or back viewpoint.

First pass of training YOLOv5 on DF1 indicates that results including 'user' photos is very poor due to extreme viewing angles (down in mirror at jeans), heavy occlusion, etc.

Therefore, now filtering out 'poor quality' images.

In [6]:
filter1 = (df_meta.occlusion<3) & (df_meta.zoom<3)
filter2 = (df_meta.scale>1) & (df_meta.occlusion==2) & (df_meta.zoom==2) & (df_meta.viewpoint>1)
df_meta = df_meta[filter1 & ~filter2]

In [7]:
cnts = df_meta.groupby('category_name').count().sort_values(by='id', ascending=False)
cnts

Unnamed: 0_level_0,id,source,scale,viewpoint,zoom,occlusion,landmarks,bbox,style,category_id
category_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
short sleeve top,9944,9944,9944,9944,9944,9944,9944,9944,9944,9944
trousers,5060,5060,5060,5060,5060,5060,5060,5060,5060,5060
skirt,5003,5003,5003,5003,5003,5003,5003,5003,5003,5003
long sleeve top,4671,4671,4671,4671,4671,4671,4671,4671,4671,4671
shorts,3242,3242,3242,3242,3242,3242,3242,3242,3242,3242
short sleeve dress,2627,2627,2627,2627,2627,2627,2627,2627,2627,2627
vest dress,2508,2508,2508,2508,2508,2508,2508,2508,2508,2508
long sleeve outwear,1821,1821,1821,1821,1821,1821,1821,1821,1821,1821
vest,1673,1673,1673,1673,1673,1673,1673,1673,1673,1673
long sleeve dress,1240,1240,1240,1240,1240,1240,1240,1240,1240,1240


In [8]:
# combining 'sling' and 'sling dress' categories
df_meta.loc[df_meta.category_name=='sling','category_name'] = 'sling dress'

In [9]:
cnts = df_meta.groupby('category_name').count().sort_values(by='id', ascending=False)
cnts

Unnamed: 0_level_0,id,source,scale,viewpoint,zoom,occlusion,landmarks,bbox,style,category_id
category_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
short sleeve top,9944,9944,9944,9944,9944,9944,9944,9944,9944,9944
trousers,5060,5060,5060,5060,5060,5060,5060,5060,5060,5060
skirt,5003,5003,5003,5003,5003,5003,5003,5003,5003,5003
long sleeve top,4671,4671,4671,4671,4671,4671,4671,4671,4671,4671
shorts,3242,3242,3242,3242,3242,3242,3242,3242,3242,3242
short sleeve dress,2627,2627,2627,2627,2627,2627,2627,2627,2627,2627
vest dress,2508,2508,2508,2508,2508,2508,2508,2508,2508,2508
long sleeve outwear,1821,1821,1821,1821,1821,1821,1821,1821,1821,1821
vest,1673,1673,1673,1673,1673,1673,1673,1673,1673,1673
long sleeve dress,1240,1240,1240,1240,1240,1240,1240,1240,1240,1240


In [10]:
# use the same categories and ordering used for the Training dataset
# copied from the training dataset notebook
cats = ['short sleeve top', 'trousers', 'long sleeve top', 'shorts',
       'skirt', 'short sleeve dress', 'vest dress', 'vest',
       'long sleeve outwear', 'sling dress', 'long sleeve dress']

In [11]:
subset = df_meta[df_meta.category_name.isin(cats)]

In [12]:
min_cnt = subset.groupby('category_name').count().min()[0]
min_cnt

1187

In [13]:
# we keep all the data in the validation set for the full dataset
# for the smaller dataset, we target 10% of the size of the training dataset (100k) = 10,000, or 900 / category
samples = subset.groupby('category_name').sample(n=900, random_state=314)
len(samples)

9900

In [14]:
df['id'] = df['filename'].apply(lambda x: x.split('.')[0])

In [15]:
#from sklearn.model_selection import train_test_split
#train, val = train_test_split(samples, test_size=0.2)
#print(len(train),len(val))

#For this case, all the data is in the validation set
val = samples

In [16]:
val = pd.merge(df, val, on='id')
len(val)

9900

In [17]:
#train = pd.merge(df, train, on='id')
#train = train[train.folder=='image']
#len(train)

In [18]:
val.iloc[0]

filename                                                002304.jpg
path                          ../../../validation/image/002304.jpg
id                                                          002304
source                                                        user
scale                                                            3
viewpoint                                                        2
zoom                                                             2
occlusion                                                        1
landmarks        [0, 0, 0, 0, 0, 0, 0, 0, 0, 126, 27, 2, 0, 0, ...
bbox                                              [0, 1, 258, 378]
style                                                            3
category_id                                                     12
category_name                                           vest dress
Name: 0, dtype: object

In [19]:
root = '../../../'

if not os.path.exists(root+'df2_mix'):
    os.mkdir(root+'df2_mix')

if not os.path.exists(root+'df2_mix/images'):
    os.mkdir(root+'df2_mix/images')
if not os.path.exists(root+'df2_mix/images/train'):
    os.mkdir(root+'df2_mix/images/train')
if not os.path.exists(root+'df2_mix/images/val'):
    os.mkdir(root+'df2_mix/images/val')
if not os.path.exists(root+'df2_mix/images/test'):
    os.mkdir(root+'df2_mix/images/test')

if not os.path.exists(root+'df2_mix/labels'):
    os.mkdir(root+'df2_mix/labels')
if not os.path.exists(root+'df2_mix/labels/train'):
    os.mkdir(root+'df2_mix/labels/train')
if not os.path.exists(root+'df2_mix/labels/val'):
    os.mkdir(root+'df2_mix/labels/val')
if not os.path.exists(root+'df2_mix/labels/test'):
    os.mkdir(root+'df2_mix/labels/test')

In [21]:
target_dim = 640.0
 
def createFiles(data=val, type_='val'):
    for i, row in data.iterrows():
        if i%int(len(data)/100)==0:
            print(i)
        try:
            filepath = row.path
            img = Image.open(filepath)
            w_orig = float(img.size[0])
            h_orig = float(img.size[1])
            img.thumbnail((target_dim, target_dim)) 
            if 'png' in str(type(img)).lower():
                img = img.convert('RGB')
            img.save(root+'df2_mix/images/'+type_+'/'+row.filename, 'JPEG', optimize=True)

            (x1, y1, x2, y2) = [float(b) for b in row.bbox]
            x = round((((x2 - x1)/2) + x1) / w_orig, 4)
            y = round((((y2 - y1)/2) + y1) / h_orig, 4)
            w = round((x2 - x1) / w_orig, 4)
            h = round((y2 - y1) / h_orig, 4)

            idx = list(cats).index(row.category_name)
            metadata = str(idx) + f' {x} {y} {w} {h}'
            fileroot = root+'df2_mix/labels/'+type_+'/'+row.filename.split('.')[0]+'.txt'

            f = open(fileroot, 'w')
            f.write(metadata)
            f.close()
            img = None
        except:
            print('error:', row.path)

In [22]:
#createFiles(train, type_='train')
createFiles(val, type_='val')

0
99
198
297
396
495
594
693
792
891
990
1089
1188
1287
1386
1485
1584
1683
1782
1881
1980
2079
2178
2277
2376
2475
2574
2673
2772
2871
2970
3069
3168
3267
3366
3465
3564
3663
3762
3861
3960
4059
4158
4257
4356
4455
4554
4653
4752
4851
4950
5049
5148
5247
5346
5445
5544
5643
5742
5841
5940
6039
6138
6237
6336
6435
6534
6633
6732
6831
6930
7029
7128
7227
7326
7425
7524
7623
7722
7821
7920
8019
8118
8217
8316
8415
8514
8613
8712
8811
8910
9009
9108
9207
9306
9405
9504
9603
9702
9801


In [6]:
lines = []
lines.append('path: ../df2_mix # root dir\n')
lines.append('train: images/train\n')
lines.append('val: images/val\n')
lines.append('test:  images/test\n')

lines.append('nc: '+str(len(cats))+' # number of classes\n')

str_classes = "[ '" + "',\n'".join(cats) + "' ]"

lines.append('names: '+str_classes + '\n')

f = open(root+'df2_mix/df2_mix.yaml', 'w')
f.writelines(lines)
f.close()