In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os

from tqdm import tqdm
import pandas as pd

from mxnet import gluon, nd, image

import matplotlib.pyplot as plt
from PIL import Image

from patch_generate_util import check_bbox_in_patch, get_patch, draw_with_bbox

In [3]:
# num_patches = 4

# root = '/home/ec2-user/SageMaker'

# data_basedir = f'{root}/datasets/Data'
# img_basedir = f'{root}/datasets/Images_DefectNames'

# patch_basedir = f'datasets-patches-{num_patches}'

In [4]:
def get_meta_data(camera_type='GA', data_type='train', version='2'):
    csv_fname = f'{data_basedir}/combined_{camera_type}_v{version}_{data_type}.csv'
    df = pd.read_csv(csv_fname)
    
    return df

def fillZero(id, num_digits):
    ret = str(id).zfill(num_digits)
    return ret

In [7]:
def generate_patches(camera_type='GA', data_type='train', num_pathches=1, version='2'):
#     patch_dir = f'{patch_basedir}/{camera_type}/{data_type}'
    patch_dir = f'{root}/dataset_v{version}/{patch_basedir}/{camera_type}/{data_type}'
    df = get_meta_data(camera_type, data_type, version)
    
    patch_col = ['patch_fname', 'is_defective', 'patch_x0', 'patch_y0', 'patch_bbox_x0', 'patch_bbox_y0', 'patch_bbox_x1', 'patch_bbox_y1']
    new_col = patch_col + list(df.columns)
    new_df = pd.DataFrame(columns=new_col)
    
    patch_cnt = 1
    invalid_bbox_list = []
    patch_exception_list = []
    
    for index, example in tqdm(df.iterrows(), total=df.shape[0]):
        # Retrieve information on the defective image
        bbox_x0 = example.RoiX0
        bbox_x1 = example.RoiX1
        bbox_y0 = example.RoiY0
        bbox_y1 = example.RoiY1

        # To get the full file path to the image and bounding box information
        img_fname = example.img_fname
        img_full_fname = f'{img_basedir}_v{version}/{camera_type}/{example.defect_name}/{img_fname}'
        bbox = (bbox_x0, bbox_y0, bbox_x1, bbox_y1)
        
        if (bbox_x0 == bbox_x1) or (bbox_y0 == bbox_y1):
            invalid_bbox_list.append(f'{img_fname},{img.shape}')
            continue
            
        if bbox_x0 > bbox_x1 or bbox_y0 > bbox_y1:
            invalid_bbox_list.append(f'{img_fname},{img.shape}')
            continue
            
        # To load the image
        img = image.imread(img_full_fname)
        
        defect_class_dir = f'{patch_dir}/{example.defect_name}'
        
        # To create the class subdirectory if it does not exist
        if not os.path.exists(defect_class_dir):
            os.makedirs(defect_class_dir, exist_ok=True)
        
        # To generate patches
        for i in range(num_pathches):
            try:
                patch_img, new_bbox, patch_x0, patch_y0, patch_bbox = get_patch(img, patch_height=128, 
                                                                    bbox=bbox, bbox_included=True, 
                                                                    debug=False)
                patch_bbox_x0, patch_bbox_y0, patch_bbox_x1, patch_bbox_y1 = patch_bbox
            except:
                patch_exception_list.append(f'{img_fname},{img.shape}')
                continue
                
            patch_fname = img_fname.replace('.jpg','')
            patch_fname = f'{patch_fname}-{patch_x0}.{patch_y0}.jpg'
            patch_full_fname = os.path.join(defect_class_dir, patch_fname)            
            pil_img = Image.fromarray(patch_img.asnumpy())
            pil_img.save(patch_full_fname, 'JPEG', quality=100)
            
            is_defective = True

            patch_series = pd.Series([patch_fname, is_defective, patch_x0, patch_y0, patch_bbox_x0, patch_bbox_y0, patch_bbox_x1, patch_bbox_y1], index=patch_col)
            patch_series = patch_series.append(example)

            new_df.loc[patch_cnt] = patch_series
            patch_cnt += 1
            
        if patch_cnt % 100 == 0:
            draw_with_bbox(patch_img, new_bbox)  

    csv_full_fname = f'{root}/dataset_v{version}/{patch_basedir}/patch_list_{camera_type}_{data_type}.csv'
    new_df.to_csv(csv_full_fname, index=False)
    
    print(f'patch list saved to {csv_full_fname}')
    print(f'patch_exception_cnt - {len(patch_exception_list)}')
    print(f'invalid_bbox_cnt - {len(invalid_bbox_list)}')

    return new_df, patch_exception_list, invalid_bbox_list

In [12]:
num_patches = 4

root = '/home/ec2-user/SageMaker'
data_basedir = f'{root}/datasets/Data'
img_basedir = f'{root}/datasets/Images_DefectNames'
patch_basedir = f'datasets-patches-{num_patches}'

camera_types = ['GA', 'GI']
versions = ['2', '3']
dataset_types = ['train', 'val', 'test']

for version in versions:
    for camera_type in camera_types:
        for dataset_type in dataset_types:
            print(f'Creating patches of {camera_type} v{version} - {dataset_type} dataset')
        
            train_df, _, _ = generate_patches(camera_type, data_type=dataset_type, num_pathches=num_patches, version=version)


  0%|          | 2/7040 [00:00<08:42, 13.48it/s]

Creating patches of GA v2 - train dataset


100%|██████████| 7040/7040 [25:32<00:00,  4.59it/s]
  0%|          | 1/880 [00:00<01:56,  7.58it/s]

patch list saved to /home/ec2-user/SageMaker/dataset_v2/datasets-patches-4/patch_list_GA_train.csv
patch_exception_cnt - 80
invalid_bbox_cnt - 0
Creating patches of GA v2 - val dataset


100%|██████████| 880/880 [01:49<00:00,  8.04it/s]
  0%|          | 1/881 [00:00<02:02,  7.17it/s]

patch list saved to /home/ec2-user/SageMaker/dataset_v2/datasets-patches-4/patch_list_GA_val.csv
patch_exception_cnt - 0
invalid_bbox_cnt - 0
Creating patches of GA v2 - test dataset


100%|██████████| 881/881 [01:49<00:00,  8.01it/s]
  0%|          | 1/3602 [00:00<07:40,  7.82it/s]

patch list saved to /home/ec2-user/SageMaker/dataset_v2/datasets-patches-4/patch_list_GA_test.csv
patch_exception_cnt - 0
invalid_bbox_cnt - 0
Creating patches of GI v2 - train dataset


100%|██████████| 3602/3602 [07:08<00:00,  8.41it/s]
  0%|          | 2/450 [00:00<00:30, 14.91it/s]

patch list saved to /home/ec2-user/SageMaker/dataset_v2/datasets-patches-4/patch_list_GI_train.csv
patch_exception_cnt - 4
invalid_bbox_cnt - 0
Creating patches of GI v2 - val dataset


100%|██████████| 450/450 [00:33<00:00, 13.35it/s]
  0%|          | 2/451 [00:00<00:29, 15.09it/s]

patch list saved to /home/ec2-user/SageMaker/dataset_v2/datasets-patches-4/patch_list_GI_val.csv
patch_exception_cnt - 0
invalid_bbox_cnt - 0
Creating patches of GI v2 - test dataset


100%|██████████| 451/451 [00:33<00:00, 13.35it/s]
  0%|          | 2/7045 [00:00<08:11, 14.32it/s]

patch list saved to /home/ec2-user/SageMaker/dataset_v2/datasets-patches-4/patch_list_GI_test.csv
patch_exception_cnt - 0
invalid_bbox_cnt - 0
Creating patches of GA v3 - train dataset


100%|██████████| 7045/7045 [22:17<00:00,  5.27it/s]
  0%|          | 1/881 [00:00<01:41,  8.71it/s]

patch list saved to /home/ec2-user/SageMaker/dataset_v3/datasets-patches-4/patch_list_GA_train.csv
patch_exception_cnt - 72
invalid_bbox_cnt - 0
Creating patches of GA v3 - val dataset


100%|██████████| 881/881 [02:06<00:00,  6.97it/s]
  0%|          | 0/881 [00:00<?, ?it/s]

patch list saved to /home/ec2-user/SageMaker/dataset_v3/datasets-patches-4/patch_list_GA_val.csv
patch_exception_cnt - 8
invalid_bbox_cnt - 0
Creating patches of GA v3 - test dataset


100%|██████████| 881/881 [02:16<00:00,  6.43it/s]
  0%|          | 1/3602 [00:00<07:12,  8.33it/s]

patch list saved to /home/ec2-user/SageMaker/dataset_v3/datasets-patches-4/patch_list_GA_test.csv
patch_exception_cnt - 0
invalid_bbox_cnt - 0
Creating patches of GI v3 - train dataset


100%|██████████| 3602/3602 [11:08<00:00,  5.39it/s]
  0%|          | 1/450 [00:00<01:01,  7.27it/s]

patch list saved to /home/ec2-user/SageMaker/dataset_v3/datasets-patches-4/patch_list_GI_train.csv
patch_exception_cnt - 4
invalid_bbox_cnt - 0
Creating patches of GI v3 - val dataset


100%|██████████| 450/450 [01:03<00:00,  7.11it/s]
  0%|          | 1/451 [00:00<00:58,  7.71it/s]

patch list saved to /home/ec2-user/SageMaker/dataset_v3/datasets-patches-4/patch_list_GI_val.csv
patch_exception_cnt - 0
invalid_bbox_cnt - 0
Creating patches of GI v3 - test dataset


100%|██████████| 451/451 [01:06<00:00,  6.80it/s]


patch list saved to /home/ec2-user/SageMaker/dataset_v3/datasets-patches-4/patch_list_GI_test.csv
patch_exception_cnt - 0
invalid_bbox_cnt - 0
