In [1]:
!pip install six numpy scipy Pillow matplotlib scikit-image opencv-python imageio Shapely



In [2]:
!pip install imgaug



In [3]:
!pip install files



In [4]:
!pip install pascal-voc-writer



In [5]:
# from google.colab import drive
# drive.mount('/content/drive')

In [6]:
def f_unzip(source_file, dest_path='./', enc_type='cp437', dec_type='cp949'):
    '''
    설명: 압축 파일을 지정된 디렉토리에 압축 풀기, 상태바 표시
    입력: unzip(압축 파일 경로명, 압축 풀 디렉토리 명)
    출력: 설정한 디렉토리에 압축 푼 파일 생성
    예시: src_file = '/content/drive/MyDrive/test.zip'
         des_path = '/content/img/'
         f_unzip(src_file, des_path)
    '''
    import zipfile
    import progressbar
    import time
    
    with zipfile.ZipFile(source_file, 'r') as zf:
        zipInfo = zf.infolist()
        bar = progressbar.ProgressBar(maxval=len(zipInfo)).start()
        
        for i, member in enumerate(zipInfo, start=0):
            try:
                # print(member.filename.encode(enc_type).decode(dec_type))
                member.filename = member.filename.encode(enc_type).decode(dec_type)
                zf.extract(member, dest_path)
                bar.update(i)
            except:
                print(member.filename)
                raise Exception('what?!')
                
    bar.finish()


In [7]:
def f_create_dir(dir_name):
    '''
    설명: 만들려는 디렉토리가 없으면 생성
    입력: 경로 + 새 디렉토리 이름
    출력: x
    예시: createDirectory('./test')
    '''
    import os
    
    try:
        if not os.path.exists(dir_name):
            os.makedirs(dir_name)
    except OSError:
        print("Error: Failed to create the directory.")
        

In [8]:
def f_hangule_yn(str):
    hangul = re.compile('[^ㄱ-ㅣ가-힣]+')
    
    if hangul.sub('', str) != '':  # 한글이 있으면
        return True
    else:
        return False

In [9]:
def f_dir_list(des_dir):
    '''
    des_dir 안의 디렉토리 이름을 리스트로 반환
    '''
    dir_names = []
    
    with os.scandir(des_dir) as entries:
        for entry in entries:
            if entry.is_dir():
                dir_names.append(entry.name)
                
    return dir_names

In [10]:
def f_save_img(img_path, img_data):
    '''
    한글 경로 여부를 판단하여 이미지 저장
    '''
    import cv2
    
    # 한글 경로인 경우
    if f_hangule_yn(img_path) == True:
        result, encoded_img = cv2.imencode('.'+img_ext, img_data)
        if result == True:
            with open(img_path, mode='w+b') as f:
                encoded_img.tofile(f)
    else:
        cv2.imwrite(img_path, img_data)

In [11]:
import xml.etree.ElementTree as ET

def read_anntation(xml_file: str):
    '''
    xml 파일 읽어서, 좌표 반환
    '''
    # print('read_anntation:', str)
    tree = ET.parse(xml_file)
    root = tree.getroot()

    bounding_box_list = []
    
    file_name = root.find('filename').text
    # print('read_anntation:', file_name)
    for obj in root.iter('object'):

        object_label = obj.find("name").text
        for box in obj.findall("bndbox"):
            x_min = int(box.find("xmin").text)
            y_min = int(box.find("ymin").text)
            x_max = int(box.find("xmax").text)
            y_max = int(box.find("ymax").text)

        bounding_box = [object_label, x_min, y_min, x_max, y_max]
        bounding_box_list.append(bounding_box)

    return bounding_box_list, file_name

In [12]:
import os
import cv2
import numpy as np
import re

def read_train_dataset_2(dir):
    '''
    디렉토리 안에 이미지 파일, xml 파일명 반환
    '''
    images = []
    annotations = []
    images_ext = []

    for file in os.listdir(dir):
        if 'jpg' in file.lower() or 'png' in file.lower():
            # 경로와 파일명에 한글 포함 여부에 따라 이미지 파일 읽기
            img_path = os.path.join(dir, file)
            
            hangul = re.compile('[^ㄱ-ㅣ가-힣]+')
            
            if hangul.sub('', img_path) != '':  # 한글이 있으면
                img_ary = np.fromfile(img_path, np.uint8)
                img = cv2.imdecode(img_ary, cv2.IMREAD_UNCHANGED)
            else:
                img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
            
            # 이미지에 alpha 채널 있으면 제거
            if img.shape[2] == 4:  # (150, 150, 4)
                img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
                
            # 이미지 확장자
            img_ext = file.split('.')[-1].lower()
            images_ext.append(img_ext)
            
            # xml 파일
            xml_file = file.replace(img_ext, 'xml')
            xml_path = os.path.join(dir, xml_file)
            
            # xml 파일이 있으면
            if os.path.isfile(xml_path):
                # 이미지 배열에 이미지 추가
                images.append(img)
                
                bounding_box_list, file_name = read_anntation(xml_path)
                annotations.append((bounding_box_list, xml_file, file_name))
            else:
                print('xml 파일 없음:', xml_path)
                
    images = np.array(images)
    
    return images, annotations, images_ext


In [13]:
def f_img_bgd_cut(img_data, bbox, img_ext):
    '''
    입력: 이미지 데이터, bounding box 좌표
    기능: grabcut 으로 이미지 배경 제거
    출력: 이미지 데이터 
    '''
    import numpy as np
    
    # 사각형 좌표
    rectangle = bbox
    
    # png 파일이면 grabCut 하기 위해 알파채널 제거
    if img_ext == 'png':
        img_data = cv2.cvtColor(img_data, cv2.COLOR_RGBA2RGB)
    
    # 초기 마스크 생성
    mask = np.zeros(img_data.shape[:2], np.uint8)
    
    # grabCut에 사용할 임시 배열 생성
    bgdModel = np.zeros((1, 65), np.float64)
    fgdModel = np.zeros((1, 65), np.float64)
    
    # grabCut 실행
    cv2.grabCut(img_data,  # 원본 이미지
               mask,       # 마스크
               rectangle,  # 사각형
               bgdModel,   # 배경을 위한 임시 배열
               fgdModel,   # 전경을 위한 임시 배열 
               5,          # 반복 횟수
               cv2.GC_INIT_WITH_RECT) # 사각형을 위한 초기화
    
    # 배경인 곳은 0, 그 외에는 1로 설정한 마스크 생성
    mask_2 = np.where((mask==2) | (mask==0), 0, 1).astype('uint8')
    
    # 이미지에 새로운 마스크를 곱해 배경을 제외
    img = img_data * mask_2[:, :, np.newaxis]
    
    return img
    
#     tmp = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#     _,alpha = cv2.threshold(tmp,0,255,cv2.THRESH_BINARY)
#     b, g, r = cv2.split(img)
#     rgba = [b,g,r, alpha]
#     dst = cv2.merge(rgba, 4)

    # BGR -> RGB
#     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
#     # plot
#     plt.imshow(img)
#     plt.show()
    
    return img
    

In [14]:
from imgaug import augmenters as iaa

# augmenters
seq_01 = iaa.Sequential([
    iaa.Multiply((1.2, 1.5)),  # 숫자 클 수록 하얗게
    iaa.Affine(
        scale = (1, 1)  # 확대 축소
    )
])
seq_02 = iaa.Sequential([
    iaa.Affine(
        scale=(0.5, 0.7)  # 축소
    )
])
seq_03 = iaa.Sequential([
    iaa.Affine(
        scale=(1.5, 1.7)  # 확대
    )
])
seq_04 = iaa.Sequential([
    iaa.Affine(
        rotate = 45       # 회전
    )
])
seq_05 = iaa.Sequential([
    iaa.Affine(
        rotate = -45       # 회전
    )
])
seq_06 = iaa.Sequential([
    iaa.Affine(
        translate_px = {"x": -80, "y": 0}  # 가로축 이동
    )
])
seq_07 = iaa.Sequential([
    iaa.Affine(
        translate_px = {"x": 0, "y": -80}  # 세로축 이동
    )
])
seq_08 = iaa.Sequential([
        iaa.GaussianBlur(sigma=(4.0, 4.0)) # 흐리게
])
seq_09 = iaa.Sequential([
        iaa.MotionBlur(k=20)  # 모션 블러
])
seq_10 = iaa.Sequential([
        iaa.AdditiveGaussianNoise(scale=0.1*255)  # 가우시안 노이즈
])
seq_11 = iaa.Sequential([
        iaa.Grayscale(alpha=(0.8, 0.8)),
        iaa.ShearX((-20, 20))
])
seq_12 = iaa.Sequential([
        iaa.BlendAlpha((0.0, 1.0),
                       foreground=iaa.Add(100),
                       background=iaa.Multiply(0.3))
])
seq_13 = iaa.Sequential([
        iaa.ElasticTransformation(alpha=(0, 5.0), sigma=0.2),
        iaa.Affine(shear=(16, -16)),
        iaa.Affine(rotate=(35, 45))
])
seq_14 = iaa.Sequential([
        iaa.Add((-40, 40), per_channel=0.5)
])
seq_15 = iaa.Sequential([
        iaa.AdditiveLaplaceNoise(scale=0.2*255, per_channel=True)
])
seq_16 = iaa.Sequential([
        iaa.MeanShiftBlur()
])
seq_17 = iaa.Sequential([
        iaa.Canny(alpha=(0.0, 0.5))
])
seq_18 = iaa.Sequential([
        iaa.pillike.Autocontrast((10, 20), per_channel=True)
])
seq_19 = iaa.Sequential([
        iaa.pillike.FilterEmboss()
])
seq_20 = iaa.Sequential([
        iaa.pillike.FilterEdgeEnhance()
])

# 위의 추가한 시퀀셜을 리스트에 추가
# aug_list = [seq_01, seq_02, seq_03, seq_04, seq_05, seq_06, seq_07, seq_08, seq_09, seq_10,
#             seq_11, seq_12, seq_13, seq_14, seq_15, seq_16, seq_17, seq_18, seq_19, seq_20
#             ]
aug_list = [seq_01, seq_02, seq_08, seq_09, seq_10
#             , seq_11, seq_12, seq_14, seq_15
           ]
# seq_03, 04, 05, 06, 07, 13  >>  bounding box 좌표에서 마이너스 값 나와서 제외


In [17]:
# 압축 파일 디렉토리, 이름
src_root_dir = './alyac_img_label/google_drive'
# src_root_dir = '/content/drive/MyDrive/그게뭐약/alyac_crop'
src_name = [
            '2070.탁센연질캡슐(나프록센)'
            ,'3324.로프민캡슐'
            ,'3982.애드빌정(이부프로펜)'
            ,'8998.이소티논연질캡슐10밀리그램'
            ,'12063.한미오메가연질캡슐1000밀리그램'
            ,'17159.제스부틴정'
            ,'17458.캐롤비콜드연질캡슐'
            ,'22074.모드콜에스연질캡슐'
            ,'22408.이지엔6프로연질캡슐'
            ,'22605.타이레놀정500밀리그람'
           ]

# 작업 할 디렉토리
des_root_dir = './alyac_img_label/google_drive'
# des_root_dir = '/content/drive/MyDrive/그게뭐약/alyac_tmp'

In [18]:
'''
압축 풀기

0.약이름_crop.zip 파일만 압축 풀기 가능
'''
import os

for i in range(len(src_name)):
    # 압축 파일명 생성
    src_zip = src_name[i] + '_crop.zip'
    src_path = os.path.join(src_root_dir, src_zip)
    
    # 압축 파일이 있으면
    if os.path.isfile(src_path):
        # 압축 풀 디렉토리 생성
        des_dir_name = src_name[i] + '_crop'
        des_dir = os.path.join(des_root_dir, des_dir_name)
        f_create_dir(des_dir)
        
        # 압축 풀기
        f_unzip(src_path, des_dir)
        
    else:
        print('압축 파일 없음:', src_path)

100% |########################################################################|
100% |########################################################################|
100% |########################################################################|
100% |########################################################################|
100% |########################################################################|
100% |########################################################################|
100% |########################################################################|
100% |########################################################################|
100% |########################################################################|
100% |########################################################################|


In [19]:
'''
이미지 복붙 늘리기

0.약이름_crop 폴더에 있는 _crop 이미지를,
복붙 방식으로 두배 이상으로 늘려서, 같은 폴더에 저장

'''
import os
import shutil
import progressbar
import time

for i in range(len(src_name)):
    print(src_name[i])
    time.sleep(0.5)
    
    # _crop 디렉토리 경로
    crop_dir_name = src_name[i] + '_crop'
    crop_dir = os.path.join(des_root_dir, crop_dir_name)
    
    # _crop 디렉토리가 없으면 중지
    if os.path.isdir(crop_dir) == False:
        print('_crop 디렉토리 없음:', crop_dir)
        break
        
    # 파일명 읽어와서 _001 -> _101 로 바꿔서 복붙
    items = os.listdir(crop_dir)
    
    bar = progressbar.ProgressBar(maxval=len(items)).start()
    for i, item in enumerate(items):
        # 파일이 아니면
        if os.path.isfile(os.path.join(crop_dir, item)) == False:
            continue
        
        # 파일이면 파일명, 확장자 분리
        file_name, file_ext = os.path.splitext(item)
        
        # _001 -> _101 변경
        tmp_num = int(file_name[-3:]) + 100
        new_file_name = file_name[:-3] + str(tmp_num) + file_ext
        
        # 복붙
        src_path = os.path.join(crop_dir, item)
        des_path = os.path.join(crop_dir, new_file_name)
        shutil.copy2(src_path, des_path)
        
        bar.update(i)
        
    bar.finish()

2070.탁센연질캡슐(나프록센)


100% |########################################################################|


3324.로프민캡슐


100% |########################################################################|


3982.애드빌정(이부프로펜)


100% |########################################################################|


8998.이소티논연질캡슐10밀리그램


100% |########################################################################|


12063.한미오메가연질캡슐1000밀리그램


100% |########################################################################|


17159.제스부틴정


100% |########################################################################|


17458.캐롤비콜드연질캡슐


100% |########################################################################|


22074.모드콜에스연질캡슐


100% |########################################################################|


22408.이지엔6프로연질캡슐


100% |########################################################################|


22605.타이레놀정500밀리그람


100% |########################################################################|


In [20]:
'''
배경 제거

0.약이름_crop 폴더에 있는 _crop 이미지를,
배경을 제거하고 0.약이름_black 폴더에 저장
'''
import progressbar
import shutil
import time

for i in range(len(src_name)):
    print(f'{i}: {src_name[i]}')
    time.sleep(0.5)
    
    # _crop 디렉토리 경로
    crop_dir_name = src_name[i] + '_crop'
    crop_dir = os.path.join(des_root_dir, crop_dir_name)
    
    # _crop 디렉토리가 없으면 중지
    if os.path.isdir(crop_dir) == False:
        print('_crop 디렉토리 없음:', crop_dir)
        break
        
    # _black 디렉토리 만들기
    black_dir = crop_dir.replace('_crop', '_black')
    f_create_dir(black_dir)

    # _crop 디렉토리에서 이미지, xml 파일 읽기
    images, annotations, images_ext = read_train_dataset_2(crop_dir)
    
    bar = progressbar.ProgressBar(maxval=len(images)).start()
    for idx in range(len(images)):
        
        img_data = images[idx]
        bbox = annotations[idx][0][0][1:]
        xml_file = annotations[idx][1]
        img_ext = images_ext[idx]
        
        # _crop 이미지 배경 제거
        img_data = f_img_bgd_cut(img_data, bbox, img_ext)
        
        # _black 이미지 저장
        img_file = xml_file.replace(xml_file.split('.')[-1], img_ext)
        img_file = img_file.replace('_crop', '_black')
        img_path = os.path.join(black_dir, img_file)
        
        f_save_img(img_path, img_data)
            
        # _crop 디렉토리의 xml 파일을 _black 디렉토리로 복사
        xml_path = os.path.join(crop_dir, xml_file)
        black_xml_file = xml_file.replace('_crop_', '_black_')
        black_xml_path = os.path.join(black_dir, black_xml_file)
        shutil.copyfile(xml_path, black_xml_path)
        
        
        bar.update(idx)
        
    bar.finish()


0: 2070.탁센연질캡슐(나프록센)


100% |########################################################################|


1: 3324.로프민캡슐


100% |########################################################################|


2: 3982.애드빌정(이부프로펜)


100% |########################################################################|


3: 8998.이소티논연질캡슐10밀리그램


100% |########################################################################|


4: 12063.한미오메가연질캡슐1000밀리그램


100% |########################################################################|


5: 17159.제스부틴정


100% |########################################################################|


6: 17458.캐롤비콜드연질캡슐


100% |########################################################################|


7: 22074.모드콜에스연질캡슐


100% |########################################################################|


8: 22408.이지엔6프로연질캡슐


100% |########################################################################|


9: 22605.타이레놀정500밀리그람


100% |########################################################################|


In [21]:
'''
이미지 증강
_crop, _black 디렉토리의 이미지 데이터들을
aug 디렉토리에 일정 수 만큼 증강
'''
import imgaug as ia
from pascal_voc_writer import Writer
import progressbar
import time

ia.seed(1)

# 증강 할 디렉토리 이름 가져오기
src_dir_names = f_dir_list(des_root_dir)

for dir_idx, dir_name in enumerate(src_dir_names):
    
    # 각 디렉토리 안에 aug 디렉토리 만들기
    des_dir = os.path.join(des_root_dir, dir_name)
    des_aug_dir = os.path.join(des_dir, 'aug')
    f_create_dir(des_aug_dir)
    
    # 이미지, xml 파일 데이터 읽어오기
    images, annotations, images_ext = read_train_dataset_2(des_dir)
    
    print(f'{dir_idx}: {dir_name}')
    time.sleep(0.3)
    
    bar = progressbar.ProgressBar(maxval=len(images)).start()
    for idx in range(len(images)):
        
        image = images[idx]
        boxes = annotations[idx][0]
        names = annotations[idx][0][0][0]
        xml_file = annotations[idx][1]
        img_ext = images_ext[idx]
        
        # 바운딩 박스
        ia_bounding_boxes = []
        for box in boxes:
            ia_bounding_boxes.append(ia.BoundingBox(x1=box[1], y1=box[2], x2=box[3], y2=box[4]))
        bbs = ia.BoundingBoxesOnImage(ia_bounding_boxes, shape=image.shape)
        
        # before    bbox 좌표 출력
        # for i in range(len(bbs.bounding_boxes)):
        #     before = bbs.bounding_boxes[i]
            # print('BB %d: %.4f, %.4f, %.4f, %.4f'%
            #       (i, before.x1, before.y1, before.x2, before.y2)
            #       )
            
        # before    알파 채널이 있으면 제거
        if image.shape[2] == 4:
            image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
        
        # before    이미지 출력
        image_before = bbs.draw_on_image(image, thickness=2)
        h, w = image_before.shape[0:2]
        # plt.imshow(image_before)
        
        # after
        for num, aug_i in enumerate(aug_list):
            
            # 이미지 변환 시퀀셜 가져오기
            seq = aug_i

            seq_det = seq.to_deterministic()

            image_aug = seq_det.augment_images([image])[0]      # 이미지 정보
            bbs_aug = seq_det.augment_bounding_boxes([bbs])[0]  # bbox 정보

            # after     bbox 좌표 출력
            # for i in range(len(bbs.bounding_boxes)):
            #     after = bbs_aug.bounding_boxes[i]

                # print('BB %d: %.4f, %.4f, %.4f, %.4f'%
                #       (i, after.x1, after.y1, after.x2, after.y2)
                #       )

            # after     bbox 그리기
            # image_after = bbs_aug.draw_on_image(image_aug, thickness=10, color=[0, 0, 255])

            # after    이미지 출력
            # cv2_imshow(cv2.resize(image_after, (print_w, print_h)))
            # cv2.waitKey(0)
            
            # 저장할 파일명 예) after0_0.알약명_crop_000.
            aug_name = 'after' + str(num+1) + '_'
            aug_name = aug_name + xml_file.replace('xml', '')
            save_path = os.path.join(des_aug_dir, aug_name)
            
            # 이미지 저장
            img_save_path = save_path + img_ext
            f_save_img(img_save_path, image_aug)
            
            # xml 저장
            voc_writer = Writer(aug_name, w, h)

            for i in range(len(bbs_aug.bounding_boxes)):
                bb_box = bbs_aug.bounding_boxes[i]
                voc_writer.addObject(boxes[i][0], int(bb_box.x1), int(bb_box.y1), int(bb_box.x2), int(bb_box.y2))
                
            xml_save_path = save_path + 'xml'
            voc_writer.save(xml_save_path)
            
        bar.update(idx)
        
    bar.finish()

9: 12063.한미오메가연질캡슐1000밀리그램_black


100% |########################################################################|


0: 12063.한미오메가연질캡슐1000밀리그램_crop


100% |########################################################################|


0: 17159.제스부틴정_black


100% |########################################################################|


0: 17159.제스부틴정_crop


100% |########################################################################|


0: 17458.캐롤비콜드연질캡슐_black


100% |########################################################################|


0: 17458.캐롤비콜드연질캡슐_crop


100% |########################################################################|


0: 2070.탁센연질캡슐(나프록센)_black


100% |########################################################################|


0: 2070.탁센연질캡슐(나프록센)_crop


100% |########################################################################|


0: 22074.모드콜에스연질캡슐_black


100% |########################################################################|


0: 22074.모드콜에스연질캡슐_crop


100% |########################################################################|


0: 22408.이지엔6프로연질캡슐_black


100% |########################################################################|


0: 22408.이지엔6프로연질캡슐_crop


100% |########################################################################|


0: 22605.타이레놀정500밀리그람_black


100% |########################################################################|


0: 22605.타이레놀정500밀리그람_crop


100% |########################################################################|


0: 3324.로프민캡슐_black


100% |########################################################################|


0: 3324.로프민캡슐_crop


100% |########################################################################|


0: 3982.애드빌정(이부프로펜)_black


100% |########################################################################|


0: 3982.애드빌정(이부프로펜)_crop


100% |########################################################################|


0: 8998.이소티논연질캡슐10밀리그램_black


100% |########################################################################|


0: 8998.이소티논연질캡슐10밀리그램_crop


100% |########################################################################|
