In [None]:
!pip install opencv-python



In [None]:
import os
import cv2
import random
import shutil
import numpy as np
from PIL import Image

In [None]:
from glob import glob
from tqdm.notebook import tqdm
from matplotlib import pyplot as plt

In [None]:
SORUCE_PATH="cropped"
RESULT_PATH="pre-dataset-color"

### Extract Green Channel

In [None]:
def only_green_channel(img):
    b, g, r = cv2.split(img)
    return g

### list of files and list of folders in path

In [None]:
def read_path(folder):
    files = glob(os.path.join(folder, '**', '*.*'), recursive=True)
    temps = []
    for file in files:
        name, ext = os.path.splitext(file)
        if ext in [".jpeg", ".jpg", ".png"]:
            temps.append(file)
    folders = list(set(map(lambda x : os.path.dirname(x), files)))
    return temps, folders

### Prepare output directory structure (same of source folder)

In [None]:
def prepare_path(folders):
    dirs = ['without_augmentation', 'only_augmentation', 'with_augmentation', 'only_random_augmentation']
    os.makedirs(RESULT_PATH, exist_ok=True)
    for dir in dirs:
        os.makedirs(os.path.join(RESULT_PATH, dir), exist_ok=True)
        for folder in folders:
            os.makedirs(os.path.join(RESULT_PATH, dir, folder), exist_ok=True)

### CLAHE (Contrast Limited Adaptive Histogram Equalization)

- https://docs.opencv.org/4.5.5/d5/daf/tutorial_py_histogram_equalization.html

In [None]:
CLAHE = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
def contrast_limited_adaptive_histogram_equalization(img):
    return CLAHE.apply(img)

### Non-local Means Denoising

- https://docs.opencv.org/4.5.5/d5/d69/tutorial_py_non_local_means.html

In [None]:
def non_local_means_denoising(grey):
    return cv2.fastNlMeansDenoising(grey, None, 1, 7, 21)

In [None]:
def flip_vertical(img):
    return cv2.flip(img, 0)

In [None]:
def flip_horizontal(img):
    return cv2.flip(img, 1)

In [None]:
def save_image(file_path, img):
    cv2.imwrite(file_path, img)

In [None]:
def read_without_augmentation_path():
    return glob(os.path.join(RESULT_PATH, 'without_augmentation', '**', '*.*'), recursive=True)

## Run Project Setup

In [None]:
FILES, FOLDERS = read_path(SORUCE_PATH)
prepare_path(FOLDERS)

## Run 01 "without_augmentation"
Make pre-process image

In [None]:
def run_without_augmentation_pipeline(file_path):
    img = cv2.imread(file_path)   
    # img = only_green_channel(img)
    # img = non_local_means_denoising(img)
    # img = contrast_limited_adaptive_histogram_equalization(img)
    return img

In [None]:
def run_without_augmentation():
    for file in tqdm(FILES):
        shutil.copyfile(file, os.path.join(RESULT_PATH, "without_augmentation", file))
        # save_image(os.path.join(RESULT_PATH, "without_augmentation", file), run_without_augmentation_pipeline(file))

In [None]:
run_without_augmentation()

  0%|          | 0/8428 [00:00<?, ?it/s]

## Run 02 "only_augmentation"

In [None]:
def run_only_augmentation_pipeline(file_path):
    img = cv2.imread(os.path.join(RESULT_PATH, 'without_augmentation', file_path))

    # Extracting filename and extension from filepath   
    filename, extension = os.path.splitext(file_path)

    save_base_path = os.path.join(RESULT_PATH, 'only_augmentation')
    save_image(os.path.join(save_base_path, filename + '_flip_vertical' + extension), flip_vertical(img))
    save_image(os.path.join(save_base_path, filename + '_flip_horizontal' + extension), flip_horizontal(img))
    save_image(os.path.join(save_base_path, filename + '_flip_both' + extension), flip_vertical(flip_horizontal(img)))

In [None]:
def run_only_augmentation():
    for file in tqdm(FILES):
        run_only_augmentation_pipeline(file)

In [None]:
run_only_augmentation()

  0%|          | 0/8428 [00:00<?, ?it/s]

## Run 03 "only_random_augmentation"

#### การตั้งค่าการกำหนดจำนวนที่ต้องการสุ่มที่นับรวมภาพต้นฉบับเช่น


    ต้องการให้ validate-set-normal มีไฟล์จำนวน 110
    ต้องการให้ validate-set-abnormal มีไฟล์จำนวน 120
    ต้องการให้ test-set-normal มีไฟล์จำนวน 130
    ต้องการให้ test-set-abnormal มีไฟล์จำนวน 140

```python
ONLY_RANDOM_CONFIG = [110, 140, 130, 120]
```

    ตั้งค่าตามลำดับดังนี้

In [None]:
for index, folder in enumerate(FOLDERS):
    a, b = read_path(os.path.join(RESULT_PATH, 'only_augmentation', folder))
    c, d = read_path(os.path.join(RESULT_PATH, 'without_augmentation', folder))
    print(index, ':', folder, "\t", 'current:' , len(c), "\t", 'max:' , len(c) + len(a))

0 : cropped/raw/test/abnormal 	 current: 340 	 max: 1360
1 : cropped/raw/test/normal 	 current: 924 	 max: 3696
2 : cropped/raw/valid/abnormal 	 current: 341 	 max: 1364
3 : cropped/raw/train/abnormal 	 current: 1588 	 max: 6352
4 : cropped/raw/valid/normal 	 current: 923 	 max: 3692
5 : cropped/raw/train/normal 	 current: 4312 	 max: 17248


In [None]:
ONLY_RANDOM_CONFIG = [1000, 1000, 1000, 5000, 1000, 5000]

In [None]:
def run_only_random_augmentation_pipeline(file_path):
    # need_filp_both = random.randint(0, 1)
    # need_filp_vertical = random.randint(0, 1)
    # need_filp_horizontal = random.randint(0, 1)

    need_filp_both = 1
    need_filp_vertical = 1
    need_filp_horizontal = 1
    
    oaug_base_path = os.path.join(RESULT_PATH, 'only_augmentation')
    save_base_path = os.path.join(RESULT_PATH, 'only_random_augmentation')

    local_count = 0
    filename, extension = os.path.splitext(file_path)

    if need_filp_both:
        src = os.path.join(oaug_base_path, filename + '_flip_both' + extension)
        dst = os.path.join(save_base_path, filename + '_flip_both' + extension)
        shutil.copyfile(src, dst)
        local_count += 1
    
    if need_filp_vertical:
        src = os.path.join(oaug_base_path, filename + '_flip_vertical' + extension)
        dst = os.path.join(save_base_path, filename + '_flip_vertical' + extension)
        shutil.copyfile(src, dst)
        local_count += 1
        
    if need_filp_horizontal:
        src = os.path.join(oaug_base_path, filename + '_flip_horizontal' + extension)
        dst = os.path.join(save_base_path, filename + '_flip_horizontal' + extension)
        shutil.copyfile(src, dst)
        local_count += 1

    return local_count

In [None]:
def run_only_random_augmentation():
    collections = list()
    
    # clear old files
    files, _ = read_path(os.path.join(RESULT_PATH, 'only_random_augmentation'))
    for file in tqdm(files, leave=False):
        os.remove(file)

    for index, folder in enumerate(FOLDERS):
        finfo = dict()
        files, _ = read_path(folder)
        random.shuffle(files)
        finfo['min'] = len(files)
        finfo['cur'] = finfo['min']
        finfo['max'] = ONLY_RANDOM_CONFIG[index]
        finfo['files'] = files
        collections.append(finfo)

    for collection in tqdm(collections):
        if collection['min'] >= collection['max']:
            continue
        with tqdm(total=collection['max'] - collection['min'], leave=False) as pbar:
            for file in collection['files']:
                if collection['cur'] >= collection['max']:
                    break
                count = run_only_random_augmentation_pipeline(file)
                collection['cur'] += count
                pbar.update(count)

In [None]:
run_only_random_augmentation()

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/660 [00:00<?, ?it/s]

  0%|          | 0/76 [00:00<?, ?it/s]

  0%|          | 0/659 [00:00<?, ?it/s]

  0%|          | 0/3412 [00:00<?, ?it/s]

  0%|          | 0/77 [00:00<?, ?it/s]

  0%|          | 0/688 [00:00<?, ?it/s]

## Run 04 "with_augmentation"

In [None]:
def run_with_augmentation():
    path_x = os.path.join(RESULT_PATH, 'without_augmentation')
    path_y = os.path.join(RESULT_PATH, 'only_random_augmentation')

    x, _ = read_path(path_x)
    y, _ = read_path(path_y)

    path_z = os.path.join(RESULT_PATH, 'with_augmentation')

    for file in tqdm(x):
        shutil.copyfile(file, file.replace(path_x, path_z))

    for file in tqdm(y):
        shutil.copyfile(file, file.replace(path_y, path_z))

In [None]:
run_with_augmentation()

  0%|          | 0/8428 [00:00<?, ?it/s]

  0%|          | 0/5580 [00:00<?, ?it/s]