catatan awal:

> untuk file .h5
*     header untuk data 'X' : "x_images"
*     header untuk data 'y' : "y_labels"

> kode label
0. ceplok
1. kawung
2. lereng
3. nitik
4. parang
5. semen
6. lung-lungan

In [30]:
import h5py
import numpy as np
from PIL import Image
import os
from sklearn.utils import shuffle
import imageio
import cv2 # for zooming in image
import math
import random
from random import randint # will be used in random_erasing_image function
from scipy.ndimage import zoom, rotate

In [31]:
def slice_image(image):
    """ Slice image into 4 parts
        Parameters:
        ---------------
        image            array of RGB (tuple), numpy array of image 
 
        Returns:
        ----------
        images_array     array of images sized 4 
    """
    height, width = image.shape[:2]
    center_h = int(np.round(height/2))
    center_w = int(np.round(width/2))
    
    new_images = []
    pos_w = 0
    pos_h = 0
    for _ in range(4):
        if pos_w <= width:
            new_images.append(image[pos_h:pos_h+center_h, pos_w:pos_w+center_w])
            pos_w = pos_w+center_w
            if pos_w >= width:
                pos_w = 0
                pos_h = pos_h+center_h
        elif pos_w > width and pos_h < height:
            new_images.append(image[pos_h:pos_h+center_h, pos_w:pos_w+center_w])
    return new_images

In [32]:
def zoom_in_image(img, min_zoom=0.2, max_zoom=1.5):
    """
    Center zoom in/out of the given image and returning an enlarged/shrinked view of 
    the image without changing dimensions
    Args:
        img : Image array
        zoom_factor : amount of zoom as a ratio (0 to Inf)
    """
    zoom_factor = random.uniform(min_zoom, max_zoom)
    zoom_factor = round(zoom_factor,1)
    
    height, width = img.shape[:2] # It's also the final desired shape
    new_height, new_width = int(height * zoom_factor), int(width * zoom_factor)

    ### Crop only the part that will remain in the result (more efficient)
    # Centered bbox of the final desired size in resized (larger/smaller) image coordinates
    y1, x1 = max(0, new_height - height) // 2, max(0, new_width - width) // 2
    y2, x2 = y1 + height, x1 + width
    bbox = np.array([y1,x1,y2,x2])
    # Map back to original image coordinates
    bbox = (bbox / zoom_factor).astype(np.int)
    y1, x1, y2, x2 = bbox
    cropped_img = img[y1:y2, x1:x2]

    # Handle padding when downscaling
    resize_height, resize_width = min(new_height, height), min(new_width, width)
    pad_height1, pad_width1 = (height - resize_height) // 2, (width - resize_width) //2
    pad_height2, pad_width2 = (height - resize_height) - pad_height1, (width - resize_width) - pad_width1
    pad_spec = [(pad_height1, pad_height2), (pad_width1, pad_width2)] + [(0,0)] * (img.ndim - 2)

    result = cv2.resize(cropped_img, (resize_width, resize_height))
    result = np.pad(result, pad_spec, mode='constant')
    assert result.shape[0] == height and result.shape[1] == width
    return result

def rotate_image(img):
    """
    generate new rotated image

    Arguments:\n
    img --> Array, pixels array of image\n
    degree --> float, rotation degree within range -150 to 150\n
    """
    degree_value = randint(-225, 225)
    out = rotate(img, degree_value, mode='mirror')
    return out

def flip_ud_image(img):
    """
    generate flipped image on x axis\n
    """
    out = np.flipud(img)
    return out

def random_erasing_image(img, min_percent=20, max_percent=30):
    """
    generate new random erased image.\n
    random erasing area is 20%-30% of image\n
    """
    #
    #first:
    #generate random number for random erasing area percentage
    # (default is 20% to 30%)
    #
    percent_number = randint(min_percent,max_percent)/100

    #
    #second:
    #generate random number for random erasing area position (x,y)
    #
    h, w = img.shape[:2]
    max_length_h = math.floor(h*percent_number)
    max_length_w = math.floor(w*percent_number)
    # pos_h and pos_w set, so not to close to edge
    pos_h = randint(0,h)
    pos_w = randint(0,w)
    end_pos_h = pos_h + max_length_h
    end_pos_w = pos_w + max_length_w

    #
    #third:
    #generate black area
    #
    black = np.zeros_like(img)
    black = black[pos_h:end_pos_h, pos_w:end_pos_w]

    #
    #final:
    #create the new image
    #
    out = img
    out[pos_h:end_pos_h, pos_w:end_pos_w] = black
    return out

def mix_augmentation(img,rand_value):
    value = rand_value
    if value == 1:
        # control zoom range 1 to 2
        # because need more and vast features to be covered
        out = zoom_in_image(img)
        # 15%-25% used to control the area of black plane
        # because image already zoomed in
        out = random_erasing_image(out) 
    elif value == 2:
        out = rotate_image(img)
        out = flip_ud_image(out)
    elif value == 3:
        out = flip_ud_image(img)
        out = random_erasing_image(out)
    elif value == 4:
        out = rotate_image(img)
        out = zoom_in_image(out)
    elif value == 5:
        out = rotate_image(img)
        out = random_erasing_image(out)
    elif value == 6:
        out = flip_ud_image(img)
        out = zoom_in_image(out)
        
    return out, out.shape[0], out.shape[1]

def do_mix(img, rand_value):
    out, h, w = mix_augmentation(img, rand_value)
    while h == 1 and w == 1:
        out, h, w = mix_augmentation(img, rand_value)
    
    return out

In [33]:
def create_augmented_dataset_allfeatures(folder_dir, dest_dir):   
    """
        to create dataset that produce 10 images.
        consist of origin, random erased, rotated, flipped, scaled, mix, and 4 sliced images
    """
    new_dir = dest_dir
    for folder_image in os.listdir(folder_dir): 
        try: 
            os.mkdir(f"{new_dir+folder_image}") 
        except OSError as error: 
            print(error)
        for filename in os.listdir(f"{folder_dir+folder_image}"):
            if filename.endswith(".png") or filename.endswith(".jpg") or filename.endswith(".PNG") or filename.endswith(".JPG") or filename.endswith(".jpeg") or filename.endswith(".JPEG"):
                print(f"working on {folder_image}\\{filename}")

                # copy the original image to img_others and img_for_random_erase
                # img_others is used by zoomed, flipped, rotated
                # img_for_random_erase is used by random erasing
                # and img_for_mix used by mixed augmentation
                img_others = cv2.imread(f"{folder_dir+folder_image}\\{filename}")
                img_for_random_erase = img_others.copy()
                img_for_mix = img_others.copy()

                # FIRST
                # add original image to new directory
                cv2.imwrite(f"{new_dir+folder_image}\\{filename}", img_others)

                # SECOND
                # create image for random erased image
                random_erased_image = random_erasing_image(img_for_random_erase)
                status = cv2.imwrite(f"{new_dir+folder_image}\\RA=={filename}", random_erased_image)

                # THIRD
                # create image for other augmentation methods
                zoomed_image = zoom_in_image(img_others)
                flipped_image = flip_ud_image(img_others)
                rotated_image = rotate_image(img_others)
                status = cv2.imwrite(f"{new_dir+folder_image}\\ZO=={filename}", zoomed_image)
                status = cv2.imwrite(f"{new_dir+folder_image}\\FL=={filename}", flipped_image)
                status = cv2.imwrite(f"{new_dir+folder_image}\\RO=={filename}", rotated_image)

                # FOURTH
                # create image for mixed augmentation methods
                val = randint(1,6)
                mixed_augment_image = do_mix(img_for_mix, val)
                status = cv2.imwrite(f"{new_dir+folder_image}\\MI=={filename}", mixed_augment_image)

                # FIVE
                # slicing Image into 4
                img = cv2.imread(f"{folder_dir+folder_image}\\{filename}")
                if np.shape(img) < (448,448,):
                    img_for_slice = cv2.resize(img, (450,450))
                else:
                    img_for_slice = img.copy()

                img_for_slice = np.array(img_for_slice)
                new = slice_image(img_for_slice)                
                for i in range(len(new)): 
                    # save 4 sliced images part
                    cv2.imwrite(f"{new_dir+folder_image}\\SL%{str(i)}%=={filename}", new[i])
            else:
                print(f"FOUND OTHER TYPE: {filename}")

In [35]:
def create_dataset_noaugm(folder_dir, dest_dir):     
    """
        to create dataset that produce 4 images.
        consist of 4 sliced images.
        no augmentation
    """
    new_dir = dest_dir
    for folder_image in os.listdir(folder_dir):    
        try: 
            os.mkdir(f"{new_dir+folder_image}") 
        except OSError as error: 
            print(error)
        for filename in os.listdir(f"{folder_dir+folder_image}"):
            if filename.endswith(".png") or filename.endswith(".jpg") or filename.endswith(".PNG") or filename.endswith(".JPG") or filename.endswith(".jpeg") or filename.endswith(".JPEG"):
                print(f"working on {folder_image}\\{filename}")

                # slicing Image into 4
                img = cv2.imread(f"{folder_dir+folder_image}\\{filename}")
                if np.shape(img) < (448,448,):
                    img_for_slice = cv2.resize(img, (450,450))
                else:
                    img_for_slice = img.copy()

                img_for_slice = np.array(img_for_slice)
                new = slice_image(img_for_slice)                
                for i in range(len(new)): 
                    # save 4 sliced images part
                    cv2.imwrite(f"{new_dir+folder_image}\\SL%{str(i)}%=={filename}", new[i])
            else:
                print(f"FOUND OTHER TYPE: {filename}")

### MAIN CELL BELOW

In [None]:
source_dir = "C:\\SAMUEL\\KULIAH\\Skripsi Gasal 1920\\jupyter-notebook\\dataset_batik\\Training1\\"
dest_dir = "C:\\SAMUEL\\KULIAH\\Skripsi Gasal 1920\\jupyter-notebook\\new\\Training1\\"

source_dir2 = "C:\\SAMUEL\\KULIAH\\Skripsi Gasal 1920\\jupyter-notebook\\dataset_batik\\Testing1\\"
dest_dir2 = "C:\\SAMUEL\\KULIAH\\Skripsi Gasal 1920\\jupyter-notebook\\new\\Testing1\\"

create_dataset_noaugm(source_dir, dest_dir)
create_dataset_noaugm(source_dir2, dest_dir2)

In [None]:
source_dir = "C:\\SAMUEL\\KULIAH\\Skripsi Gasal 1920\\jupyter-notebook\\dataset_batik\\Training2\\"
dest_dir = "C:\\SAMUEL\\KULIAH\\Skripsi Gasal 1920\\jupyter-notebook\\new\\Training2\\"

source_dir2 = "C:\\SAMUEL\\KULIAH\\Skripsi Gasal 1920\\jupyter-notebook\\dataset_batik\\Testing2\\"
dest_dir2 = "C:\\SAMUEL\\KULIAH\\Skripsi Gasal 1920\\jupyter-notebook\\new\\Testing2\\"

create_augmented_dataset_allfeatures(source_dir, dest_dir)
create_augmented_dataset_allfeatures(source_dir2, dest_dir2)