## Data preparation - sketch to pixel art (Models A1, A2)

### Create pairs of sketches and pixel art images from 64x64 images taken from Tiny Hero dataset

- Tiny Hero dataset source: https://www.kaggle.com/datasets/calmness/retro-pixel-characters-generator
- Data preparation for models A1, A2 uses data from the `datasets/unprepared_data/input_a1_a2` folder.
- Data preparation involves using gaussian-blur and canny edge detection to generate a viable sketch for a given image
- The code for `create_sketch` function is a result of experimenting upon code from:
    - https://stackabuse.com/opencv-edge-detection-in-python-with-cv2canny/
    - https://codewithcurious.com/python-projects/convert-image-into-sketch-python/
- 3 sketches of decreasing detail are created for each input image (creates more data for the models)
- Prepared data is saved in `datasets/model_a_data`
- Pix2Pix models requires input images and ground truth images to be combined. These prepared images are stored in the `combined` folder of `datasets/model_a_data`

In [1]:
import cv2 as cv
import os
import common_functions as core

def remove_extension(i: str) -> str:
    """
    Function to remove the extension of a filename
    :param i: input name of file
    :return: filename without extension
    """
    a, _ = os.path.splitext(i)
    return a

def get_filename(i: str) -> str:
    """
    Function to get the filename of a given path
    :param i: file path
    :return: filename
    """
    return os.path.basename(i)

def create_sketch(i: str, levels: int = 3, directory="."):
    """
    Function to create a sketch of an image
    :param i: input image path
    :param levels: number of levels to apply sketching - higher the number, smoother the sketching (less detail)
    :param directory: location to save the sketch
    :return: 
    Code references:
    - https://stackabuse.com/opencv-edge-detection-in-python-with-cv2canny/
    - https://codewithcurious.com/python-projects/convert-image-into-sketch-python/
    """
    # Load image
    image = cv.imread(i)

    # BASIC SKETCH
    # Convert image to grayscale
    gray_image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
    # Invert grayscale image
    invert_image = cv.bitwise_not(gray_image)
    # Apply Gaussian blur to inverted image
    blur_image = cv.GaussianBlur(invert_image, (5,5), 0)
    # Invert the blurred image
    invert_blur = cv.bitwise_not(blur_image)
    # Divide grayscale image by inverted blurred image to create a sketch
    sketch = cv.divide(gray_image, invert_blur, scale=256.0)    
    
    # EDGE DETECTION
    # Apply Gaussian blur to sketch
    b1 = cv.GaussianBlur(sketch, (5, 5), 0)
    b2 = b1
    # for loop to apply additional Gaussian blur to the image
    # When `levels` argument is increased -> more Gaussian blur is applied -> sketch becomes simpler
    for _ in range(0, levels - 1):
        b2 = cv.GaussianBlur(b2, (5, 5), 0)
    
    # Using canny edge detection on final blurred image
    edge = cv.Canny(b2, 50, 100)
    # Inverting the edge-detected image
    edge_inv = cv.bitwise_not(edge)
    # Saving the image
    file_path = os.path.join(directory, f"{get_filename(remove_extension(i))}_s{levels}.png")
    cv.imwrite(file_path, edge_inv)

## Folder structure for model A1, A2 data folders

```
model_a_data
    -- input (every image from 'input_a' folder is added 3 times because 3 sketches are created for every input image)
        -- train
        -- test
        -- val
    -- output (contains 3 sketches of decreasing detail for each 'input' image)
        -- train
        -- test
        -- val
    -- combined (combined input, output images to use for Pix2Pix training)
        -- train
        -- test
        -- val
```

## Generate data for training a Pix2Pix model

In [2]:
import glob
import shutil

# Make new directory for temp data
TEMP = core.get_path(core.A1.temp_directory)
os.makedirs(TEMP, exist_ok=True)

# 3 sketches of decreasing detail will be created from a single input image
# This gives us more image pairs to train the model with
total_levels = 3

# Looping through `input_a1_a2` folder
for i, f in enumerate(glob.glob(core.get_path(core.A1.raw_data_dir, "*.png"))):
    # Zero-padded numbers as file names
    target = f"{i:03}"
    # Adding 'target' image inside TEMP folder
    target = os.path.join(TEMP, target) + ".png"
    shutil.copyfile(f, target)
    # Calling 'create_sketch' function that creates 3 different sketches from the 'target' image
    for j in range(1, total_levels + 1):
        create_sketch(target, levels = j, directory = TEMP)

# At this point TEMP folder contains input images + 3 sketches per original image

### Create folder structure for pix2pix

In [3]:
# Splitting data for training, testing, validation
total_files = i     # i variable from previous for loop

test = int(total_files * 0.79)
val = int(total_files * 0.90)

splits = ["train", "test", "val"]

MODEL_INPUT_DIR = core.get_path(core.A1.prepared_data_dir, "input")
MODEL_OUTPUT_DIR = core.get_path(core.A1.prepared_data_dir, "output")
MODEL_COMBINED_DIR = core.get_path(core.A1.prepared_data_dir, "combined")

# Creating 'input', 'output' folders for model A1, A2 data folder
for s in splits:
    os.makedirs(os.path.join(MODEL_INPUT_DIR, s), exist_ok=True)
    os.makedirs(os.path.join(MODEL_OUTPUT_DIR, s), exist_ok=True)

# Counter variable for created image-sketch pairs
data_number = 0

# For loop to split files into train, test, validation
for i in range(0, total_files):
    current_split = "train"
    if test < i < val:
        current_split = "test"
    elif val <= i:
        current_split = "val"
    # Zero-padded numbers as file names
    pair_name = f"{i:03}"
    
    ## TODO: REWRITE THIS BIT
    
    pair_A_path = os.path.join(TEMP, pair_name) + ".png"
    
    # For loop to copy original image and 3 sketches to A and B folders respectively
        ## with new numbering using the data_number variable
    ### TEMP  --> 000.png, 000_s1.png, 000_s2.png, 000_s3.png
    ### A         000.png, 001.png, 002.png (copies of TEMP/000.png)
    ### B         000.png (TEMP/000_s1.png), 001.png (TEMP/000_s2.png), 002.png (TEMP/000_s3.png)
    for j in range(1, total_levels + 1):
        # Sketch files 1, 2, 3
        pair_B_path = os.path.join(TEMP, f"{pair_name}_s{j}.png")
        # File naming
        data_name = f"{data_number:03}"
        data_A_path = os.path.join(MODEL_INPUT_DIR, current_split, data_name) + ".png"
        data_B_path = os.path.join(MODEL_OUTPUT_DIR, current_split, data_name) + ".png"
        shutil.copyfile(pair_A_path, data_A_path)
        shutil.copyfile(pair_B_path, data_B_path)
        data_number += 1

# Cleanup TEMP folder as it is no longer needed
shutil.rmtree(TEMP)

## Convert to combined AB images

In [4]:
os.makedirs(MODEL_COMBINED_DIR, exist_ok=True)
core.create_combined_images(f"--fold_A {MODEL_INPUT_DIR} --fold_B {MODEL_OUTPUT_DIR} --fold_AB {MODEL_COMBINED_DIR} --no_multiprocessing")

----------------------------------------------------------
python /Users/tashvit/Documents/GitHub/mmpixagen/thirdparty/pix2pix/datasets/combine_A_and_B.py --fold_A /Users/tashvit/Documents/GitHub/mmpixagen/datasets/model_a_data/input --fold_B /Users/tashvit/Documents/GitHub/mmpixagen/datasets/model_a_data/output --fold_AB /Users/tashvit/Documents/GitHub/mmpixagen/datasets/model_a_data/combined --no_multiprocessing
[fold_A] =  /Users/tashvit/Documents/GitHub/mmpixagen/datasets/model_a_data/input
[fold_B] =  /Users/tashvit/Documents/GitHub/mmpixagen/datasets/model_a_data/output
[fold_AB] =  /Users/tashvit/Documents/GitHub/mmpixagen/datasets/model_a_data/combined
[num_imgs] =  1000000
[use_AB] =  False
[no_multiprocessing] =  True
split = test, use 108/108 images
split = test, number of images = 108
split = train, use 795/795 images
split = train, number of images = 795
split = val, use 102/102 images
split = val, number of images = 102
