In [None]:
from pathlib import Path
from shutil import copyfile
import shutil
import os
#
import numpy as np
from tqdm import tqdm
#
try:
    from bin.svg_convert import svg2png
except ImportError:
    os.system("cythonize -i ./bin/svg_convert.pyx")
    from bin.svg_convert import svg2png

In [None]:
# MUST BE GIVEN
p_svg = Path("svgs")

#temporal directory where images are created into
p_raw = Path("data/raw")
p_raw.mkdir(exist_ok=True)
#

img_dim = 128
is_gray = True
is_background = True
dir_name = "128_gray"
#
p_jaggy = p_raw / dir_name / "jaggy"
p_smooth = p_raw / dir_name / "smooth"

In [None]:
# create raw data
svg2png(str(p_svg), str(p_raw), img_dim, is_gray, is_background)

In [None]:
assert p_jaggy.exists()
assert p_smooth.exists()

In [None]:
# destination for train test splits
p_data = Path("./data") / dir_name
#
p_train = p_data / "train"
p_valid = p_data / "valid"
#
p_data.mkdir(exist_ok=True)

In [None]:
train_ratio = 0.8
#
# find all images
p_jaggy_imgs = list(p_jaggy.glob("*.png"))
p_smooth_imgs = list(p_smooth.glob("*.png"))
#
n_samples = len(p_jaggy_imgs)
assert n_samples == len(p_smooth_imgs)
#
n_train_samples = int(n_samples * train_ratio)
n_valid_samples = n_samples - n_train_samples
#
print("Train samples: ", n_train_samples)
print("Valid samples: ", n_valid_samples)

In [None]:
# randomly select train samples by indices
idcs_train = np.random.choice(range(n_samples), size=n_train_samples, replace=False)
idcs_valid = [idx for idx in range(n_samples) if idx not in set(idcs_train)]
#
assert len(set(idcs_train)) == n_train_samples
assert len(idcs_valid) == n_valid_samples

In [None]:
def copy_imgs(p_jaggy_imgs, p_smooth_imgs, p_dest, indices):
    p_inputs = p_dest / "inputs"
    p_labels = p_dest / "labels"
    #
    p_inputs.mkdir(exist_ok=True, parents=True)
    p_labels.mkdir(exist_ok=True, parents=True)
    #
    # copy imgs
    for idx in tqdm(indices):
        p_jaggy_img = p_jaggy_imgs[idx]
        p_smooth_img = p_smooth_imgs[idx]
        
        copyfile(p_jaggy_img, p_labels / p_smooth_img.name)
        copyfile(p_smooth_img, p_inputs / p_jaggy_img.name)

In [None]:
copy_imgs(p_jaggy_imgs, p_smooth_imgs, p_train, idcs_train)
copy_imgs(p_jaggy_imgs, p_smooth_imgs, p_valid, idcs_valid)

In [None]:
shutil.rmtree(p_raw)