In [22]:
import cv2
import matplotlib.pyplot as plt
import os
import random
import time
import shutil

In [23]:
def downscale_image(image,factor):
    old_dims = image.shape
    new_dims = [int(x * factor) for x in old_dims]
    small_image= cv2.resize(image, (new_dims[1], new_dims[0]))
    stretched_small_image = cv2.resize(small_image, (old_dims[1], old_dims[0]))
    return stretched_small_image

In [24]:
def show_images_side_by_side(lr, hr):
    plt.figure(figsize=(10, 5))

    plt.subplot(1, 2, 1)
    plt.imshow(lr)
    plt.title('lr')

    plt.subplot(1, 2, 2)
    plt.imshow(hr)
    plt.title('hr')

    plt.show()

In [25]:
def make_datum(image_path, export_image_dims):
    def calc_random_downscale_factors():
        def valid_factors(factors):
            # gap must not exceed 0.3
            # first num must be smaller
            # second num must be higher than 0.5

            highest_gap = 0.5
            min_value = 0.1

            if any([x < min_value for x in factors]):
                return False

            if abs(factors[0] - factors[1]) > highest_gap:
                return False
            if factors[0] > factors[1]:
                return False

            return True

        while 1:
            factors = [random.randint(0, 100) / 100 for _ in range(2)]
            if valid_factors(factors):
                return factors

    random_factors = calc_random_downscale_factors()
    image = cv2.imread(image_path)
    lr_image = downscale_image(image, random_factors[0])
    hr_image = downscale_image(image, random_factors[1])

    lr_image = cv2.resize(lr_image, (export_image_dims[1], export_image_dims[0]))
    hr_image = cv2.resize(hr_image, (export_image_dims[1], export_image_dims[0]))

    return lr_image, hr_image


def make_dataset(export_folder, export_image_dims, image_count=-1):
    # grab up all the raw file paths
    raw_images_folder = r"H:\my_files\my_programs\cat_upscaler\datasets\raw_cat_images"
    image_paths = [
        os.path.join(raw_images_folder, f)
        for f in os.listdir(raw_images_folder)
        if os.path.isfile(os.path.join(raw_images_folder, f))
    ]
    random.shuffle(image_paths)
    print(f"there are {len(image_paths)} images in the folder")

    hr_folder = os.path.join(export_folder, "hr")
    lr_folder = os.path.join(export_folder, "lr")

    # remove any existing export folders
    if os.path.exists(export_folder):
        shutil.rmtree(export_folder)

    # assure export folders exist
    for folder in [export_folder, hr_folder, lr_folder]:
        if not os.path.exists(folder):
            os.makedirs(folder)

    # handle cutting to size
    if image_count != -1:
        image_paths = (
            random.sample(image_paths, image_count)
            if len(image_paths) > image_count
            else image_paths
        )

    # process each image into the lr and hr images, then save them
    for i, image_path in enumerate(image_paths):
        operation_start_time = time.time()
        if image_count != -1 and i >= image_count:
            break
        lr_image, hr_image = make_datum(image_path, export_image_dims)
        # show_images_side_by_side(lr_image, hr_image)
        fn = f"{i}.png"
        cv2.imwrite(os.path.join(hr_folder, fn), hr_image)
        cv2.imwrite(os.path.join(lr_folder, fn), lr_image)
        operation_time_taken = round((time.time() - operation_start_time),3)
        if i % 10 == 0:print(f"({i} / {len(image_paths)}) {operation_time_taken}s per operation")
    print('done')

make_dataset(
    r"H:\my_files\my_programs\cat_upscaler\datasets\cat_downscale_4_500_count",
    (2560, 2560),
    image_count=500,
)

there are 5011 images in the folder
(0 / 500) 0.415s per operation
(10 / 500) 0.694s per operation
(20 / 500) 0.382s per operation
(30 / 500) 0.412s per operation
(40 / 500) 0.329s per operation
(50 / 500) 0.427s per operation
(60 / 500) 0.539s per operation
(70 / 500) 0.326s per operation
(80 / 500) 0.314s per operation
(90 / 500) 0.338s per operation
(100 / 500) 0.388s per operation
(110 / 500) 0.332s per operation
(120 / 500) 0.419s per operation
(130 / 500) 0.337s per operation
(140 / 500) 0.417s per operation
(150 / 500) 0.303s per operation
(160 / 500) 0.973s per operation
(170 / 500) 0.285s per operation
(180 / 500) 0.318s per operation
(190 / 500) 0.276s per operation
(200 / 500) 0.039s per operation
(210 / 500) 0.03s per operation
(220 / 500) 0.027s per operation
(230 / 500) 0.026s per operation
(240 / 500) 0.023s per operation
(250 / 500) 0.015s per operation
(260 / 500) 0.033s per operation
(270 / 500) 0.006s per operation
(280 / 500) 0.019s per operation
(290 / 500) 0.024s 