# Resize Training Set Preserving Aspect ratio

In [None]:
import functools
import math
import multiprocessing
import os
from dataclasses import dataclass
from glob import glob

import cv2
import numpy as np
from tqdm.auto import tqdm


@dataclass
class PreprocessConfig:
    resize: int = 512
    pas_value: int = 0
    interp: int = cv2.INTER_CUBIC


def pad_resize_image_dir(
        input_path: str, output_path: str, config: PreprocessConfig
):
    os.makedirs(output_path, exist_ok=True)
    fnames = glob(os.path.join(input_path, "*.*"))
    partial_process = functools.partial(
        process_image, config=config, output_path=output_path
    )

    n_workers = os.cpu_count()
    with multiprocessing.Pool(n_workers) as pool:
        futures = pool.imap_unordered(partial_process, fnames)
        list(tqdm(futures, total=len(fnames)))


def process_image(
        fname: str,
        output_path: str,
        config: PreprocessConfig,
):
    bname = os.path.basename(fname)
    image = cv2.imread(fname)
    preprocessed_image = pad_resize_square(image, config)
    output_image_fname = os.path.join(output_path, bname)
    cv2.imwrite(output_image_fname, preprocessed_image)


def pad_resize_square(image: np.ndarray, config: PreprocessConfig):
    square_image = pad_image_square(image=image, pad_value=config.pas_value)
    resized_image = resize_image(
        image=square_image,
        size=config.resize,
        interp=config.interp
    )
    return resized_image


def pad_image_square(image: np.ndarray, pad_value: int):
    h, w, _ = image.shape
    output_size = max(h, w)
    pad_h = (output_size - h) / 2
    pad_w = (output_size - w) / 2
    padded = np.pad(
        image,
        [
            (math.floor(pad_h), math.ceil(pad_h)),
            (math.floor(pad_w), math.ceil(pad_w)),
            (0, 0),
        ],
        mode="constant",
        constant_values=pad_value,
    )
    return padded


def resize_image(image: np.ndarray, size: int, interp: int):
    resized = cv2.resize(image, dsize=(size, size), interpolation=interp)
    return resized


In [None]:
pad_resize_image_dir(
    input_path='../input/happy-whale-and-dolphin/train_images', 
    output_path='/kaggle/working/training_set/resized_train_images', 
    config=PreprocessConfig()
)

In [None]:
! cp ../input/happy-whale-and-dolphin/train.csv /kaggle/working/training_set/