# Image preprocessing

In [1]:
import json
import random
from lib.file_utils import create_directory
from lib.images import list_images, read_image_file, get_image_properties
from lib.images import rotate_image_portrait, scale_image, convert_to_rgb, crop_center

## Centering the image and standardizing

In [2]:
def scale_and_center(img, image_size):
    is_landscape = img.width > img.height
    img = rotate_image_portrait(img)
    props = get_image_properties(img)

    if props['width'] >= image_size:
        img = scale_image(img, target_width=image_size)
        props = get_image_properties(img)
        
        if props['height'] >= image_size:
            img = crop_center(img, target_height=image_size)
            img = convert_to_rgb(img)
            return img.rotate(-90) if is_landscape else img
        else:
            raise Exception(f"Image is {props['height']} px height.")
            
    else:
        raise Exception(f"Image is {props['width']} px width.")

In [3]:
def split_dataset(list, train_ratio=0.8, random_seed=42):
    random.seed(random_seed)
    random.shuffle(list)
    train_size = int(len(list) * train_ratio)
    train = list[:train_size]
    test = list[train_size:]
    return train, test

## Read labeled dataset

In [4]:
DATASET_DIR = "./dataset"
TARGET_DIR = "./images"

with open(f'{DATASET_DIR}/catalog.json') as json_file:
    catalog = json.load(json_file)
print(catalog)

{'MM': 'medical-mask', 'PM': 'person-mask', 'PMM': 'person-masked-medical', 'PMN': 'person-masked-non-medical'}


## Write labels file

In [5]:
create_directory(TARGET_DIR)
with open(f'{TARGET_DIR}/class-names.txt', 'w') as f:
    f.write('\n'.join(catalog.values()))

## Create train and test datasets

In [6]:
IMAGE_SIZE = 720
for class_dir in catalog.keys():
    create_directory(f'{TARGET_DIR}/train/{class_dir}')
    create_directory(f'{TARGET_DIR}/test/{class_dir}')
    print(f'Processing {class_dir}...')
    
    train, test = split_dataset(list_images(f'{DATASET_DIR}/{class_dir}'))

    for dataset_type, image_paths in zip(['train','test'],[train, test]):
        for path in image_paths:
            filename = path.split('/')[-1]
            output_path = f'{TARGET_DIR}/{dataset_type}/{class_dir}/{filename}'
            
            try:
                img = read_image_file(path)
                img = scale_and_center(img, IMAGE_SIZE)
                img.save(output_path)
            except Exception as e:
                print(f'Error processing {path}: {e}')
                continue

Processing MM...


Processing PM...
Processing PMM...
Processing PMN...
