In [8]:
import numpy as np
import os
import cv2

def process_and_resize_masks_to_npy(input_path, output_path, target_size=(512, 512), chunk_size=1000):
    """
    Process masks to binary format, resize them, and save in standard .npy format.
    
    Parameters:
    - input_path: Path to the input mask file
    - output_path: Path to save the processed mask file
    - target_size: Desired mask dimensions (height, width).
    - chunk_size: Number of slices to process at a time.
    """
    try:
        # Load input masks in memory-mapped mode
        masks = np.load(input_path, mmap_mode="r")
        num_slices = masks.shape[0]
        print(f"Loaded masks: {masks.shape}")

        # Prepare an output array for resized masks
        resized_masks = np.zeros((num_slices, *target_size), dtype="int8")
        
        # Process in chunks
        for start_idx in range(0, num_slices, chunk_size):
            end_idx = min(start_idx + chunk_size, num_slices)
            chunk = masks[start_idx:end_idx]  # Load a chunk
            
            for i, mask in enumerate(chunk):
                binary_mask = np.where(mask > 0, 1, 0).astype("int8")  # Convert to binary
                resized_masks[start_idx + i] = cv2.resize(
                    binary_mask, target_size[::-1], interpolation=cv2.INTER_NEAREST
                )
            print(f"Processed and resized slices {start_idx} to {end_idx - 1}")

        # Save the processed binary masks as a standard .npy file
        np.save(output_path, resized_masks)
        print(f"Resized binary masks saved successfully to {output_path}")

        # Validate the saved file
        validation_masks = np.load(output_path)
        print(f"Validation - Shape of resized masks: {validation_masks.shape}")
        print(f"Validation - Unique values in resized masks: {np.unique(validation_masks)}")
    
    except Exception as e:
        print(f"Error during processing: {e}")

# Define paths
input_mask_path = "../data/training_data/val/Residential_val_masks_combined.npy"
output_resized_path = "../data/training_data/val/Residential_val_masks_cleaned_resized.npy"

# Process and resize the masks
process_and_resize_masks_to_npy(input_mask_path, output_resized_path)


Loaded masks: (123360, 257, 21)
Error during processing: Unable to allocate 30.1 GiB for an array with shape (123360, 512, 512) and data type int8
