In [1]:
import os
import json
import torch
from torchvision.models.detection import maskrcnn_resnet50_fpn
from torchvision.transforms import functional as F
from PIL import Image
import numpy as np
from time import time
from torch.cuda.amp import autocast
from torch.utils.data import DataLoader, Dataset

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load the pretrained Mask R-CNN model
model = maskrcnn_resnet50_fpn(pretrained=True)
model = model.to(device)  # Move the model to GPU
model.eval()  # Set the model to evaluation mode


def save_image_paths_to_json(input_dir, json_path):
    """
    Traverse the input directory and save all image paths to a JSON file.
    """
    all_images = []
    for root, _, files in os.walk(input_dir):
        for file in sorted(files):  # Ensure consistent order
            if file.lower().endswith((".jpg", ".jpeg", ".png")):
                all_images.append(os.path.join(root, file))
    with open(json_path, 'w') as json_file:
        json.dump(all_images, json_file)
    print(f"Saved {len(all_images)} image paths to {json_path}")


def load_image_paths_from_json(json_path):
    """
    Load image paths from a JSON file.
    """
    if not os.path.exists(json_path):
        raise FileNotFoundError(f"JSON file not found: {json_path}")
    with open(json_path, 'r') as json_file:
        all_images = json.load(json_file)
    print(f"Loaded {len(all_images)} image paths from {json_path}")
    return all_images


def filter_images_by_directory(image_paths):
    """
    Filter the image paths to consider only up to 16 images per directory.
    """
    filtered_images = []
    dir_to_images = {}

    # Group images by their parent directory
    for image_path in image_paths:
        directory = os.path.dirname(image_path)
        if directory not in dir_to_images:
            dir_to_images[directory] = []
        dir_to_images[directory].append(image_path)

    # Filter images within each directory
    for directory, images in dir_to_images.items():
        # Sort images to ensure the correct order (frame_0000, frame_0001, ...)
        sorted_images = sorted(images)
        filtered_images.extend(sorted_images[:16])  # Take only the first 16 images

    return filtered_images


def mask_humans(image, predictions, threshold=0.5, original_size=None):
    """
    Mask human figures in the input image and restore to original size if specified.
    """
    image_array = np.array(image)
    masks = predictions['masks']
    labels = predictions['labels']
    scores = predictions['scores']

    for i in range(len(masks)):
        if labels[i] == 1 and scores[i] > threshold:  # Label 1 in COCO is "person"
            binary_mask = masks[i, 0].cpu().numpy() > 0.5
            binary_mask = Image.fromarray(binary_mask).resize(
                (image_array.shape[1], image_array.shape[0]), Image.NEAREST
            )
            binary_mask = np.array(binary_mask)
            image_array[binary_mask] = [255, 255, 255]

    masked_image = Image.fromarray(image_array)
    if original_size:
        masked_image = masked_image.resize(original_size, Image.ANTIALIAS)
    return masked_image


class ImageDataset(Dataset):
    """
    Custom Dataset for loading images and resizing.
    """
    def __init__(self, image_paths, resize_to):
        self.image_paths = image_paths
        self.resize_to = resize_to

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        path = self.image_paths[idx]
        try:
            image = Image.open(path).convert("RGB")
            original_size = image.size
            image_resized = image.resize((self.resize_to, self.resize_to), Image.ANTIALIAS)
            return F.to_tensor(image_resized), original_size, path
        except Exception as e:
            print(f"Error loading image: {path}, skipping. Error: {e}")
            return None


def custom_collate_fn(batch):
    """
    Custom collate function to handle None values in DataLoader.
    """
    batch = [item for item in batch if item is not None]  # Filter out None values
    images, original_sizes, paths = zip(*batch)
    return torch.stack(images), original_sizes, paths


def process_and_mask_images(input_dir, json_path, output_dir, batch_size=4, threshold=0.5, resize_to=800, num_workers=4):
    """
    Process images using CPU for preprocessing and GPU for inference.
    """
    if not os.path.exists(json_path):
        save_image_paths_to_json(input_dir, json_path)
    
    all_images = load_image_paths_from_json(json_path)
    filtered_images = filter_images_by_directory(all_images)
    dataset = ImageDataset(filtered_images, resize_to)
    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        num_workers=num_workers,  # Use multiple CPU workers for preprocessing
        pin_memory=True,
        collate_fn=custom_collate_fn
    )

    start_time = time()
    processed_images = 0
    total_images = len(filtered_images)

    for images, original_sizes, paths in dataloader:
        images = images.to(device)  # Move batch to GPU
        with autocast():  # Mixed precision inference
            with torch.no_grad():
                predictions = model(images)

        for path, preds, original_size in zip(paths, predictions, original_sizes):
            original_image = Image.open(path).convert("RGB")
            masked_image = mask_humans(original_image, preds, threshold, original_size=original_size)

            relative_path = os.path.relpath(path, input_dir)
            output_path = os.path.join(output_dir, relative_path)
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
            masked_image.save(output_path)

        processed_images += len(paths)
        elapsed_time = time() - start_time
        images_per_minute = (processed_images / elapsed_time) * 60
        remaining_images = total_images - processed_images
        eta = remaining_images / images_per_minute

        print(
            f"Processed {processed_images}/{total_images} images "
            f"({(processed_images / total_images) * 100:.2f}%). "
            f"Speed: {images_per_minute:.2f} images/min. ETA: {eta:.2f} minutes."
        )

    torch.cuda.empty_cache()


# Set paths for input, output, and JSON
input_directory = "/scratch/supalami/ProjectDataSplitwise/extracted_frames_mk_val_videos"
json_file_path = "/scratch/supalami/ProjectDataSplitwise/image_paths_validation.json"
output_directory = "/scratch/supalami/ProjectDataSplitwise/masked_extracted_frames_validation"

# Process and mask images using GPU for inference and CPU for preprocessing
process_and_mask_images(
    input_directory,
    json_file_path,
    output_directory,
    batch_size=120,
    threshold=0.5,
    resize_to=256,
    num_workers=os.cpu_count()  # Use all available CPU cores
)


Using device: cuda




Loaded 752031 image paths from /scratch/supalami/ProjectDataSplitwise/image_paths_validation.json




Processed 120/64728 images (0.19%). Speed: 256.36 images/min. ETA: 252.02 minutes.
Processed 240/64728 images (0.37%). Speed: 441.78 images/min. ETA: 145.97 minutes.
Processed 360/64728 images (0.56%). Speed: 586.33 images/min. ETA: 109.78 minutes.
Processed 480/64728 images (0.74%). Speed: 703.77 images/min. ETA: 91.29 minutes.
Processed 600/64728 images (0.93%). Speed: 804.66 images/min. ETA: 79.70 minutes.
Processed 720/64728 images (1.11%). Speed: 886.36 images/min. ETA: 72.21 minutes.
Processed 840/64728 images (1.30%). Speed: 942.49 images/min. ETA: 67.79 minutes.
Processed 960/64728 images (1.48%). Speed: 988.12 images/min. ETA: 64.53 minutes.
Processed 1080/64728 images (1.67%). Speed: 1033.10 images/min. ETA: 61.61 minutes.
Processed 1200/64728 images (1.85%). Speed: 1078.74 images/min. ETA: 58.89 minutes.
Processed 1320/64728 images (2.04%). Speed: 1122.04 images/min. ETA: 56.51 minutes.
Processed 1440/64728 images (2.22%). Speed: 1155.06 images/min. ETA: 54.79 minutes.
Proce

In [None]:
# import os
# import json
# import torch
# from torchvision.models.detection import maskrcnn_resnet50_fpn
# from torchvision.transforms import functional as F
# from PIL import Image
# import numpy as np
# from time import time
# from torch.cuda.amp import autocast
# from torch.utils.data import DataLoader, Dataset

# # Check for GPU availability
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print(f"Using device: {device}")

# # Load the pretrained Mask R-CNN model
# model = maskrcnn_resnet50_fpn(pretrained=True)
# model = model.to(device)  # Move the model to GPU
# model.eval()  # Set the model to evaluation mode


# def save_image_paths_to_json(input_dir, json_path):
#     """
#     Traverse the input directory and save all image paths to a JSON file.
#     """
#     all_images = []
#     for root, _, files in os.walk(input_dir):
#         for file in sorted(files):  # Ensure consistent order
#             if file.lower().endswith((".jpg", ".jpeg", ".png")):
#                 all_images.append(os.path.join(root, file))
#     with open(json_path, 'w') as json_file:
#         json.dump(all_images, json_file)
#     print(f"Saved {len(all_images)} image paths to {json_path}")


# def load_image_paths_from_json(json_path):
#     """
#     Load image paths from a JSON file.
#     """
#     if not os.path.exists(json_path):
#         raise FileNotFoundError(f"JSON file not found: {json_path}")
#     with open(json_path, 'r') as json_file:
#         all_images = json.load(json_file)
#     print(f"Loaded {len(all_images)} image paths from {json_path}")
#     return all_images


# def filter_images_by_directory(image_paths):
#     """
#     Return all image paths without filtering.
#     """
#     return image_paths  # No filtering, return all image paths


# def mask_humans(image, predictions, threshold=0.5, original_size=None):
#     """
#     Mask human figures in the input image and restore to original size if specified.
#     """
#     image_array = np.array(image)
#     masks = predictions['masks']
#     labels = predictions['labels']
#     scores = predictions['scores']

#     for i in range(len(masks)):
#         if labels[i] == 1 and scores[i] > threshold:  # Label 1 in COCO is "person"
#             binary_mask = masks[i, 0].cpu().numpy() > 0.5
#             binary_mask = Image.fromarray(binary_mask).resize(
#                 (image_array.shape[1], image_array.shape[0]), Image.NEAREST
#             )
#             binary_mask = np.array(binary_mask)
#             image_array[binary_mask] = [255, 255, 255]

#     masked_image = Image.fromarray(image_array)
#     if original_size:
#         masked_image = masked_image.resize(original_size, Image.ANTIALIAS)
#     return masked_image


# class ImageDataset(Dataset):
#     """
#     Custom Dataset for loading images and resizing.
#     """
#     def __init__(self, image_paths, resize_to):
#         self.image_paths = image_paths
#         self.resize_to = resize_to

#     def __len__(self):
#         return len(self.image_paths)

#     def __getitem__(self, idx):
#         path = self.image_paths[idx]
#         try:
#             image = Image.open(path).convert("RGB")
#             original_size = image.size
#             image_resized = image.resize((self.resize_to, self.resize_to), Image.ANTIALIAS)
#             return F.to_tensor(image_resized), original_size, path
#         except Exception as e:
#             print(f"Error loading image: {path}, skipping. Error: {e}")
#             return None


# def custom_collate_fn(batch):
#     """
#     Custom collate function to handle None values in DataLoader.
#     """
#     batch = [item for item in batch if item is not None]  # Filter out None values
#     images, original_sizes, paths = zip(*batch)
#     return torch.stack(images), original_sizes, paths


# def process_and_mask_images(input_dir, json_path, output_dir, batch_size=4, threshold=0.5, resize_to=800, num_workers=4):
#     """
#     Process images using CPU for preprocessing and GPU for inference.
#     """
#     if not os.path.exists(json_path):
#         save_image_paths_to_json(input_dir, json_path)
    
#     all_images = load_image_paths_from_json(json_path)
#     filtered_images = filter_images_by_directory(all_images)
#     dataset = ImageDataset(filtered_images, resize_to)
#     dataloader = DataLoader(
#         dataset,
#         batch_size=batch_size,
#         num_workers=num_workers,  # Use multiple CPU workers for preprocessing
#         pin_memory=True,
#         collate_fn=custom_collate_fn
#     )

#     start_time = time()
#     processed_images = 0
#     total_images = len(filtered_images)

#     for images, original_sizes, paths in dataloader:
#         images = images.to(device)  # Move batch to GPU
#         with autocast():  # Mixed precision inference
#             with torch.no_grad():
#                 predictions = model(images)

#         for path, preds, original_size in zip(paths, predictions, original_sizes):
#             original_image = Image.open(path).convert("RGB")
#             masked_image = mask_humans(original_image, preds, threshold, original_size=original_size)

#             relative_path = os.path.relpath(path, input_dir)
#             output_path = os.path.join(output_dir, relative_path)
#             os.makedirs(os.path.dirname(output_path), exist_ok=True)
#             masked_image.save(output_path)

#         processed_images += len(paths)
#         elapsed_time = time() - start_time
#         images_per_minute = (processed_images / elapsed_time) * 60
#         remaining_images = total_images - processed_images
#         eta = remaining_images / images_per_minute

#         print(
#             f"Processed {processed_images}/{total_images} images "
#             f"({(processed_images / total_images) * 100:.2f}%). "
#             f"Speed: {images_per_minute:.2f} images/min. ETA: {eta:.2f} minutes."
#         )
#     torch.cuda.empty_cache()


# # Set paths for input, output, and JSON
# input_directory = "/scratch/supalami/ProjectDataSplitwise/extracted_frames_mk_val_videos"
# json_file_path = "/scratch/supalami/ProjectDataSplitwise/image_paths_validation.json"
# output_directory = "/scratch/supalami/ProjectDataSplitwise/masked_extracted_frames_validation"

# # Process and mask images using GPU for inference and CPU for preprocessing
# process_and_mask_images(
#     input_directory,
#     json_file_path,
#     output_directory,
#     batch_size=120,
#     threshold=0.5,
#     resize_to=256,
#     num_workers=os.cpu_count()  # Use all available CPU cores
# )


In [None]:
# # Clear unused variables
# import gc
# gc.collect()
# torch.cuda.empty_cache()