In [1]:
import pandas as pd
import numpy as np
from PIL import Image
import os

# Function to convert RLE-encoded string to mask
def rleToMask(rleString, height, width):
    rows, cols = height, width
    rleNumbers = [int(numstring) for numstring in rleString.split(' ')]
    rlePairs = np.array(rleNumbers).reshape(-1, 2)
    img = np.zeros(rows * cols, dtype=np.uint8)
    for index, length in rlePairs:
        index -= 1
        img[index:index + length] = 255
    img = img.reshape(cols, rows)
    img = img.T
    return img

# Read CSV file
csv_path = '/Users/claustrum/Library/CloudStorage/OneDrive-UHN/archive/nucleiKaggle_2018/stage2_final.csv'
print(f"Reading CSV file from: {csv_path}")
df = pd.read_csv(csv_path)

# Output directory on desktop
output_dir = os.path.join(os.path.expanduser('~'), 'scratch', 'stage2_masks')
print(f"Output directory: {output_dir}")

# Counter for number of masks created
num_masks_created = 0

# Counter for number of directories created
num_dirs_created = 0

# Iterate over each row
for index, row in df.iterrows():
    image_id = row['ImageId']
    encoded_pixels = row['EncodedPixels']
    height = row['Height']
    width = row['Width']
    print(f"Processing image: {image_id}, Height: {height}, Width: {width}")

    # Create directory for ImageId if it doesn't exist
    image_dir = os.path.join(output_dir, image_id)
    if not os.path.exists(image_dir):
        os.makedirs(image_dir)
        num_dirs_created += 1
    
    # Convert RLE-encoded mask to binary mask image
    mask = rleToMask(encoded_pixels, height, width)

    # Create PIL image from mask
    mask_image = Image.fromarray(mask, mode='L')

    # Save mask image as PNG in respective directory with sequential numbering
    mask_filename = f'{image_id}_{num_masks_created + 1}.png'
    file_path = os.path.join(image_dir, mask_filename)
    print(f"Saving mask image to: {file_path}")
    mask_image.save(file_path)

    # Increment counters
    num_masks_created += 1

print(f"Number of masks created: {num_masks_created}")
print(f"Number of directories created: {num_dirs_created}")
print("Mask creation process completed.")


Reading CSV file from: /Users/claustrum/Library/CloudStorage/OneDrive-UHN/archive/nucleiKaggle_2018/submission.csv
Output directory: /Users/claustrum/Library/CloudStorage/OneDrive-UHN/archive/nucleiKaggle_2018/maskrcnn_validation_masks


KeyError: 'Height'