# Task
Extract individual payment slips from the image `/content/Slip.jpeg` using instance segmentation and save each extracted slip as a separate image file in a newly created directory.

## Load Image and Deep Learning Model




In [6]:
from PIL import Image
import torchvision.transforms as T
import torchvision

# 1. Load the image
image_path = '/content/Slip.jpeg'
image = Image.open(image_path).convert("RGB")
print(f"Image '{image_path}' loaded successfully.")

# 2. and 3. Import and instantiate a pre-trained Mask R-CNN model
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
print("Mask R-CNN model loaded with pre-trained weights.")

# 4. Set the model to evaluation mode
model.eval()
print("Model set to evaluation mode.")

Image '/content/Slip.jpeg' loaded successfully.




Mask R-CNN model loaded with pre-trained weights.
Model set to evaluation mode.


In [7]:
import torch

# 1. Define a transformation pipeline
transform = T.Compose([T.ToTensor()])

# 2. Apply the transformation to the loaded image
img_tensor = transform(image)

# 3. Perform inference by passing the transformed image to the model
# The model expects a list of tensors as input
with torch.no_grad():
    prediction = model([img_tensor])

# 4. Store the predictions
# For Mask R-CNN, prediction[0] contains dictionaries with 'boxes', 'labels', 'scores', and 'masks'
print(f"Inference completed. Found {len(prediction[0]['labels'])} potential objects.")


Inference completed. Found 5 potential objects.


In [8]:
import numpy as np
import os
from PIL import Image
from skimage.transform import resize

# 1. Define a confidence threshold for filtering predictions
score_threshold = 0.1 # Lowering the threshold to capture more objects

# Filter predictions based on confidence score
masks = prediction[0]['masks'][prediction[0]['scores'] > score_threshold]
boxes = prediction[0]['boxes'][prediction[0]['scores'] > score_threshold]
labels = prediction[0]['labels'][prediction[0]['scores'] > score_threshold]
scores = prediction[0]['scores'][prediction[0]['scores'] > score_threshold]

print(f"Found {len(boxes)} objects above confidence threshold {score_threshold}.")

# Create a directory to save the extracted slips
output_dir = "payment_slips_extracted" # Changed folder name
os.makedirs(output_dir, exist_ok=True)
print(f"Output directory '{output_dir}' created or already exists.")

# Process each detected object
for i in range(len(boxes)):
    # Get the mask for the current object
    # The mask is typically a float tensor, convert to boolean and then to numpy
    mask = (masks[i] > 0.5).squeeze().cpu().numpy()

    # Get the bounding box coordinates
    # Convert tensor to list of integers
    box = [int(b) for b in boxes[i].tolist()]
    x1, y1, x2, y2 = box[0], box[1], box[2], box[3]

    # Convert the original PIL image to a numpy array for masking and cropping
    img_np = np.array(image)

    # Apply the mask to the original image to get the segmented object
    # Ensure mask has the same dimensions as the image slice
    cropped_mask = mask[y1:y2, x1:x2]
    # Resize mask if it does not match the image slice dimensions exactly due to rounding
    if cropped_mask.shape != (y2 - y1, x2 - x1):
        cropped_mask = resize(cropped_mask, (y2 - y1, x2 - x1), anti_aliasing=True) > 0.5
    segmented_image_data = np.zeros((y2 - y1, x2 - x1, 4), dtype=np.uint8)

    # Extract the region of interest from the original image
    roi = img_np[y1:y2, x1:x2]

    # Apply the mask to the ROI for RGB channels
    # Only pixels where cropped_mask is True will be copied
    segmented_image_data[cropped_mask, :3] = roi[cropped_mask]

    # Set the alpha channel to 255 (opaque) where the mask is True
    segmented_image_data[cropped_mask, 3] = 255

    # Create PIL Image from numpy array
    extracted_slip = Image.fromarray(segmented_image_data)

    # Save the extracted slip
    output_filename = os.path.join(output_dir, f"slip_{i+1}.png")
    extracted_slip.save(output_filename)
    print(f"Extracted slip {i+1} saved to {output_filename}")

print("All relevant payment slips extracted and saved.")


Found 3 objects above confidence threshold 0.1.
Output directory 'payment_slips_extracted' created or already exists.
Extracted slip 1 saved to payment_slips_extracted/slip_1.png
Extracted slip 2 saved to payment_slips_extracted/slip_2.png
Extracted slip 3 saved to payment_slips_extracted/slip_3.png
All relevant payment slips extracted and saved.
