STEP 2 --> Object Extraction and Storage

In [None]:
#importing all the required libraries
import torch
import torchvision
import torchvision.transforms as T
from PIL import Image
import os
import pandas as pd
import uuid

In [None]:
# Loading pre-trained Mask R-CNN model of torchvision
model2 = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
model2.eval()



MaskRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(in

In [None]:
#Defining the transformation
transform = T.Compose([T.ToTensor()])

In [None]:
#Defining all the file paths to variables
img = f"{i}.jpg"
output = "segmented_objects"
object_file = "objects.csv"

In [None]:
#checking if the directory exists
os.makedirs(output, exist_ok=True)

In [None]:
# Loading and preprocessing the image
image = Image.open(img).convert("RGB")
tensor = transform(image)

In [None]:
# Performing object detection
with torch.no_grad():
    predictions = model2([tensor])[0]

In [None]:
# Get the master ID for the image
master_id = str(uuid.uuid4())
objects = []

In [None]:

# Iterating through the detected objects and saving each object as a separate image
for idx, mask in enumerate(predictions['masks']):
    mask = mask[0].mul(255).byte().cpu().numpy()

    # Generating unique ID for the object
    object_id = str(uuid.uuid4())

    mask_img = Image.fromarray(mask)
    segmented_img = Image.composite(image, Image.new("RGB", image.size), mask_img)
    output_path = os.path.join(output, f"object_{idx}.png")
    segmented_img.save(output_path)
    objects.append({
        "object_id": object_id,
        "master_id": master_id,
        "file_path": output_path
    })

In [None]:
# Saving metadata to a CSV file
object_df = pd.DataFrame(objects)
object_df.to_csv(object_file, index=False)

print(f"Segmented objects saved in '{output}' and metadata saved in '{object_file}'")


Segmented objects saved in 'segmented_objects' and metadata saved in 'objects.csv'
