In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="6"

## Import necessary Modules

In [2]:
import torch
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.models.detection import maskrcnn_resnet50_fpn
from PIL import Image
import transforms as T
import matplotlib.pyplot as plt
import numpy as np

## Define helper function to load the model

In [8]:
def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained on COCO
    model = maskrcnn_resnet50_fpn(pretrained=False)

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)
    model.roi_heads.detections_per_img = 1

    return model

## Load the model

In [9]:
num_classes = 2  # Assuming only one class + background
model = get_model_instance_segmentation(num_classes)
model.load_state_dict(torch.load("weights/25e_mrcnn_precise.pth"))
# model.roi_heads.box_detections_per_img = 500
model.eval()



MaskRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=1e-05)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=1e-05)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=1e-05)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=1e-05)
          (relu):

In [10]:
import json
import torch

# place an image "img.png" and a json file "img.json" in the cd

# Read the JSON file
with open('./dataset/img.json', 'r') as file:
    data = json.load(file)
    bboxes = data['bboxes']

# 20 boxes per bin
input_boxes = []
for bbox in bboxes:
    x, y, w, h = bbox
    input_boxes.append([x, y, x+w, y+h])

In [12]:
from PIL import Image

# Open the image file
image_path = './dataset/img.png'
image = Image.open(image_path)

# Optionally, convert the image to RGB if it's not already
image = image.convert('RGB')

In [18]:
from tqdm import tqdm
from torchvision.transforms import ToTensor

In [None]:
from tqdm import tqdm
from torchvision.transforms import ToTensor
import numpy as np

to_tensor = ToTensor()
model.roi_heads.detections_per_img = 1

# Create an empty mask to store the combined masks
predictions = []

with torch.no_grad():
    for box in tqdm(input_boxes, desc="Generating masks", ncols=100):
        cropped_image = image.crop(box)
        image_tensor = to_tensor(cropped_image).unsqueeze(0)

        # Run inference
        prediction = model(image_tensor)
        
        # Extract the mask and resize it to match the bounding box size
        mask = prediction[0]['masks'][0, 0].mul(255).byte().cpu().numpy()
        predictions.append(mask)