In [1]:
import cv2
from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

In [2]:
image_path = 'Woman_at_Beach.png'
image = cv2.imread(image_path)


# Create a Detectron 2 configuration
cfg = get_cfg()

#to fetch a specific configuration file for a model we get the yaml file
#object detection, image classification, segmentation, etc. A model zoo helps practitioners-- 
# -- and researchers by providing them with models that have already been trained and mergers it with current congiguration
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))

#MODEL.ROI_HEADS.SCORE_THRESH_TEST is a configuration parameter commonly used in object detection frameworks like Detectron2
#ROI_HEADS: Region of Interest (RoI) threshold
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.4  # Set a confidence threshold for detections
cfg.MODEL.DEVICE = 'cpu'  # Use CPU for inference

# Load the pre-trained Detectron 2 model
# give the URL to the model.weighs
#weights associated with the neural network layers, bounding box regression, and object classification
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)

# Perform object detection
outputs = predictor(image)


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [None]:
# Visualize the results
# image[:, :, ::-1] makes RGB, BGR
# MetadataCatalog is a utility class that provides access to metadata associated with datasets used for object detection and instance segmentation
v = Visualizer(image[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
cv2.imshow("Object Detection", out.get_image()[:, :, ::-1])
cv2.waitKey(0)
cv2.destroyAllWindows()

In [1]:
import cv2
from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

image_path = 'Woman_at_Beach.png'
image = cv2.imread(image_path)

# Create a Detectron 2 configuration
cfg = get_cfg()

# Load a specific configuration file for a model (faster R-CNN in this case)
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))

# Set a confidence threshold for detections
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05
cfg.MODEL.DEVICE = 'cpu'  # Use CPU for inference

# Load the pre-trained Detectron 2 model weights
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)

# Perform object detection
outputs = predictor(image)

# Print available thing classes
print("Available Thing Classes:", MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).thing_classes)


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Available Thing Classes: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [2]:
# Get instances from the outputs
instances = outputs["instances"].to("cpu")
metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0])

# Get class IDs for hat and houses
#hat_class_id = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).thing_classes.index("hat")
boat_class_id = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).thing_classes.index("boat")

# Choose the correct class names for hat and houses based on the printed list
#hat_class_name = "person"  # Replace with the correct class name for the hat
boat_class_name = "boat"  # Replace with the correct class name for boats

# Get class IDs for hat and houses
#hat_class_id = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).thing_classes.index(hat_class_name)
boat_class_id = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).thing_classes.index(boat_class_name)

# Filter instances for hat and boats
#hat_instances = instances[instances.pred_classes == hat_class_id]
boat_instances = instances[instances.pred_classes == boat_class_id]

# Visualize the results for hat
#v_hat = Visualizer(image[:, :, ::-1], metadata, scale=0.2)
#out_hat = v_hat.draw_instance_predictions(hat_instances)
#cv2.imshow("Hat Detection", out_hat.get_image()[:, :, ::-1])
#cv2.waitKey(0)

# Visualize the results for boats
v_boats = Visualizer(image[:, :, ::-1], metadata, scale=1.2)
out_boats = v_boats.draw_instance_predictions(boat_instances)
cv2.imshow("Boats Detection", out_boats.get_image()[:, :, ::-1])
#cv2.waitKey(0)

# Wait for 30 seconds (300000 milliseconds)
cv2.waitKey(15)

# Close the window
cv2.destroyAllWindows()