<img src="https://dl.fbaipublicfiles.com/detectron2/Detectron2-Logo-Horz.png" width="500">



Now is a good time to confirm that we have the right versions of the libraries at our disposal.

In [2]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Tue_May__3_19:00:59_Pacific_Daylight_Time_2022
Cuda compilation tools, release 11.7, V11.7.64
Build cuda_11.7.r11.7/compiler.31294372_0
torch:  2.0 ; cuda:  cu117
detectron2: 0.6


In [3]:
# COMMON LIBRARIES
import os
import cv2

from datetime import datetime

# DATA SET PREPARATION AND LOADING
from detectron2.data.datasets import register_coco_instances
from detectron2.data import DatasetCatalog, MetadataCatalog

# VISUALIZATION
from detectron2.utils.visualizer import Visualizer
from detectron2.utils.visualizer import ColorMode


# CONFIGURATION
from detectron2 import model_zoo
from detectron2.config import get_cfg

# EVALUATION
from detectron2.engine import DefaultPredictor

# TRAINING
from detectron2.engine import DefaultTrainer

## COCO Format Dataset

### Register

When you use Detectron2, before you actually train the model you need to [register it](https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html#register-a-coco-format-dataset).

In [4]:
# TRAIN SET
TRAIN_DATA_SET_NAME = "v1-train"
TRAIN_DATA_SET_IMAGES_DIR_PATH = "./datasets/v1/train/"
TRAIN_DATA_SET_ANN_FILE_PATH = "./datasets/v1/train/_annotations.coco.json"

register_coco_instances(
    name=TRAIN_DATA_SET_NAME, 
    metadata={}, 
    json_file=TRAIN_DATA_SET_ANN_FILE_PATH, 
    image_root=TRAIN_DATA_SET_IMAGES_DIR_PATH
)


# VALID SET
VALID_DATA_SET_NAME = "v1-valid"
VALID_DATA_SET_IMAGES_DIR_PATH = "./datasets/v1/valid/"
VALID_DATA_SET_ANN_FILE_PATH = "./datasets/v1/valid/_annotations.coco.json"

register_coco_instances(
    name=VALID_DATA_SET_NAME, 
    metadata={}, 
    json_file=VALID_DATA_SET_ANN_FILE_PATH, 
    image_root=VALID_DATA_SET_IMAGES_DIR_PATH
)

We can now confirm that our custom dataset was correctly registered using [MetadataCatalog](https://detectron2.readthedocs.io/en/latest/modules/data.html#detectron2.data.MetadataCatalog).

Train

In [5]:
[
    data_set
    for data_set
    in MetadataCatalog.list()
    if data_set.startswith("v1-train")
]

['v1-train']

Valid

In [6]:
[
    data_set
    for data_set
    in MetadataCatalog.list()
    if data_set.startswith("v1-valid")
]

['v1-valid']

## Train Model Using Custom COCO Format Dataset 

### Configuration

In [18]:
# HYPERPARAMETERS
ARCHITECTURE = "mask_rcnn_R_50_FPN_3x"
CONFIG_FILE_PATH = f"COCO-InstanceSegmentation/{ARCHITECTURE}.yaml"
MAX_ITER = 25000
EVAL_PERIOD = 200
BASE_LR = 0.001
NUM_CLASSES = 67


# OUTPUT DIR
OUTPUT_DIR_PATH = os.path.join(
    "v1-train", 
    ARCHITECTURE, 
    datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
)

os.makedirs(OUTPUT_DIR_PATH, exist_ok=True)

In [19]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file(CONFIG_FILE_PATH))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(CONFIG_FILE_PATH)
#cfg.MODEL.WEIGHTS = "./v1-train/mask_rcnn_R_50_FPN_3x/2023-08-08-11-01-05/model_final.pth"
cfg.DATASETS.TRAIN = ("v1-train",)
cfg.DATASETS.TEST = ()
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 64
cfg.TEST.EVAL_PERIOD = EVAL_PERIOD
cfg.SOLVER.CHECKPOINT_PERIOD = 500
cfg.DATALOADER.NUM_WORKERS = 2
cfg.SOLVER.IMS_PER_BATCH = 4
cfg.INPUT.MASK_FORMAT='bitmask'
cfg.SOLVER.BASE_LR = BASE_LR
cfg.SOLVER.MAX_ITER = MAX_ITER
cfg.MODEL.ROI_HEADS.NUM_CLASSES = NUM_CLASSES
cfg.OUTPUT_DIR = OUTPUT_DIR_PATH

### Training

In [20]:
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

[32m[08/09 00:07:47 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (68, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (68,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (268, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (268,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (67, 256, 1

[32m[08/09 00:07:47 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[08/09 00:10:03 d2.utils.events]: [0m eta: 1 day, 22:34:01  iter: 19  total_loss: 5.869  loss_cls: 4.405  loss_box_reg: 0.7057  loss_mask: 0.6942  loss_rpn_cls: 0.03901  loss_rpn_loc: 0.0265    time: 6.6447  last_time: 5.8330  data_time: 0.1659  last_data_time: 0.0071   lr: 1.9981e-05  max_mem: 8233M
[32m[08/09 00:12:04 d2.utils.events]: [0m eta: 1 day, 20:11:57  iter: 39  total_loss: 5.456  loss_cls: 4.021  loss_box_reg: 0.671  loss_mask: 0.6912  loss_rpn_cls: 0.05591  loss_rpn_loc: 0.02282    time: 6.3325  last_time: 5.3720  data_time: 0.0318  last_data_time: 0.0058   lr: 3.9961e-05  max_mem: 8233M
[32m[08/09 00:14:24 d2.utils.events]: [0m eta: 1 day, 21:19:19  iter: 59  total_loss: 4.709  loss_cls: 3.206  loss_box_reg: 0.6841  loss_mask: 0.6802  loss_rpn_cls: 0.05116  loss_rpn_loc: 0.03188    time: 6.5559  last_time: 8.5301  data_time: 0.0436  last_data_time: 0.0090   lr: 5.9941e-05  max_mem

### Evaluation

In [16]:
cfg.MODEL.WEIGHTS = os.path.join("./v1-train/mask_rcnn_R_50_FPN_3x/2023-08-08-13-25-14/", "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
predictor = DefaultPredictor(cfg)

[32m[08/08 14:53:32 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from ./v1-train/mask_rcnn_R_50_FPN_3x/2023-08-08-13-25-14/model_final.pth ...


In [17]:
import os
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import DatasetCatalog, MetadataCatalog
import cv2
import numpy as np

# Determine the next output folder number
results_root = "./results"
output_folder_count = 1
while os.path.exists(os.path.join(results_root, f"output{output_folder_count}")):
    output_folder_count += 1

# Create a new output folder for this run
output_dir = os.path.join(results_root, f"output{output_folder_count}")
os.makedirs(output_dir, exist_ok=True)

dataset_valid = DatasetCatalog.get("Z:/Uni/Tese/Code/TESTES/")
# Load the model's metadata
metadata = MetadataCatalog.get("Z:/Uni/Tese/Code/TESTES/")

# Create a text file to save results
txt_filename = os.path.join(output_dir, "results.txt")

with open(txt_filename, "w") as txt_file:
    for idx, d in enumerate(dataset_valid):
        img = cv2.imread(d["file_name"])
        outputs = predictor(img)
        
        # Map class IDs to class names
        class_names = [metadata.thing_classes[i] for i in outputs["instances"].pred_classes]
        
        visualizer = Visualizer(
            img[:, :, ::-1],
            metadata=metadata,  # Pass the metadata for proper class name mapping
            scale=0.8, 
            instance_mode=ColorMode.IMAGE
        )
        
        # Draw instance predictions with class names
        out = visualizer.draw_instance_predictions(outputs["instances"].to("cpu"))
        
        output_filename = os.path.basename(d["file_name"])  # Get the original image name
        output_filename = os.path.splitext(output_filename)[0]  # Remove extension
        output_filename = f"{output_filename}_output_{idx}.jpg"
        output_filename = os.path.join(output_dir, output_filename)
        
        cv2.imwrite(output_filename, out.get_image()[:, :, ::-1])
        
        # Write results to the text file
        txt_file.write(f"Image name: {d['file_name']}\n")
        txt_file.write("Predictions: ")

        # Get the predicted class indices
        pred_classes = outputs["instances"].pred_classes.to("cpu").numpy()

        # Calculate the count of each predicted class
        class_counts = np.bincount(pred_classes, minlength=len(metadata.thing_classes))

        # Filter out classes with non-zero counts and get their corresponding names
        non_zero_classes = [(class_name, count) for class_name, count in zip(metadata.thing_classes, class_counts) if count > 0]

        # Write the non-zero class predictions to the text file
        predictions = [f"{count} {class_name}" for class_name, count in non_zero_classes]
        txt_file.write(", ".join(predictions))
        txt_file.write("\n\n")



print("Visualized images and results saved in", output_dir)


Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[32m[08/08 14:53:33 d2.data.datasets.coco]: [0mLoaded 71 images in COCO format from ./datasets/v1/valid/_annotations.coco.json


Visualized images and results saved in ./results\output15
