# MaskRCNN

In [18]:
import cv2
import time
import torch
from flame.core.model.maskRCNN.mask_rcnn import maskrcnn_resnet50_fpn, MaskRCNN_ResNet50_FPN_Weights
from flame.core.model.maskRCNN.mask_rcnn import maskrcnn_resnet50_fpn_v2, MaskRCNN_ResNet50_FPN_V2_Weights

In [29]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = maskrcnn_resnet50_fpn_v2(
    weights=None,
    box_score_thresh=0.5,
    box_nms_thresh=0.5
)
model.eval().to(device)
print(f'Params: {sum(p.numel() for p in model.parameters() if p.requires_grad)}')

Params: 46357361


In [24]:
image = cv2.imread('/home/phungpx/Downloads/dog.jpg')
sample = torch.from_numpy(image).to(device)
sample = sample.float().div(255.)
sample = sample.permute(2, 0, 1)

In [25]:
t1 = time.time()
with torch.no_grad():
    preds = model([sample])
t2 = time.time()
print(t2 - t1)

4.918933629989624


In [26]:
preds

[{'boxes': tensor([[126.4080,  77.7413, 559.8017, 508.7804]]),
  'labels': tensor([18]),
  'scores': tensor([0.9962]),
  'masks': tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
            [0., 0., 0.,  ..., 0., 0., 0.],
            [0., 0., 0.,  ..., 0., 0., 0.],
            ...,
            [0., 0., 0.,  ..., 0., 0., 0.],
            [0., 0., 0.,  ..., 0., 0., 0.],
            [0., 0., 0.,  ..., 0., 0., 0.]]]])}]

In [37]:
from flame.core.model.maskRCNN.rpn import AnchorGenerator
from flame.core.model.maskRCNN.faster_rcnn import FasterRCNN
from flame.core.model.maskRCNN.faster_rcnn import FastRCNNPredictor

In [None]:
model = FasterRCNN()

# Faster RCNN

In [43]:
import cv2
import time
import torch
from flame.core.model.maskRCNN.faster_rcnn import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights
from flame.core.model.maskRCNN.faster_rcnn import fasterrcnn_mobilenet_v3_large_fpn, FasterRCNN_MobileNet_V3_Large_FPN_Weights

In [93]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = fasterrcnn_mobilenet_v3_large_fpn(
    weights=FasterRCNN_MobileNet_V3_Large_FPN_Weights.COCO_V1,
    box_score_thresh=0.05,
    box_nms_thresh=0.5
)
model.eval().to(device)
print(f'Params: {sum(p.numel() for p in model.parameters() if p.requires_grad)}')

Params: 19327458


In [94]:
image = cv2.imread('/home/phungpx/Downloads/dog.jpg')
sample = torch.from_numpy(image).to(device)
sample = sample.float().div(255.)
sample = sample.permute(2, 0, 1)

In [95]:
t1 = time.time()
with torch.no_grad():
    preds = model([sample])
t2 = time.time()
print(t2 - t1)

1.329390048980713


In [96]:
preds

[{'boxes': tensor([[471.9238,  46.5055, 908.0916, 567.8863],
          [468.8514,  40.9219, 813.2957, 569.1475]]),
  'labels': tensor([18, 23]),
  'scores': tensor([0.9712, 0.2047])}]

# Custome FasterRCNN

In [63]:
import cv2
import time
import torch
from flame.core.model.maskRCNN.faster_rcnn import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights
from flame.core.model.maskRCNN.backbones.mobilenetv3 import MobileNet_V3_Large_Weights

# backbone
# FPN
# Anchor Generator
# RoIAlign
from torchvision.ops import MultiScaleRoIAlign

In [65]:
trainable_backbone_layers = _validate_trainable_layers(True, None, 6, 3)
backbone = mobilenet_v3_large(weights=MobileNet_V3_Large_Weights.IMAGENET1K_V1)
backbone = _mobilenet_extractor(backbone, True, trainable_backbone_layers)

In [69]:
dummy = torch.FloatTensor(2, 3, 224, 224)
output = backbone(dummy)
for i, j in output.items():
    print(i, j.shape)

0 torch.Size([2, 256, 7, 7])
1 torch.Size([2, 256, 7, 7])
pool torch.Size([2, 256, 4, 4])


In [58]:
sizes = (128, 256, 512)

In [59]:
if not isinstance(sizes[0], (list, tuple)):
    # TODO change this
    sizes = tuple((s,) for s in sizes)

In [60]:
sizes

((128,), (256,), (512,))

In [62]:
anchor = AnchorGenerator()

In [3]:
from torch import nn
from flame.core.model.maskRCNN.faster_rcnn import FasterRCNN
from flame.core.model.maskRCNN.backbones.mobilenetv3 import mobilenet_v3_large, MobileNet_V3_Large_Weights
from flame.core.model.maskRCNN.faster_rcnn import fasterrcnn_mobilenet_v3_large_fpn, FasterRCNN_MobileNet_V3_Large_FPN_Weights
from flame.core.model.maskRCNN.backbones.backbone_utils import _mobilenet_extractor, _validate_trainable_layers
from flame.core.model.maskRCNN.anchor import AnchorGenerator
from flame.core.model.maskRCNN.functions._utils import _ovewrite_value_param
from flame.core.model.maskRCNN.functions import misc as misc_nn_ops
from typing import Optional, Tuple, Any

class FasterRCNNMobileNetV3LargeFPN(nn.Module):
    def __init__(
        self,
        weights: Optional[FasterRCNN_MobileNet_V3_Large_FPN_Weights] = None,
        weights_backbone: Optional[MobileNet_V3_Large_Weights] = None,
        num_classes: int = None,
        progress: bool = True,
        trainable_backbone_layers: Optional[int] = None,
        # Anchors parameters
        anchor_sizes: Tuple[Tuple[int]] = ((32, 64, 128, 256, 512,),) * 3,
        aspect_ratios: Tuple[Tuple[float]] = ((0.5, 1.0, 2.0),) * 3,
        # transform parameters
        min_size: int = 320,
        max_size: int = 640,
        # RPN parameters
        rpn_pre_nms_top_n_test: int = 150,
        rpn_post_nms_top_n_test: int = 150,
        rpn_nms_thresh: float = 0.7,
        rpn_score_thresh: float = 0.05,
        # Box parameters
        box_score_thresh: float = 0.05,
        box_nms_thresh: float = 0.5,
        **kwargs: Any,
    ):
        super(FasterRCNNMobileNetV3LargeFPN, self).__init__()
        weights = FasterRCNN_MobileNet_V3_Large_FPN_Weights.verify(weights)
        weights_backbone = MobileNet_V3_Large_Weights.verify(weights_backbone)

        if weights is not None:
            weights_backbone = None
            num_classes = _ovewrite_value_param(num_classes, len(weights.meta["categories"]))
        elif num_classes is None:
            num_classes = 91

        is_trained = weights is not None or weights_backbone is not None
        trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 6, 3)
        norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d

        backbone = mobilenet_v3_large(weights=weights_backbone, progress=progress, norm_layer=norm_layer)
        backbone = _mobilenet_extractor(backbone, True, trainable_backbone_layers)

        self.model = FasterRCNN(
            backbone,
            num_classes,
            rpn_anchor_generator=AnchorGenerator(anchor_sizes, aspect_ratios),
            min_size=min_size,
            max_size=max_size,
            # RPN parameters
            rpn_pre_nms_top_n_test=rpn_pre_nms_top_n_test,
            rpn_post_nms_top_n_test=rpn_post_nms_top_n_test,
            rpn_nms_thresh=rpn_nms_thresh,
            rpn_score_thresh=rpn_score_thresh,
            # Box parameters
            box_score_thresh=box_score_thresh,
            box_nms_thresh=box_nms_thresh,
            **kwargs
        )

        if weights is not None:
            self.model.load_state_dict(weights.get_state_dict(progress=progress))

    def state_dict(self):
        return self.model.state_dict()

    def load_state_dict(self, state_dict):
        self.model.load_state_dict(state_dict)

    def forward(self, x, targets=None):
        return self.model(x, targets)

In [15]:
device = 'cpu'
model = FasterRCNNMobileNetV3LargeFPN(weights=FasterRCNN_MobileNet_V3_Large_FPN_Weights.COCO_V1)
model.eval().to(device)
print(f'Params: {sum(p.numel() for p in model.parameters() if p.requires_grad)}')

Params: 19327458


In [14]:
import cv2
import torch
image = cv2.imread('/home/phungpx/Downloads/dog.jpg')
sample = torch.from_numpy(image).to(device)
sample = sample.float().div(255.)
sample = sample.permute(2, 0, 1)

In [10]:
import time
t1 = time.time()
with torch.no_grad():
    preds = model([sample])
t2 = time.time()
print(t2 - t1)

0.3305342197418213


In [11]:
preds

[{'boxes': tensor([[472.7997,  37.1727, 904.9170, 561.7517],
          [464.8203,  44.0201, 920.6664, 561.7022],
          [475.1435,  46.0952, 888.1178, 562.9543]]),
  'labels': tensor([20, 18, 16]),
  'scores': tensor([0.8297, 0.4276, 0.0503])}]

In [16]:
from torchvision.models import mobilenet

In [17]:
mobilenet.__dict__

{'__name__': 'torchvision.models.mobilenet',
 '__doc__': None,
 '__package__': 'torchvision.models',
 '__loader__': <_frozen_importlib_external.SourceFileLoader at 0x7f3bdf99c370>,
 '__spec__': ModuleSpec(name='torchvision.models.mobilenet', loader=<_frozen_importlib_external.SourceFileLoader object at 0x7f3bdf99c370>, origin='/home/phungpx/anaconda3/envs/vtcc/lib/python3.9/site-packages/torchvision/models/mobilenet.py'),
 '__file__': '/home/phungpx/anaconda3/envs/vtcc/lib/python3.9/site-packages/torchvision/models/mobilenet.py',
 '__cached__': '/home/phungpx/anaconda3/envs/vtcc/lib/python3.9/site-packages/torchvision/models/__pycache__/mobilenet.cpython-39.pyc',
 '__builtins__': {'__name__': 'builtins',
  '__doc__': "Built-in functions, exceptions, and other objects.\n\nNoteworthy: None is the `nil' object; Ellipsis represents `...' in slices.",
  '__package__': '',
  '__loader__': _frozen_importlib.BuiltinImporter,
  '__spec__': ModuleSpec(name='builtins', loader=<class '_frozen_impo

In [18]:
from flame.core.model.maskRCNN.backbones import mobilenetv3

In [19]:
mobilenetv3.__dict__

{'__name__': 'flame.core.model.maskRCNN.backbones.mobilenetv3',
 '__doc__': None,
 '__package__': 'flame.core.model.maskRCNN.backbones',
 '__loader__': <_frozen_importlib_external.SourceFileLoader at 0x7f3be062b8b0>,
 '__spec__': ModuleSpec(name='flame.core.model.maskRCNN.backbones.mobilenetv3', loader=<_frozen_importlib_external.SourceFileLoader object at 0x7f3be062b8b0>, origin='/media/phungpx/WORKSPACE/PROJECTS/PHUNGPX/object_detection_pytorch/maskrcnn_pytorch/flame/core/model/maskRCNN/backbones/mobilenetv3.py'),
 '__file__': '/media/phungpx/WORKSPACE/PROJECTS/PHUNGPX/object_detection_pytorch/maskrcnn_pytorch/flame/core/model/maskRCNN/backbones/mobilenetv3.py',
 '__cached__': '/media/phungpx/WORKSPACE/PROJECTS/PHUNGPX/object_detection_pytorch/maskrcnn_pytorch/flame/core/model/maskRCNN/backbones/__pycache__/mobilenetv3.cpython-39.pyc',
 '__builtins__': {'__name__': 'builtins',
  '__doc__': "Built-in functions, exceptions, and other objects.\n\nNoteworthy: None is the `nil' object; El

In [20]:
import torch.fx

In [21]:
from torch import Tensor