# Faster RCNN

In [1]:
import cv2
import time
import json
import torch
from typing import List, Dict
from flame.core.model.maskRCNN.faster_rcnn import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights
from flame.core.model.maskRCNN.faster_rcnn import fasterrcnn_mobilenet_v3_large_fpn, FasterRCNN_MobileNet_V3_Large_FPN_Weights

# device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# samples (for both mode, eval and train)
images = [
    cv2.imread('/home/phungpx/Downloads/dog.jpg'),
    cv2.imread('/home/phungpx/Downloads/cat.jpg'),
]

samples: List[torch.Tensor] = []
for image in images:
    sample = torch.from_numpy(image).to(device)
    sample = sample.float().div(255.)
    sample = sample.permute(2, 0, 1)
    samples.append(sample)

# targets (for training mode)
targets: List[Dict[str, torch.Tensor]] = [
    {
        'labels': torch.tensor([18], dtype=torch.int64, device=device),
        'boxes': torch.tensor([[475,  43, 907, 566]], dtype=torch.float32, device=device),
    },
    {
        'labels': torch.tensor([17], dtype=torch.int64, device=device),
        'boxes': torch.tensor([[43,  47, 700, 672]], dtype=torch.float32, device=device),
    },
]

In [2]:
# model
model = fasterrcnn_mobilenet_v3_large_fpn(
    weights=FasterRCNN_MobileNet_V3_Large_FPN_Weights.COCO_V1,
    box_score_thresh=0.2,
    box_nms_thresh=0.2,
)

print(f'Params: {sum(p.numel() for p in model.parameters() if p.requires_grad)}')

Params: 19327458


In [3]:
# evaluation mode
model.eval().to(device)

t1 = time.time()
with torch.no_grad():
    preds = model(samples)
t2 = time.time()

print(f'evaluation time: {t2 - t1}s')
print('predictions:')
preds

evaluation time: 1.1534547805786133s
predictions:


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[{'boxes': tensor([[473.4747,  45.6446, 908.4516, 567.7006]]),
  'labels': tensor([18]),
  'scores': tensor([0.9831])},
 {'boxes': tensor([[ 43.1124,  46.5310, 699.1204, 671.2537]]),
  'labels': tensor([17]),
  'scores': tensor([0.9989])}]

In [4]:
# training mode
model.train().to(device)

t1 = time.time()
losses = model(samples, targets)
t2 = time.time()

print(f'training time: {t2 - t1}s')
print('losses:')
losses

training time: 1.0749309062957764s
losses:


{'loss_classifier': tensor(0.1329, grad_fn=<NllLossBackward0>),
 'loss_box_reg': tensor(0.1349, grad_fn=<DivBackward0>),
 'loss_objectness': tensor(0.0031, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>),
 'loss_rpn_box_reg': tensor(0.0414, grad_fn=<DivBackward0>)}

# MaskRCNN

In [1]:
import cv2
import time
import json
import torch
from typing import List, Dict

from flame.core.model.maskrcnn_resnet50_fpn_v2 import MaskRCNNResNet50FPNV2

In [2]:
device = 'cpu'

# model
model = MaskRCNNResNet50FPNV2(
    num_classes=3,
    box_score_thresh=0.2,
    box_nms_thresh=0.2,
)

print(f'Params: {sum(p.numel() for p in model.parameters() if p.requires_grad)}')

Params: 45883745


In [3]:
# samples
samples: List[torch.Tensor] = [
    torch.FloatTensor(size=(3, 800, 800), device=device),
    # torch.FloatTensor(size=(3, 800, 1000), device=device),
]

# targets
targets: List[Dict[str, torch.Tensor]] = [
    {
        'labels': torch.tensor([1], dtype=torch.int64, device=device),
        'boxes': torch.tensor([[0, 0, 1, 1]], dtype=torch.float32, device=device),
        'masks': torch.zeros(size=(1, 800, 800), device=device),
    },
    # {
    #     'labels': torch.tensor([2], dtype=torch.int64, device=device),
    #     'boxes': torch.tensor([[0, 0, 1, 1]], dtype=torch.float32, device=device),
    #     'masks': torch.zeros(size=(1, 800, 1000), device=device),
    # }

]

In [4]:
# training mode
model.train().to(device)

t1 = time.time()
losses = model(samples, targets)
t2 = time.time()

print(f'training time: {t2 - t1}s')
print('losses:')
losses

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


bbbbbb
training time: 1.518348217010498s
losses:


{'loss_classifier': tensor(1.0802, grad_fn=<NllLossBackward0>),
 'loss_box_reg': tensor(9.6563e-06, grad_fn=<DivBackward0>),
 'loss_mask': tensor(0.5961, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>),
 'loss_objectness': tensor(0.6931, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>),
 'loss_rpn_box_reg': tensor(0.9987, grad_fn=<DivBackward0>)}

In [7]:
from prettytable import PrettyTable
verbose = PrettyTable(losses.keys())  # heading of table
verbose.add_row([loss.item() for loss in losses.values()])
print(verbose)

+-----------------+--------------+-----------+-----------------+------------------+
| loss_classifier | loss_box_reg | loss_mask | loss_objectness | loss_rpn_box_reg |
+-----------------+--------------+-----------+-----------------+------------------+
|       nan       |     nan      |    nan    |       nan       |       nan        |
+-----------------+--------------+-----------+-----------------+------------------+
