Benchmarks: support to run maskcnn with or without IPEX. (#1308)

oap-project · Apr 13, 2023 · 3ad167b · 3ad167b
1 parent 8ecef24
commit 3ad167b
Show file tree

Hide file tree

Showing 14 changed files with 739 additions and 69 deletions.
diff --git a/...bject_detection/pytorch/maskrcnn/maskrcnn-benchmark/maskrcnn_benchmark/backbone/resnet.py b/...bject_detection/pytorch/maskrcnn/maskrcnn-benchmark/maskrcnn_benchmark/backbone/resnet.py
diff --git a/...ject_detection/pytorch/maskrcnn/maskrcnn-benchmark/maskrcnn_benchmark/engine/inference.py b/...ject_detection/pytorch/maskrcnn/maskrcnn-benchmark/maskrcnn_benchmark/engine/inference.py
@@ -32,10 +32,13 @@
 from ..utils.comm import synchronize
 from ..utils.timer import Timer, get_time_str
 from .bbox_aug import im_detect_bbox_aug
-import intel_extension_for_pytorch as ipex
 import numpy as np
 # from maskrcnn_benchmark.engine.utils_vis import draw, make_dot
 
+use_ipex = False
+if os.environ.get('USE_IPEX') == "1":
+    import intel_extension_for_pytorch as ipex
+    use_ipex = True
 
 def compute_on_dataset(model, data_loader, device, bbox_aug, timer=None, bf16=False, bf32=False, jit=False, iterations=-1, iter_warmup=-1, enable_profiling=False):
     model.eval()
@@ -48,15 +51,16 @@ def compute_on_dataset(model, data_loader, device, bbox_aug, timer=None, bf16=Fa
     print('Evaluating MaskRCNN: Steps per Epoch {} total Steps {}'.format(steps_per_epoch, total_steps))
 
     model = model.to(memory_format=torch.channels_last)
-    if bf32:
-        ipex.set_fp32_math_mode(mode=ipex.FP32MathMode.BF32, device="cpu")
-        model.backbone = ipex.optimize(model.backbone, dtype=torch.float32, inplace=True, auto_kernel_selection=True)
-        model.rpn = ipex.optimize(model.rpn, dtype=torch.float32, inplace=True, auto_kernel_selection=True)
-        model.roi_heads = ipex.optimize(model.roi_heads, dtype=torch.float32, inplace=True, auto_kernel_selection=True)
-    else:
-        model.backbone = ipex.optimize(model.backbone, dtype=torch.bfloat16 if bf16 else torch.float32, inplace=True)
-        model.rpn = ipex.optimize(model.rpn, dtype=torch.bfloat16 if bf16 else torch.float32, inplace=True)
-        model.roi_heads = ipex.optimize(model.roi_heads, dtype=torch.bfloat16 if bf16 else torch.float32, inplace=True)
+    if use_ipex:
+        if bf32:
+            ipex.set_fp32_math_mode(mode=ipex.FP32MathMode.BF32, device="cpu")
+            model.backbone = ipex.optimize(model.backbone, dtype=torch.float32, inplace=True, auto_kernel_selection=True)
+            model.rpn = ipex.optimize(model.rpn, dtype=torch.float32, inplace=True, auto_kernel_selection=True)
+            model.roi_heads = ipex.optimize(model.roi_heads, dtype=torch.float32, inplace=True, auto_kernel_selection=True)
+        else:
+            model.backbone = ipex.optimize(model.backbone, dtype=torch.bfloat16 if bf16 else torch.float32, inplace=True)
+            model.rpn = ipex.optimize(model.rpn, dtype=torch.bfloat16 if bf16 else torch.float32, inplace=True)
+            model.roi_heads = ipex.optimize(model.roi_heads, dtype=torch.bfloat16 if bf16 else torch.float32, inplace=True)
 
     with torch.cpu.amp.autocast(enabled=bf16), torch.no_grad():
         # generate trace model

diff --git a/...object_detection/pytorch/maskrcnn/maskrcnn-benchmark/maskrcnn_benchmark/engine/trainer.py b/...object_detection/pytorch/maskrcnn/maskrcnn-benchmark/maskrcnn_benchmark/engine/trainer.py
@@ -33,7 +33,11 @@
 from maskrcnn_benchmark.utils.metric_logger import MetricLogger
 from maskrcnn_benchmark.engine.inference import inference
 from ..utils.timer import Timer, get_time_str
-import intel_extension_for_pytorch as ipex
+
+use_ipex = False
+if os.environ.get('USE_IPEX') == "1":
+    import intel_extension_for_pytorch as ipex
+    use_ipex = True
 
 def reduce_loss_dict(loss_dict):
     """
@@ -94,11 +98,12 @@ def do_train(
         iou_types = iou_types + ("keypoints",)
     dataset_names = cfg.DATASETS.TEST
 
-    if bf32:
-        ipex.set_fp32_math_mode(mode=ipex.FP32MathMode.BF32, device="cpu")
-        model, optimizer = ipex.optimize(model, dtype=torch.float32, optimizer=optimizer, inplace=True, auto_kernel_selection=True)
-    else:
-        model, optimizer = ipex.optimize(model, dtype=torch.bfloat16 if bf16 else torch.float32, optimizer=optimizer, inplace=True)
+    if use_ipex:
+        if bf32:
+            ipex.set_fp32_math_mode(mode=ipex.FP32MathMode.BF32, device="cpu")
+            model, optimizer = ipex.optimize(model, dtype=torch.float32, optimizer=optimizer, inplace=True, auto_kernel_selection=True)
+        else:
+            model, optimizer = ipex.optimize(model, dtype=torch.bfloat16 if bf16 else torch.float32, optimizer=optimizer, inplace=True)
 
     for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
 

diff --git a/...ject_detection/pytorch/maskrcnn/maskrcnn-benchmark/maskrcnn_benchmark/modeling/poolers.py b/...ject_detection/pytorch/maskrcnn/maskrcnn-benchmark/maskrcnn_benchmark/modeling/poolers.py
@@ -0,0 +1,142 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+# from intel_extension_for_pytorch.nn.modules._roi_align import RoIAlign as ROIAlign
+
+from .utils import cat
+
+# CloudTik patch start
+use_ipex = False
+import os
+if os.environ.get('USE_IPEX') == "1":
+    from intel_extension_for_pytorch.nn.modules._roi_align import RoIAlign as ROIAlign
+    use_ipex = True
+else:
+    from torchvision.ops.roi_align import RoIAlign as ROIAlign
+# CloudTik patch end
+
+class LevelMapper(object):
+    """Determine which FPN level each RoI in a set of RoIs should map to based
+    on the heuristic in the FPN paper.
+    """
+
+    def __init__(self, k_min, k_max, canonical_scale=224, canonical_level=4, eps=1e-6):
+        """
+        Arguments:
+            k_min (int)
+            k_max (int)
+            canonical_scale (int)
+            canonical_level (int)
+            eps (float)
+        """
+        self.k_min = k_min
+        self.k_max = k_max
+        self.s0 = canonical_scale
+        self.lvl0 = canonical_level
+        self.eps = eps
+
+    def __call__(self, boxlists):
+        """
+        Arguments:
+            boxlists (list[BoxList])
+        """
+        # Compute level ids
+        s = torch.sqrt(cat([boxlist.area() for boxlist in boxlists]))
+
+        # Eqn.(1) in FPN paper
+        target_lvls = torch.floor(self.lvl0 + torch.log2(s / self.s0 + self.eps))
+        target_lvls = torch.clamp(target_lvls, min=self.k_min, max=self.k_max)
+        return target_lvls.to(torch.int64) - self.k_min
+
+
+class Pooler(nn.Module):
+    """
+    Pooler for Detection with or without FPN.
+    It currently hard-code ROIAlign in the implementation,
+    but that can be made more generic later on.
+    Also, the requirement of passing the scales is not strictly necessary, as they
+    can be inferred from the size of the feature map / size of original image,
+    which is available thanks to the BoxList.
+    """
+
+    def __init__(self, output_size, scales, sampling_ratio):
+        """
+        Arguments:
+            output_size (list[tuple[int]] or list[int]): output size for the pooled region
+            scales (list[float]): scales for each Pooler
+            sampling_ratio (int): sampling ratio for ROIAlign
+        """
+        super(Pooler, self).__init__()
+        poolers = []
+        for scale in scales:
+            poolers.append(
+                ROIAlign(
+                    output_size, spatial_scale=scale, sampling_ratio=sampling_ratio
+                )
+            )
+        self.poolers = nn.ModuleList(poolers)
+        self.output_size = output_size
+        # get the levels in the feature map by leveraging the fact that the network always
+        # downsamples by a factor of 2 at each level.
+        lvl_min = -torch.log2(torch.tensor(scales[0], dtype=torch.float32)).item()
+        lvl_max = -torch.log2(torch.tensor(scales[-1], dtype=torch.float32)).item()
+        self.map_levels = LevelMapper(lvl_min, lvl_max)
+
+    def convert_to_roi_format(self, boxes):
+        concat_boxes = cat([b.bbox for b in boxes], dim=0)
+        device, dtype = concat_boxes.device, concat_boxes.dtype
+        ids = cat(
+            [
+                torch.full((len(b), 1), i, dtype=dtype, device=device)
+                for i, b in enumerate(boxes)
+            ],
+            dim=0,
+        )
+        rois = torch.cat([ids, concat_boxes], dim=1)
+        return rois
+
+    def forward(self, x, boxes):
+        """
+        Arguments:
+            x (list[Tensor]): feature maps for each level
+            boxes (list[BoxList]): boxes to be used to perform the pooling operation.
+        Returns:
+            result (Tensor)
+        """
+        num_levels = len(self.poolers)
+        rois = self.convert_to_roi_format(boxes)
+        if num_levels == 1:
+            return self.poolers[0](x[0], rois)
+
+        levels = self.map_levels(boxes)
+
+        num_rois = len(rois)
+        num_channels = x[0].shape[1]
+        output_size = self.output_size[0]
+
+        dtype, device = x[0].dtype, x[0].device
+        result = torch.zeros(
+            (num_rois, num_channels, output_size, output_size),
+            dtype=dtype,
+            device=device,
+        )
+        for level, (per_level_feature, pooler) in enumerate(zip(x, self.poolers)):
+            idx_in_level = torch.nonzero(levels == level).squeeze(1)
+            rois_per_level = rois[idx_in_level]
+            result[idx_in_level] = pooler(per_level_feature, rois_per_level).to(dtype)
+
+        return result
+
+
+def make_pooler(cfg, head_name):
+    resolution = cfg.MODEL[head_name].POOLER_RESOLUTION
+    scales = cfg.MODEL[head_name].POOLER_SCALES
+    sampling_ratio = cfg.MODEL[head_name].POOLER_SAMPLING_RATIO
+    pooler = Pooler(
+        output_size=(resolution, resolution),
+        scales=scales,
+        sampling_ratio=sampling_ratio,
+    )
+    return pooler
diff --git a/...h/maskrcnn/maskrcnn-benchmark/maskrcnn_benchmark/modeling/roi_heads/box_head/inference.py b/...h/maskrcnn/maskrcnn-benchmark/maskrcnn_benchmark/modeling/roi_heads/box_head/inference.py
@@ -8,8 +8,16 @@
 from maskrcnn_benchmark.structures.boxlist_ops import cat_boxlist
 from maskrcnn_benchmark.modeling.box_coder import BoxCoder
 
-import intel_extension_for_pytorch as ipex
-box_head_nms = torch.ops.torch_ipex.box_head_nms
+# CloudTik patch start
+import os
+use_ipex = False
+if os.environ.get('USE_IPEX') == "1":
+    import intel_extension_for_pytorch as ipex
+    box_head_nms = torch.ops.torch_ipex.box_head_nms
+    use_ipex = True
+#import intel_extension_for_pytorch as ipex
+#box_head_nms = torch.ops.torch_ipex.box_head_nms
+# CloudTik patch end
 
 class PostProcessor(nn.Module):
     """
@@ -86,13 +94,32 @@ def forward(self, x, boxes):
         #     if not self.bbox_aug_enabled:  # If bbox aug is enabled, we will do it later
         #         boxlist = self.filter_results(boxlist, num_classes)
         #     results.append(boxlist)
-        new_boxes, new_scores, new_labels = box_head_nms(proposals, class_prob, image_shapes, self.score_thresh, self.nms, self.detections_per_img, num_classes)
-        for box, score, label, image_shape in zip(new_boxes, new_scores, new_labels, image_shapes):
-            boxlist_for_class = BoxList(box, image_shape, mode="xyxy")
-            boxlist_for_class.add_field("scores", score)
-            boxlist_for_class.add_field("labels", label)
-            results.append(boxlist_for_class)
-
+        # CloudTik patch start
+        if use_ipex:
+            new_boxes, new_scores, new_labels = box_head_nms(proposals, class_prob, image_shapes, self.score_thresh,
+                                                             self.nms, self.detections_per_img, num_classes)
+            for box, score, label, image_shape in zip(new_boxes, new_scores, new_labels, image_shapes):
+                boxlist_for_class = BoxList(box, image_shape, mode="xyxy")
+                boxlist_for_class.add_field("scores", score)
+                boxlist_for_class.add_field("labels", label)
+                results.append(boxlist_for_class)
+        else:
+            for prob, boxes_per_img, image_shape in zip(
+                    class_prob, proposals, image_shapes
+            ):
+                boxlist = self.prepare_boxlist(boxes_per_img, prob, image_shape)
+                boxlist = boxlist.clip_to_image(remove_empty=False)
+                if not self.bbox_aug_enabled:  # If bbox aug is enabled, we will do it later
+                    boxlist = self.filter_results(boxlist, num_classes)
+                results.append(boxlist)
+        # new_boxes, new_scores, new_labels = box_head_nms(proposals, class_prob, image_shapes, self.score_thresh,
+        #                                                 self.nms, self.detections_per_img, num_classes)
+        # for box, score, label, image_shape in zip(new_boxes, new_scores, new_labels, image_shapes):
+        #     boxlist_for_class = BoxList(box, image_shape, mode="xyxy")
+        #     boxlist_for_class.add_field("scores", score)
+        #     boxlist_for_class.add_field("labels", label)
+        #     results.append(boxlist_for_class)
+        # CloudTik patch end
         return results
 
     def prepare_boxlist(self, boxes, scores, image_shape):

diff --git a/...etection/pytorch/maskrcnn/maskrcnn-benchmark/maskrcnn_benchmark/modeling/rpn/inference.py b/...etection/pytorch/maskrcnn/maskrcnn-benchmark/maskrcnn_benchmark/modeling/rpn/inference.py
@@ -10,8 +10,17 @@
 from ..utils import cat
 from .utils import permute_and_flatten
 
-import intel_extension_for_pytorch as ipex
-rpn_nms = torch.ops.torch_ipex.rpn_nms
+# CloudTik patch start
+from maskrcnn_benchmark.structures.boxlist_ops import boxlist_nms
+import os
+use_ipex = False
+if os.environ.get('USE_IPEX') == "1":
+    import intel_extension_for_pytorch as ipex
+    rpn_nms = torch.ops.torch_ipex.rpn_nms
+    use_ipex = True
+#import intel_extension_for_pytorch as ipex
+#rpn_nms = torch.ops.torch_ipex.rpn_nms
+# CloudTik patch end
 
 class RPNPostProcessor(torch.nn.Module):
     """
@@ -123,13 +132,35 @@ def forward_for_single_feature_map(self, anchors, objectness, box_regression):
         #         score_field="objectness",
         #     )
         #     result.append(boxlist)
-        new_proposal, new_score = rpn_nms(proposals, objectness, image_shapes, self.min_size, self.nms_thresh, self.post_nms_top_n)
-
-        for proposal, score, im_shape in zip(new_proposal, new_score, image_shapes):
-            boxlist = BoxList(proposal, im_shape, mode="xyxy")
-            boxlist.add_field("objectness", score)
-            result.append(boxlist)
-
+        # CloudTik patch start
+        if use_ipex:
+            new_proposal, new_score = rpn_nms(proposals, objectness, image_shapes, self.min_size, self.nms_thresh,
+                                              self.post_nms_top_n)
+
+            for proposal, score, im_shape in zip(new_proposal, new_score, image_shapes):
+                boxlist = BoxList(proposal, im_shape, mode="xyxy")
+                boxlist.add_field("objectness", score)
+                result.append(boxlist)
+        else:
+            for proposal, score, im_shape in zip(proposals, objectness, image_shapes):
+                boxlist = BoxList(proposal, im_shape, mode="xyxy")
+                boxlist.add_field("objectness", score)
+                boxlist = boxlist.clip_to_image(remove_empty=False)
+                boxlist = remove_small_boxes(boxlist, self.min_size)
+                boxlist = boxlist_nms(
+                    boxlist,
+                    self.nms_thresh,
+                    max_proposals=self.post_nms_top_n,
+                    score_field="objectness",
+                )
+                result.append(boxlist)
+        # new_proposal, new_score = rpn_nms(proposals, objectness, image_shapes, self.min_size, self.nms_thresh, self.post_nms_top_n)
+        #
+        # for proposal, score, im_shape in zip(new_proposal, new_score, image_shapes):
+        #     boxlist = BoxList(proposal, im_shape, mode="xyxy")
+        #     boxlist.add_field("objectness", score)
+        #     result.append(boxlist)
+        # CloudTik patch end
         return result
 
     def forward(self, anchors, objectness, box_regression, targets=None):

diff --git a/...etection/pytorch/maskrcnn/maskrcnn-benchmark/maskrcnn_benchmark/structures/boxlist_ops.py b/...etection/pytorch/maskrcnn/maskrcnn-benchmark/maskrcnn_benchmark/structures/boxlist_ops.py
@@ -3,10 +3,18 @@
 
 from .bounding_box import BoxList
 
-import intel_extension_for_pytorch as ipex
+use_ipex = False
+import os
+import torchvision
+if os.environ.get('USE_IPEX') == "1":
+    import intel_extension_for_pytorch as ipex
+    use_ipex = True
 
 def _box_nms(dets, scores, threshold, sorted=False):
-    return torch.ops.torch_ipex.nms(dets, scores, threshold, sorted)
+    if use_ipex:
+        return torch.ops.torch_ipex.nms(dets, scores, threshold, sorted)
+    else:
+        return torchvision.ops.nms(dets, scores, threshold)
 
 def boxlist_nms(boxlist, nms_thresh, max_proposals=-1, score_field="scores"):
     """

diff --git a/...s/models/quickstart/object_detection/pytorch/maskrcnn/inference/cpu/inference_realtime.sh b/...s/models/quickstart/object_detection/pytorch/maskrcnn/inference/cpu/inference_realtime.sh
@@ -70,8 +70,6 @@ else
 fi
 
 export DNNL_PRIMITIVE_CACHE_CAPACITY=1024
-export KMP_BLOCKTIME=1
-export KMP_AFFINITY=granularity=fine,compact,1,0
 
 export TRAIN=0
 
@@ -80,15 +78,9 @@ BATCH_SIZE=1
 
 rm -rf ${OUTPUT_DIR}/maskrcnn_${PRECISION}_inference_realtime*
 
-# check if stoch PYT or IPEX is installed on the system
-IPEX_ARGS=""
-pip list | grep intel-extension-for-pytorch
-if [[ "$?" == 0 ]]; then
-  IPEX_ARGS="-m intel_extension_for_pytorch.cpu.launch \
-    --enable_jemalloc --latency_mode"
-fi
-
-python ${IPEX_ARGS} \
+cloudtik-ml-run \
+    --enable_jemalloc \
+    --latency_mode \
     ${MODEL_DIR}/models/object_detection/pytorch/maskrcnn/maskrcnn-benchmark/tools/test_net.py \
     $ARGS \
     --iter-warmup 20 \