Skip to content
This repository has been archived by the owner on Oct 12, 2023. It is now read-only.

Commit

Permalink
Benchmarks: support to run maskcnn with or without IPEX. (#1308)
Browse files Browse the repository at this point in the history
  • Loading branch information
haojinIntel committed Apr 13, 2023
1 parent 8ecef24 commit 3ad167b
Show file tree
Hide file tree
Showing 14 changed files with 739 additions and 69 deletions.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,13 @@
from ..utils.comm import synchronize
from ..utils.timer import Timer, get_time_str
from .bbox_aug import im_detect_bbox_aug
import intel_extension_for_pytorch as ipex
import numpy as np
# from maskrcnn_benchmark.engine.utils_vis import draw, make_dot

use_ipex = False
if os.environ.get('USE_IPEX') == "1":
import intel_extension_for_pytorch as ipex
use_ipex = True

def compute_on_dataset(model, data_loader, device, bbox_aug, timer=None, bf16=False, bf32=False, jit=False, iterations=-1, iter_warmup=-1, enable_profiling=False):
model.eval()
Expand All @@ -48,15 +51,16 @@ def compute_on_dataset(model, data_loader, device, bbox_aug, timer=None, bf16=Fa
print('Evaluating MaskRCNN: Steps per Epoch {} total Steps {}'.format(steps_per_epoch, total_steps))

model = model.to(memory_format=torch.channels_last)
if bf32:
ipex.set_fp32_math_mode(mode=ipex.FP32MathMode.BF32, device="cpu")
model.backbone = ipex.optimize(model.backbone, dtype=torch.float32, inplace=True, auto_kernel_selection=True)
model.rpn = ipex.optimize(model.rpn, dtype=torch.float32, inplace=True, auto_kernel_selection=True)
model.roi_heads = ipex.optimize(model.roi_heads, dtype=torch.float32, inplace=True, auto_kernel_selection=True)
else:
model.backbone = ipex.optimize(model.backbone, dtype=torch.bfloat16 if bf16 else torch.float32, inplace=True)
model.rpn = ipex.optimize(model.rpn, dtype=torch.bfloat16 if bf16 else torch.float32, inplace=True)
model.roi_heads = ipex.optimize(model.roi_heads, dtype=torch.bfloat16 if bf16 else torch.float32, inplace=True)
if use_ipex:
if bf32:
ipex.set_fp32_math_mode(mode=ipex.FP32MathMode.BF32, device="cpu")
model.backbone = ipex.optimize(model.backbone, dtype=torch.float32, inplace=True, auto_kernel_selection=True)
model.rpn = ipex.optimize(model.rpn, dtype=torch.float32, inplace=True, auto_kernel_selection=True)
model.roi_heads = ipex.optimize(model.roi_heads, dtype=torch.float32, inplace=True, auto_kernel_selection=True)
else:
model.backbone = ipex.optimize(model.backbone, dtype=torch.bfloat16 if bf16 else torch.float32, inplace=True)
model.rpn = ipex.optimize(model.rpn, dtype=torch.bfloat16 if bf16 else torch.float32, inplace=True)
model.roi_heads = ipex.optimize(model.roi_heads, dtype=torch.bfloat16 if bf16 else torch.float32, inplace=True)

with torch.cpu.amp.autocast(enabled=bf16), torch.no_grad():
# generate trace model
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@
from maskrcnn_benchmark.utils.metric_logger import MetricLogger
from maskrcnn_benchmark.engine.inference import inference
from ..utils.timer import Timer, get_time_str
import intel_extension_for_pytorch as ipex

use_ipex = False
if os.environ.get('USE_IPEX') == "1":
import intel_extension_for_pytorch as ipex
use_ipex = True

def reduce_loss_dict(loss_dict):
"""
Expand Down Expand Up @@ -94,11 +98,12 @@ def do_train(
iou_types = iou_types + ("keypoints",)
dataset_names = cfg.DATASETS.TEST

if bf32:
ipex.set_fp32_math_mode(mode=ipex.FP32MathMode.BF32, device="cpu")
model, optimizer = ipex.optimize(model, dtype=torch.float32, optimizer=optimizer, inplace=True, auto_kernel_selection=True)
else:
model, optimizer = ipex.optimize(model, dtype=torch.bfloat16 if bf16 else torch.float32, optimizer=optimizer, inplace=True)
if use_ipex:
if bf32:
ipex.set_fp32_math_mode(mode=ipex.FP32MathMode.BF32, device="cpu")
model, optimizer = ipex.optimize(model, dtype=torch.float32, optimizer=optimizer, inplace=True, auto_kernel_selection=True)
else:
model, optimizer = ipex.optimize(model, dtype=torch.bfloat16 if bf16 else torch.float32, optimizer=optimizer, inplace=True)

for iteration, (images, targets, _) in enumerate(data_loader, start_iter):

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import torch
import torch.nn.functional as F
from torch import nn

# from intel_extension_for_pytorch.nn.modules._roi_align import RoIAlign as ROIAlign

from .utils import cat

# CloudTik patch start
use_ipex = False
import os
if os.environ.get('USE_IPEX') == "1":
from intel_extension_for_pytorch.nn.modules._roi_align import RoIAlign as ROIAlign
use_ipex = True
else:
from torchvision.ops.roi_align import RoIAlign as ROIAlign
# CloudTik patch end

class LevelMapper(object):
"""Determine which FPN level each RoI in a set of RoIs should map to based
on the heuristic in the FPN paper.
"""

def __init__(self, k_min, k_max, canonical_scale=224, canonical_level=4, eps=1e-6):
"""
Arguments:
k_min (int)
k_max (int)
canonical_scale (int)
canonical_level (int)
eps (float)
"""
self.k_min = k_min
self.k_max = k_max
self.s0 = canonical_scale
self.lvl0 = canonical_level
self.eps = eps

def __call__(self, boxlists):
"""
Arguments:
boxlists (list[BoxList])
"""
# Compute level ids
s = torch.sqrt(cat([boxlist.area() for boxlist in boxlists]))

# Eqn.(1) in FPN paper
target_lvls = torch.floor(self.lvl0 + torch.log2(s / self.s0 + self.eps))
target_lvls = torch.clamp(target_lvls, min=self.k_min, max=self.k_max)
return target_lvls.to(torch.int64) - self.k_min


class Pooler(nn.Module):
"""
Pooler for Detection with or without FPN.
It currently hard-code ROIAlign in the implementation,
but that can be made more generic later on.
Also, the requirement of passing the scales is not strictly necessary, as they
can be inferred from the size of the feature map / size of original image,
which is available thanks to the BoxList.
"""

def __init__(self, output_size, scales, sampling_ratio):
"""
Arguments:
output_size (list[tuple[int]] or list[int]): output size for the pooled region
scales (list[float]): scales for each Pooler
sampling_ratio (int): sampling ratio for ROIAlign
"""
super(Pooler, self).__init__()
poolers = []
for scale in scales:
poolers.append(
ROIAlign(
output_size, spatial_scale=scale, sampling_ratio=sampling_ratio
)
)
self.poolers = nn.ModuleList(poolers)
self.output_size = output_size
# get the levels in the feature map by leveraging the fact that the network always
# downsamples by a factor of 2 at each level.
lvl_min = -torch.log2(torch.tensor(scales[0], dtype=torch.float32)).item()
lvl_max = -torch.log2(torch.tensor(scales[-1], dtype=torch.float32)).item()
self.map_levels = LevelMapper(lvl_min, lvl_max)

def convert_to_roi_format(self, boxes):
concat_boxes = cat([b.bbox for b in boxes], dim=0)
device, dtype = concat_boxes.device, concat_boxes.dtype
ids = cat(
[
torch.full((len(b), 1), i, dtype=dtype, device=device)
for i, b in enumerate(boxes)
],
dim=0,
)
rois = torch.cat([ids, concat_boxes], dim=1)
return rois

def forward(self, x, boxes):
"""
Arguments:
x (list[Tensor]): feature maps for each level
boxes (list[BoxList]): boxes to be used to perform the pooling operation.
Returns:
result (Tensor)
"""
num_levels = len(self.poolers)
rois = self.convert_to_roi_format(boxes)
if num_levels == 1:
return self.poolers[0](x[0], rois)

levels = self.map_levels(boxes)

num_rois = len(rois)
num_channels = x[0].shape[1]
output_size = self.output_size[0]

dtype, device = x[0].dtype, x[0].device
result = torch.zeros(
(num_rois, num_channels, output_size, output_size),
dtype=dtype,
device=device,
)
for level, (per_level_feature, pooler) in enumerate(zip(x, self.poolers)):
idx_in_level = torch.nonzero(levels == level).squeeze(1)
rois_per_level = rois[idx_in_level]
result[idx_in_level] = pooler(per_level_feature, rois_per_level).to(dtype)

return result


def make_pooler(cfg, head_name):
resolution = cfg.MODEL[head_name].POOLER_RESOLUTION
scales = cfg.MODEL[head_name].POOLER_SCALES
sampling_ratio = cfg.MODEL[head_name].POOLER_SAMPLING_RATIO
pooler = Pooler(
output_size=(resolution, resolution),
scales=scales,
sampling_ratio=sampling_ratio,
)
return pooler
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,16 @@
from maskrcnn_benchmark.structures.boxlist_ops import cat_boxlist
from maskrcnn_benchmark.modeling.box_coder import BoxCoder

import intel_extension_for_pytorch as ipex
box_head_nms = torch.ops.torch_ipex.box_head_nms
# CloudTik patch start
import os
use_ipex = False
if os.environ.get('USE_IPEX') == "1":
import intel_extension_for_pytorch as ipex
box_head_nms = torch.ops.torch_ipex.box_head_nms
use_ipex = True
#import intel_extension_for_pytorch as ipex
#box_head_nms = torch.ops.torch_ipex.box_head_nms
# CloudTik patch end

class PostProcessor(nn.Module):
"""
Expand Down Expand Up @@ -86,13 +94,32 @@ def forward(self, x, boxes):
# if not self.bbox_aug_enabled: # If bbox aug is enabled, we will do it later
# boxlist = self.filter_results(boxlist, num_classes)
# results.append(boxlist)
new_boxes, new_scores, new_labels = box_head_nms(proposals, class_prob, image_shapes, self.score_thresh, self.nms, self.detections_per_img, num_classes)
for box, score, label, image_shape in zip(new_boxes, new_scores, new_labels, image_shapes):
boxlist_for_class = BoxList(box, image_shape, mode="xyxy")
boxlist_for_class.add_field("scores", score)
boxlist_for_class.add_field("labels", label)
results.append(boxlist_for_class)

# CloudTik patch start
if use_ipex:
new_boxes, new_scores, new_labels = box_head_nms(proposals, class_prob, image_shapes, self.score_thresh,
self.nms, self.detections_per_img, num_classes)
for box, score, label, image_shape in zip(new_boxes, new_scores, new_labels, image_shapes):
boxlist_for_class = BoxList(box, image_shape, mode="xyxy")
boxlist_for_class.add_field("scores", score)
boxlist_for_class.add_field("labels", label)
results.append(boxlist_for_class)
else:
for prob, boxes_per_img, image_shape in zip(
class_prob, proposals, image_shapes
):
boxlist = self.prepare_boxlist(boxes_per_img, prob, image_shape)
boxlist = boxlist.clip_to_image(remove_empty=False)
if not self.bbox_aug_enabled: # If bbox aug is enabled, we will do it later
boxlist = self.filter_results(boxlist, num_classes)
results.append(boxlist)
# new_boxes, new_scores, new_labels = box_head_nms(proposals, class_prob, image_shapes, self.score_thresh,
# self.nms, self.detections_per_img, num_classes)
# for box, score, label, image_shape in zip(new_boxes, new_scores, new_labels, image_shapes):
# boxlist_for_class = BoxList(box, image_shape, mode="xyxy")
# boxlist_for_class.add_field("scores", score)
# boxlist_for_class.add_field("labels", label)
# results.append(boxlist_for_class)
# CloudTik patch end
return results

def prepare_boxlist(self, boxes, scores, image_shape):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,17 @@
from ..utils import cat
from .utils import permute_and_flatten

import intel_extension_for_pytorch as ipex
rpn_nms = torch.ops.torch_ipex.rpn_nms
# CloudTik patch start
from maskrcnn_benchmark.structures.boxlist_ops import boxlist_nms
import os
use_ipex = False
if os.environ.get('USE_IPEX') == "1":
import intel_extension_for_pytorch as ipex
rpn_nms = torch.ops.torch_ipex.rpn_nms
use_ipex = True
#import intel_extension_for_pytorch as ipex
#rpn_nms = torch.ops.torch_ipex.rpn_nms
# CloudTik patch end

class RPNPostProcessor(torch.nn.Module):
"""
Expand Down Expand Up @@ -123,13 +132,35 @@ def forward_for_single_feature_map(self, anchors, objectness, box_regression):
# score_field="objectness",
# )
# result.append(boxlist)
new_proposal, new_score = rpn_nms(proposals, objectness, image_shapes, self.min_size, self.nms_thresh, self.post_nms_top_n)

for proposal, score, im_shape in zip(new_proposal, new_score, image_shapes):
boxlist = BoxList(proposal, im_shape, mode="xyxy")
boxlist.add_field("objectness", score)
result.append(boxlist)

# CloudTik patch start
if use_ipex:
new_proposal, new_score = rpn_nms(proposals, objectness, image_shapes, self.min_size, self.nms_thresh,
self.post_nms_top_n)

for proposal, score, im_shape in zip(new_proposal, new_score, image_shapes):
boxlist = BoxList(proposal, im_shape, mode="xyxy")
boxlist.add_field("objectness", score)
result.append(boxlist)
else:
for proposal, score, im_shape in zip(proposals, objectness, image_shapes):
boxlist = BoxList(proposal, im_shape, mode="xyxy")
boxlist.add_field("objectness", score)
boxlist = boxlist.clip_to_image(remove_empty=False)
boxlist = remove_small_boxes(boxlist, self.min_size)
boxlist = boxlist_nms(
boxlist,
self.nms_thresh,
max_proposals=self.post_nms_top_n,
score_field="objectness",
)
result.append(boxlist)
# new_proposal, new_score = rpn_nms(proposals, objectness, image_shapes, self.min_size, self.nms_thresh, self.post_nms_top_n)
#
# for proposal, score, im_shape in zip(new_proposal, new_score, image_shapes):
# boxlist = BoxList(proposal, im_shape, mode="xyxy")
# boxlist.add_field("objectness", score)
# result.append(boxlist)
# CloudTik patch end
return result

def forward(self, anchors, objectness, box_regression, targets=None):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,18 @@

from .bounding_box import BoxList

import intel_extension_for_pytorch as ipex
use_ipex = False
import os
import torchvision
if os.environ.get('USE_IPEX') == "1":
import intel_extension_for_pytorch as ipex
use_ipex = True

def _box_nms(dets, scores, threshold, sorted=False):
return torch.ops.torch_ipex.nms(dets, scores, threshold, sorted)
if use_ipex:
return torch.ops.torch_ipex.nms(dets, scores, threshold, sorted)
else:
return torchvision.ops.nms(dets, scores, threshold)

def boxlist_nms(boxlist, nms_thresh, max_proposals=-1, score_field="scores"):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,6 @@ else
fi

export DNNL_PRIMITIVE_CACHE_CAPACITY=1024
export KMP_BLOCKTIME=1
export KMP_AFFINITY=granularity=fine,compact,1,0

export TRAIN=0

Expand All @@ -80,15 +78,9 @@ BATCH_SIZE=1

rm -rf ${OUTPUT_DIR}/maskrcnn_${PRECISION}_inference_realtime*

# check if stoch PYT or IPEX is installed on the system
IPEX_ARGS=""
pip list | grep intel-extension-for-pytorch
if [[ "$?" == 0 ]]; then
IPEX_ARGS="-m intel_extension_for_pytorch.cpu.launch \
--enable_jemalloc --latency_mode"
fi

python ${IPEX_ARGS} \
cloudtik-ml-run \
--enable_jemalloc \
--latency_mode \
${MODEL_DIR}/models/object_detection/pytorch/maskrcnn/maskrcnn-benchmark/tools/test_net.py \
$ARGS \
--iter-warmup 20 \
Expand Down
Loading

0 comments on commit 3ad167b

Please sign in to comment.