In [1]:
import cv2
import torch
from captum.attr import (DeepLift, DeepLiftShap, GradientShap,
                         IntegratedGradients, LayerConductance,
                         NeuronConductance, NoiseTunnel, LayerGradientXActivation)
from detectron2 import model_zoo
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import get_cfg
from detectron2.modeling import build_model

# from modified_rcnn import ModifiedGeneralizedRCNN
from modified_fast_rcnn_output_layers import ModifiedFastRCNNOutputLayers
from modified_image_list import ModifiedImageList
from types import MethodType

img = cv2.imread('000000000001.jpg')
device = torch.device("cuda")

# build and load faster rcnn model
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")

model = build_model(cfg).to(device).eval()
model.roi_heads.box_predictor = ModifiedFastRCNNOutputLayers(model.roi_heads.box_predictor)

def new_preprocess_image(self, batched_inputs: torch.Tensor):
      """
      Normalize, pad and batch the input images.
      """
      print(type(batched_inputs))
      images = [x.to(self.device) for x in batched_inputs]
      images = [(x - self.pixel_mean) / self.pixel_std for x in images]
      images = ModifiedImageList.from_tensors(images, self.backbone.size_divisibility) # Extend ImageList to new object
      return images

def _new_postprocess(instances, batched_inputs: torch.Tensor, image_sizes):
        """
        Rescale the output instances to the target size.
        """
        # note: private function; subject to changes
        processed_results = []
        print(type(batched_inputs))
        for results_per_image, input_per_image, image_size in zip(
            instances, batched_inputs, image_sizes
        ):
            height = image_size[0]
            width = image_size[1]
            from detectron2.modeling.postprocessing import detector_postprocess
            r = detector_postprocess(results_per_image, height, width)
            processed_results.append({"instances": r})
        return processed_results
    
model.preprocess_image = MethodType(new_preprocess_image, model)
model.__class__._postprocess = _new_postprocess
model.roi_heads.forward_with_given_boxes = MethodType(lambda self, x, y: y, model)

modified = model

DetectionCheckpointer(modified).load(cfg.MODEL.WEIGHTS)
modified.to(device)

# define input and baseline
input_   = torch.from_numpy(img).permute(2,0,1).to(device)
input_   = torch.stack([input_, input_]) # [2, 3, H, W]
baseline = torch.zeros(input_.shape).to(device)
baseline_dist = torch.randn(5, 3, 480, 640).to(device) * 0.001

modified.roi_heads.box_predictor.class_scores_only = True
outputs = modified.inference(input_, do_postprocess = False)

<class 'torch.Tensor'>
Inside ModifiedImageList.from_tensors
2
[2, 3, tensor(480), tensor(640)] 0.0
torch.Size([3, 480, 640])
torch.Size([2, 3, 480, 640])
PREDICTIONS: [torch.Size([2000, 81]), torch.Size([2000, 320])]


In [2]:
outputs[0].sum(dim=0).unsqueeze(0)

tensor([[2.0602e-05, 7.7820e-06, 1.7247e+00, 2.3876e-05, 1.1636e-05, 6.1807e-04,
         5.7153e-05, 1.0312e+00, 8.1349e-05, 6.5970e-05, 2.1995e-06, 1.9053e-05,
         9.7847e-01, 3.1735e-05, 8.6988e-07, 5.1343e-07, 1.2946e-06, 7.2711e-06,
         3.3631e-06, 1.4810e-05, 1.0711e-05, 3.5918e-06, 3.0762e-06, 7.7639e-07,
         1.2731e-05, 8.8409e-05, 7.5949e-06, 6.7203e-07, 1.1000e-05, 8.6329e-07,
         1.0507e-06, 1.0290e-06, 2.2424e-06, 2.3222e-06, 9.7615e-07, 6.3849e-07,
         1.0389e-06, 3.4104e-06, 1.4213e-06, 1.1426e-06, 6.0348e-07, 2.5471e-06,
         5.5576e-07, 4.0875e-07, 4.9753e-07, 1.3100e-06, 3.6242e-07, 6.9711e-07,
         4.0807e-07, 5.6906e-07, 2.2819e-07, 4.4449e-07, 5.0250e-07, 4.3888e-07,
         5.9598e-07, 4.4450e-07, 6.0057e-05, 5.9240e-06, 3.0881e-06, 2.8856e-06,
         1.6135e-06, 1.3102e-06, 4.9715e-06, 1.2347e-06, 9.8407e-07, 5.3022e-07,
         7.2162e-07, 2.9664e-06, 2.0972e-06, 1.9974e-06, 2.1424e-06, 1.6975e-06,
         4.1199e-06, 2.0110e

In [3]:
outputs[1].sum(dim=0).unsqueeze(0)

tensor([[2.0602e-05, 7.7820e-06, 1.7247e+00, 2.3876e-05, 1.1636e-05, 6.1807e-04,
         5.7153e-05, 1.0312e+00, 8.1349e-05, 6.5970e-05, 2.1995e-06, 1.9053e-05,
         9.7847e-01, 3.1735e-05, 8.6988e-07, 5.1343e-07, 1.2946e-06, 7.2711e-06,
         3.3631e-06, 1.4810e-05, 1.0711e-05, 3.5918e-06, 3.0762e-06, 7.7639e-07,
         1.2731e-05, 8.8409e-05, 7.5949e-06, 6.7203e-07, 1.1000e-05, 8.6329e-07,
         1.0507e-06, 1.0290e-06, 2.2424e-06, 2.3222e-06, 9.7615e-07, 6.3849e-07,
         1.0389e-06, 3.4104e-06, 1.4213e-06, 1.1426e-06, 6.0348e-07, 2.5471e-06,
         5.5576e-07, 4.0875e-07, 4.9753e-07, 1.3100e-06, 3.6242e-07, 6.9711e-07,
         4.0807e-07, 5.6906e-07, 2.2819e-07, 4.4449e-07, 5.0250e-07, 4.3888e-07,
         5.9598e-07, 4.4450e-07, 6.0057e-05, 5.9240e-06, 3.0881e-06, 2.8856e-06,
         1.6135e-06, 1.3102e-06, 4.9715e-06, 1.2347e-06, 9.8407e-07, 5.3022e-07,
         7.2162e-07, 2.9664e-06, 2.0972e-06, 1.9974e-06, 2.1424e-06, 1.6975e-06,
         4.1199e-06, 2.0110e