In [1]:
import json
import os
import random
from typing import Any, Dict, List, Tuple, Callable

import cv2
import detectron2.data.transforms as T
import numpy as np
import torch
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import CfgNode
from detectron2.data import DatasetMapper, MetadataCatalog, build_detection_test_loader
from detectron2.modeling import build_model
from detectron2.structures import Boxes, pairwise_iou
from detectron2.utils.visualizer import Visualizer
from torch.nn import functional as F
from tqdm import tqdm
from PIL import Image
import matplotlib.pyplot as plt
import pickle
import seaborn as sns
os.environ['QT_QPA_PLATFORM']='offscreen'

## Configurations

In [2]:
from detectron2.config import get_cfg
print("Preparing config file...")
cfg = get_cfg()
cfg.merge_from_file('../configs/faster_rcnn_bet365.yaml')
cfg.MODEL.WEIGHTS = '../rcnn_bet365.pth'

# Modify config
cfg = cfg.clone()  # cfg can be modified by model
# To generate more dense proposals
cfg.MODEL.RPN.NMS_THRESH = 0.9
cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 3000

# Init model
model = build_model(cfg)
model.eval()

# Load weights
checkpointer = DetectionCheckpointer(model)
checkpointer.load(cfg.MODEL.WEIGHTS)

# data augmentation
aug = T.ResizeShortestEdge(
    [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST
)


Preparing config file...


In [30]:
model.preprocess_image

<bound method GeneralizedRCNN.preprocess_image of GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
    

## Save transformed ground-truth result

In [4]:
os.chdir('../')

In [6]:
from detectron2.data import DatasetMapper, MetadataCatalog, build_detection_test_loader
from detectron2_1.datasets import BenignMapper
from tqdm import tqdm

dataset_mapper = BenignMapper(cfg, is_train=False)
data_loader = build_detection_test_loader(
    cfg, cfg.DATASETS.TEST[0], mapper=dataset_mapper
)

In [27]:
def save_gt_dicts(batched_inputs, json_file):

    if os.path.exists(json_file):
        with open(json_file, 'r') as handle:
            json_dict = json.load(handle)
    else:
        json_dict = {"images": [],  "annotations": [], "categories": [{"id": 1, "name": "box"}, {"id": 2, "name": "logo"}]}

    height, width = batched_inputs[0]["instances"].image_size
    filename = '/'.join(batched_inputs[0]['file_name'].split('/')[-2:])
    image_id = batched_inputs[0]["image_id"]
    image = {
        "file_name": filename,
        "height": height,
        "width": width,
        "id": image_id,
    }
    json_dict["images"].append(image)

    ## get gt_box annotations
    for i, b in enumerate(batched_inputs[0]['instances'].gt_boxes):
        x1, y1, x2, y2 = int(b[0]), int(b[1]), int(b[2]), int(b[3])
        width = x2 - x1
        height = y2 - y1

        category_id = batched_inputs[0]['instances'].gt_classes[i].item()+1
        id_annot = json_dict['annotations'][-1]['id']+1 if len(json_dict["annotations"])!=0 else 0

        ann = {
            "area": width * height,
            "image_id": image_id,
            "bbox": [x1, y1, width, height],
            "category_id": category_id,
            "id": id_annot, # need to be continuous
            "iscrowd": 0
            }
        json_dict["annotations"].append(ann)

    ## write to json file
    with open(json_file, "w") as f:
        json.dump(json_dict, f)



In [31]:
for i, batch in tqdm(enumerate(data_loader)):
    save_gt_dicts(batch, 'data/benign_data/coco_perturbgt_test.json')

1579it [07:55,  3.32it/s]


In [21]:
instances.image_size

(749, 1333)

In [16]:
batch[0]

{'file_name': 'data/benign_data/benign_database/demotywatory.pl/shot.png',
 'height': 768,
 'width': 1366,
 'image_id': 0,
 'image': tensor([[[23, 24, 24,  ..., 23, 23, 23],
          [22, 23, 23,  ..., 22, 23, 24],
          [21, 21, 21,  ..., 21, 23, 24],
          ...,
          [25, 25, 29,  ..., 30, 30, 26],
          [25, 33, 26,  ..., 29, 27, 25],
          [25, 31, 30,  ..., 32, 31, 30]],
 
         [[22, 23, 23,  ..., 22, 22, 22],
          [21, 22, 22,  ..., 21, 22, 23],
          [20, 20, 20,  ..., 20, 22, 23],
          ...,
          [25, 25, 28,  ..., 29, 29, 26],
          [25, 33, 26,  ..., 28, 27, 25],
          [25, 30, 29,  ..., 31, 30, 29]],
 
         [[31, 32, 32,  ..., 31, 31, 31],
          [30, 31, 31,  ..., 30, 31, 32],
          [29, 29, 29,  ..., 29, 31, 32],
          ...,
          [36, 36, 40,  ..., 41, 41, 37],
          [36, 44, 37,  ..., 40, 38, 36],
          [36, 42, 41,  ..., 43, 42, 41]]], dtype=torch.uint8),
 'instances': Instances(num_instances=2

In [8]:
original_image = 'data/benign_data/benign_database/a-golf.net/shot.png'
original_image = cv2.imread(original_image)
with torch.no_grad():  # https://github.com/sphinx-doc/sphinx/issues/4258
    # Apply pre-processing to image.
#     if input_format == "RGB":
#         # whether the model expects BGR inputs or RGB
#         original_image = original_image[:, :, ::-1]
    height, width = original_image.shape[:2]
    image = aug.get_transform(original_image).apply_image(original_image)
    image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))

    inputs = {"image": image, "height": height, "width": width}
#     predictions = model([inputs])[0]

In [15]:
predictions

{'instances': Instances(num_instances=9, image_height=1787, image_width=1366, fields=[pred_boxes: Boxes(tensor([[3.9013e+02, 3.6795e+02, 6.6060e+02, 4.0883e+02],
         [3.9256e+02, 3.1160e+02, 6.6270e+02, 3.5157e+02],
         [1.1783e+01, 6.4615e+00, 1.3082e+02, 4.8582e+01],
         [1.8938e+02, 5.5595e+01, 3.8157e+02, 1.0328e+02],
         [2.0437e+02, 2.7017e+02, 6.6726e+02, 3.0292e+02],
         [1.1258e+02, 1.4594e+00, 4.8668e+02, 4.9569e+01],
         [6.7648e+02, 1.4749e+03, 9.0259e+02, 1.5013e+03],
         [4.3797e+02, 1.4726e+03, 6.6176e+02, 1.5030e+03],
         [1.1484e+03, 4.5038e-01, 1.3660e+03, 5.0969e+01]], device='cuda:0')), scores: tensor([0.9912, 0.9910, 0.9066, 0.7832, 0.7655, 0.7319, 0.2090, 0.1220, 0.0753],
        device='cuda:0'), pred_classes: tensor([0, 0, 1, 1, 0, 1, 0, 0, 1], device='cuda:0')])}

In [9]:
## preprocess image
images = model.preprocess_image([inputs])

# Get features
features = model.backbone(images.tensor)

# Get bounding box proposals
proposals, _ = model.proposal_generator(images, features, None)
proposal_boxes = proposals[0].proposal_boxes

def _get_roi_prediction(model, features, proposal_boxes):
    # Get proposal boxes' classification scores
    roi_heads = model.roi_heads
    features = [features[f] for f in roi_heads.box_in_features]
    box_features = roi_heads.box_pooler(features, [proposal_boxes])

    box_features = roi_heads.box_head(box_features)
    logits, proposal_deltas = roi_heads.box_predictor(box_features)
    del box_features
    
    return logits, proposal_deltas

In [13]:
type(features)

dict

In [11]:
type(images)

detectron2.structures.image_list.ImageList

In [17]:
image.shape

torch.Size([3, 1047, 800])

In [18]:
logits, proposal_delta = _get_roi_prediction(model, features, proposal_boxes)

In [19]:
startind = torch.argmax(logits, dim=1)
torch.nn.functional.one_hot(startind)

tensor([[1, 0, 0],
        [1, 0, 0],
        [0, 1, 0],
        ...,
        [0, 0, 1],
        [0, 0, 1],
        [0, 0, 1]], device='cuda:0')

In [23]:
from collections import Counter
Counter(startind.detach().cpu().numpy())

Counter({0: 88, 1: 100, 2: 2812})

In [21]:
Boxes(proposal_delta[:, :4]))

detectron2.structures.boxes.Boxes

In [17]:
proposal_boxes

Boxes(tensor([[226.4303, 183.2361, 387.0454, 205.8438],
        [228.3489, 215.5953, 385.8726, 239.3732],
        [  6.9006,   5.5424,  78.0141,  28.2409],
        ...,
        [175.8883, 169.7452, 392.4770, 519.8912],
        [440.4531, 433.6590, 662.4514, 748.8353],
        [269.0080, 596.0848, 537.9231, 953.0919]], device='cuda:0'))

In [2]:
round(12.23)

12

In [8]:
onehot_tile = onehot.repeat_interleave(4, dim=1)

regbox = torch.mul(proposal_delta, onehot_tile)
regbox = regbox[regbox!=0].view(-1, 4)

RuntimeError: CUDA error: an illegal memory access was encountered

In [15]:
regbox

tensor([[-0.1108,  0.1453,  0.3384,  0.2832],
        [-0.0285, -0.0651,  0.3444,  0.2503],
        [-0.1576,  0.1342,  0.4125,  0.3486],
        ...,
        [ 0.5305, -0.6265, -1.1865,  0.0277],
        [ 0.9470,  0.0714, -1.2006,  0.1700],
        [-0.3846, -1.1510, -0.8399, -0.0300]], device='cuda:0',
       grad_fn=<ViewBackward>)

In [283]:
regbox.shape

torch.Size([3000, 4])

In [284]:
regbox = regbox.detach()

In [285]:
regbox

tensor([[-0.1108,  0.1453,  0.3384,  0.2832],
        [-0.0285, -0.0651,  0.3444,  0.2503],
        [-0.1576,  0.1342,  0.4125,  0.3486],
        ...,
        [ 0.5305, -0.6265, -1.1865,  0.0277],
        [ 0.9470,  0.0714, -1.2006,  0.1700],
        [-0.3846, -1.1510, -0.8399, -0.0300]], device='cuda:0')

## Initial perturbation

In [69]:
noise = torch.normal(0, 0.1, size=inputs['image'].shape)

inputs_noise = inputs.copy()

inputs_noise['image'] = inputs['image'] + noise

In [262]:
## preprocess image
images = model.preprocess_image([inputs_noise])

# Record gradients for image
images.tensor.requires_grad = True

# Get features
features = model.backbone(images.tensor)

# Get bounding box proposals
proposals, _ = model.proposal_generator(images, features, None)
proposal_boxes = proposals[0].proposal_boxes

# Get classification logits
predictions = _get_roi_prediction(model, features, proposal_boxes)

logits, proposal_deltas = predictions

scores = model.roi_heads.box_predictor.predict_probs(
    predictions, proposals
)[0]

In [263]:
startind = torch.argmax(scores, dim=1)-1
onehot = make_one_hot(startind, 2)
onehot_tile = onehot.repeat_interleave(4, dim=1)

proposal_deltas = torch.mul(proposal_deltas, onehot_tile)
proposal_deltas = proposal_deltas[proposal_deltas!=0].view(-1, 4)

In [264]:
proposal_deltas

tensor([[-0.1111,  0.1455,  0.3384,  0.2839],
        [-0.0276, -0.0647,  0.3481,  0.2524],
        [-0.1579,  0.1343,  0.4133,  0.3497],
        ...,
        [-0.5342,  1.0924,  0.0970,  1.0639],
        [-0.5792,  0.1298,  0.8606, -1.1056],
        [-0.8079,  0.3824, -0.2702,  0.1062]], device='cuda:0',
       grad_fn=<ViewBackward>)

In [25]:
cfg.MODEL.ROI_HEADS.NUM_CLASSES

2

In [31]:
torch.nn.functional.one_hot(torch.tensor([[1, 2, 1, 1]]), num_classes=5)

tensor([[[0, 1, 0, 0, 0],
         [0, 0, 1, 0, 0],
         [0, 1, 0, 0, 0],
         [0, 1, 0, 0, 0]]])

In [14]:
labels = torch.tensor([0, 1, 0, 1])

In [16]:
# FIXME Make this more efficient / vectorized?
adv_labels = torch.zeros_like(labels)
for i in range(len(labels)):
    # Include background class: self.n_classes
    incorrect_labels = [l for l in range(3) if l != labels[i]]
    adv_labels[i] = random.choice(incorrect_labels)


In [17]:
adv_labels

tensor([2, 2, 2, 0])

0