In [1]:
import os
import cv2
import torch
from nanodet.util import cfg, load_config, Logger

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

device = torch.device('cuda')

torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True

In [2]:
config_path = 'demo/nanodet-plus-m_416.yml'
model_path = 'demo/nanodet-plus-m_416_checkpoint.ckpt'
image_path = 'demo/000252.jpg'

In [3]:
load_config(cfg, config_path)
logger = Logger(-1, use_tensorboard=False)

FileNotFoundError: [Errno 2] No such file or directory: 'demo/nanodet-plus-m_416.yml'

In [None]:
from nanodet.model.arch import build_model
from nanodet.util import Logger, cfg, load_config, load_model_weight
from nanodet.data.transform import Pipeline
from nanodet.data.collate import naive_collate
from nanodet.data.batch_process import stack_batch_img


class Predictor(object):
    def __init__(self, cfg, model_path, logger, device="cuda:0"):
        self.cfg = cfg
        self.device = device
        model = build_model(cfg.model)
        ckpt = torch.load(model_path, map_location=lambda storage, loc: storage)
        load_model_weight(model, ckpt, logger)

        self.model = model.to(device).eval()
        self.pipeline = Pipeline(cfg.data.val.pipeline, cfg.data.val.keep_ratio)

    def inference(self, img):
        img_info = {"id": 0}
        if isinstance(img, str):
            img_info["file_name"] = os.path.basename(img)
            img = cv2.imread(img)
            print(img.shape)
        else:
            img_info["file_name"] = None
        
#         if isinstance(img, torch.cuda.FloatTensor):
#             img = img.cpu().detach().numpy()[0]
#             print(img.shape)

        height, width = img.shape[:2]
        img_info["height"] = height
        img_info["width"] = width
        meta = dict(img_info=img_info, raw_img=img, img=img)
        
        # meta["img"] must be numpy in pipeline
        meta = self.pipeline(None, meta, self.cfg.data.val.input_size)
        print(meta["img"].shape)
        
        meta["img"] = torch.from_numpy(meta["img"].transpose(2, 0, 1)).to(self.device).type(torch.cuda.FloatTensor)
        print(meta["warp_matrix"])
        
        
        # collate list of meta into a single meta
        meta_list = [meta]
        meta = naive_collate(meta_list)
        
        # just stacks the tensors up
        meta["img"] = stack_batch_img(meta["img"], divisible=32)
                
        with torch.no_grad():
            results = self.model.inference(meta)
        return meta, results

    def visualize(self, dets, meta, class_names, score_thres, wait=0):
        time1 = time.time()
        result_img = self.model.head.show_result(
            meta["raw_img"][0], dets, class_names, score_thres=score_thres, show=True
        )
        print("viz time: {:.3f}s".format(time.time() - time1))
        return result_img
    
predictor = Predictor(cfg, model_path, logger, device=device)

In [None]:
cfg.data.val.pipeline, cfg.data.val.keep_ratio

In [None]:
cfg.data.val.input_size

In [None]:
cfg.data.val

In [None]:
meta, res = predictor.inference(image_path)

In [None]:
meta["img"][0][0]

In [None]:
torch.Size([1, 333, 500, 3])

In [None]:
print(type(meta["img"]))

preds = predictor.model(meta["img"])

num_classes = 80
reg_max = 7

cls_scores, bbox_preds = preds.split(
            [num_classes, 4 * (reg_max + 1)], dim=-1
        )

values, indices = torch.max(cls_scores.sigmoid()[0], dim=0)
values

In [None]:
0.4030977785587311
0.7854804992675781
0.7465116381645203
0.3727158010005951

In [None]:
res

In [None]:
from IPython.display import display
from PIL import Image

def cv2_imshow(a, convert_bgr_to_rgb=True):
    """A replacement for cv2.imshow() for use in Jupyter notebooks.
    Args:
        a: np.ndarray. shape (N, M) or (N, M, 1) is an NxM grayscale image. shape
            (N, M, 3) is an NxM BGR color image. shape (N, M, 4) is an NxM BGRA color
            image.
        convert_bgr_to_rgb: switch to convert BGR to RGB channel.
    """
    a = a.clip(0, 255).astype('uint8')
    # cv2 stores colors as BGR; convert to RGB
    if convert_bgr_to_rgb and a.ndim == 3:
        if a.shape[2] == 4:
            a = cv2.cvtColor(a, cv2.COLOR_BGRA2RGBA)
        else:
            a = cv2.cvtColor(a, cv2.COLOR_BGR2RGB)
    display(Image.fromarray(a))

In [None]:
from nanodet.util import overlay_bbox_cv

result = overlay_bbox_cv(meta['raw_img'][0], res[0], cfg.class_names, score_thresh=0.35)

cv2_imshow(cv2.resize(result, None, fx=1.0, fy=1.0))

In [None]:
type(res)

In [None]:
scores = []
for label in res[0]:
    for bbox in res[0][label]:
        score = bbox[-1]
        scores.append(score)
        if score > 0.35:
            print(score)

## Captum

In [None]:
# TODO: Wrapper model should take in tensor, return tensor

In [None]:
class WrapperModel(torch.nn.Module):
    def __init__(self, predictor, device):
        super().__init__()
        self.model = predictor
        self.device = device
        
    def forward(self, batched_imgs):
        for i in range(len(batched_imgs)):
            _, outputs = predictor.inference(batched_imgs[i].cpu().detach().numpy())
            acc = []

            for j in range(len(outputs)):
                sum_scores = torch.zeros((1, len(outputs[j]))).float()

                for k in range(len(outputs[j])):
                    scores = [box[4] for box in outputs[j][k]]
                    sum_scores[0][k] += float(sum(scores))

                acc.append(sum_scores)

            if len(acc) == 1:
                return acc[0]

            return torch.stack(acc)

In [None]:
from captum.attr import (Deconvolution, DeepLift, DeepLiftShap,
                         FeatureAblation, GradientShap, GuidedBackprop,
                         GuidedGradCam, InputXGradient, IntegratedGradients,
                         Occlusion, Saliency)

wrapper = WrapperModel(predictor, device)
pred_class = 7
img = cv2.imread(image_path)
input_   = torch.from_numpy(img).permute(2,0,1).unsqueeze(0).to(device).type(torch.cuda.FloatTensor)

# Integrated Gradients
ig = IntegratedGradients(wrapper)
attributions, delta = ig.attribute(input_,
                                target=pred_class,
                                return_convergence_delta=True)
print('Integrated Gradients Convergence Delta:', delta)
print(attributions)

In [None]:
input_.shape