In [1]:
import os
import cv2
import torch
from nanodet.util import cfg, load_config, Logger

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

device = torch.device('cuda')

torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True

In [37]:
config_path = 'demo/nanodet-plus-m_416.yml'
model_path = 'demo/nanodet-plus-m_416_checkpoint.ckpt'
image_path = 'demo/000252.jpg'

load_config(cfg, config_path)
logger = Logger(-1, use_tensorboard=False)

In [86]:
from nanodet.model.arch import build_model
from nanodet.util import Logger, cfg, load_config, load_model_weight
from nanodet.data.transform import Pipeline
from nanodet.data.collate import naive_collate
from nanodet.data.batch_process import stack_batch_img
import numpy as np

class WrapperModel(torch.nn.Module):
    def __init__(self, cfg, model_path, logger, device="cuda:0"):
        super().__init__()

        self.cfg = cfg
        self.device = device
        
        self.num_classes = cfg['model']['arch']['head']['num_classes']
        self.reg_max = cfg['model']['arch']['head']['reg_max']
        model = build_model(cfg.model)
        ckpt = torch.load(model_path, map_location=lambda storage, loc: storage)
        load_model_weight(model, ckpt, logger)
        self.model = model.to(device).eval()

    def forward(self, tensor_img):
        if len(tensor_img.shape) == 4:
            list_max_cls_scores = []
            for i in range(tensor_img.shape[0]):
                numpy_img = tensor_img[i].permute(1, 2, 0).detach().cpu().numpy().astype(np.uint8)

                from nanodet.data.transform.warp import ShapeTransform
                from nanodet.data.transform.color import color_aug_and_norm
                import functools

                meta = ShapeTransform(cfg.data.val.keep_ratio, **cfg.data.val.pipeline)({"img": numpy_img}, cfg.data.val.input_size)
                meta = functools.partial(color_aug_and_norm, kwargs=cfg.data.val.pipeline)(meta)

                processed_tensor_img = torch.from_numpy(meta["img"].transpose(2, 0, 1)).to(device).type(torch.cuda.FloatTensor).unsqueeze(0)

                preds = self.model(processed_tensor_img)

                cls_scores = preds.split(
                    [self.num_classes, 4 * (self.reg_max + 1)], dim=-1
                )[0]

                max_cls_scores = torch.max(cls_scores.sigmoid()[0], dim=0)[0]

                list_max_cls_scores.append(max_cls_scores)
            return torch.stack(list_max_cls_scores)
    
wrapper = WrapperModel(cfg, model_path, logger, device=device)

model size is  1.0x
init weights...
=> loading pretrained model https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth
Finish initialize NanoDet-Plus Head.


In [87]:
img = cv2.imread(image_path)

input_   = torch.from_numpy(img).permute(2,0,1).to(device).type(torch.cuda.FloatTensor).unsqueeze(0)

print(input_.shape)

torch.Size([1, 3, 333, 500])


In [90]:
model(input_)>0.35

tensor([ True, False,  True, False, False,  True, False,  True, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False],
       device='cuda:0')

In [91]:
[i for i, x in enumerate(model(input_)>0.35) if x]

[0, 2, 5, 7]

In [88]:
from captum.attr import (Deconvolution, DeepLift, DeepLiftShap,
                         FeatureAblation, GradientShap, GuidedBackprop,
                         GuidedGradCam, InputXGradient, IntegratedGradients,
                         Occlusion, Saliency)

pred_class = 7
img = cv2.imread(image_path)
input_   = torch.from_numpy(img).permute(2,0,1).unsqueeze(0).to(device).type(torch.cuda.FloatTensor)

# Integrated Gradients
ig = IntegratedGradients(wrapper)
attributions, delta = ig.attribute(input_,
                                target=pred_class,
                                return_convergence_delta=True)
print('Integrated Gradients Convergence Delta:', delta)
print(attributions)

RuntimeError: CUDA out of memory. Tried to allocate 96.00 MiB (GPU 0; 4.00 GiB total capacity; 2.62 GiB already allocated; 0 bytes free; 2.83 GiB reserved in total by PyTorch)