import torch import torchvision import torch_tensorrt import shutil torch.set_printoptions(linewidth=shutil.get_terminal_size().columns - 1, edgeitems=10) # Set nicer formatting. torch_tensorrt.logging.set_reportable_log_level(torch_tensorrt.logging.Level.Graph) torch.manual_seed(0) DEVICE = torch.device("cuda:0") class Deformable_Convolution(torch.nn.Module): def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, dilation=1, groups=1, offset_groups=1): super().__init__() # Deformable convolution offset_channels = 2 * kernel_size * kernel_size self.conv2d_offset = torch.nn.Conv2d( in_channels, offset_channels * offset_groups, kernel_size=kernel_size, stride=stride, padding=dilation, dilation=dilation, ) self.conv2d = torchvision.ops.DeformConv2d( in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=dilation, dilation=dilation, groups=groups, bias=False ) def forward(self, x): offset = self.conv2d_offset(x) x = self.conv2d(x, offset) return x class MyModel(torch.nn.Module): def __init__(self): super(MyModel, self).__init__() self.input_size = [3, 4, 4] self.rpn_num_features = 5 self.backbone = torch.nn.Sequential( Deformable_Convolution(3, 3), # Keep a deformable convolution from my original repro script to show that that isn't an issue now. ) self.make_scores = torch.nn.Conv2d(in_channels=3, out_channels=2 * self.rpn_num_features, kernel_size=1, stride=1, padding=0) self.make_boxes = torch.nn.Conv2d(in_channels=3, out_channels=4 * self.rpn_num_features, kernel_size=1, stride=1, padding=0) # The following forces an RPN anchor to always be true to avoid a convolution on an empty tensor at a later stage in the original model. self.register_buffer('false_anchor_score', torch.tensor((-10, 10), dtype=torch.float).unsqueeze(dim=0)) self.anchor_always_index = torch.rand(self.rpn_num_features).max(dim=0).indices # self.anchor_always_index = self.anchor_always_index.item() # Using this instead avoids the original error # RuntimeError: [Error thrown at core/partitioning/shape_analysis.cpp:68] Expected ivalues_maps.count(input) to be true but got false # Could not find torch::jit::Value* 61 produced from %x.1 : Tensor, %61 : Long(requires_grad=0, device=cpu) = prim::Param() in lowering graph for mini graph input. # but gives # RuntimeError: [Error thrown at ./core/conversion/var/Var_inl.h:37] Expected isIValue() to be true but got false # Requested unwrapping of arg assuming it was an IValue, however arg type is nvinfer1::ITensor def fwd_rpn(self, x): scores = self.make_scores(x).reshape(x.shape[0], 2, -1).transpose(-2, -1) scores[:, self.anchor_always_index, :] = self.false_anchor_score # scores[:, self.anchor_always_index, :] = torch.tensor((-10, 10), dtype=torch.float, device=scores.device).unsqueeze(dim=0) # This avoids the "arg type is nvinfer1::ITensor" error. boxes = self.make_boxes(x).reshape(x.shape[0], 4, -1).transpose(-2, -1) return scores, boxes def forward(self, x): x = self.backbone(x) scores, boxes = self.fwd_rpn(x) return x, scores, boxes if __name__ == "__main__": model = MyModel().eval().to(DEVICE) SHAPE2 = (1, *model.input_size) tensor = torch.randn(SHAPE2, dtype=torch.float32, device=DEVICE) with torch.inference_mode(): out, scores, boxes = model(tensor) model_trt = torch_tensorrt.compile( model, inputs=[torch_tensorrt.Input(shape=SHAPE2), ], enabled_precisions={torch.float}, truncate_long_and_double=True ) with torch.inference_mode(): out_trt, scores_trt, boxes_trt = model(tensor) assert torch.max(torch.abs(out - out_trt)) < 1e-6 print("success")