Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Debug error for vision maskrcnn #170

Closed
anijain2305 opened this issue Apr 25, 2022 · 4 comments
Closed

Debug error for vision maskrcnn #170

anijain2305 opened this issue Apr 25, 2022 · 4 comments

Comments

@anijain2305
Copy link
Contributor

Torchscript error, repro is below

import torch
from torch.nn import *

class FxModule(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.register_buffer('_tensor_constant0', torch.zeros([], dtype=torch.int64))
        self.register_buffer('_tensor_constant1', torch.zeros([], dtype=torch.int64))
        self.register_buffer('_tensor_constant2', torch.zeros([], dtype=torch.int64))
        self.register_buffer('_tensor_constant3', torch.zeros([], dtype=torch.int64))
        self.register_buffer('_tensor_constant4', torch.zeros([], dtype=torch.int64))
        self.register_buffer('_tensor_constant5', torch.zeros([4, 1], dtype=torch.int64))
        self.register_buffer('_tensor_constant6', torch.zeros([4, 359613], dtype=torch.int64))



    def forward(self, primals_1, primals_2):
        view = torch.ops.aten.view(primals_2, [4, 359613]);  primals_2 = None
        split_with_sizes = torch.ops.aten.split_with_sizes(view, [269952, 67488, 16872, 4218, 1083], 1)
        getitem = split_with_sizes[0]
        getitem_1 = split_with_sizes[1]
        getitem_2 = split_with_sizes[2]
        getitem_3 = split_with_sizes[3]
        getitem_4 = split_with_sizes[4];  split_with_sizes = None
        topk = torch.ops.aten.topk(getitem, 1000, 1);  getitem = None
        getitem_6 = topk[1];  topk = None
        _tensor_constant0 = self._tensor_constant0
        add = torch.ops.aten.add(getitem_6, _tensor_constant0);  getitem_6 = _tensor_constant0 = None
        topk_1 = torch.ops.aten.topk(getitem_1, 1000, 1);  getitem_1 = None
        getitem_8 = topk_1[1];  topk_1 = None
        _tensor_constant1 = self._tensor_constant1
        add_1 = torch.ops.aten.add(getitem_8, _tensor_constant1);  getitem_8 = _tensor_constant1 = None
        topk_2 = torch.ops.aten.topk(getitem_2, 1000, 1);  getitem_2 = None
        getitem_10 = topk_2[1];  topk_2 = None
        _tensor_constant2 = self._tensor_constant2
        add_2 = torch.ops.aten.add(getitem_10, _tensor_constant2);  getitem_10 = _tensor_constant2 = None
        topk_3 = torch.ops.aten.topk(getitem_3, 1000, 1);  getitem_3 = None
        getitem_12 = topk_3[1];  topk_3 = None
        _tensor_constant3 = self._tensor_constant3
        add_3 = torch.ops.aten.add(getitem_12, _tensor_constant3);  getitem_12 = _tensor_constant3 = None
        topk_4 = torch.ops.aten.topk(getitem_4, 1000, 1);  getitem_4 = None
        getitem_14 = topk_4[1];  topk_4 = None
        _tensor_constant4 = self._tensor_constant4
        add_4 = torch.ops.aten.add(getitem_14, _tensor_constant4);  getitem_14 = _tensor_constant4 = None
        cat = torch.ops.aten.cat([add, add_1, add_2, add_3, add_4], 1);  add = add_1 = add_2 = add_3 = add_4 = None
        _tensor_constant5 = self._tensor_constant5
        index = torch.ops.aten.index(view, [_tensor_constant5, cat]);  view = _tensor_constant5 = None
        _tensor_constant6 = self._tensor_constant6
        _tensor_constant5_1 = self._tensor_constant5
        index_1 = torch.ops.aten.index(_tensor_constant6, [_tensor_constant5_1, cat]);  _tensor_constant6 = _tensor_constant5_1 = None
        _tensor_constant5_2 = self._tensor_constant5
        index_2 = torch.ops.aten.index(primals_1, [_tensor_constant5_2, cat]);  primals_1 = _tensor_constant5_2 = cat = None
        sigmoid = torch.ops.aten.sigmoid(index);  index = None
        select_1 = torch.ops.aten.select(index_2, 0, 1)
        select_5 = torch.ops.aten.select(sigmoid, 0, 1)
        return [select_1, select_5, ]


sizes =  [torch.Size([4, 359613, 4]), torch.Size([1438452, 1])]
dtypes =  [torch.float32, torch.float32]
inps = [torch.empty(size, dtype=dtype, device="cuda") for (size, dtype) in zip(sizes, dtypes)]


mod = FxModule().cuda()
ref = mod(*inps)

ts_mod = torch.jit.script(mod)
res = ts_mod(*inps)
@anijain2305
Copy link
Contributor Author

cc @eellison Can you please take a look?

@eellison
Copy link
Contributor

@anijain2305 this didn't repro for me as of 3326fa60ccfb7c717ecfacbb2a216544561f08d2 on master. In general, do you mind posting a dump of the error when you file an issue ?

@anijain2305
Copy link
Contributor Author

I see. I will build pytorch and retry.

Yeah, I will start adding the error log as well from next time.

@anijain2305
Copy link
Contributor Author

Issue is resolved at PyTorch master. Closing.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants