Skip to content

torch.topk() cuda version is not working correctly #82878

@hadleyhzy34

Description

@hadleyhzy34

🐛 Describe the bug

import torch
import pdb

def pair_muln_selection(src_points, ref_points, log_n_affinity):
    # points feats distance matrix
    ref_dist = torch.cdist(ref_points, ref_points) #(n_t,n_t)
    src_dist = torch.cdist(src_points, src_points) #(n_s,n_s)

    ref_ne_idx = ref_dist.topk(5+1, dim=-1, largest=False)[1][:,1:]  #(n_r,a)
    src_ne_idx = src_dist.topk(5+1, dim=-1, largest=False)[1][:,1:]  #(n_s,a)

    src_ne_dist = torch.linalg.norm(src_points[src_ne_idx] - src_points.unsqueeze(1).repeat(1,5,1), dim=-1)  #(n_s,a)
    ref_ne_dist = torch.linalg.norm(ref_points[ref_ne_idx] - ref_points.unsqueeze(1).repeat(1,5,1), dim=-1)  #(n_r,a)

    # pdb.set_trace()
    n_s,n_r = log_n_affinity.size()
    pair_affinity = torch.zeros((n_s,n_r,5,5),device=torch.device('cuda'))  #(n_s,n_r,a,a)
    for i in range(5):
        for j in range(5):
            temp_dist = src_ne_dist[:,i].unsqueeze(-1).repeat(1,n_r) - ref_ne_dist[:,j].unsqueeze(0).repeat(n_s,1)  #(n_s,n_r) 
            temp_dist = temp_dist ** 2 / 0.1 ** 2  #(n_s,n_r)
            temp_dist = torch.clamp(1 - temp_dist, min = 0.)  #(n_s.n_r)
            pair_affinity[:,:,i,j] = log_n_affinity * temp_dist * log_n_affinity[src_ne_idx[:,i]][:,ref_ne_idx[:,j]]


    # _, indices = torch.topk(pair_affinity.cpu().view(-1), 512, dim = -1)
    # indices = indices.to(torch.device('cuda'))

    _, indices = torch.topk(pair_affinity.view(-1), 512, dim = -1)
    # indices = indices.to(torch.device('cuda'))

    first_node_src = (indices // (n_r * 5 * 5)).long()  #candidate first src idx
    first_node_ref = ((indices % (n_r * 5 * 5)) // (5 * 5)).long()  #candidate first ref idx
    second_idx = (indices % (n_r * 5 * 5)) % (5 * 5)
    second_node_src = (second_idx // 5).long()  #candidate second src idx
    second_node_ref = second_idx % 5  #candidate second ref idx

    second_node_src = src_ne_idx[first_node_src,second_node_src]
    second_node_ref = ref_ne_idx[first_node_ref,second_node_ref]
    pdb.set_trace()
    return first_node_src,first_node_ref,second_node_src,second_node_ref

if __name__ == '__main__':
    src_points = torch.randn((7000,3),device=torch.device('cuda'))
    ref_points = torch.randn((7000,3),device=torch.device('cuda'))

    log_n_affinity = torch.randn((7000,7000),device=torch.device('cuda'))
    _,_,_,_ = pair_muln_selection(src_points, ref_points, log_n_affinity)

Versions

hardware: rtx3090 + intel12700 + CUDA Version 11.6
it works totally fine given torch 1.7.1+cu110
not working correctly given below torch version:
torch 1.12.0+cu116
torchaudio 0.12.0+cu116
torchvision 0.13.0+cu116

RuntimeError: CUDA error: an illegal memory access was encountered error is reported when it was running topk function, above I gave an example splitted from my project and the same error should be reproduced

cc @ngimel

Metadata

Metadata

Assignees

No one assigned

    Labels

    module: cudaRelated to torch.cuda, and CUDA support in generaltriagedThis issue has been looked at a team member, and triaged and prioritized into an appropriate module

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions