Skip to content

[MLIR][OpenMP][Libomptarget][AMDGPU] omp.target access memory raise errors #76577

@EllisLambda

Description

@EllisLambda

The MLIR and LLVM toolchain was built with 8c6172b. Use mlir-translate --mlir-to-llvmir| clang++ -c -x ir -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx1103 to generate static lib

module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.target = #omp.target<target_cpu = "gfx1103", target_features = "">} {
  llvm.func @llvm_omp_target_alloc_device(i64, i32) -> !llvm.ptr
  llvm.func @omp_get_default_device() -> i32
  llvm.func @_QQmain_omp_outline_1() attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost)>} {
    %0 = llvm.mlir.zero : !llvm.ptr
    %1 = llvm.call @omp_get_default_device() : () -> i32
    %2 = llvm.getelementptr %0[67108864] : (!llvm.ptr) -> !llvm.ptr, f64
    %3 = llvm.ptrtoint %2 : !llvm.ptr to i64
    %4 = llvm.call @llvm_omp_target_alloc_device(%3, %1) : (i64, i32) -> !llvm.ptr
    %5 = llvm.call @llvm_omp_target_alloc_device(%3, %1) : (i64, i32) -> !llvm.ptr
    %6 = omp.map_info var_ptr(%4 : !llvm.ptr, f64) map_clauses(tofrom) capture(ByCopy) -> !llvm.ptr
    %7 = omp.map_info var_ptr(%5 : !llvm.ptr, f64) map_clauses(tofrom) capture(ByCopy) -> !llvm.ptr
    omp.target map_entries(%6 -> %arg0, %7 -> %arg1 : !llvm.ptr, !llvm.ptr) {
    ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
      %8 = llvm.mlir.constant(0 : index) : i64
      %9 = llvm.mlir.constant(1 : index) : i64
      %10 = llvm.mlir.constant(8192 : index) : i64
      omp.teams {
        omp.parallel {
          omp.wsloop for  (%arg2, %arg3, %arg4, %arg5) : i64 = (%8, %8, %8, %8) to (%10, %10, %10, %10) step (%9, %9, %9, %9) {
            %11 = llvm.mul %arg2, %10  : i64
            %12 = llvm.add %11, %arg3  : i64
            %13 = llvm.load %arg0 {alignment = 8 : i64} : !llvm.ptr -> vector<16xf64>
            %14 = llvm.load %arg1 {alignment = 8 : i64} : !llvm.ptr -> vector<16xf64>
            %15 = llvm.mul %arg3, %10  : i64
            %16 = llvm.add %15, %arg2  : i64
            %17 = llvm.getelementptr %arg1[%16] : (!llvm.ptr, i64) -> !llvm.ptr, f64
            %18 = llvm.load %17 {alignment = 8 : i64} : !llvm.ptr -> vector<16xf64>
            %19 = llvm.fmul %13, %14  : vector<16xf64>
            %20 = llvm.fdiv %14, %18  : vector<16xf64>
            %21 = llvm.fadd %19, %20  : vector<16xf64>
            %22 = llvm.getelementptr %arg1[%12] : (!llvm.ptr, i64) -> !llvm.ptr, f64
            llvm.store %21, %22 {alignment = 8 : i64} : vector<16xf64>, !llvm.ptr
            omp.terminator
          }
          omp.terminator
        }
        omp.terminator
      }
      omp.terminator
    }
    omp.barrier
    llvm.return
  }
  llvm.func @_mlir_ciface__QQmain_omp_outline_1() attributes {llvm.emit_c_interface} {
    llvm.call @_QQmain_omp_outline_1() : () -> ()
    llvm.return
  }
}

Using C program to call the function and build with clang args -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx1103 it's normal when running on the CPU host, but on AMDGPU it raise errors even if replace llvm_omp_target_alloc_device to llvm.alloc and with a smaller size:

Libomptarget error: Host ptr 0x0000560ed81500a1 does not have a matching target pointer.
Libomptarget error: Consult https://openmp.llvm.org/design/Runtimes.html for debugging options.
Libomptarget error: Source location information not present. Compile with -g or -gline-tables-only.
Libomptarget fatal error 1: failure of target construct while offloading is mandatory

The C code has similar function works normally on the AMDGPU:

int main() {
    double* a = (double*)llvm_omp_target_alloc_device(4096*4096*sizeof(double) , omp_get_default_device());
    double* b = (double*)llvm_omp_target_alloc_device(4096*4096*sizeof(double) , omp_get_default_device());
    #pragma omp target teams map(tofrom: a, b)
    #pragma omp parallel for
        for(int i=0; i<4096; i++){
            for(int j=0; j<4096; j++){
                for (int k=0; k<4096; k++){
                a[i*4096+j] = i * j;
                b[j*4096+k] = j / i;
                }
            }
        }
}

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions