diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index 4fe83d41c3b96..609a1fc9fb02c 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -557,8 +557,8 @@ static mlir::Value emboxSrc(mlir::PatternRewriter &rewriter, mlir::Value src = op.getSrc(); if (srcTy.isInteger(1)) { // i1 is not a supported type in the descriptor and it is actually coming - // from a LOGICAL constant. Store it as a fir.logical. - srcTy = fir::LogicalType::get(rewriter.getContext(), 4); + // from a LOGICAL constant. Use the destination type to avoid mismatch. + srcTy = dstEleTy; src = createConvertOp(rewriter, loc, srcTy, src); addr = builder.createTemporary(loc, srcTy); fir::StoreOp::create(builder, loc, src, addr); @@ -650,7 +650,7 @@ struct CUFDataTransferOpConversion if (fir::isa_trivial(srcTy) && !fir::isa_trivial(dstTy)) { // Initialization of an array from a scalar value should be implemented - // via a kernel launch. Use the flan runtime via the Assign function + // via a kernel launch. Use the flang runtime via the Assign function // until we have more infrastructure. mlir::Value src = emboxSrc(rewriter, op, symtab); mlir::Value dst = emboxDst(rewriter, op, symtab); diff --git a/flang/test/Fir/CUDA/cuda-data-transfer.fir b/flang/test/Fir/CUDA/cuda-data-transfer.fir index a724d9f681fb6..669300cf64737 100644 --- a/flang/test/Fir/CUDA/cuda-data-transfer.fir +++ b/flang/test/Fir/CUDA/cuda-data-transfer.fir @@ -463,13 +463,13 @@ func.func @_QPlogical_cst() { } // CHECK-LABEL: func.func @_QPlogical_cst() -// CHECK: %[[DESC:.*]] = fir.alloca !fir.box> -// CHECK: %[[CONST:.*]] = fir.alloca !fir.logical<4> -// CHECK: %[[CONV:.*]] = fir.convert %false : (i1) -> !fir.logical<4> -// CHECK: fir.store %[[CONV]] to %[[CONST]] : !fir.ref> -// CHECK: %[[EMBOX:.*]] = fir.embox %[[CONST]] : (!fir.ref>) -> !fir.box> -// CHECK: fir.store %[[EMBOX]] to %[[DESC]] : !fir.ref>> -// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DESC]] : (!fir.ref>>) -> !fir.ref> +// CHECK: %[[DESC:.*]] = fir.alloca !fir.box> +// CHECK: %[[CONST:.*]] = fir.alloca !fir.logical<1> +// CHECK: %[[CONV:.*]] = fir.convert %false : (i1) -> !fir.logical<1> +// CHECK: fir.store %[[CONV]] to %[[CONST]] : !fir.ref> +// CHECK: %[[EMBOX:.*]] = fir.embox %[[CONST]] : (!fir.ref>) -> !fir.box> +// CHECK: fir.store %[[EMBOX]] to %[[DESC]] : !fir.ref>> +// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DESC]] : (!fir.ref>>) -> !fir.ref> // CHECK: fir.call @_FortranACUFDataTransferCstDesc(%{{.*}}, %[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.ref>, i32, !fir.ref, i32) -> () func.func @_QPcallkernel(%arg0: !fir.box>> {fir.bindc_name = "a"}, %arg1: !fir.ref {fir.bindc_name = "b"}, %arg2: !fir.ref {fir.bindc_name = "c"}) { @@ -603,5 +603,53 @@ func.func @_QPsub20() { // CHECK: %[[BOX_NONE:.*]] = fir.convert %[[BOX_ALLOCA]] : (!fir.ref>) -> !fir.ref> // CHECK: fir.call @_FortranACUFDataTransferCstDesc(%13, %[[BOX_NONE]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.ref>, i32, !fir.ref, i32) -> () +func.func @_QPsub28() { + %0 = fir.dummy_scope : !fir.dscope + %1 = cuf.alloc !fir.box>>> {bindc_name = "id2", data_attr = #cuf.cuda, uniq_name = "_QFsub28Eid2"} -> !fir.ref>>>> + %2 = fir.zero_bits !fir.heap>> + %c0 = arith.constant 0 : index + %3 = fir.shape %c0, %c0 : (index, index) -> !fir.shape<2> + %4 = fir.embox %2(%3) {allocator_idx = 2 : i32} : (!fir.heap>>, !fir.shape<2>) -> !fir.box>>> + fir.store %4 to %1 : !fir.ref>>>> + %5:2 = hlfir.declare %1 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub28Eid2"} : (!fir.ref>>>>) -> (!fir.ref>>>>, !fir.ref>>>>) + %c1 = arith.constant 1 : index + %c10_i32 = arith.constant 10 : i32 + %c0_i32 = arith.constant 0 : i32 + %6 = fir.convert %5#0 : (!fir.ref>>>>) -> !fir.ref> + %7 = fir.convert %c1 : (index) -> i64 + %8 = fir.convert %c10_i32 : (i32) -> i64 + fir.call @_FortranAAllocatableSetBounds(%6, %c0_i32, %7, %8) fastmath : (!fir.ref>, i32, i64, i64) -> () + %c1_0 = arith.constant 1 : index + %c10_i32_1 = arith.constant 10 : i32 + %c1_i32 = arith.constant 1 : i32 + %9 = fir.convert %5#0 : (!fir.ref>>>>) -> !fir.ref> + %10 = fir.convert %c1_0 : (index) -> i64 + %11 = fir.convert %c10_i32_1 : (i32) -> i64 + fir.call @_FortranAAllocatableSetBounds(%9, %c1_i32, %10, %11) fastmath : (!fir.ref>, i32, i64, i64) -> () + %12 = cuf.allocate %5#0 : !fir.ref>>>> {data_attr = #cuf.cuda} -> i32 + %false = arith.constant false + cuf.data_transfer %false to %5#0 {transfer_kind = #cuf.cuda_transfer} : i1, !fir.ref>>>> + %13 = fir.load %5#0 : !fir.ref>>>> + %14 = fir.box_addr %13 : (!fir.box>>>) -> !fir.heap>> + %15 = fir.convert %14 : (!fir.heap>>) -> i64 + %c0_i64 = arith.constant 0 : i64 + %16 = arith.cmpi ne, %15, %c0_i64 : i64 + fir.if %16 { + %17 = cuf.deallocate %5#0 : !fir.ref>>>> {data_attr = #cuf.cuda} -> i32 + } + cuf.free %5#0 : !fir.ref>>>> {data_attr = #cuf.cuda} + return +} + +// CHECK-LABEL: func.func @_QPsub28() +// CHECK: %[[DESC:.*]] = fir.alloca !fir.box> +// CHECK: %[[L8:.*]] = fir.alloca !fir.logical<8> +// CHECK: %[[FALSE:.*]] = fir.convert %false{{.*}} : (i1) -> !fir.logical<8> +// CHECK: fir.store %[[FALSE]] to %[[L8]] : !fir.ref> +// CHECK: %[[EMBOX:.*]] = fir.embox %[[L8]] : (!fir.ref>) -> !fir.box> +// CHECK: fir.store %[[EMBOX]] to %[[DESC]] : !fir.ref>> +// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DESC]] : (!fir.ref>>) -> !fir.ref> +// CHECK: fir.call @_FortranACUFDataTransferCstDesc(%{{.*}}, %[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.ref>, i32, !fir.ref, i32) -> () + } // end of module