Skip to content

Commit

Permalink
[Flang] Lower the transpose intrinsic
Browse files Browse the repository at this point in the history
Tranpose intrinsic performs the transpose matrix operation for arrays
of rank 2. The intrinsic is lowered to a runtime call.

This is part of the upstreaming effort from the fir-dev branch in [1].
[1] https://github.com/flang-compiler/f18-llvm-project

Reviewed By: clementval

Differential Revision: https://reviews.llvm.org/D121895

Co-authored-by: Valentin Clement <clementval@gmail.com>
Co-authored-by: Jean Perier <jperier@nvidia.com>
  • Loading branch information
3 people committed Mar 17, 2022
1 parent 67e9151 commit e525245
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 0 deletions.
31 changes: 31 additions & 0 deletions flang/lib/Lower/IntrinsicCall.cpp
Expand Up @@ -505,6 +505,8 @@ struct IntrinsicLibrary {
void genSystemClock(llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genTransfer(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genTranspose(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genTrim(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genUbound(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genUnpack(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
Expand Down Expand Up @@ -805,6 +807,10 @@ static constexpr IntrinsicHandler handlers[]{
&I::genTransfer,
{{{"source", asAddr}, {"mold", asAddr}, {"size", asValue}}},
/*isElemental=*/false},
{"transpose",
&I::genTranspose,
{{{"matrix", asAddr}}},
/*isElemental=*/false},
{"trim", &I::genTrim, {{{"string", asAddr}}}, /*isElemental=*/false},
{"ubound",
&I::genUbound,
Expand Down Expand Up @@ -3063,6 +3069,30 @@ IntrinsicLibrary::genUbound(mlir::Type resultType,
return mlir::Value();
}

// TRANSPOSE
fir::ExtendedValue
IntrinsicLibrary::genTranspose(mlir::Type resultType,
llvm::ArrayRef<fir::ExtendedValue> args) {

assert(args.size() == 1);

// Handle source argument
mlir::Value source = builder.createBox(loc, args[0]);

// Create mutable fir.box to be passed to the runtime for the result.
mlir::Type resultArrayType = builder.getVarLenSeqTy(resultType, 2);
fir::MutableBoxValue resultMutableBox =
fir::factory::createTempMutableBox(builder, loc, resultArrayType);
mlir::Value resultIrBox =
fir::factory::getMutableIRBox(builder, loc, resultMutableBox);
// Call runtime. The runtime is allocating the result.
fir::runtime::genTranspose(builder, loc, resultIrBox, source);
// Read result from mutable fir.box and add it to the list of temps to be
// finalized by the StatementContext.
return readAndAddCleanUp(resultMutableBox, resultType,
"unexpected result for TRANSPOSE");
}

// TRIM
fir::ExtendedValue
IntrinsicLibrary::genTrim(mlir::Type resultType,
Expand All @@ -3080,6 +3110,7 @@ IntrinsicLibrary::genTrim(mlir::Type resultType,
// finalized by the StatementContext.
return readAndAddCleanUp(resultMutableBox, resultType, "TRIM");
}

// UNPACK
fir::ExtendedValue
IntrinsicLibrary::genUnpack(mlir::Type resultType,
Expand Down
24 changes: 24 additions & 0 deletions flang/test/Lower/Intrinsics/transpose.f90
@@ -0,0 +1,24 @@
! RUN: bbc -emit-fir %s -o - | FileCheck %s

! CHECK-LABEL: func @_QPtranspose_test(
! CHECK-SAME: %[[source:.*]]: !fir.ref<!fir.array<2x3xf32>>{{.*}}) {
subroutine transpose_test(mat)
! CHECK: %[[resultDescr:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?x?xf32>>>
real :: mat(2,3)
call bar_transpose_test(transpose(mat))
! CHECK: %[[sourceBox:.*]] = fir.embox %[[source]]({{.*}}) : (!fir.ref<!fir.array<2x3xf32>>, !fir.shape<2>) -> !fir.box<!fir.array<2x3xf32>>
! CHECK: %[[zeroArray:.*]] = fir.zero_bits !fir.heap<!fir.array<?x?xf32>
! CHECK: %[[c0:.*]] = arith.constant 0 : index
! CHECK: %[[shapeResult:.*]] = fir.shape %[[c0]], %[[c0]] : (index, index) -> !fir.shape<2>
! CHECK: %[[resultBox:.*]] = fir.embox %[[zeroArray]](%[[shapeResult]]) : (!fir.heap<!fir.array<?x?xf32>>, !fir.shape<2>) -> !fir.box<!fir.heap<!fir.array<?x?xf32>>>
! CHECK: fir.store %[[resultBox]] to %[[resultDescr]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
! CHECK: %[[resultOpaque:.*]] = fir.convert %[[resultDescr]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<none>>
! CHECK: %[[sourceOpaque:.*]] = fir.convert %[[sourceBox]] : (!fir.box<!fir.array<2x3xf32>>) -> !fir.box<none>
! CHECK: %{{.*}} = fir.call @_FortranATranspose(%[[resultOpaque]], %[[sourceOpaque]], %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.ref<i8>, i32) -> none
! CHECK: %[[tmp1:.*]] = fir.load %[[resultDescr]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
! CHECK: %[[tmp2:.*]] = fir.box_addr %[[tmp1]] : (!fir.box<!fir.heap<!fir.array<?x?xf32>>>) -> !fir.heap<!fir.array<?x?xf32>>
! CHECK: %[[tmp3:.*]] = fir.convert %[[tmp2]] : (!fir.heap<!fir.array<?x?xf32>>) -> !fir.ref<!fir.array<3x2xf32>>
! CHECK: fir.call @_QPbar_transpose_test(%[[tmp3]]) : (!fir.ref<!fir.array<3x2xf32>>) -> ()
! CHECK: fir.freemem %[[tmp2]] : <!fir.array<?x?xf32>
end subroutine

0 comments on commit e525245

Please sign in to comment.