773 changes: 773 additions & 0 deletions flang/lib/Optimizer/Transforms/StackArrays.cpp

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions flang/test/Driver/driver-help-hidden.f90
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,12 @@
! CHECK-NEXT: -fno-color-diagnostics Disable colors in diagnostics
! CHECK-NEXT: -fno-integrated-as Disable the integrated assembler
! CHECK-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros
! CHECK-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default)
! CHECK-NEXT: -fopenacc Enable OpenACC
! CHECK-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code.
! CHECK-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
! CHECK-NEXT: -freciprocal-math Allow division operations to be reassociated
! CHECK-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size
! CHECK-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages
! CHECK-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV.
! CHECK-NEXT: -help Display available options
Expand Down
4 changes: 4 additions & 0 deletions flang/test/Driver/driver-help.f90
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,12 @@
! HELP-NEXT: -fno-color-diagnostics Disable colors in diagnostics
! HELP-NEXT: -fno-integrated-as Disable the integrated assembler
! HELP-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros
! HELP-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default)
! HELP-NEXT: -fopenacc Enable OpenACC
! HELP-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code.
! HELP-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
! HELP-NEXT: -freciprocal-math Allow division operations to be reassociated
! HELP-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size
! HELP-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages
! HELP-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV.
! HELP-NEXT: -help Display available options
Expand Down Expand Up @@ -133,10 +135,12 @@
! HELP-FC1-NEXT: -fno-debug-pass-manager Disables debug printing for the new pass manager
! HELP-FC1-NEXT: -fno-reformat Dump the cooked character stream in -E mode
! HELP-FC1-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros
! HELP-FC1-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default)
! HELP-FC1-NEXT: -fopenacc Enable OpenACC
! HELP-FC1-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code.
! HELP-FC1-NEXT: -fpass-plugin=<dsopath> Load pass plugin from a dynamic shared object file (only with new pass manager).
! HELP-FC1-NEXT: -freciprocal-math Allow division operations to be reassociated
! HELP-FC1-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size
! HELP-FC1-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages
! HELP-FC1-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV.
! HELP-FC1-NEXT: -help Display available options
Expand Down
20 changes: 15 additions & 5 deletions flang/test/Driver/fast_math.f90
Original file line number Diff line number Diff line change
@@ -1,25 +1,35 @@
! Test for correct forwarding of fast-math flags from the compiler driver to the
! frontend driver

! -Ofast => -ffast-math -O3
! -Ofast => -ffast-math -O3 -fstack-arrays
! RUN: %flang -Ofast -fsyntax-only -### %s -o %t 2>&1 \
! RUN: | FileCheck --check-prefix=CHECK-OFAST %s
! CHECK-OFAST: -fc1
! CHECK-OFAST-SAME: -ffast-math
! CHECK-OFAST-SAME: -fstack-arrays
! CHECK-OFAST-SAME: -O3

! TODO: update once -fstack-arays is added
! RUN: %flang -fstack-arrays -fsyntax-only %s -o %t 2>&1 \
! RUN: %flang -fstack-arrays -fsyntax-only -### %s -o %t 2>&1 \
! RUN: | FileCheck --check-prefix=CHECK-STACK-ARRAYS %s
! CHECK-STACK-ARRAYS: warning: argument unused during compilation: '-fstack-arrays'
! CHECK-STACK-ARRAYS: -fc1
! CHECK-STACK-ARRAYS-SAME: -fstack-arrays

! -Ofast -fno-fast-math => -O3
! -Ofast -fno-fast-math => -O3 -fstack-arrays
! RUN: %flang -Ofast -fno-fast-math -fsyntax-only -### %s -o %t 2>&1 \
! RUN: | FileCheck --check-prefix=CHECK-OFAST-NO-FAST %s
! CHECK-OFAST-NO-FAST: -fc1
! CHECK-OFAST-NO-FAST-NOT: -ffast-math
! CHECK-OFAST-NO-FAST-SAME: -fstack-arrays
! CHECK-OFAST-NO-FAST-SAME: -O3

! -Ofast -fno-stack-arrays -> -O3 -ffast-math
! RUN: %flang -Ofast -fno-stack-arrays -fsyntax-only -### %s -o %t 2>&1 \
! RUN: | FileCheck --check-prefix=CHECK-OFAST-NO-SA %s
! CHECK-OFAST-NO-SA: -fc1
! CHECK-OFAST-NO-SA-SAME: -ffast-math
! CHECK-OFAST-NO-SA-NOT: -fstack-arrays
! CHECK-OFAST-NO-SA-SAME: -O3

! -ffast-math => -ffast-math
! RUN: %flang -ffast-math -fsyntax-only -### %s -o %t 2>&1 \
! RUN: | FileCheck --check-prefix=CHECK-FFAST %s
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ subroutine allocation(x)
! CHECK: %[[VAL_12:.*]] = arith.constant 0 : index
! CHECK: %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_12]] : index
! CHECK: %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_11]], %[[VAL_12]] : index
! CHECK: %[[VAL_15:.*]] = fir.allocmem !fir.array<?x!fir.char<1,?>>(%[[VAL_2]] : index), %[[VAL_14]] {uniq_name = "_QFallocationEx.alloc"}
! CHECK: %[[VAL_15:.*]] = fir.allocmem !fir.array<?x!fir.char<1,?>>(%[[VAL_2]] : index), %[[VAL_14]] {fir.must_be_heap = true, uniq_name = "_QFallocationEx.alloc"}
! CHECK: %[[VAL_16:.*]] = fir.shape %[[VAL_14]] : (index) -> !fir.shape<1>
! CHECK: %[[VAL_17:.*]] = fir.embox %[[VAL_15]](%[[VAL_16]]) typeparams %[[VAL_2]] : (!fir.heap<!fir.array<?x!fir.char<1,?>>>, !fir.shape<1>, index) -> !fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>
! CHECK: fir.store %[[VAL_17]] to %[[VAL_3]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.char<1,?>>>>>
Expand Down Expand Up @@ -84,7 +84,7 @@ subroutine alloc_comp(x)
! CHECK: %[[VAL_9:.*]] = arith.constant 0 : index
! CHECK: %[[VAL_10:.*]] = arith.cmpi sgt, %[[VAL_8]], %[[VAL_9]] : index
! CHECK: %[[VAL_11:.*]] = arith.select %[[VAL_10]], %[[VAL_8]], %[[VAL_9]] : index
! CHECK: %[[VAL_12:.*]] = fir.allocmem !fir.array<?xf32>, %[[VAL_11]] {uniq_name = "_QEa.alloc"}
! CHECK: %[[VAL_12:.*]] = fir.allocmem !fir.array<?xf32>, %[[VAL_11]] {fir.must_be_heap = true, uniq_name = "_QEa.alloc"}
! CHECK: %[[VAL_13:.*]] = fir.shape %[[VAL_11]] : (index) -> !fir.shape<1>
! CHECK: %[[VAL_14:.*]] = fir.embox %[[VAL_12]](%[[VAL_13]]) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xf32>>>
! CHECK: fir.store %[[VAL_14]] to %[[VAL_6]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
Expand Down
2 changes: 1 addition & 1 deletion flang/test/Lower/Intrinsics/c_loc.f90
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ subroutine c_loc_arraysection()
! CHECK: %[[VAL_2:.*]] = fir.zero_bits !fir.ptr<i32>
! CHECK: fir.store %[[VAL_2]] to %[[VAL_1]] : !fir.ref<!fir.ptr<i32>>
! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> {bindc_name = "ptr", uniq_name = "_QFc_loc_non_save_pointer_scalarEptr"}
! CHECK: %[[VAL_4:.*]] = fir.allocmem i32 {uniq_name = "_QFc_loc_non_save_pointer_scalarEi.alloc"}
! CHECK: %[[VAL_4:.*]] = fir.allocmem i32 {fir.must_be_heap = true, uniq_name = "_QFc_loc_non_save_pointer_scalarEi.alloc"}
! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (!fir.heap<i32>) -> !fir.ptr<i32>
! CHECK: fir.store %[[VAL_5]] to %[[VAL_1]] : !fir.ref<!fir.ptr<i32>>
! CHECK: %[[VAL_6:.*]] = arith.constant 10 : i32
Expand Down
2 changes: 1 addition & 1 deletion flang/test/Lower/Intrinsics/system_clock.f90
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ subroutine ss(count)
! CHECK: %[[V_6:[0-9]+]] = fir.alloca i64 {bindc_name = "count_rate_", fir.target, uniq_name = "_QFssEcount_rate_"}
! CHECK: %[[V_7:[0-9]+]] = fir.convert %[[V_6]] : (!fir.ref<i64>) -> !fir.ptr<i64>
! CHECK: fir.store %[[V_7]] to %[[V_4]] : !fir.ref<!fir.ptr<i64>>
! CHECK: %[[V_8:[0-9]+]] = fir.allocmem i64 {uniq_name = "_QFssEcount_max.alloc"}
! CHECK: %[[V_8:[0-9]+]] = fir.allocmem i64 {fir.must_be_heap = true, uniq_name = "_QFssEcount_max.alloc"}
! CHECK: fir.store %[[V_8]] to %[[V_1]] : !fir.ref<!fir.heap<i64>>
! CHECK: %[[V_9:[0-9]+]] = fir.load %[[V_4]] : !fir.ref<!fir.ptr<i64>>
! CHECK: %[[V_10:[0-9]+]] = fir.load %[[V_1]] : !fir.ref<!fir.heap<i64>>
Expand Down
154 changes: 154 additions & 0 deletions flang/test/Transforms/stack-arrays.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --array-value-copy | fir-opt --stack-arrays | FileCheck %s

! In order to verify the whole MLIR pipeline, make the driver generate LLVM IR.
! This is only to check that -fstack-arrays enables the stack-arrays pass so
! only check the first example
! RUN: %flang_fc1 -emit-llvm -o - -fstack-arrays %s | FileCheck --check-prefix=LLVM-IR %s

! check simple array value copy case
subroutine array_value_copy_simple(arr)
integer, intent(inout) :: arr(4)
arr(3:4) = arr(1:2)
end subroutine
! CHECK-LABEL: func.func @_QParray_value_copy_simple(%arg0: !fir.ref<!fir.array<4xi32>>
! CHECK-NOT: fir.allocmem
! CHECK-NOT: fir.freemem
! CHECK: fir.alloca !fir.array<4xi32>
! CHECK-NOT: fir.allocmem
! CHECK-NOT: fir.freemem
! CHECK: return
! CHECK-NEXT: }

! LLVM-IR: array_value_copy_simple
! LLVM-IR-NOT: malloc
! LLVM-IR-NOT: free
! LLVM-IR: alloca [4 x i32]
! LLVM-IR-NOT: malloc
! LLVM-IR-NOT: free
! LLVM-IR: ret void
! LLVM-IR-NEXT: }

! check complex array value copy case
module stuff
type DerivedWithAllocatable
integer, dimension(:), allocatable :: dat
end type

contains
subroutine array_value_copy_complex(arr)
type(DerivedWithAllocatable), intent(inout) :: arr(:)
arr(3:4) = arr(1:2)
end subroutine
end module
! CHECK: func.func
! CHECK-SAME: array_value_copy_complex
! CHECK-NOT: fir.allocmem
! CHECK-NOT: fir.freemem
! CHECK: fir.alloca !fir.array<?x!fir.type<_QMstuffTderivedwithallocatable
! CHECK-NOT: fir.allocmem
! CHECK-NOT: fir.freemem
! CHECK: return
! CHECK-NEXT: }

subroutine parameter_array_init
integer, parameter :: p(100) = 42
call use_p(p)
end subroutine
! CHECK: func.func
! CHECK-SAME: parameter_array_init
! CHECK-NOT: fir.allocmem
! CHECK-NOT: fir.freemem
! CHECK: fir.alloca !fir.array<100xi32>
! CHECK-NOT: fir.allocmem
! CHECK-NOT: fir.freemem
! CHECK: return
! CHECK-NEXT: }

subroutine test_vector_subscripted_section_to_box(v, x)
interface
subroutine takes_box(y)
real :: y(:)
end subroutine
end interface

integer :: v(:)
real :: x(:)
call takes_box(x(v))
end subroutine
! CHECK: func.func
! CHECK-SAME: test_vector_subscripted_section_to_box
! CHECK-NOT: fir.allocmem
! CHECK: fir.alloca !fir.array<?xf32>
! CHECK-NOT: fir.allocmem
! CHECK: fir.call @_QPtakes_box
! CHECK-NOT: fir.freemem
! CHECK: return
! CHECK-NEXT: }

subroutine call_parenthesized_arg(x)
integer :: x(100)
call bar((x))
end subroutine
! CHECK: func.func
! CHECK-SAME: call_parenthesized_arg
! CHECK-NOT: fir.allocmem
! CHECK: fir.alloca !fir.array<100xi32>
! CHECK-NOT: fir.allocmem
! CHECK: fir.call @_QPbar
! CHECK-NOT: fir.freemem
! CHECK: return
! CHECK-NEXT: }

subroutine where_allocatable_assignments(a, b)
integer :: a(:)
integer, allocatable :: b(:)
where(b > 0)
b = a
elsewhere
b(:) = 0
end where
end subroutine
! TODO: broken: passing allocation through fir.result
! CHECK: func.func
! CHECK-SAME: where_allocatable_assignments
! CHECK: return
! CHECK-NEXT: }

subroutine array_constructor(a, b)
real :: a(5), b
real, external :: f
a = [f(b), f(b+1), f(b+2), f(b+5), f(b+11)]
end subroutine
! TODO: broken: realloc
! CHECK: func.func
! CHECK-SAME: array_constructor
! CHECK: return
! CHECK-NEXT: }

subroutine sequence(seq, n)
integer :: n, seq(n)
seq = [(i,i=1,n)]
end subroutine
! TODO: broken: realloc
! CHECK: func.func
! CHECK-SAME: sequence
! CHECK: return
! CHECK-NEXT: }

subroutine CFGLoop(x)
integer, parameter :: k = 100, m=1000000, n = k*m
integer :: x(n)
logical :: has_error

do i=0,m-1
x(k*i+1:k*(i+1)) = x(k*(i+1):k*i+1:-1)
if (has_error(x, k)) stop
end do
end subroutine
! CHECK: func.func
! CHECK-SAME: cfgloop
! CHECK-NEXT: %[[MEM:.*]] = fir.alloca !fir.array<100000000xi32>
! CHECK-NOT: fir.allocmem
! CHECK-NOT: fir.freemem
! CHECK: return
! CHECK-NEXT: }
309 changes: 309 additions & 0 deletions flang/test/Transforms/stack-arrays.fir
Original file line number Diff line number Diff line change
@@ -0,0 +1,309 @@
// RUN: fir-opt --stack-arrays %s | FileCheck %s

// Simplest transformation
func.func @simple() {
%0 = fir.allocmem !fir.array<42xi32>
fir.freemem %0 : !fir.heap<!fir.array<42xi32>>
return
}
// CHECK: func.func @simple() {
// CHECK-NEXT: fir.alloca !fir.array<42xi32>
// CHECK-NEXT: return
// CHECK-NEXT: }

// Check fir.must_be_heap allocations are not moved
func.func @must_be_heap() {
%0 = fir.allocmem !fir.array<42xi32> {fir.must_be_heap = true}
fir.freemem %0 : !fir.heap<!fir.array<42xi32>>
return
}
// CHECK: func.func @must_be_heap() {
// CHECK-NEXT: %[[ALLOC:.*]] = fir.allocmem !fir.array<42xi32> {fir.must_be_heap = true}
// CHECK-NEXT: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<42xi32>>
// CHECK-NEXT: return
// CHECK-NEXT: }

// Check the data-flow-analysis can detect cases where we aren't sure if memory
// is freed by the end of the function
func.func @dfa1(%arg0: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}) {
%7 = arith.constant 42 : index
%8 = fir.allocmem !fir.array<?xi32>, %7 {uniq_name = "_QFdfa1Earr.alloc"}
%9 = fir.load %arg0 : !fir.ref<!fir.logical<4>>
%10 = fir.convert %9 : (!fir.logical<4>) -> i1
fir.if %10 {
fir.freemem %8 : !fir.heap<!fir.array<?xi32>>
} else {
}
return
}
// CHECK: func.func @dfa1(%arg0: !fir.ref<!fir.logical<4>> {fir.bindc_name = "cond"}) {
// CHECK-NEXT: %[[C42:.*]] = arith.constant 42 : index
// CHECK-NEXT: %[[MEM:.*]] = fir.allocmem !fir.array<?xi32>, %[[C42]] {uniq_name = "_QFdfa1Earr.alloc"}
// CHECK-NEXT: %[[LOGICAL:.*]] = fir.load %arg0 : !fir.ref<!fir.logical<4>>
// CHECK-NEXT: %[[BOOL:.*]] = fir.convert %[[LOGICAL]] : (!fir.logical<4>) -> i1
// CHECK-NEXT: fir.if %[[BOOL]] {
// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<?xi32>>
// CHECK-NEXT: } else {
// CHECK-NEXT: }
// CHECK-NEXT: return
// CHECK-NEXT: }

// Check scf.if (fir.if is not considered a branch operation)
func.func @dfa2(%arg0: i1) {
%a = fir.allocmem !fir.array<1xi8>
scf.if %arg0 {
fir.freemem %a : !fir.heap<!fir.array<1xi8>>
} else {
}
return
}
// CHECK: func.func @dfa2(%arg0: i1) {
// CHECK-NEXT: %[[MEM:.*]] = fir.allocmem !fir.array<1xi8>
// CHECK-NEXT: scf.if %arg0 {
// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<1xi8>>
// CHECK-NEXT: } else {
// CHECK-NEXT: }
// CHECK-NEXT: return
// CHECK-NEXT: }

// check the alloca is placed after all operands become available
func.func @placement1() {
// do some stuff with other ssa values
%1 = arith.constant 1 : index
%2 = arith.constant 2 : index
%3 = arith.addi %1, %2 : index
// operand is now available
%4 = fir.allocmem !fir.array<?xi32>, %3
// ...
fir.freemem %4 : !fir.heap<!fir.array<?xi32>>
return
}
// CHECK: func.func @placement1() {
// CHECK-NEXT: %[[ONE:.*]] = arith.constant 1 : index
// CHECK-NEXT: %[[TWO:.*]] = arith.constant 2 : index
// CHECK-NEXT: %[[ARG:.*]] = arith.addi %[[ONE]], %[[TWO]] : index
// CHECK-NEXT: %[[MEM:.*]] = fir.alloca !fir.array<?xi32>, %[[ARG]]
// CHECK-NEXT: return
// CHECK-NEXT: }

// check that if there are no operands, then the alloca is placed early
func.func @placement2() {
// do some stuff with other ssa values
%1 = arith.constant 1 : index
%2 = arith.constant 2 : index
%3 = arith.addi %1, %2 : index
%4 = fir.allocmem !fir.array<42xi32>
// ...
fir.freemem %4 : !fir.heap<!fir.array<42xi32>>
return
}
// CHECK: func.func @placement2() {
// CHECK-NEXT: %[[MEM:.*]] = fir.alloca !fir.array<42xi32>
// CHECK-NEXT: %[[ONE:.*]] = arith.constant 1 : index
// CHECK-NEXT: %[[TWO:.*]] = arith.constant 2 : index
// CHECK-NEXT: %[[SUM:.*]] = arith.addi %[[ONE]], %[[TWO]] : index
// CHECK-NEXT: return
// CHECK-NEXT: }

// check that stack allocations which must be placed in loops use stacksave
func.func @placement3() {
%c1 = arith.constant 1 : index
%c1_i32 = fir.convert %c1 : (index) -> i32
%c2 = arith.constant 2 : index
%c10 = arith.constant 10 : index
%0:2 = fir.do_loop %arg0 = %c1 to %c10 step %c1 iter_args(%arg1 = %c1_i32) -> (index, i32) {
%3 = arith.addi %c1, %c2 : index
// operand is now available
%4 = fir.allocmem !fir.array<?xi32>, %3
// ...
fir.freemem %4 : !fir.heap<!fir.array<?xi32>>
fir.result %3, %c1_i32 : index, i32
}
return
}
// CHECK: func.func @placement3() {
// CHECK-NEXT: %[[C1:.*]] = arith.constant 1 : index
// CHECK-NEXT: %[[C1_I32:.*]] = fir.convert %[[C1]] : (index) -> i32
// CHECK-NEXT: %[[C2:.*]] = arith.constant 2 : index
// CHECK-NEXT: %[[C10:.*]] = arith.constant 10 : index
// CHECK-NEXT: fir.do_loop
// CHECK-NEXT: %[[SUM:.*]] = arith.addi %[[C1]], %[[C2]] : index
// CHECK-NEXT: %[[SP:.*]] = fir.call @llvm.stacksave() : () -> !fir.ref<i8>
// CHECK-NEXT: %[[MEM:.*]] = fir.alloca !fir.array<?xi32>, %[[SUM]]
// CHECK-NEXT: fir.call @llvm.stackrestore(%[[SP]])
// CHECK-NEXT: fir.result
// CHECK-NEXT: }
// CHECK-NEXT: return
// CHECK-NEXT: }

// check that stack save/restore are used in CFG loops
func.func @placement4(%arg0 : i1) {
%c1 = arith.constant 1 : index
%c1_i32 = fir.convert %c1 : (index) -> i32
%c2 = arith.constant 2 : index
%c10 = arith.constant 10 : index
cf.br ^bb1
^bb1:
%3 = arith.addi %c1, %c2 : index
// operand is now available
%4 = fir.allocmem !fir.array<?xi32>, %3
// ...
fir.freemem %4 : !fir.heap<!fir.array<?xi32>>
cf.cond_br %arg0, ^bb1, ^bb2
^bb2:
return
}
// CHECK: func.func @placement4(%arg0: i1) {
// CHECK-NEXT: %[[C1:.*]] = arith.constant 1 : index
// CHECK-NEXT: %[[C1_I32:.*]] = fir.convert %[[C1]] : (index) -> i32
// CHECK-NEXT: %[[C2:.*]] = arith.constant 2 : index
// CHECK-NEXT: %[[C10:.*]] = arith.constant 10 : index
// CHECK-NEXT: cf.br ^bb1
// CHECK-NEXT: ^bb1:
// CHECK-NEXT: %[[SUM:.*]] = arith.addi %[[C1]], %[[C2]] : index
// CHECK-NEXT: %[[SP:.*]] = fir.call @llvm.stacksave() : () -> !fir.ref<i8>
// CHECK-NEXT: %[[MEM:.*]] = fir.alloca !fir.array<?xi32>, %[[SUM]]
// CHECK-NEXT: fir.call @llvm.stackrestore(%[[SP]]) : (!fir.ref<i8>) -> ()
// CHECK-NEXT: cf.cond_br %arg0, ^bb1, ^bb2
// CHECK-NEXT: ^bb2:
// CHECK-NEXT: return
// CHECK-NEXT: }

// check that stacksave is not used when there is an intervening alloca
func.func @placement5() {
%c1 = arith.constant 1 : index
%c1_i32 = fir.convert %c1 : (index) -> i32
%c2 = arith.constant 2 : index
%c10 = arith.constant 10 : index
%0:2 = fir.do_loop %arg0 = %c1 to %c10 step %c1 iter_args(%arg1 = %c1_i32) -> (index, i32) {
%3 = arith.addi %c1, %c2 : index
// operand is now available
%4 = fir.allocmem !fir.array<?xi32>, %3
%5 = fir.alloca i32
fir.freemem %4 : !fir.heap<!fir.array<?xi32>>
fir.result %3, %c1_i32 : index, i32
}
return
}
// CHECK: func.func @placement5() {
// CHECK-NEXT: %[[C1:.*]] = arith.constant 1 : index
// CHECK-NEXT: %[[C1_I32:.*]] = fir.convert %[[C1]] : (index) -> i32
// CHECK-NEXT: %[[C2:.*]] = arith.constant 2 : index
// CHECK-NEXT: %[[C10:.*]] = arith.constant 10 : index
// CHECK-NEXT: fir.do_loop
// CHECK-NEXT: %[[SUM:.*]] = arith.addi %[[C1]], %[[C2]] : index
// CHECK-NEXT: %[[MEM:.*]] = fir.allocmem !fir.array<?xi32>, %[[SUM]]
// CHECK-NEXT: %[[IDX:.*]] = fir.alloca i32
// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<?xi32>>
// CHECK-NEXT: fir.result
// CHECK-NEXT: }
// CHECK-NEXT: return
// CHECK-NEXT: }

// check that stack save/restore are not used when the memalloc and freemem are
// in different blocks
func.func @placement6(%arg0: i1) {
%c1 = arith.constant 1 : index
%c1_i32 = fir.convert %c1 : (index) -> i32
%c2 = arith.constant 2 : index
%c10 = arith.constant 10 : index
cf.br ^bb1
^bb1:
%3 = arith.addi %c1, %c2 : index
// operand is now available
%4 = fir.allocmem !fir.array<?xi32>, %3
// ...
cf.cond_br %arg0, ^bb2, ^bb3
^bb2:
// ...
fir.freemem %4 : !fir.heap<!fir.array<?xi32>>
cf.br ^bb1
^bb3:
// ...
fir.freemem %4 : !fir.heap<!fir.array<?xi32>>
cf.br ^bb1
}
// CHECK: func.func @placement6(%arg0: i1) {
// CHECK-NEXT: %[[c1:.*]] = arith.constant 1 : index
// CHECK-NEXT: %[[c1_i32:.*]] = fir.convert %[[c1]] : (index) -> i32
// CHECK-NEXT: %[[c2:.*]] = arith.constant 2 : index
// CHECK-NEXT: %[[c10:.*]] = arith.constant 10 : index
// CHECK-NEXT: cf.br ^bb1
// CHECK-NEXT: ^bb1:
// CHECK-NEXT: %[[ADD:.*]] = arith.addi %[[c1]], %[[c2]] : index
// CHECK-NEXT: %[[MEM:.*]] = fir.allocmem !fir.array<?xi32>, %[[ADD]]
// CHECK-NEXT: cf.cond_br %arg0, ^bb2, ^bb3
// CHECK-NEXT: ^bb2:
// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<?xi32>>
// CHECK-NEXT: cf.br ^bb1
// CHECK-NEXT: ^bb3:
// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<?xi32>>
// CHECK-NEXT: cf.br ^bb1
// CHECK-NEXT: }

// Check multiple returns, where the memory is always freed
func.func @returns(%arg0: i1) {
%0 = fir.allocmem !fir.array<42xi32>
cf.cond_br %arg0, ^bb1, ^bb2
^bb1:
fir.freemem %0 : !fir.heap<!fir.array<42xi32>>
return
^bb2:
fir.freemem %0 : !fir.heap<!fir.array<42xi32>>
return
}
// CHECK: func.func @returns(%[[COND:.*]]: i1) {
// CHECK-NEXT: %[[ALLOC:.*]] = fir.alloca !fir.array<42xi32>
// CHECK-NEXT: cf.cond_br %[[COND]], ^bb1, ^bb2
// CHECK-NEXT: ^bb1:
// CHECK-NEXT: return
// CHECK-NEXT: ^bb2:
// CHECK-NEXT: return
// CHECK-NEXT: }

// Check multiple returns, where the memory is not freed on one branch
func.func @returns2(%arg0: i1) {
%0 = fir.allocmem !fir.array<42xi32>
cf.cond_br %arg0, ^bb1, ^bb2
^bb1:
fir.freemem %0 : !fir.heap<!fir.array<42xi32>>
return
^bb2:
return
}
// CHECK: func.func @returns2(%[[COND:.*]]: i1) {
// CHECK-NEXT: %[[ALLOC:.*]] = fir.allocmem !fir.array<42xi32>
// CHECK-NEXT: cf.cond_br %[[COND]], ^bb1, ^bb2
// CHECK-NEXT: ^bb1:
// CHECK-NEXT: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<42xi32>>
// CHECK-NEXT: return
// CHECK-NEXT: ^bb2:
// CHECK-NEXT: return
// CHECK-NEXT: }

// Check allocations are not moved outside of an omp region
func.func @omp_placement1() {
omp.sections {
omp.section {
%mem = fir.allocmem !fir.array<42xi32>
fir.freemem %mem : !fir.heap<!fir.array<42xi32>>
omp.terminator
}
omp.terminator
}
return
}
// CHECK: func.func @omp_placement1() {
// CHECK-NEXT: omp.sections {
// CHECK-NEXT: omp.section {
// CHECK-NEXT: %[[MEM:.*]] = fir.allocmem !fir.array<42xi32>
// TODO: this allocation should be moved to the stack. Unfortunately, the data
// flow analysis fails to propogate the lattice out of the omp region to the
// return satement.
// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<42xi32>>
// CHECK-NEXT: omp.terminator
// CHECK-NEXT: }
// CHECK-NEXT: omp.terminator
// CHECK-NEXT: }
// CHECK-NEXT: return
// CHECK-NEXT: }