31 changes: 31 additions & 0 deletions flang/test/Lower/OpenMP/copyin-order.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
!RUN: bbc -fopenmp -emit-hlfir -o - %s | FileCheck %s

!https://github.com/llvm/llvm-project/issues/91205

!CHECK: omp.parallel if(%{{[0-9]+}} : i1) {
!CHECK: %[[THP1:[0-9]+]] = omp.threadprivate %{{[0-9]+}}#1
!CHECK: %[[DCL1:[0-9]+]]:2 = hlfir.declare %[[THP1]] {uniq_name = "_QFcopyin_scalar_arrayEx1"}
!CHECK: %[[LD1:[0-9]+]] = fir.load %{{[0-9]+}}#0
!CHECK: hlfir.assign %[[LD1]] to %[[DCL1]]#0 temporary_lhs
!CHECK: %[[THP2:[0-9]+]] = omp.threadprivate %{{[0-9]+}}#1
!CHECK: %[[SHP2:[0-9]+]] = fir.shape %c{{[0-9]+}}
!CHECK: %[[DCL2:[0-9]+]]:2 = hlfir.declare %[[THP2]](%[[SHP2]]) {uniq_name = "_QFcopyin_scalar_arrayEx2"}
!CHECK: hlfir.assign %{{[0-9]+}}#0 to %[[DCL2]]#0 temporary_lhs
!CHECK: omp.barrier
!CHECK: fir.call @_QPsub1(%[[DCL1]]#1, %[[DCL2]]#1)
!CHECK: omp.terminator
!CHECK: }

subroutine copyin_scalar_array()
integer(kind=4), save :: x1
integer(kind=8), save :: x2(10)
!$omp threadprivate(x1, x2)

! Have x1 appear before x2 in the AST node for the `parallel` construct,
! but at the same time have them in a different order in `copyin`.
!$omp parallel if (x1 .eq. x2(1)) copyin(x2, x1)
call sub1(x1, x2)
!$omp end parallel

end

104 changes: 51 additions & 53 deletions flang/test/Lower/OpenMP/sections.f90
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
!CHECK: %[[COUNT_DECL:.*]]:2 = hlfir.declare %[[COUNT]] {uniq_name = "_QFEcount"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[ETA:.*]] = fir.alloca f32 {bindc_name = "eta", uniq_name = "_QFEeta"}
!CHECK: %[[CONST_1:.*]] = arith.constant 4 : i64
!CHECK: %[[PRIVATE_ETA:.*]] = fir.alloca f32 {bindc_name = "eta", pinned, uniq_name = "_QFEeta"}
!CHECK: %[[PRIVATE_ETA_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_ETA]] {uniq_name = "_QFEeta"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
!CHECK: %[[PRIVATE_DOUBLE_COUNT:.*]] = fir.alloca i32 {bindc_name = "double_count", pinned, uniq_name = "_QFEdouble_count"}
!CHECK: %[[PRIVATE_DOUBLE_COUNT_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_DOUBLE_COUNT]] {uniq_name = "_QFEdouble_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: omp.sections allocate(%[[CONST_1]] : i64 -> %[[COUNT_DECL]]#1 : !fir.ref<i32>) {
!CHECK: omp.section {
!CHECK: %[[PRIVATE_ETA:.*]] = fir.alloca f32 {bindc_name = "eta", pinned, uniq_name = "_QFEeta"}
!CHECK: %[[PRIVATE_ETA_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_ETA]] {uniq_name = "_QFEeta"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
!CHECK: %[[PRIVATE_DOUBLE_COUNT:.*]] = fir.alloca i32 {bindc_name = "double_count", pinned, uniq_name = "_QFEdouble_count"}
!CHECK: %[[PRIVATE_DOUBLE_COUNT_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_DOUBLE_COUNT]] {uniq_name = "_QFEdouble_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[CONST5:.*]] = arith.constant 5 : i32
!CHECK: hlfir.assign %[[CONST5]] to %[[COUNT_DECL]]#0 : i32, !fir.ref<i32>
!CHECK: %[[TEMP_COUNT:.*]] = fir.load %[[COUNT_DECL]]#0 : !fir.ref<i32>
Expand All @@ -26,21 +26,13 @@
!CHECK: omp.terminator
!CHECK: }
!CHECK: omp.section {
!CHECK: %[[PRIVATE_ETA:.*]] = fir.alloca f32 {bindc_name = "eta", pinned, uniq_name = "_QFEeta"}
!CHECK: %[[PRIVATE_ETA_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_ETA]] {uniq_name = "_QFEeta"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
!CHECK: %[[PRIVATE_DOUBLE_COUNT:.*]] = fir.alloca i32 {bindc_name = "double_count", pinned, uniq_name = "_QFEdouble_count"}
!CHECK: %[[PRIVATE_DOUBLE_COUNT_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_DOUBLE_COUNT]] {uniq_name = "_QFEdouble_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_DOUBLE_COUNT_DECL]]#0 : !fir.ref<i32>
!CHECK: %[[CONST:.*]] = arith.constant 1 : i32
!CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32
!CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_DOUBLE_COUNT_DECL]]#0 : i32, !fir.ref<i32>
!CHECK: omp.terminator
!CHECK: }
!CHECK: omp.section {
!CHECK: %[[PRIVATE_ETA:.*]] = fir.alloca f32 {bindc_name = "eta", pinned, uniq_name = "_QFEeta"}
!CHECK: %[[PRIVATE_ETA_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_ETA]] {uniq_name = "_QFEeta"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
!CHECK: %[[PRIVATE_DOUBLE_COUNT:.*]] = fir.alloca i32 {bindc_name = "double_count", pinned, uniq_name = "_QFEdouble_count"}
!CHECK: %[[PRIVATE_DOUBLE_COUNT_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_DOUBLE_COUNT]] {uniq_name = "_QFEdouble_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_ETA_DECL]]#0 : !fir.ref<f32>
!CHECK: %[[CONST:.*]] = arith.constant 7.000000e+00 : f32
!CHECK: %[[RESULT:.*]] = arith.subf %[[TEMP]], %[[CONST]] {{.*}}: f32
Expand Down Expand Up @@ -88,12 +80,12 @@ end program sample

!CHECK: func @_QPfirstprivate(%[[ARG:.*]]: !fir.ref<f32> {fir.bindc_name = "alpha"}) {
!CHECK: %[[ARG_DECL:.*]]:2 = hlfir.declare %[[ARG]] {uniq_name = "_QFfirstprivateEalpha"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
!CHECK: %[[PRIVATE_ALPHA:.*]] = fir.alloca f32 {bindc_name = "alpha", pinned, uniq_name = "_QFfirstprivateEalpha"}
!CHECK: %[[PRIVATE_ALPHA_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_ALPHA]] {uniq_name = "_QFfirstprivateEalpha"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
!CHECK: %[[TEMP:.*]] = fir.load %[[ARG_DECL]]#0 : !fir.ref<f32>
!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_ALPHA_DECL]]#0 temporary_lhs : f32, !fir.ref<f32>
!CHECK: omp.sections {
!CHECK: omp.section {
!CHECK: %[[PRIVATE_ALPHA:.*]] = fir.alloca f32 {bindc_name = "alpha", pinned, uniq_name = "_QFfirstprivateEalpha"}
!CHECK: %[[PRIVATE_ALPHA_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_ALPHA]] {uniq_name = "_QFfirstprivateEalpha"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
!CHECK: %[[TEMP:.*]] = fir.load %[[ARG_DECL]]#0 : !fir.ref<f32>
!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_ALPHA_DECL]]#0 temporary_lhs : f32, !fir.ref<f32>
!CHECK: omp.terminator
!CHECK: }
!CHECK: omp.terminator
Expand Down Expand Up @@ -126,11 +118,11 @@ subroutine lastprivate()
integer :: x
!CHECK: %[[X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlastprivateEx"}
!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X]] {uniq_name = "_QFlastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFlastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: omp.sections {
!$omp sections lastprivate(x)
!CHECK: omp.section {
!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFlastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[CONST10:.*]] = arith.constant 10 : i32
!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
!CHECK: %[[RESULT:.*]] = arith.muli %[[CONST10]], %[[TEMP]] : i32
Expand All @@ -141,17 +133,12 @@ subroutine lastprivate()
x = x * 10

!CHECK: omp.section {
!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFlastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
!CHECK: %[[CONST:.*]] = arith.constant 1 : i32
!CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32
!CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
!CHECK: %[[TRUE:.*]] = arith.constant true
!CHECK: fir.if %[[TRUE]] {
!CHECK: %[[TEMP1:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
!CHECK: hlfir.assign %[[TEMP1]] to %[[X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
!CHECK: }
!CHECK: omp.terminator
!CHECK: }
!$omp section
Expand All @@ -160,14 +147,14 @@ subroutine lastprivate()
!CHECK: }
!$omp end sections

!CHECK: omp.sections {
!$omp sections firstprivate(x) lastprivate(x)
!CHECK: omp.section {
!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFlastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
!CHECK: omp.barrier
!CHECK: omp.sections {
!$omp sections firstprivate(x) lastprivate(x)
!CHECK: omp.section {
!CHECK: %[[CONST:.*]] = arith.constant 10 : i32
!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
!CHECK: %[[RESULT:.*]] = arith.muli %[[CONST]], %[[TEMP]] : i32
Expand All @@ -177,20 +164,12 @@ subroutine lastprivate()
!$omp section
x = x * 10
!CHECK: omp.section {
!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFlastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
!CHECK: omp.barrier
!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
!CHECK: %[[CONST:.*]] = arith.constant 1 : i32
!CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32
!CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
!CHECK: %[[TRUE:.*]] = arith.constant true
!CHECK: fir.if %[[TRUE]] {
!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
!CHECK: hlfir.assign %[[TEMP]] to %[[X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
!CHECK: }
!CHECK: omp.terminator
!CHECK: }
!$omp section
Expand All @@ -199,14 +178,14 @@ subroutine lastprivate()
!CHECK: }
!$omp end sections

!CHECK: omp.sections nowait {
!$omp sections firstprivate(x) lastprivate(x)
!CHECK: omp.section {
!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFlastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
!CHECK: omp.barrier
!CHECK: omp.sections nowait {
!$omp sections firstprivate(x) lastprivate(x)
!CHECK: omp.section {
!CHECK: %[[CONST:.*]] = arith.constant 10 : i32
!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
!CHECK: %[[RESULT:.*]] = arith.muli %[[CONST]], %[[TEMP]] : i32
Expand All @@ -216,33 +195,25 @@ subroutine lastprivate()
!$omp section
x = x * 10
!CHECK: omp.section {
!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFlastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<i32>
!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
!CHECK: omp.barrier
!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
!CHECK: %[[CONST:.*]] = arith.constant 1 : i32
!CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32
!CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref<i32>
!CHECK: %[[TRUE:.*]] = arith.constant true
!CHECK: fir.if %[[TRUE]] {
!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
!CHECK: hlfir.assign %[[TEMP]] to %[[X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
!CHECK: omp.barrier
!CHECK: }
!CHECK: omp.terminator
!CHECK: }
!$omp section
x = x + 1
!CHECK: omp.terminator
!CHECK: }
!CHECK: omp.barrier
!$omp end sections nowait

!CHECK: omp.sections {
!CHECK: omp.section {
!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"}
!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFlastprivateEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: omp.sections {
!CHECK: omp.section {
!CHECK: cf.br ^bb1
!CHECK: ^bb1: // pred: ^bb0
!CHECK: %[[INNER_PRIVATE_X:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
Expand All @@ -257,22 +228,49 @@ subroutine lastprivate()
!CHECK: }
!CHECK: return
!CHECK: }

!$omp sections lastprivate(x)
!$omp section
goto 30
30 x = x + 1
!$omp end sections
end subroutine

!CHECK-LABEL: func @_QPlastprivate2
!CHECK: %[[X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlastprivate2Ex"}
!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X]] {uniq_name = "_QFlastprivate2Ex"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[Y:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFlastprivate2Ey"}
!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y]] {uniq_name = "_QFlastprivate2Ey"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivate2Ex"}
!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFlastprivate2Ex"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFlastprivate2Ey"}
!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y]] {uniq_name = "_QFlastprivate2Ey"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: omp.sections {
!CHECK: omp.section {
!CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<i32>
!CHECK: hlfir.assign %[[TEMP]] to %[[X_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
!CHECK: %[[TEMP2:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<i32>
!CHECK: hlfir.assign %[[TEMP2]] to %[[Y_DECL]]#0 temporary_lhs : i32, !fir.ref<i32>
!CHECK: omp.terminator
!CHECK: }
!CHECK: omp.terminator
!CHECK: }
subroutine lastprivate2()
integer :: x, y

!$omp sections lastprivate(x) lastprivate(y)
!$omp section
x = y + 1
!$omp end sections
end subroutine

!CHECK-LABEL: func @_QPunstructured_sections_privatization
subroutine unstructured_sections_privatization()
!CHECK: %[[X:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFunstructured_sections_privatizationEx"}
!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X]] {uniq_name = "_QFunstructured_sections_privatizationEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
!CHECK: omp.sections {
!CHECK: omp.section {
!CHECK: %[[PRIVATE_X:.*]] = fir.alloca f32 {bindc_name = "x", pinned, uniq_name = "_QFunstructured_sections_privatizationEx"}
!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFunstructured_sections_privatizationEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
!CHECK: omp.sections {
!CHECK: omp.section {
!CHECK: cf.br ^bb1
!CHECK: ^bb1: // pred: ^bb0
!CHECK: %[[INNER_PRIVATE_X:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<f32>
Expand All @@ -288,12 +286,12 @@ subroutine unstructured_sections_privatization()
goto 40
40 x = x + 1
!$omp end sections
!CHECK: omp.sections {
!CHECK: omp.section {
!CHECK: %[[PRIVATE_X:.*]] = fir.alloca f32 {bindc_name = "x", pinned, uniq_name = "_QFunstructured_sections_privatizationEx"}
!CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFunstructured_sections_privatizationEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
!CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref<f32>
!CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : f32, !fir.ref<f32>
!CHECK: omp.sections {
!CHECK: omp.section {
!CHECK: cf.br ^bb1
!CHECK: ^bb1:
!CHECK: %[[INNER_PRIVATE_X:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<f32>
Expand Down
2 changes: 1 addition & 1 deletion flang/test/Semantics/cuf13.cuf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
! RUN: %flang -fc1 -x cuda -fdebug-unparse %s | FileCheck %s
! RUN: %flang_fc1 -x cuda -fdebug-unparse %s | FileCheck %s

module matching
interface sub
Expand Down
54 changes: 51 additions & 3 deletions lld/MachO/ObjC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,13 +186,26 @@ ObjcCategoryChecker::ObjcCategoryChecker()
roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
methodLayout(target->wordSize) {}

// \p r must point to an offset within a cstring section.
// \p r must point to an offset within a CStringInputSection or a
// ConcatInputSection
static StringRef getReferentString(const Reloc &r) {
if (auto *isec = r.referent.dyn_cast<InputSection *>())
return cast<CStringInputSection>(isec)->getStringRefAtOffset(r.addend);

auto *sym = cast<Defined>(r.referent.get<Symbol *>());
return cast<CStringInputSection>(sym->isec())
->getStringRefAtOffset(sym->value + r.addend);
auto *symIsec = sym->isec();
auto symOffset = sym->value + r.addend;

if (auto *s = dyn_cast_or_null<CStringInputSection>(symIsec))
return s->getStringRefAtOffset(symOffset);

if (isa<ConcatInputSection>(symIsec)) {
auto strData = symIsec->data.slice(symOffset);
const char *pszData = reinterpret_cast<const char *>(strData.data());
return StringRef(pszData, strnlen(pszData, strData.size()));
}

llvm_unreachable("unknown reference section in getReferentString");
}

void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec,
Expand Down Expand Up @@ -420,6 +433,7 @@ class ObjcCategoryMerger {
mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);

void eraseISec(ConcatInputSection *isec);
void removeRefsToErasedIsecs();
void eraseMergedCategories();

void generateCatListForNonErasedCategories(
Expand Down Expand Up @@ -478,6 +492,8 @@ class ObjcCategoryMerger {
std::vector<ConcatInputSection *> &allInputSections;
// Map of base class Symbol to list of InfoInputCategory's for it
DenseMap<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
// Set for tracking InputSection erased via eraseISec
DenseSet<InputSection *> erasedIsecs;

// Normally, the binary data comes from the input files, but since we're
// generating binary data ourselves, we use the below array to store it in.
Expand Down Expand Up @@ -518,6 +534,8 @@ void ObjcCategoryMerger::collectSectionWriteInfoFromIsec(
Symbol *
ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
uint32_t offset) {
if (!isec)
return nullptr;
const Reloc *reloc = isec->getRelocAt(offset);

if (!reloc)
Expand Down Expand Up @@ -1141,6 +1159,8 @@ void ObjcCategoryMerger::generateCatListForNonErasedCategories(
}

void ObjcCategoryMerger::eraseISec(ConcatInputSection *isec) {
erasedIsecs.insert(isec);

isec->live = false;
for (auto &sym : isec->symbols)
sym->used = false;
Expand Down Expand Up @@ -1175,6 +1195,7 @@ void ObjcCategoryMerger::eraseMergedCategories() {
continue;

eraseISec(catInfo.catBodyIsec);

tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset);
tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
catLayout.instanceMethodsOffset);
Expand All @@ -1188,6 +1209,33 @@ void ObjcCategoryMerger::eraseMergedCategories() {
catLayout.instancePropsOffset);
}
}

removeRefsToErasedIsecs();
}

// The compiler may generate references to categories inside the addrsig
// section. This function will erase these references.
void ObjcCategoryMerger::removeRefsToErasedIsecs() {
for (InputSection *isec : inputSections) {
if (isec->getName() != section_names::addrSig)
continue;

auto removeRelocs = [this](Reloc &r) {
auto *isec = dyn_cast_or_null<ConcatInputSection>(
r.referent.dyn_cast<InputSection *>());
if (!isec) {
Defined *sym =
dyn_cast_or_null<Defined>(r.referent.dyn_cast<Symbol *>());
if (sym)
isec = dyn_cast<ConcatInputSection>(sym->isec());
}
if (!isec)
return false;
return erasedIsecs.count(isec) > 0;
};

llvm::erase_if(isec->relocs, removeRelocs);
}
}

void ObjcCategoryMerger::doMerge() {
Expand Down
5 changes: 4 additions & 1 deletion lld/test/MachO/objc-category-merging-extern-class-minimal.s
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ __OBJC_$_CATEGORY_MyBaseClass_$_Category01:
.quad 0
.long 64 ; 0x40
.space 4
.section __TEXT,__objc_classname,cstring_literals
.section __DATA,__objc_const
l_OBJC_CLASS_NAME_.1: ; @OBJC_CLASS_NAME_.1
.asciz "Category02"
.section __TEXT,__objc_methname,cstring_literals
Expand Down Expand Up @@ -153,3 +153,6 @@ L_OBJC_IMAGE_INFO:
.long 0
.long 96
.subsections_via_symbols

.addrsig
.addrsig_sym __OBJC_$_CATEGORY_MyBaseClass_$_Category01
12 changes: 5 additions & 7 deletions lldb/source/API/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ endif()
# Target to generate SBLanguages.h from Dwarf.def.
set(sb_languages_file
${CMAKE_CURRENT_BINARY_DIR}/../../include/lldb/API/SBLanguages.h)
add_custom_target(
lldb-sbapi-dwarf-enums
"${Python3_EXECUTABLE}"
add_custom_command(
COMMENT "Generating SBLanguages.h from Dwarf.def"
COMMAND "${Python3_EXECUTABLE}"
${LLDB_SOURCE_DIR}/scripts/generate-sbapi-dwarf-enum.py
${LLVM_MAIN_INCLUDE_DIR}/llvm/BinaryFormat/Dwarf.def
-o ${sb_languages_file}
BYPRODUCTS ${sb_languages_file}
OUTPUT ${sb_languages_file}
DEPENDS ${LLVM_MAIN_INCLUDE_DIR}/llvm/BinaryFormat/Dwarf.def
WORKING_DIRECTORY ${LLVM_LIBRARY_OUTPUT_INTDIR}
)
Expand Down Expand Up @@ -113,9 +113,7 @@ add_lldb_library(liblldb SHARED ${option_framework}
SystemInitializerFull.cpp
${lldb_python_wrapper}
${lldb_lua_wrapper}

DEPENDS
lldb-sbapi-dwarf-enums
${sb_languages_file}

LINK_LIBS
lldbBreakpoint
Expand Down
50 changes: 0 additions & 50 deletions lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,26 +23,6 @@ llvm::StringRef DW_TAG_value_to_name(dw_tag_t tag) {
return s_unknown_tag_name;
}

const char *DW_AT_value_to_name(uint32_t val) {
static char invalid[100];
llvm::StringRef llvmstr = llvm::dwarf::AttributeString(val);
if (llvmstr.empty()) {
snprintf(invalid, sizeof(invalid), "Unknown DW_AT constant: 0x%x", val);
return invalid;
}
return llvmstr.data();
}

const char *DW_FORM_value_to_name(uint32_t val) {
static char invalid[100];
llvm::StringRef llvmstr = llvm::dwarf::FormEncodingString(val);
if (llvmstr.empty()) {
snprintf(invalid, sizeof(invalid), "Unknown DW_FORM constant: 0x%x", val);
return invalid;
}
return llvmstr.data();
}

const char *DW_OP_value_to_name(uint32_t val) {
static char invalid[100];
llvm::StringRef llvmstr = llvm::dwarf::OperationEncodingString(val);
Expand All @@ -53,35 +33,5 @@ const char *DW_OP_value_to_name(uint32_t val) {
return llvmstr.data();
}

const char *DW_ATE_value_to_name(uint32_t val) {
static char invalid[100];
llvm::StringRef llvmstr = llvm::dwarf::AttributeEncodingString(val);
if (llvmstr.empty()) {
snprintf(invalid, sizeof(invalid), "Unknown DW_ATE constant: 0x%x", val);
return invalid;
}
return llvmstr.data();
}

const char *DW_LANG_value_to_name(uint32_t val) {
static char invalid[100];
llvm::StringRef llvmstr = llvm::dwarf::LanguageString(val);
if (llvmstr.empty()) {
snprintf(invalid, sizeof(invalid), "Unknown DW_LANG constant: 0x%x", val);
return invalid;
}
return llvmstr.data();
}

const char *DW_LNS_value_to_name(uint32_t val) {
static char invalid[100];
llvm::StringRef llvmstr = llvm::dwarf::LNStandardString(val);
if (llvmstr.empty()) {
snprintf(invalid, sizeof(invalid), "Unknown DW_LNS constant: 0x%x", val);
return invalid;
}
return llvmstr.data();
}

} // namespace dwarf
} // namespace lldb_private::plugin
12 changes: 0 additions & 12 deletions lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,10 @@
namespace lldb_private::plugin {
namespace dwarf {

typedef uint32_t DRC_class; // Holds DRC_* class bitfields

llvm::StringRef DW_TAG_value_to_name(dw_tag_t tag);

const char *DW_AT_value_to_name(uint32_t val);

const char *DW_FORM_value_to_name(uint32_t val);

const char *DW_OP_value_to_name(uint32_t val);

const char *DW_ATE_value_to_name(uint32_t val);

const char *DW_LANG_value_to_name(uint32_t val);

const char *DW_LNS_value_to_name(uint32_t val);

} // namespace dwarf
} // namespace lldb_private::plugin

Expand Down
34 changes: 15 additions & 19 deletions lldb/tools/lldb-dap/lldb-dap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2774,32 +2774,28 @@ void request_dataBreakpointInfo(const llvm::json::Object &request) {
: "evaluation failed");
} else {
uint64_t load_addr = value.GetValueAsUnsigned();
addr = llvm::utohexstr(load_addr);
lldb::SBMemoryRegionInfo region;
lldb::SBError err =
g_dap.target.GetProcess().GetMemoryRegionInfo(load_addr, region);
if (err.Success()) {
if (!(region.IsReadable() || region.IsWritable())) {
body.try_emplace("dataId", nullptr);
body.try_emplace("description",
"memory region for address " + addr +
" has no read or write permissions");
} else {
lldb::SBData data = value.GetPointeeData();
if (data.IsValid())
size = llvm::utostr(data.GetByteSize());
else {
lldb::SBData data = value.GetPointeeData();
if (data.IsValid()) {
size = llvm::utostr(data.GetByteSize());
addr = llvm::utohexstr(load_addr);
lldb::SBMemoryRegionInfo region;
lldb::SBError err =
g_dap.target.GetProcess().GetMemoryRegionInfo(load_addr, region);
// Only lldb-server supports "qMemoryRegionInfo". So, don't fail this
// request if SBProcess::GetMemoryRegionInfo returns error.
if (err.Success()) {
if (!(region.IsReadable() || region.IsWritable())) {
body.try_emplace("dataId", nullptr);
body.try_emplace("description",
"unable to get byte size for expression: " +
name.str());
"memory region for address " + addr +
" has no read or write permissions");
}
}
} else {
body.try_emplace("dataId", nullptr);
body.try_emplace("description",
"unable to get memory region info for address " +
addr);
"unable to get byte size for expression: " +
name.str());
}
}
} else {
Expand Down
4 changes: 1 addition & 3 deletions llvm/cmake/modules/LLVMExternalProjectUtils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ function(llvm_ExternalProject_Add name source_dir)
set(sysroot_arg -DCMAKE_SYSROOT=${CMAKE_SYSROOT})
endif()

if(CMAKE_CROSSCOMPILING OR _cmake_system_name STREQUAL AIX)
if(CMAKE_CROSSCOMPILING)
set(compiler_args -DCMAKE_ASM_COMPILER=${CMAKE_ASM_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
Expand All @@ -273,8 +273,6 @@ function(llvm_ExternalProject_Add name source_dir)
-DCMAKE_OBJDUMP=${CMAKE_OBJDUMP}
-DCMAKE_STRIP=${CMAKE_STRIP}
-DCMAKE_READELF=${CMAKE_READELF})
endif()
if(CMAKE_CROSSCOMPILING)
set(llvm_config_path ${LLVM_CONFIG_PATH})

if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
Expand Down
402 changes: 402 additions & 0 deletions llvm/docs/DirectX/DXContainer.rst

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions llvm/docs/DirectXUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ User Guide for the DirectX Target
:hidden:

DirectX/DXILArchitecture
DirectX/DXContainer

Introduction
============
Expand Down Expand Up @@ -81,6 +82,8 @@ code generation targets in LLVM, the LLVM codebase uses a more neutral name,
The ``DXContainer`` format is sparsely documented in the functional
specification, but a reference implementation exists in the
`DirectXShaderCompiler. <https://github.com/microsoft/DirectXShaderCompiler>`_.
The format is documented in the LLVM project docs as well (see
:doc:`DirectX/DXContainer`).

Support for generating ``DXContainer`` files in LLVM, is being added to the LLVM
MC layer for object streamers and writers, and to the Object and ObjectYAML
Expand Down
12 changes: 12 additions & 0 deletions llvm/include/llvm/Analysis/CFG.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,18 @@ bool isPotentiallyReachableFromMany(
const SmallPtrSetImpl<BasicBlock *> *ExclusionSet,
const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);

/// Determine whether there is a potentially a path from at least one block in
/// 'Worklist' to at least one block in 'StopSet' within a single function
/// without passing through any of the blocks in 'ExclusionSet'. Returns false
/// only if we can prove that once any block in 'Worklist' has been reached then
/// no blocks in 'StopSet' can be executed without passing through any blocks in
/// 'ExclusionSet'. Conservatively returns true.
bool isManyPotentiallyReachableFromMany(
SmallVectorImpl<BasicBlock *> &Worklist,
const SmallPtrSetImpl<const BasicBlock *> &StopSet,
const SmallPtrSetImpl<BasicBlock *> *ExclusionSet,
const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);

/// Return true if the control flow in \p RPOTraversal is irreducible.
///
/// This is a generic implementation to detect CFG irreducibility based on loop
Expand Down
32 changes: 25 additions & 7 deletions llvm/lib/Analysis/CFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,21 @@ bool llvm::isPotentiallyReachableFromMany(
SmallVectorImpl<BasicBlock *> &Worklist, const BasicBlock *StopBB,
const SmallPtrSetImpl<BasicBlock *> *ExclusionSet, const DominatorTree *DT,
const LoopInfo *LI) {
// When the stop block is unreachable, it's dominated from everywhere,
return isManyPotentiallyReachableFromMany(
Worklist, llvm::SmallPtrSet<const BasicBlock *, 1>{StopBB}, ExclusionSet,
DT, LI);
}

bool llvm::isManyPotentiallyReachableFromMany(
SmallVectorImpl<BasicBlock *> &Worklist,
const SmallPtrSetImpl<const BasicBlock *> &StopSet,
const SmallPtrSetImpl<BasicBlock *> *ExclusionSet, const DominatorTree *DT,
const LoopInfo *LI) {
// When a stop block is unreachable, it's dominated from everywhere,
// regardless of whether there's a path between the two blocks.
if (DT && !DT->isReachableFromEntry(StopBB))
DT = nullptr;
llvm::DenseMap<const BasicBlock *, bool> StopBBReachable;
for (auto *BB : StopSet)
StopBBReachable[BB] = DT && DT->isReachableFromEntry(BB);

// We can't skip directly from a block that dominates the stop block if the
// exclusion block is potentially in between.
Expand All @@ -155,19 +166,23 @@ bool llvm::isPotentiallyReachableFromMany(
}
}

const Loop *StopLoop = LI ? getOutermostLoop(LI, StopBB) : nullptr;
llvm::DenseMap<const BasicBlock *, const Loop *> StopLoops;
for (auto *StopBB : StopSet)
StopLoops[StopBB] = LI ? getOutermostLoop(LI, StopBB) : nullptr;

unsigned Limit = DefaultMaxBBsToExplore;
SmallPtrSet<const BasicBlock*, 32> Visited;
do {
BasicBlock *BB = Worklist.pop_back_val();
if (!Visited.insert(BB).second)
continue;
if (BB == StopBB)
if (StopSet.contains(BB))
return true;
if (ExclusionSet && ExclusionSet->count(BB))
continue;
if (DT && DT->dominates(BB, StopBB))
if (DT && llvm::any_of(StopSet, [&](const BasicBlock *StopBB) {
return StopBBReachable[BB] && DT->dominates(BB, StopBB);
}))
return true;

const Loop *Outer = nullptr;
Expand All @@ -179,7 +194,10 @@ bool llvm::isPotentiallyReachableFromMany(
// excluded block. Clear Outer so we process BB's successors.
if (LoopsWithHoles.count(Outer))
Outer = nullptr;
if (StopLoop && Outer == StopLoop)
if (llvm::any_of(StopSet, [&](const BasicBlock *StopBB) {
const Loop *StopLoop = StopLoops[StopBB];
return StopLoop && StopLoop == Outer;
}))
return true;
}

Expand Down
99 changes: 32 additions & 67 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11880,47 +11880,6 @@ static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
return true;
}

/// isREVMask - Check if a vector shuffle corresponds to a REV
/// instruction with the specified blocksize. (The order of the elements
/// within each block of the vector is reversed.)
static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64 ||
BlockSize == 128) &&
"Only possible block sizes for REV are: 16, 32, 64, 128");

unsigned EltSz = VT.getScalarSizeInBits();
unsigned NumElts = VT.getVectorNumElements();
unsigned BlockElts = M[0] + 1;
// If the first shuffle index is UNDEF, be optimistic.
if (M[0] < 0)
BlockElts = BlockSize / EltSz;

if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
return false;

for (unsigned i = 0; i < NumElts; ++i) {
if (M[i] < 0)
continue; // ignore UNDEF indices
if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
return false;
}

return true;
}

static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
if (NumElts % 2 != 0)
return false;
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
(M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
return false;
}
return true;
}

/// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
Expand Down Expand Up @@ -12585,15 +12544,16 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
}
}

if (isREVMask(ShuffleMask, VT, 64))
unsigned NumElts = VT.getVectorNumElements();
unsigned EltSize = VT.getScalarSizeInBits();
if (isREVMask(ShuffleMask, EltSize, NumElts, 64))
return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2);
if (isREVMask(ShuffleMask, VT, 32))
if (isREVMask(ShuffleMask, EltSize, NumElts, 32))
return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2);
if (isREVMask(ShuffleMask, VT, 16))
if (isREVMask(ShuffleMask, EltSize, NumElts, 16))
return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2);

if (((VT.getVectorNumElements() == 8 && VT.getScalarSizeInBits() == 16) ||
(VT.getVectorNumElements() == 16 && VT.getScalarSizeInBits() == 8)) &&
if (((NumElts == 8 && EltSize == 16) || (NumElts == 16 && EltSize == 8)) &&
ShuffleVectorInst::isReverseMask(ShuffleMask, ShuffleMask.size())) {
SDValue Rev = DAG.getNode(AArch64ISD::REV64, dl, VT, V1);
return DAG.getNode(AArch64ISD::EXT, dl, VT, Rev, Rev,
Expand All @@ -12615,15 +12575,15 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
}

unsigned WhichResult;
if (isZIPMask(ShuffleMask, VT, WhichResult)) {
if (isZIPMask(ShuffleMask, NumElts, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
}
if (isUZPMask(ShuffleMask, VT, WhichResult)) {
if (isUZPMask(ShuffleMask, NumElts, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
}
if (isTRNMask(ShuffleMask, VT, WhichResult)) {
if (isTRNMask(ShuffleMask, NumElts, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
}
Expand Down Expand Up @@ -12655,7 +12615,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
int SrcLane = ShuffleMask[Anomaly];
if (SrcLane >= NumInputElements) {
SrcVec = V2;
SrcLane -= VT.getVectorNumElements();
SrcLane -= NumElts;
}
SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64);

Expand All @@ -12675,7 +12635,6 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,

// If the shuffle is not directly supported and it has 4 elements, use
// the PerfectShuffle-generated table to synthesize it from other shuffles.
unsigned NumElts = VT.getVectorNumElements();
if (NumElts == 4) {
unsigned PFIndexes[4];
for (unsigned i = 0; i != 4; ++i) {
Expand Down Expand Up @@ -14126,16 +14085,20 @@ bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
int DummyInt;
unsigned DummyUnsigned;

return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) ||
isREVMask(M, VT, 32) || isREVMask(M, VT, 16) ||
unsigned EltSize = VT.getScalarSizeInBits();
unsigned NumElts = VT.getVectorNumElements();
return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
isREVMask(M, EltSize, NumElts, 64) ||
isREVMask(M, EltSize, NumElts, 32) ||
isREVMask(M, EltSize, NumElts, 16) ||
isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
// isTBLMask(M, VT) || // FIXME: Port TBL support from ARM.
isTRNMask(M, VT, DummyUnsigned) || isUZPMask(M, VT, DummyUnsigned) ||
isZIPMask(M, VT, DummyUnsigned) ||
isTRNMask(M, NumElts, DummyUnsigned) ||
isUZPMask(M, NumElts, DummyUnsigned) ||
isZIPMask(M, NumElts, DummyUnsigned) ||
isTRN_v_undef_Mask(M, VT, DummyUnsigned) ||
isUZP_v_undef_Mask(M, VT, DummyUnsigned) ||
isZIP_v_undef_Mask(M, VT, DummyUnsigned) ||
isINSMask(M, VT.getVectorNumElements(), DummyBool, DummyInt) ||
isINSMask(M, NumElts, DummyBool, DummyInt) ||
isConcatMask(M, VT, VT.getSizeInBits() == 128));
}

Expand Down Expand Up @@ -27486,15 +27449,15 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
return convertFromScalableVector(DAG, VT, Op);
}

unsigned EltSize = VT.getScalarSizeInBits();
for (unsigned LaneSize : {64U, 32U, 16U}) {
if (isREVMask(ShuffleMask, VT, LaneSize)) {
if (isREVMask(ShuffleMask, EltSize, VT.getVectorNumElements(), LaneSize)) {
EVT NewVT =
getPackedSVEVectorVT(EVT::getIntegerVT(*DAG.getContext(), LaneSize));
unsigned RevOp;
unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz == 8)
if (EltSize == 8)
RevOp = AArch64ISD::BSWAP_MERGE_PASSTHRU;
else if (EltSz == 16)
else if (EltSize == 16)
RevOp = AArch64ISD::REVH_MERGE_PASSTHRU;
else
RevOp = AArch64ISD::REVW_MERGE_PASSTHRU;
Expand All @@ -27506,8 +27469,8 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
}
}

if (Subtarget->hasSVE2p1() && VT.getScalarSizeInBits() == 64 &&
isREVMask(ShuffleMask, VT, 128)) {
if (Subtarget->hasSVE2p1() && EltSize == 64 &&
isREVMask(ShuffleMask, EltSize, VT.getVectorNumElements(), 128)) {
if (!VT.isFloatingPoint())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::REVD_MERGE_PASSTHRU);

Expand All @@ -27519,11 +27482,12 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
}

unsigned WhichResult;
if (isZIPMask(ShuffleMask, VT, WhichResult) && WhichResult == 0)
if (isZIPMask(ShuffleMask, VT.getVectorNumElements(), WhichResult) &&
WhichResult == 0)
return convertFromScalableVector(
DAG, VT, DAG.getNode(AArch64ISD::ZIP1, DL, ContainerVT, Op1, Op2));

if (isTRNMask(ShuffleMask, VT, WhichResult)) {
if (isTRNMask(ShuffleMask, VT.getVectorNumElements(), WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
return convertFromScalableVector(
DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op2));
Expand Down Expand Up @@ -27566,11 +27530,12 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
return convertFromScalableVector(DAG, VT, Op);
}

if (isZIPMask(ShuffleMask, VT, WhichResult) && WhichResult != 0)
if (isZIPMask(ShuffleMask, VT.getVectorNumElements(), WhichResult) &&
WhichResult != 0)
return convertFromScalableVector(
DAG, VT, DAG.getNode(AArch64ISD::ZIP2, DL, ContainerVT, Op1, Op2));

if (isUZPMask(ShuffleMask, VT, WhichResult)) {
if (isUZPMask(ShuffleMask, VT.getVectorNumElements(), WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
return convertFromScalableVector(
DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op2));
Expand Down
53 changes: 48 additions & 5 deletions llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
Original file line number Diff line number Diff line change
Expand Up @@ -6588,7 +6588,7 @@ static const unsigned PerfectShuffleTable[6561 + 1] = {
835584U, // <u,u,u,u>: Cost 0 copy LHS
0};

static unsigned getPerfectShuffleCost(llvm::ArrayRef<int> M) {
inline unsigned getPerfectShuffleCost(llvm::ArrayRef<int> M) {
assert(M.size() == 4 && "Expected a 4 entry perfect shuffle");

// Special case zero-cost nop copies, from either LHS or RHS.
Expand Down Expand Up @@ -6623,8 +6623,8 @@ static unsigned getPerfectShuffleCost(llvm::ArrayRef<int> M) {
/// Return true for zip1 or zip2 masks of the form:
/// <0, 8, 1, 9, 2, 10, 3, 11> or
/// <4, 12, 5, 13, 6, 14, 7, 15>
inline bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResultOut) {
unsigned NumElts = VT.getVectorNumElements();
inline bool isZIPMask(ArrayRef<int> M, unsigned NumElts,
unsigned &WhichResultOut) {
if (NumElts % 2 != 0)
return false;
// Check the first non-undef element for which half to use.
Expand Down Expand Up @@ -6656,8 +6656,8 @@ inline bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResultOut) {
/// Return true for uzp1 or uzp2 masks of the form:
/// <0, 2, 4, 6, 8, 10, 12, 14> or
/// <1, 3, 5, 7, 9, 11, 13, 15>
inline bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResultOut) {
unsigned NumElts = VT.getVectorNumElements();
inline bool isUZPMask(ArrayRef<int> M, unsigned NumElts,
unsigned &WhichResultOut) {
// Check the first non-undef element for which half to use.
unsigned WhichResult = 2;
for (unsigned i = 0; i != NumElts; i++) {
Expand All @@ -6680,6 +6680,49 @@ inline bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResultOut) {
return true;
}

/// Return true for trn1 or trn2 masks of the form:
/// <0, 8, 2, 10, 4, 12, 6, 14> or
/// <1, 9, 3, 11, 5, 13, 7, 15>
inline bool isTRNMask(ArrayRef<int> M, unsigned NumElts,
unsigned &WhichResult) {
if (NumElts % 2 != 0)
return false;
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
(M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
return false;
}
return true;
}

/// isREVMask - Check if a vector shuffle corresponds to a REV
/// instruction with the specified blocksize. (The order of the elements
/// within each block of the vector is reversed.)
inline bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
unsigned BlockSize) {
assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64 ||
BlockSize == 128) &&
"Only possible block sizes for REV are: 16, 32, 64, 128");

unsigned BlockElts = M[0] + 1;
// If the first shuffle index is UNDEF, be optimistic.
if (M[0] < 0)
BlockElts = BlockSize / EltSize;

if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize)
return false;

for (unsigned i = 0; i < NumElts; ++i) {
if (M[i] < 0)
continue; // ignore UNDEF indices
if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
return false;
}

return true;
}

} // namespace llvm

#endif
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3968,8 +3968,8 @@ InstructionCost AArch64TTIImpl::getShuffleCost(
if (LT.second.isFixedLengthVector() &&
LT.second.getVectorNumElements() == Mask.size() &&
(Kind == TTI::SK_PermuteTwoSrc || Kind == TTI::SK_PermuteSingleSrc) &&
(isZIPMask(Mask, LT.second, Unused) ||
isUZPMask(Mask, LT.second, Unused) ||
(isZIPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
// Check for non-zero lane splats
all_of(drop_begin(Mask),
[&Mask](int M) { return M < 0 || M == Mask[0]; })))
Expand Down
79 changes: 2 additions & 77 deletions llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
//===----------------------------------------------------------------------===//

#include "AArch64GlobalISelUtils.h"
#include "AArch64PerfectShuffle.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "GISel/AArch64LegalizerInfo.h"
Expand Down Expand Up @@ -77,50 +78,6 @@ struct ShuffleVectorPseudo {
ShuffleVectorPseudo() = default;
};

/// Check if a vector shuffle corresponds to a REV instruction with the
/// specified blocksize.
bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
unsigned BlockSize) {
assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
"Only possible block sizes for REV are: 16, 32, 64");
assert(EltSize != 64 && "EltSize cannot be 64 for REV mask.");

unsigned BlockElts = M[0] + 1;

// If the first shuffle index is UNDEF, be optimistic.
if (M[0] < 0)
BlockElts = BlockSize / EltSize;

if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize)
return false;

for (unsigned i = 0; i < NumElts; ++i) {
// Ignore undef indices.
if (M[i] < 0)
continue;
if (static_cast<unsigned>(M[i]) !=
(i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
return false;
}

return true;
}

/// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts.
/// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult.
bool isTRNMask(ArrayRef<int> M, unsigned NumElts, unsigned &WhichResult) {
if (NumElts % 2 != 0)
return false;
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != i + WhichResult) ||
(M[i + 1] >= 0 &&
static_cast<unsigned>(M[i + 1]) != i + NumElts + WhichResult))
return false;
}
return true;
}

/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
/// sources of the shuffle are different.
std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
Expand Down Expand Up @@ -163,38 +120,6 @@ std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
return std::make_pair(ReverseExt, Imm);
}

/// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts.
/// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult.
bool isUZPMask(ArrayRef<int> M, unsigned NumElts, unsigned &WhichResult) {
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i != NumElts; ++i) {
// Skip undef indices.
if (M[i] < 0)
continue;
if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult)
return false;
}
return true;
}

/// \return true if \p M is a zip mask for a shuffle vector of \p NumElts.
/// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult.
bool isZipMask(ArrayRef<int> M, unsigned NumElts, unsigned &WhichResult) {
if (NumElts % 2 != 0)
return false;

// 0 means use ZIP1, 1 means use ZIP2.
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != Idx) ||
(M[i + 1] >= 0 && static_cast<unsigned>(M[i + 1]) != Idx + NumElts))
return false;
Idx += 1;
}
return true;
}

/// Helper function for matchINS.
///
/// \returns a value when \p M is an ins mask for \p NumInputElements.
Expand Down Expand Up @@ -308,7 +233,7 @@ bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
Register Dst = MI.getOperand(0).getReg();
unsigned NumElts = MRI.getType(Dst).getNumElements();
if (!isZipMask(ShuffleMask, NumElts, WhichResult))
if (!isZIPMask(ShuffleMask, NumElts, WhichResult))
return false;
unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
Register V1 = MI.getOperand(1).getReg();
Expand Down
18 changes: 13 additions & 5 deletions llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
#include "RISCVInstrInfo.h"
#include "RISCVSubtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"

Expand All @@ -32,6 +35,12 @@ class RISCVDeadRegisterDefinitions : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<LiveIntervals>();
AU.addPreserved<LiveIntervals>();
AU.addRequired<LiveIntervals>();
AU.addPreserved<SlotIndexes>();
AU.addPreserved<LiveDebugVariables>();
AU.addPreserved<LiveStacks>();
MachineFunctionPass::getAnalysisUsage(AU);
}

Expand All @@ -51,9 +60,9 @@ bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;

const MachineRegisterInfo *MRI = &MF.getRegInfo();
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
LiveIntervals &LIS = getAnalysis<LiveIntervals>();
LLVM_DEBUG(dbgs() << "***** RISCVDeadRegisterDefinitions *****\n");

bool MadeChange = false;
Expand All @@ -77,10 +86,8 @@ bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << " Ignoring, def is tied operand.\n");
continue;
}
// We should not have any relevant physreg defs that are replacable by
// zero before register allocation. So we just check for dead vreg defs.
Register Reg = MO.getReg();
if (!Reg.isVirtual() || (!MO.isDead() && !MRI->use_nodbg_empty(Reg)))
if (!Reg.isVirtual() || !MO.isDead())
continue;
LLVM_DEBUG(dbgs() << " Dead def operand #" << I << " in:\n ";
MI.print(dbgs()));
Expand All @@ -89,8 +96,9 @@ bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << " Ignoring, register is not a GPR.\n");
continue;
}
assert(LIS.hasInterval(Reg));
LIS.removeInterval(Reg);
MO.setReg(RISCV::X0);
MO.setIsDead();
LLVM_DEBUG(dbgs() << " Replacing with zero register. New:\n ";
MI.print(dbgs()));
++NumDeadDefsReplaced;
Expand Down
4 changes: 1 addition & 3 deletions llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1600,9 +1600,7 @@ bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) {
continue;
}

Register RegDef = MI.getOperand(0).getReg();
assert(RegDef == RISCV::X0 || RegDef.isVirtual());
if (RegDef != RISCV::X0 && !MRI->use_nodbg_empty(RegDef))
if (!MI.getOperand(0).isDead())
Used.demandVL();

if (NextMI) {
Expand Down
9 changes: 6 additions & 3 deletions llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -390,13 +390,19 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) {
bool RISCVPassConfig::addRegAssignAndRewriteFast() {
addPass(createRVVRegAllocPass(false));
addPass(createRISCVCoalesceVSETVLIPass());
if (TM->getOptLevel() != CodeGenOptLevel::None &&
EnableRISCVDeadRegisterElimination)
addPass(createRISCVDeadRegisterDefinitionsPass());
return TargetPassConfig::addRegAssignAndRewriteFast();
}

bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {
addPass(createRVVRegAllocPass(true));
addPass(createVirtRegRewriter(false));
addPass(createRISCVCoalesceVSETVLIPass());
if (TM->getOptLevel() != CodeGenOptLevel::None &&
EnableRISCVDeadRegisterElimination)
addPass(createRISCVDeadRegisterDefinitionsPass());
return TargetPassConfig::addRegAssignAndRewriteOptimized();
}

Expand Down Expand Up @@ -536,9 +542,6 @@ void RISCVPassConfig::addPreRegAlloc() {
if (TM->getOptLevel() != CodeGenOptLevel::None)
addPass(createRISCVMergeBaseOffsetOptPass());
addPass(createRISCVInsertVSETVLIPass());
if (TM->getOptLevel() != CodeGenOptLevel::None &&
EnableRISCVDeadRegisterElimination)
addPass(createRISCVDeadRegisterDefinitionsPass());
addPass(createRISCVInsertReadWriteCSRPass());
addPass(createRISCVInsertWriteVXRMPass());
}
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86.td
Original file line number Diff line number Diff line change
Expand Up @@ -873,6 +873,7 @@ def ProcessorFeatures {
// Nehalem
list<SubtargetFeature> NHMFeatures = X86_64V2Features;
list<SubtargetFeature> NHMTuning = [TuningMacroFusion,
TuningSlowDivide64,
TuningInsertVZEROUPPER,
TuningNoDomainDelayMov];

Expand Down
10 changes: 6 additions & 4 deletions llvm/lib/Target/X86/X86CallingConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -1063,11 +1063,13 @@ def CC_X86_64_Preserve_None : CallingConv<[
// - R10 'nest' parameter
// - RBX base pointer
// - R16 - R31 these are not available everywhere
CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D,
R11D, R12D, R13D, R14D, R15D, EAX]>>,
// Use non-volatile registers first, so functions using this convention can
// call "normal" functions without saving and restoring incoming values:
CCIfType<[i32], CCAssignToReg<[R12D, R13D, R14D, R15D, EDI, ESI,
EDX, ECX, R8D, R9D, R11D, EAX]>>,

CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8, R9,
R11, R12, R13, R14, R15, RAX]>>,
CCIfType<[i64], CCAssignToReg<[R12, R13, R14, R15, RDI, RSI,
RDX, RCX, R8, R9, R11, RAX]>>,

// Otherwise it's the same as the regular C calling convention.
CCDelegateTo<CC_X86_64_C>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1073,7 +1073,8 @@ void StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
B.CreateZExt(B.CreateLoad(B.getInt8Ty(),
B.CreateInBoundsPtrAdd(Base, B.getInt64(i))),
CI->getType());
Value *VR = ConstantInt::get(CI->getType(), RHS[i]);
Value *VR =
ConstantInt::get(CI->getType(), static_cast<unsigned char>(RHS[i]));
Value *Sub = Swapped ? B.CreateSub(VR, VL) : B.CreateSub(VL, VR);
if (i < N - 1)
B.CreateCondBr(B.CreateICmpNE(Sub, ConstantInt::get(CI->getType(), 0)),
Expand Down
60 changes: 43 additions & 17 deletions llvm/lib/Transforms/Coroutines/CoroFrame.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/PtrUseVisitor.h"
#include "llvm/Analysis/StackLifetime.h"
#include "llvm/Config/llvm-config.h"
Expand Down Expand Up @@ -1440,17 +1441,22 @@ namespace {
struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
using Base = PtrUseVisitor<AllocaUseVisitor>;
AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT,
const CoroBeginInst &CB, const SuspendCrossingInfo &Checker,
const coro::Shape &CoroShape,
const SuspendCrossingInfo &Checker,
bool ShouldUseLifetimeStartInfo)
: PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker),
ShouldUseLifetimeStartInfo(ShouldUseLifetimeStartInfo) {}
: PtrUseVisitor(DL), DT(DT), CoroShape(CoroShape), Checker(Checker),
ShouldUseLifetimeStartInfo(ShouldUseLifetimeStartInfo) {
for (AnyCoroSuspendInst *SuspendInst : CoroShape.CoroSuspends)
CoroSuspendBBs.insert(SuspendInst->getParent());
}

void visit(Instruction &I) {
Users.insert(&I);
Base::visit(I);
// If the pointer is escaped prior to CoroBegin, we have to assume it would
// be written into before CoroBegin as well.
if (PI.isEscaped() && !DT.dominates(&CoroBegin, PI.getEscapingInst())) {
if (PI.isEscaped() &&
!DT.dominates(CoroShape.CoroBegin, PI.getEscapingInst())) {
MayWriteBeforeCoroBegin = true;
}
}
Expand Down Expand Up @@ -1553,10 +1559,19 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
// When we found the lifetime markers refers to a
// subrange of the original alloca, ignore the lifetime
// markers to avoid misleading the analysis.
if (II.getIntrinsicID() != Intrinsic::lifetime_start || !IsOffsetKnown ||
!Offset.isZero())
if (!IsOffsetKnown || !Offset.isZero())
return Base::visitIntrinsicInst(II);
switch (II.getIntrinsicID()) {
default:
return Base::visitIntrinsicInst(II);
LifetimeStarts.insert(&II);
case Intrinsic::lifetime_start:
LifetimeStarts.insert(&II);
LifetimeStartBBs.push_back(II.getParent());
break;
case Intrinsic::lifetime_end:
LifetimeEndBBs.insert(II.getParent());
break;
}
}

void visitCallBase(CallBase &CB) {
Expand Down Expand Up @@ -1586,14 +1601,17 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {

private:
const DominatorTree &DT;
const CoroBeginInst &CoroBegin;
const coro::Shape &CoroShape;
const SuspendCrossingInfo &Checker;
// All alias to the original AllocaInst, created before CoroBegin and used
// after CoroBegin. Each entry contains the instruction and the offset in the
// original Alloca. They need to be recreated after CoroBegin off the frame.
DenseMap<Instruction *, std::optional<APInt>> AliasOffetMap{};
SmallPtrSet<Instruction *, 4> Users{};
SmallPtrSet<IntrinsicInst *, 2> LifetimeStarts{};
SmallVector<BasicBlock *> LifetimeStartBBs{};
SmallPtrSet<BasicBlock *, 2> LifetimeEndBBs{};
SmallPtrSet<const BasicBlock *, 2> CoroSuspendBBs{};
bool MayWriteBeforeCoroBegin{false};
bool ShouldUseLifetimeStartInfo{true};

Expand All @@ -1605,10 +1623,19 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
// every basic block that uses the pointer to see if they cross suspension
// points. The uses cover both direct uses as well as indirect uses.
if (ShouldUseLifetimeStartInfo && !LifetimeStarts.empty()) {
for (auto *I : Users)
for (auto *S : LifetimeStarts)
if (Checker.isDefinitionAcrossSuspend(*S, I))
return true;
// If there is no explicit lifetime.end, then assume the address can
// cross suspension points.
if (LifetimeEndBBs.empty())
return true;

// If there is a path from a lifetime.start to a suspend without a
// corresponding lifetime.end, then the alloca's lifetime persists
// beyond that suspension point and the alloca must go on the frame.
llvm::SmallVector<BasicBlock *> Worklist(LifetimeStartBBs);
if (isManyPotentiallyReachableFromMany(Worklist, CoroSuspendBBs,
&LifetimeEndBBs, &DT))
return true;

// Addresses are guaranteed to be identical after every lifetime.start so
// we cannot use the local stack if the address escaped and there is a
// suspend point between lifetime markers. This should also cover the
Expand Down Expand Up @@ -1646,13 +1673,13 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
}

void handleMayWrite(const Instruction &I) {
if (!DT.dominates(&CoroBegin, &I))
if (!DT.dominates(CoroShape.CoroBegin, &I))
MayWriteBeforeCoroBegin = true;
}

bool usedAfterCoroBegin(Instruction &I) {
for (auto &U : I.uses())
if (DT.dominates(&CoroBegin, U))
if (DT.dominates(CoroShape.CoroBegin, U))
return true;
return false;
}
Expand All @@ -1661,7 +1688,7 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
// We track all aliases created prior to CoroBegin but used after.
// These aliases may need to be recreated after CoroBegin if the alloca
// need to live on the frame.
if (DT.dominates(&CoroBegin, &I) || !usedAfterCoroBegin(I))
if (DT.dominates(CoroShape.CoroBegin, &I) || !usedAfterCoroBegin(I))
return;

if (!IsOffsetKnown) {
Expand Down Expand Up @@ -2830,8 +2857,7 @@ static void collectFrameAlloca(AllocaInst *AI, coro::Shape &Shape,
bool ShouldUseLifetimeStartInfo =
(Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
Shape.ABI != coro::ABI::RetconOnce);
AllocaUseVisitor Visitor{AI->getModule()->getDataLayout(), DT,
*Shape.CoroBegin, Checker,
AllocaUseVisitor Visitor{AI->getModule()->getDataLayout(), DT, Shape, Checker,
ShouldUseLifetimeStartInfo};
Visitor.visitPtr(*AI);
if (!Visitor.getShouldLiveOnFrame())
Expand Down
20 changes: 14 additions & 6 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13169,6 +13169,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
} else {
assert(E->State == TreeEntry::StridedVectorize &&
"Expected either strided or conseutive stores.");
if (!E->ReorderIndices.empty()) {
SI = cast<StoreInst>(E->Scalars[E->ReorderIndices.front()]);
Ptr = SI->getPointerOperand();
}
Align CommonAlignment = computeCommonAlignment<StoreInst>(E->Scalars);
Type *StrideTy = DL->getIndexType(SI->getPointerOperandType());
auto *Inst = Builder.CreateIntrinsic(
Expand Down Expand Up @@ -15163,14 +15167,18 @@ bool BoUpSLP::collectValuesToDemote(
"Expected min/max intrinsics only.");
unsigned SignBits = OrigBitWidth - BitWidth;
APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth - 1);
return SignBits <= ComputeNumSignBits(I->getOperand(0), *DL, 0, AC,
nullptr, DT) &&
(!isKnownNonNegative(I->getOperand(0), SimplifyQuery(*DL)) ||
unsigned Op0SignBits = ComputeNumSignBits(I->getOperand(0), *DL, 0, AC,
nullptr, DT);
unsigned Op1SignBits = ComputeNumSignBits(I->getOperand(1), *DL, 0, AC,
nullptr, DT);
return SignBits <= Op0SignBits &&
((SignBits != Op0SignBits &&
!isKnownNonNegative(I->getOperand(0), SimplifyQuery(*DL))) ||
MaskedValueIsZero(I->getOperand(0), Mask,
SimplifyQuery(*DL))) &&
SignBits <= ComputeNumSignBits(I->getOperand(1), *DL, 0, AC,
nullptr, DT) &&
(!isKnownNonNegative(I->getOperand(1), SimplifyQuery(*DL)) ||
SignBits <= Op1SignBits &&
((SignBits != Op1SignBits &&
!isKnownNonNegative(I->getOperand(1), SimplifyQuery(*DL))) ||
MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL)));
});
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %v1:_(<2 x s64>) = COPY $q0
; CHECK-NEXT: %v2:_(<2 x s64>) = G_IMPLICIT_DEF
; CHECK-NEXT: %shuf:_(<2 x s64>) = G_ZIP2 %v1, %v2
; CHECK-NEXT: %shuf:_(<2 x s64>) = G_TRN2 %v1, %v2
; CHECK-NEXT: $q0 = COPY %shuf(<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%v1:_(<2 x s64>) = COPY $q0
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AArch64/arm64-uzp.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s

define <8 x i8> @vuzpi8(<8 x i8> %A, <8 x i8> %B) nounwind {
; CHECK-LABEL: vuzpi8:
Expand Down
137 changes: 96 additions & 41 deletions llvm/test/CodeGen/AArch64/arm64-zip.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI

; CHECK-GI: warning: Instruction selection used fallback path for shuffle_zip1
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shuffle_zip2
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shuffle_zip3

define <8 x i8> @vzipi8(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: vzipi8:
Expand Down Expand Up @@ -223,12 +228,20 @@ define <16 x i8> @combine_v16i8(<8 x i8> %0, <8 x i8> %1) {
}

define <16 x i8> @combine2_v16i8(<8 x i8> %0, <8 x i8> %1) {
; CHECK-LABEL: combine2_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: zip1.16b v0, v0, v1
; CHECK-NEXT: ret
; CHECK-SD-LABEL: combine2_v16i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: zip1.16b v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: combine2_v16i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: zip1.8b v2, v0, v1
; CHECK-GI-NEXT: zip2.8b v0, v0, v1
; CHECK-GI-NEXT: mov.d v2[1], v0[0]
; CHECK-GI-NEXT: mov.16b v0, v2
; CHECK-GI-NEXT: ret
%3 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
%4 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
%5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
Expand All @@ -247,12 +260,20 @@ define <8 x i16> @combine_v8i16(<4 x i16> %0, <4 x i16> %1) {
}

define <8 x i16> @combine2_v8i16(<4 x i16> %0, <4 x i16> %1) {
; CHECK-LABEL: combine2_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: zip1.8h v0, v0, v1
; CHECK-NEXT: ret
; CHECK-SD-LABEL: combine2_v8i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: zip1.8h v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: combine2_v8i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: zip1.4h v2, v0, v1
; CHECK-GI-NEXT: zip2.4h v0, v0, v1
; CHECK-GI-NEXT: mov.d v2[1], v0[0]
; CHECK-GI-NEXT: mov.16b v0, v2
; CHECK-GI-NEXT: ret
%3 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
%4 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
%5 = shufflevector <4 x i16> %3, <4 x i16> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
Expand All @@ -271,12 +292,20 @@ define <4 x i32> @combine_v4i32(<2 x i32> %0, <2 x i32> %1) {
}

define <4 x i32> @combine2_v4i32(<2 x i32> %0, <2 x i32> %1) {
; CHECK-LABEL: combine2_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: zip1.4s v0, v0, v1
; CHECK-NEXT: ret
; CHECK-SD-LABEL: combine2_v4i32:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: zip1.4s v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: combine2_v4i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: zip1.2s v2, v0, v1
; CHECK-GI-NEXT: zip2.2s v0, v0, v1
; CHECK-GI-NEXT: mov.d v2[1], v0[0]
; CHECK-GI-NEXT: mov.16b v0, v2
; CHECK-GI-NEXT: ret
%3 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 0, i32 2>
%4 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 1, i32 3>
%5 = shufflevector <2 x i32> %3, <2 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
Expand All @@ -295,12 +324,20 @@ define <16 x i8> @combine_v16i8_undef(<8 x i8> %0, <8 x i8> %1) {
}

define <16 x i8> @combine2_v16i8_undef(<8 x i8> %0, <8 x i8> %1) {
; CHECK-LABEL: combine2_v16i8_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: zip1.16b v0, v0, v1
; CHECK-NEXT: ret
; CHECK-SD-LABEL: combine2_v16i8_undef:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: zip1.16b v0, v0, v1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: combine2_v16i8_undef:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: zip1.8b v2, v0, v1
; CHECK-GI-NEXT: zip2.8b v0, v0, v1
; CHECK-GI-NEXT: mov.d v2[1], v0[0]
; CHECK-GI-NEXT: mov.16b v0, v2
; CHECK-GI-NEXT: ret
%3 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
%4 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
%5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
Expand All @@ -320,29 +357,47 @@ define <8 x i16> @combine_v8i16_undef(<4 x i16> %0, <4 x i16> %1) {

; FIXME: This could be zip1 too, 8,0,9,1... pattern is handled
define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) {
; CHECK-LABEL: combine_v8i16_8first:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1_q2
; CHECK-NEXT: adrp x8, .LCPI25_0
; CHECK-NEXT: fmov d2, d0
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI25_0]
; CHECK-NEXT: tbl.16b v0, { v1, v2 }, v3
; CHECK-NEXT: ret
; CHECK-SD-LABEL: combine_v8i16_8first:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1_q2
; CHECK-SD-NEXT: adrp x8, .LCPI25_0
; CHECK-SD-NEXT: fmov d2, d0
; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI25_0]
; CHECK-SD-NEXT: tbl.16b v0, { v1, v2 }, v3
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: combine_v8i16_8first:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q31_q0
; CHECK-GI-NEXT: adrp x8, .LCPI25_0
; CHECK-GI-NEXT: fmov d31, d1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI25_0]
; CHECK-GI-NEXT: tbl.16b v0, { v31, v0 }, v2
; CHECK-GI-NEXT: ret
%3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
ret <16 x i8> %3
}


; FIXME: This could be zip1 too, 8,0,9,1... pattern is handled
define <16 x i8> @combine_v8i16_8firstundef(<8 x i8> %0, <8 x i8> %1) {
; CHECK-LABEL: combine_v8i16_8firstundef:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1_q2
; CHECK-NEXT: adrp x8, .LCPI26_0
; CHECK-NEXT: fmov d2, d0
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI26_0]
; CHECK-NEXT: tbl.16b v0, { v1, v2 }, v3
; CHECK-NEXT: ret
; CHECK-SD-LABEL: combine_v8i16_8firstundef:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1_q2
; CHECK-SD-NEXT: adrp x8, .LCPI26_0
; CHECK-SD-NEXT: fmov d2, d0
; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI26_0]
; CHECK-SD-NEXT: tbl.16b v0, { v1, v2 }, v3
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: combine_v8i16_8firstundef:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q31_q0
; CHECK-GI-NEXT: adrp x8, .LCPI26_0
; CHECK-GI-NEXT: fmov d31, d1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI26_0]
; CHECK-GI-NEXT: tbl.16b v0, { v31, v0 }, v2
; CHECK-GI-NEXT: ret
%3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 undef>
ret <16 x i8> %3
}
Expand Down
510 changes: 361 additions & 149 deletions llvm/test/CodeGen/AArch64/neon-perm.ll

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@
; CHECK-NEXT: RISC-V Pre-RA pseudo instruction expansion pass
; CHECK-NEXT: RISC-V Merge Base Offset
; CHECK-NEXT: RISC-V Insert VSETVLI pass
; CHECK-NEXT: RISC-V Dead register definitions
; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass
; CHECK-NEXT: RISC-V Insert Write VXRM Pass
; CHECK-NEXT: Detect Dead Lanes
Expand Down Expand Up @@ -144,6 +143,7 @@
; CHECK-NEXT: Greedy Register Allocator
; CHECK-NEXT: Virtual Register Rewriter
; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
; CHECK-NEXT: RISC-V Dead register definitions
; CHECK-NEXT: Virtual Register Map
; CHECK-NEXT: Live Register Matrix
; CHECK-NEXT: Greedy Register Allocator
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,13 @@ define <vscale x 1 x double> @test3(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: beqz a1, .LBB2_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfadd.vv v9, v8, v9
; CHECK-NEXT: vfmul.vv v8, v9, v8
; CHECK-NEXT: # implicit-def: $x10
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB2_2: # %if.else
; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfsub.vv v9, v8, v9
; CHECK-NEXT: vfmul.vv v8, v9, v8
; CHECK-NEXT: # implicit-def: $x10
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/bypass-slow-division-64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; Intel
; RUN: llc < %s -mtriple=x86_64-- -mcpu=nehalem | FileCheck %s --check-prefixes=CHECK,FAST-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=nehalem | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=sandybridge | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=haswell | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=skylake | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
Expand Down
50 changes: 34 additions & 16 deletions llvm/test/CodeGen/X86/preserve_nonecc_call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ define void @caller1(ptr %a) {
; CHECK-NEXT: .cfi_offset %r13, -32
; CHECK-NEXT: .cfi_offset %r14, -24
; CHECK-NEXT: .cfi_offset %r15, -16
; CHECK-NEXT: movq %rdi, %r12
; CHECK-NEXT: callq callee@PLT
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 40
Expand Down Expand Up @@ -61,17 +62,17 @@ define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i6
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: movq %r13, %r12
; CHECK-NEXT: movq %r14, %r13
; CHECK-NEXT: movq %r15, %r14
; CHECK-NEXT: movq %rdi, %r15
; CHECK-NEXT: movq %rsi, %rdi
; CHECK-NEXT: movq %rdx, %rsi
; CHECK-NEXT: movq %rcx, %rdx
; CHECK-NEXT: movq %r8, %rcx
; CHECK-NEXT: movq %r9, %r8
; CHECK-NEXT: movq %r11, %r9
; CHECK-NEXT: movq %r12, %r11
; CHECK-NEXT: movq %r13, %r12
; CHECK-NEXT: movq %r14, %r13
; CHECK-NEXT: movq %r15, %r14
; CHECK-NEXT: movq %rax, %r15
; CHECK-NEXT: movq %rax, %r11
; CHECK-NEXT: callq callee_with_many_param2@PLT
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: .cfi_def_cfa_offset 8
Expand All @@ -98,17 +99,17 @@ define i64 @caller3() {
; CHECK-NEXT: .cfi_offset %r13, -32
; CHECK-NEXT: .cfi_offset %r14, -24
; CHECK-NEXT: .cfi_offset %r15, -16
; CHECK-NEXT: movl $1, %edi
; CHECK-NEXT: movl $2, %esi
; CHECK-NEXT: movl $3, %edx
; CHECK-NEXT: movl $4, %ecx
; CHECK-NEXT: movl $5, %r8d
; CHECK-NEXT: movl $6, %r9d
; CHECK-NEXT: movl $7, %r11d
; CHECK-NEXT: movl $8, %r12d
; CHECK-NEXT: movl $9, %r13d
; CHECK-NEXT: movl $10, %r14d
; CHECK-NEXT: movl $11, %r15d
; CHECK-NEXT: movl $1, %r12d
; CHECK-NEXT: movl $2, %r13d
; CHECK-NEXT: movl $3, %r14d
; CHECK-NEXT: movl $4, %r15d
; CHECK-NEXT: movl $5, %edi
; CHECK-NEXT: movl $6, %esi
; CHECK-NEXT: movl $7, %edx
; CHECK-NEXT: movl $8, %ecx
; CHECK-NEXT: movl $9, %r8d
; CHECK-NEXT: movl $10, %r9d
; CHECK-NEXT: movl $11, %r11d
; CHECK-NEXT: movl $12, %eax
; CHECK-NEXT: callq callee_with_many_param@PLT
; CHECK-NEXT: popq %rbx
Expand All @@ -125,3 +126,20 @@ define i64 @caller3() {
%ret = call preserve_nonecc i64 @callee_with_many_param(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12)
ret i64 %ret
}

; Non-volatile registers are used to pass the first few parameters.
declare void @boring()
declare preserve_nonecc void @continuation(ptr, ptr, ptr, ptr)
define preserve_nonecc void @entry(ptr %r12, ptr %r13, ptr %r14, ptr %r15) {
; CHECK-LABEL: entry:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: callq boring@PLT
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: jmp continuation@PLT # TAILCALL
call void @boring()
musttail call preserve_nonecc void @continuation(ptr %r12, ptr %r13, ptr %r14, ptr %r15)
ret void
}
21 changes: 21 additions & 0 deletions llvm/test/CodeGen/X86/preserve_nonecc_call_win.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=x86_64-pc-windows-msvc -mcpu=corei7 < %s | FileCheck %s

; Non-volatile registers are used to pass the first few parameters.
declare void @boring()
declare preserve_nonecc void @continuation(ptr, ptr, ptr, ptr, ptr, ptr)
define preserve_nonecc void @entry(ptr %r12, ptr %r13, ptr %r14, ptr %r15, ptr %rdi, ptr %rsi) {
; CHECK-LABEL: entry:
; CHECK: # %bb.0:
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: .seh_stackalloc 40
; CHECK-NEXT: .seh_endprologue
; CHECK-NEXT: callq boring
; CHECK-NEXT: nop
; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: jmp continuation # TAILCALL
; CHECK-NEXT: .seh_endproc
call void @boring()
musttail call preserve_nonecc void @continuation(ptr %r12, ptr %r13, ptr %r14, ptr %r15, ptr %rdi, ptr %rsi)
ret void
}
38 changes: 38 additions & 0 deletions llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ declare i32 @strcmp(ptr nocapture, ptr nocapture)

@s2 = constant [2 x i8] c"a\00"
@s3 = constant [3 x i8] c"ab\00"
@s3ff = constant [3 x i8] c"\FE\FF\00"

define i1 @test_strncmp_1(ptr %s) {
; CHECK-LABEL: define i1 @test_strncmp_1(
Expand Down Expand Up @@ -214,3 +215,40 @@ entry:
%cmp = icmp sle i32 %call, 0
ret i1 %cmp
}

define i1 @test_strcmp_4(ptr %s) {
; CHECK-LABEL: define i1 @test_strcmp_4(
; CHECK-SAME: ptr [[S:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[SUB_0:%.*]]
; CHECK: sub_0:
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[S]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 254, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[NE:%.*]], label [[SUB_1:%.*]]
; CHECK: sub_1:
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = sub i32 255, [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
; CHECK-NEXT: br i1 [[TMP8]], label [[NE]], label [[SUB_2:%.*]]
; CHECK: sub_2:
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
; CHECK-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i32
; CHECK-NEXT: [[TMP12:%.*]] = sub i32 0, [[TMP11]]
; CHECK-NEXT: br label [[NE]]
; CHECK: ne:
; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP2]], [[SUB_0]] ], [ [[TMP7]], [[SUB_1]] ], [ [[TMP12]], [[SUB_2]] ]
; CHECK-NEXT: br label [[ENTRY_TAIL:%.*]]
; CHECK: entry.tail:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0
; CHECK-NEXT: ret i1 [[CMP]]
;
entry:
%call = tail call i32 @strcmp(ptr nonnull dereferenceable(3) @s3ff, ptr nonnull dereferenceable(1) %s)
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
142 changes: 142 additions & 0 deletions llvm/test/Transforms/Coroutines/coro-lifetime-end.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s

declare ptr @malloc(i64)

%i8.array = type { [100 x i8] }
declare void @consume.i8.array(ptr)

@testbool = external local_unnamed_addr global i8, align 1

; testval does not contain an explicit lifetime end. We must assume that it may
; live across suspension.
define void @HasNoLifetimeEnd() presplitcoroutine {
; CHECK-LABEL: define void @HasNoLifetimeEnd() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @HasNoLifetimeEnd.resumers)
; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 16)
; CHECK-NEXT: [[VFRAME:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]])
; CHECK-NEXT: store ptr @HasNoLifetimeEnd.resume, ptr [[VFRAME]], align 8
; CHECK-NEXT: [[DESTROY_ADDR:%.*]] = getelementptr inbounds [[HASNOLIFETIMEEND_FRAME:%.*]], ptr [[VFRAME]], i32 0, i32 1
; CHECK-NEXT: store ptr @HasNoLifetimeEnd.destroy, ptr [[DESTROY_ADDR]], align 8
; CHECK-NEXT: [[INDEX_ADDR1:%.*]] = getelementptr inbounds [[HASNOLIFETIMEEND_FRAME]], ptr [[VFRAME]], i32 0, i32 2
; CHECK-NEXT: call void @consume.i8.array(ptr [[INDEX_ADDR1]])
; CHECK-NEXT: [[INDEX_ADDR2:%.*]] = getelementptr inbounds [[HASNOLIFETIMEEND_FRAME]], ptr [[VFRAME]], i32 0, i32 3
; CHECK-NEXT: store i1 false, ptr [[INDEX_ADDR2]], align 1
; CHECK-NEXT: ret void
;
entry:
%testval = alloca %i8.array
%id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
%alloc = call ptr @malloc(i64 16) #3
%vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)

call void @llvm.lifetime.start.p0(i64 100, ptr %testval)
call void @consume.i8.array(ptr %testval)

%save = call token @llvm.coro.save(ptr null)
%suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
switch i8 %suspend, label %exit [
i8 0, label %await.ready
i8 1, label %exit
]
await.ready:
br label %exit
exit:
call i1 @llvm.coro.end(ptr null, i1 false, token none)
ret void
}

define void @LifetimeEndAfterCoroEnd() presplitcoroutine {
; CHECK-LABEL: define void @LifetimeEndAfterCoroEnd() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @LifetimeEndAfterCoroEnd.resumers)
; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 16)
; CHECK-NEXT: [[VFRAME:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]])
; CHECK-NEXT: store ptr @LifetimeEndAfterCoroEnd.resume, ptr [[VFRAME]], align 8
; CHECK-NEXT: [[DESTROY_ADDR:%.*]] = getelementptr inbounds [[LIFETIMEENDAFTERCOROEND_FRAME:%.*]], ptr [[VFRAME]], i32 0, i32 1
; CHECK-NEXT: store ptr @LifetimeEndAfterCoroEnd.destroy, ptr [[DESTROY_ADDR]], align 8
; CHECK-NEXT: [[INDEX_ADDR1:%.*]] = getelementptr inbounds [[LIFETIMEENDAFTERCOROEND_FRAME]], ptr [[VFRAME]], i32 0, i32 2
; CHECK-NEXT: call void @consume.i8.array(ptr [[INDEX_ADDR1]])
; CHECK-NEXT: [[INDEX_ADDR2:%.*]] = getelementptr inbounds [[LIFETIMEENDAFTERCOROEND_FRAME]], ptr [[VFRAME]], i32 0, i32 3
; CHECK-NEXT: store i1 false, ptr [[INDEX_ADDR2]], align 1
; CHECK-NEXT: ret void
;
entry:
%testval = alloca %i8.array
%id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
%alloc = call ptr @malloc(i64 16) #3
%vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)

call void @llvm.lifetime.start.p0(i64 100, ptr %testval)
call void @consume.i8.array(ptr %testval)

%save = call token @llvm.coro.save(ptr null)
%suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
switch i8 %suspend, label %exit [
i8 0, label %await.ready
i8 1, label %exit
]
await.ready:
br label %exit
exit:
call i1 @llvm.coro.end(ptr null, i1 false, token none)
call void @llvm.lifetime.end.p0(i64 100, ptr %testval)
ret void
}

define void @BranchWithoutLifetimeEnd() presplitcoroutine {
; CHECK-LABEL: define void @BranchWithoutLifetimeEnd() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @BranchWithoutLifetimeEnd.resumers)
; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 16)
; CHECK-NEXT: [[VFRAME:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]])
; CHECK-NEXT: store ptr @BranchWithoutLifetimeEnd.resume, ptr [[VFRAME]], align 8
; CHECK-NEXT: [[DESTROY_ADDR:%.*]] = getelementptr inbounds [[BRANCHWITHOUTLIFETIMEEND_FRAME:%.*]], ptr [[VFRAME]], i32 0, i32 1
; CHECK-NEXT: store ptr @BranchWithoutLifetimeEnd.destroy, ptr [[DESTROY_ADDR]], align 8
; CHECK-NEXT: [[TESTVAL:%.*]] = getelementptr inbounds [[BRANCHWITHOUTLIFETIMEEND_FRAME]], ptr [[VFRAME]], i32 0, i32 2
; CHECK-NEXT: call void @consume.i8.array(ptr [[TESTVAL]])
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr @testbool, align 1
; CHECK-NEXT: [[INDEX_ADDR1:%.*]] = getelementptr inbounds [[BRANCHWITHOUTLIFETIMEEND_FRAME]], ptr [[VFRAME]], i32 0, i32 3
; CHECK-NEXT: store i1 false, ptr [[INDEX_ADDR1]], align 1
; CHECK-NEXT: ret void
;
entry:
%testval = alloca %i8.array
%id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
%alloc = call ptr @malloc(i64 16) #3
%vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)

call void @llvm.lifetime.start.p0(i64 100, ptr %testval)
call void @consume.i8.array(ptr %testval)

%0 = load i8, ptr @testbool, align 1
%tobool = trunc nuw i8 %0 to i1
br i1 %tobool, label %if.then, label %if.end

if.then:
call void @llvm.lifetime.end.p0(i64 100, ptr %testval)
br label %if.end

if.end:
%save = call token @llvm.coro.save(ptr null)
%suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
switch i8 %suspend, label %exit [
i8 0, label %await.ready
i8 1, label %exit
]
await.ready:
br label %exit
exit:
call i1 @llvm.coro.end(ptr null, i1 false, token none)
ret void
}


declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr)
declare ptr @llvm.coro.begin(token, ptr writeonly) #3
declare ptr @llvm.coro.frame() #5
declare i8 @llvm.coro.suspend(token, i1) #3
declare i1 @llvm.coro.end(ptr, i1, token) #3
declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #4
declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #4
40 changes: 40 additions & 0 deletions llvm/test/Transforms/SLPVectorizer/RISCV/smin-signed-zextended.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S -passes=slp-vectorizer -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s

define <4 x i32> @test(i16 %0, i16 %1) {
; CHECK-LABEL: define <4 x i32> @test(
; CHECK-SAME: i16 [[TMP0:%.*]], i16 [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> <i16 poison, i16 0>, i16 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i32>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
; CHECK-NEXT: [[CONV15_I:%.*]] = sext i16 [[TMP0]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1>
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>, i32 [[CONV15_I]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP5]], <4 x i32> [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i32> [[TMP8]], <i32 65535, i32 65535, i32 65535, i32 65535>
; CHECK-NEXT: ret <4 x i32> [[TMP9]]
;
entry:
%conv13.1.i = zext i16 %1 to i32
%not.i = xor i32 %conv13.1.i, -1
%cond19.i = tail call i32 @llvm.smax.i32(i32 %not.i, i32 0)
%conv21.i = and i32 %cond19.i, 65535
%not.1.i = xor i32 %conv13.1.i, -1
%conv15.i = sext i16 %0 to i32
%cond19.1.i = tail call i32 @llvm.smax.i32(i32 %not.1.i, i32 %conv15.i)
%conv21.1.i = and i32 %cond19.1.i, 65535
%not.2.i = xor i32 %conv13.1.i, -1
%cond19.2.i = tail call i32 @llvm.smax.i32(i32 %not.2.i, i32 %conv15.i)
%conv21.2.i = and i32 %cond19.2.i, 65535
%conv13.3.i = zext i16 0 to i32
%not.3.i = xor i32 %conv13.3.i, -1
%cond19.3.i = tail call i32 @llvm.smax.i32(i32 %not.3.i, i32 %conv15.i)
%conv21.3.i = and i32 %cond19.3.i, 65535
%ins1 = insertelement <4 x i32> poison, i32 %conv21.i, i32 0
%ins2 = insertelement <4 x i32> %ins1, i32 %conv21.1.i, i32 1
%ins3 = insertelement <4 x i32> %ins2, i32 %conv21.2.i, i32 2
%ins4 = insertelement <4 x i32> %ins3, i32 %conv21.3.i, i32 3
ret <4 x i32> %ins4
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ define void @store_reverse(ptr %p3) {
; CHECK-LABEL: @store_reverse(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[P3:%.*]], i64 8
; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i64, ptr [[P3]], i64 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[P3]], i64 7
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[P3]], align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr [[ARRAYIDX1]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i64> [[TMP0]], [[TMP1]]
; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v4i64.p0.i64(<4 x i64> [[TMP2]], ptr align 8 [[ARRAYIDX14]], i64 -8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 4)
; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v4i64.p0.i64(<4 x i64> [[TMP2]], ptr align 8 [[ARRAYIDX2]], i64 -8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 4)
; CHECK-NEXT: ret void
;
entry:
Expand Down
9 changes: 8 additions & 1 deletion mlir/lib/Target/Cpp/TranslateToCpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,9 +293,16 @@ static bool shouldBeInlined(ExpressionOp expressionOp) {
if (!result.hasOneUse())
return false;

Operation *user = *result.getUsers().begin();

// Do not inline expressions used by subscript operations, since the
// way the subscript operation translation is implemented requires that
// variables be materialized.
if (isa<emitc::SubscriptOp>(user))
return false;

// Do not inline expressions used by other expressions, as any desired
// expression folding was taken care of by transformations.
Operation *user = *result.getUsers().begin();
return !user->getParentOfType<ExpressionOp>();
}

Expand Down
15 changes: 15 additions & 0 deletions mlir/test/Target/Cpp/expressions.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -210,3 +210,18 @@ func.func @expression_with_address_taken(%arg0: i32, %arg1: i32, %arg2: !emitc.p
}
return %c : i1
}

// CPP-DEFAULT: int32_t expression_with_subscript_user(void* [[VAL_1:v.+]])
// CPP-DEFAULT-NEXT: int64_t [[VAL_2:v.+]] = 0;
// CPP-DEFAULT-NEXT: int32_t* [[VAL_3:v.+]] = (int32_t*) [[VAL_1]];
// CPP-DEFAULT-NEXT: return [[VAL_3]][[[VAL_2]]];

func.func @expression_with_subscript_user(%arg0: !emitc.ptr<!emitc.opaque<"void">>) -> i32 {
%c0 = "emitc.constant"() {value = 0 : i64} : () -> i64
%0 = emitc.expression : !emitc.ptr<i32> {
%0 = emitc.cast %arg0 : !emitc.ptr<!emitc.opaque<"void">> to !emitc.ptr<i32>
emitc.yield %0 : !emitc.ptr<i32>
}
%1 = emitc.subscript %0[%c0] : (!emitc.ptr<i32>, i64) -> i32
return %1 : i32
}
12 changes: 1 addition & 11 deletions openmp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -113,17 +113,7 @@ option(OPENMP_ENABLE_LIBOMP_PROFILING "Enable time profiling for libomp." OFF)

# Header install location
if(${OPENMP_STANDALONE_BUILD})
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
execute_process(
OUTPUT_STRIP_TRAILING_WHITESPACE
COMMAND ${CMAKE_CXX_COMPILER} --print-resource-dir
RESULT_VARIABLE COMMAND_RETURN_CODE
OUTPUT_VARIABLE COMPILER_RESOURCE_DIR
)
set(LIBOMP_HEADERS_INSTALL_PATH "${COMPILER_RESOURCE_DIR}/include")
else()
set(LIBOMP_HEADERS_INSTALL_PATH "${CMAKE_INSTALL_INCLUDEDIR}")
endif()
set(LIBOMP_HEADERS_INSTALL_PATH "${CMAKE_INSTALL_INCLUDEDIR}")
else()
include(GetClangResourceDir)
get_clang_resource_dir(LIBOMP_HEADERS_INSTALL_PATH SUBDIR include)
Expand Down
15 changes: 13 additions & 2 deletions utils/bazel/llvm-project-overlay/lldb/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -494,8 +494,12 @@ objc_library(
"//conditions:default": ["@platforms//:incompatible"],
}),
deps = [
":Headers",
":HostMacOSXHeaders",
":HostMacOSXPrivateHeaders",
":Utility",
"//llvm:Support",
"//llvm:TargetParser",
],
)

Expand Down Expand Up @@ -561,7 +565,10 @@ cc_library(
"//llvm:TargetParser",
"//llvm:config",
] + select({
"@platforms//os:macos": [":HostMacOSXObjCXX"],
"@platforms//os:macos": [
":HostMacOSXObjCXX",
":HostMacOSXPrivateHeaders",
],
"//conditions:default": [],
}),
)
Expand Down Expand Up @@ -873,7 +880,11 @@ cc_binary(
"@platforms//os:macos": [],
"//conditions:default": ["@platforms//:incompatible"],
}),
deps = [":DebugServerCommon"],
deps = [
":DebugServerCommon",
":DebugServerCommonHeaders",
":DebugServerCommonMacOSXHeaders",
],
)

cc_binary(
Expand Down
17 changes: 15 additions & 2 deletions utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,12 @@ objc_library(
"@platforms//os:macos": [],
"//conditions:default": ["@platforms//:incompatible"],
}),
deps = [":PluginPlatformMacOSXObjCXXHeaders"],
deps = [
":PluginPlatformMacOSXObjCXXHeaders",
"//lldb:Host",
"//lldb:HostMacOSXPrivateHeaders",
"//llvm:Support",
],
)

cc_library(
Expand All @@ -275,14 +280,18 @@ cc_library(
"//lldb:Core",
"//lldb:Headers",
"//lldb:Host",
"//lldb:HostMacOSXPrivateHeaders",
"//lldb:InterpreterHeaders",
"//lldb:SymbolHeaders",
"//lldb:TargetHeaders",
"//lldb:Utility",
"//llvm:Support",
"//llvm:TargetParser",
] + select({
"@platforms//os:macos": [":PluginPlatformMacOSXObjCXX"],
"@platforms//os:macos": [
":PluginPlatformMacOSXObjCXX",
":PluginPlatformMacOSXObjCXXHeaders",
],
"//conditions:default": [],
}),
)
Expand Down Expand Up @@ -1748,6 +1757,10 @@ cc_library(
"//lldb:Host",
"//lldb:HostMacOSXPrivateHeaders",
"//lldb:Symbol",
"//lldb:SymbolHeaders",
"//lldb:TargetHeaders",
"//lldb:Utility",
"//llvm:Support",
],
)

Expand Down