361 changes: 270 additions & 91 deletions flang/lib/Lower/OpenMP/OpenMP.cpp

Large diffs are not rendered by default.

49 changes: 0 additions & 49 deletions flang/module/ieee_arithmetic.f90
Original file line number Diff line number Diff line change
Expand Up @@ -161,33 +161,19 @@ end function ieee_round_ne
G(1) G(2) G(4) G(8) G(16)
#define SPECIFICS_L(G) \
G(1) G(2) G(4) G(8)

#if FLANG_SUPPORT_R16
#if __x86_64__
#define SPECIFICS_R(G) \
G(2) G(3) G(4) G(8) G(10) G(16)
#else
#define SPECIFICS_R(G) \
G(2) G(3) G(4) G(8) G(16)
#endif
#else
#if __x86_64__
#define SPECIFICS_R(G) \
G(2) G(3) G(4) G(8) G(10)
#else
#define SPECIFICS_R(G) \
G(2) G(3) G(4) G(8)
#endif
#endif

#define SPECIFICS_II(G) \
G(1,1) G(1,2) G(1,4) G(1,8) G(1,16) \
G(2,1) G(2,2) G(2,4) G(2,8) G(2,16) \
G(4,1) G(4,2) G(4,4) G(4,8) G(4,16) \
G(8,1) G(8,2) G(8,4) G(8,8) G(8,16) \
G(16,1) G(16,2) G(16,4) G(16,8) G(16,16)

#if FLANG_SUPPORT_R16
#if __x86_64__
#define SPECIFICS_RI(G) \
G(2,1) G(2,2) G(2,4) G(2,8) G(2,16) \
Expand All @@ -204,24 +190,7 @@ end function ieee_round_ne
G(8,1) G(8,2) G(8,4) G(8,8) G(8,16) \
G(16,1) G(16,2) G(16,4) G(16,8) G(16,16)
#endif
#else
#if __x86_64__
#define SPECIFICS_RI(G) \
G(2,1) G(2,2) G(2,4) G(2,8) \
G(3,1) G(3,2) G(3,4) G(3,8) \
G(4,1) G(4,2) G(4,4) G(4,8) \
G(8,1) G(8,2) G(8,4) G(8,8) \
G(10,1) G(10,2) G(10,4) G(10,8)
#else
#define SPECIFICS_RI(G) \
G(2,1) G(2,2) G(2,4) G(2,8) \
G(3,1) G(3,2) G(3,4) G(3,8) \
G(4,1) G(4,2) G(4,4) G(4,8) \
G(8,1) G(8,2) G(8,4) G(8,8)
#endif
#endif

#if FLANG_SUPPORT_R16
#if __x86_64__
#define SPECIFICS_RR(G) \
G(2,2) G(2,3) G(2,4) G(2,8) G(2,10) G(2,16) \
Expand All @@ -238,22 +207,6 @@ end function ieee_round_ne
G(8,2) G(8,3) G(8,4) G(8,8) G(8,16) \
G(16,2) G(16,3) G(16,4) G(16,8) G(16,16)
#endif
#else
#if __x86_64__
#define SPECIFICS_RR(G) \
G(2,2) G(2,3) G(2,4) G(2,8) G(2,10) \
G(3,2) G(3,3) G(3,4) G(3,8) G(3,10) \
G(4,2) G(4,3) G(4,4) G(4,8) G(4,10) \
G(8,2) G(8,3) G(8,4) G(8,8) G(8,10) \
G(10,2) G(10,3) G(10,4) G(10,8) G(10,10)
#else
#define SPECIFICS_RR(G) \
G(2,2) G(2,3) G(2,4) G(2,8) \
G(3,2) G(3,3) G(3,4) G(3,8) \
G(4,2) G(4,3) G(4,4) G(4,8) \
G(8,2) G(8,3) G(8,4) G(8,8)
#endif
#endif

#define IEEE_CLASS_R(XKIND) \
elemental type(ieee_class_type) function ieee_class_a##XKIND(x); \
Expand Down Expand Up @@ -509,10 +462,8 @@ end function ieee_real_a##AKIND##_i##KKIND;
interface ieee_real
SPECIFICS_I(IEEE_REAL_I)
SPECIFICS_R(IEEE_REAL_R)
#if FLANG_SUPPORT_R16
SPECIFICS_II(IEEE_REAL_II)
SPECIFICS_RI(IEEE_REAL_RI)
#endif
end interface ieee_real
public :: ieee_real
#undef IEEE_REAL_I
Expand Down
10 changes: 0 additions & 10 deletions flang/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,6 @@ llvm_canonicalize_cmake_booleans(

set(FLANG_TOOLS_DIR ${FLANG_BINARY_DIR}/bin)

# Check if 128-bit float computations can be done via long double
check_cxx_source_compiles(
"#include <cfloat>
#if LDBL_MANT_DIG != 113
#error LDBL_MANT_DIG != 113
#endif
int main() { return 0; }
"
HAVE_LDBL_MANT_DIG_113)

# FIXME In out-of-tree builds, "SHLIBDIR" is undefined and passing it to
# `configure_lit_site_cfg` leads to a configuration error. This is currently
# only required by plugins/examples, which are not supported in out-of-tree
Expand Down
1 change: 0 additions & 1 deletion flang/test/Evaluate/fold-out_of_range.f90
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
! RUN: %python %S/test_folding.py %s %flang_fc1 -pedantic -triple x86_64-unknown-linux-gnu
! UNSUPPORTED: system-windows
! REQUIRES: target=x86_64{{.*}}
! REQUIRES: flang-supports-f128-math
! Tests folding of OUT_OF_RANGE().
module m
integer(1), parameter :: i1v(*) = [ -huge(1_1) - 1_1, huge(1_1) ]
Expand Down
1 change: 0 additions & 1 deletion flang/test/Evaluate/folding07.f90
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
! REQUIRES: flang-supports-f128-math
! RUN: %python %S/test_folding.py %s %flang_fc1
! Test numeric model inquiry intrinsics

Expand Down
1 change: 0 additions & 1 deletion flang/test/Lower/Intrinsics/ieee_class_queries.f90
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
! REQUIRES: flang-supports-f128-math
! RUN: bbc -emit-fir -o - %s | FileCheck %s

! CHECK-LABEL: func @_QQmain
Expand Down
1 change: 0 additions & 1 deletion flang/test/Lower/Intrinsics/ieee_unordered.f90
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
! REQUIRES: flang-supports-f128-math
! RUN: bbc -emit-fir -hlfir=false -o - %s | FileCheck %s

! CHECK-LABEL: func @_QQmain
Expand Down
100 changes: 100 additions & 0 deletions flang/test/Lower/OpenMP/distribute-parallel-do-simd.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
! This test checks lowering of OpenMP DISTRIBUTE PARALLEL DO SIMD composite
! constructs.

! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s
! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck %s

! CHECK-LABEL: func.func @_QPdistribute_parallel_do_simd_num_threads(
subroutine distribute_parallel_do_simd_num_threads()
!$omp teams

! CHECK: omp.parallel num_threads({{.*}}) private({{.*}}) {
! CHECK: omp.distribute {
! CHECK-NEXT: omp.wsloop {
! CHECK-NEXT: omp.simd {
! CHECK-NEXT: omp.loop_nest
!$omp distribute parallel do simd num_threads(10)
do index_ = 1, 10
end do
!$omp end distribute parallel do simd

!$omp end teams
end subroutine distribute_parallel_do_simd_num_threads

! CHECK-LABEL: func.func @_QPdistribute_parallel_do_simd_dist_schedule(
subroutine distribute_parallel_do_simd_dist_schedule()
!$omp teams

! CHECK: omp.parallel private({{.*}}) {
! CHECK: omp.distribute dist_schedule_static dist_schedule_chunk_size({{.*}}) {
! CHECK-NEXT: omp.wsloop {
! CHECK-NEXT: omp.simd {
! CHECK-NEXT: omp.loop_nest
!$omp distribute parallel do simd dist_schedule(static, 4)
do index_ = 1, 10
end do
!$omp end distribute parallel do simd

!$omp end teams
end subroutine distribute_parallel_do_simd_dist_schedule

! CHECK-LABEL: func.func @_QPdistribute_parallel_do_simd_schedule(
subroutine distribute_parallel_do_simd_schedule()
!$omp teams

! CHECK: omp.parallel private({{.*}}) {
! CHECK: omp.distribute {
! CHECK-NEXT: omp.wsloop schedule(static = {{.*}}) {
! CHECK-NEXT: omp.simd {
! CHECK-NEXT: omp.loop_nest
!$omp distribute parallel do simd schedule(static, 4)
do index_ = 1, 10
end do
!$omp end distribute parallel do simd

!$omp end teams
end subroutine distribute_parallel_do_simd_schedule

! CHECK-LABEL: func.func @_QPdistribute_parallel_do_simd_simdlen(
subroutine distribute_parallel_do_simd_simdlen()
!$omp teams

! CHECK: omp.parallel private({{.*}}) {
! CHECK: omp.distribute {
! CHECK-NEXT: omp.wsloop {
! CHECK-NEXT: omp.simd simdlen(4) {
! CHECK-NEXT: omp.loop_nest
!$omp distribute parallel do simd simdlen(4)
do index_ = 1, 10
end do
!$omp end distribute parallel do simd

!$omp end teams
end subroutine distribute_parallel_do_simd_simdlen

! CHECK-LABEL: func.func @_QPdistribute_parallel_do_simd_private(
subroutine distribute_parallel_do_simd_private()
! CHECK: %[[INDEX_ALLOC:.*]] = fir.alloca i32
! CHECK: %[[INDEX:.*]]:2 = hlfir.declare %[[INDEX_ALLOC]]
! CHECK: %[[X_ALLOC:.*]] = fir.alloca i64
! CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_ALLOC]]
integer(8) :: x

! CHECK: omp.teams {
!$omp teams

! CHECK: omp.parallel private(@{{.*}} %[[X]]#0 -> %[[X_ARG:.*]] : !fir.ref<i64>,
! CHECK-SAME: @{{.*}} %[[INDEX]]#0 -> %[[INDEX_ARG:.*]] : !fir.ref<i32>) {
! CHECK: %[[X_PRIV:.*]]:2 = hlfir.declare %[[X_ARG]]
! CHECK: %[[INDEX_PRIV:.*]]:2 = hlfir.declare %[[INDEX_ARG]]
! CHECK: omp.distribute {
! CHECK-NEXT: omp.wsloop {
! CHECK-NEXT: omp.simd {
! CHECK-NEXT: omp.loop_nest
!$omp distribute parallel do simd private(x)
do index_ = 1, 10
end do
!$omp end distribute parallel do simd

!$omp end teams
end subroutine distribute_parallel_do_simd_private
79 changes: 79 additions & 0 deletions flang/test/Lower/OpenMP/distribute-parallel-do.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
! This test checks lowering of OpenMP DISTRIBUTE PARALLEL DO composite
! constructs.

! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s
! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck %s

! CHECK-LABEL: func.func @_QPdistribute_parallel_do_num_threads(
subroutine distribute_parallel_do_num_threads()
!$omp teams

! CHECK: omp.parallel num_threads({{.*}}) private({{.*}}) {
! CHECK: omp.distribute {
! CHECK-NEXT: omp.wsloop {
! CHECK-NEXT: omp.loop_nest
!$omp distribute parallel do num_threads(10)
do index_ = 1, 10
end do
!$omp end distribute parallel do

!$omp end teams
end subroutine distribute_parallel_do_num_threads

! CHECK-LABEL: func.func @_QPdistribute_parallel_do_dist_schedule(
subroutine distribute_parallel_do_dist_schedule()
!$omp teams

! CHECK: omp.parallel private({{.*}}) {
! CHECK: omp.distribute dist_schedule_static dist_schedule_chunk_size({{.*}}) {
! CHECK-NEXT: omp.wsloop {
! CHECK-NEXT: omp.loop_nest
!$omp distribute parallel do dist_schedule(static, 4)
do index_ = 1, 10
end do
!$omp end distribute parallel do

!$omp end teams
end subroutine distribute_parallel_do_dist_schedule

! CHECK-LABEL: func.func @_QPdistribute_parallel_do_ordered(
subroutine distribute_parallel_do_ordered()
!$omp teams

! CHECK: omp.parallel private({{.*}}) {
! CHECK: omp.distribute {
! CHECK-NEXT: omp.wsloop ordered(1) {
! CHECK-NEXT: omp.loop_nest
!$omp distribute parallel do ordered(1)
do index_ = 1, 10
end do
!$omp end distribute parallel do

!$omp end teams
end subroutine distribute_parallel_do_ordered

! CHECK-LABEL: func.func @_QPdistribute_parallel_do_private(
subroutine distribute_parallel_do_private()
! CHECK: %[[INDEX_ALLOC:.*]] = fir.alloca i32
! CHECK: %[[INDEX:.*]]:2 = hlfir.declare %[[INDEX_ALLOC]]
! CHECK: %[[X_ALLOC:.*]] = fir.alloca i64
! CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_ALLOC]]
integer(8) :: x

! CHECK: omp.teams {
!$omp teams

! CHECK: omp.parallel private(@{{.*}} %[[X]]#0 -> %[[X_ARG:.*]] : !fir.ref<i64>,
! CHECK-SAME: @{{.*}} %[[INDEX]]#0 -> %[[INDEX_ARG:.*]] : !fir.ref<i32>) {
! CHECK: %[[X_PRIV:.*]]:2 = hlfir.declare %[[X_ARG]]
! CHECK: %[[INDEX_PRIV:.*]]:2 = hlfir.declare %[[INDEX_ARG]]
! CHECK: omp.distribute {
! CHECK-NEXT: omp.wsloop {
! CHECK-NEXT: omp.loop_nest
!$omp distribute parallel do private(x)
do index_ = 1, 10
end do
!$omp end distribute parallel do

!$omp end teams
end subroutine distribute_parallel_do_private
769 changes: 695 additions & 74 deletions flang/test/Lower/OpenMP/if-clause.f90

Large diffs are not rendered by default.

95 changes: 89 additions & 6 deletions flang/test/Lower/OpenMP/loop-compound.f90
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,40 @@ program main
integer :: i

! TODO When composite constructs are supported add:
! - DISTRIBUTE PARALLEL DO SIMD
! - DISTRIBUTE PARALLEL DO
! - TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD
! - TARGET TEAMS DISTRIBUTE PARALLEL DO
! - TASKLOOP SIMD
! - TEAMS DISTRIBUTE PARALLEL DO SIMD
! - TEAMS DISTRIBUTE PARALLEL DO

! ----------------------------------------------------------------------------
! DISTRIBUTE PARALLEL DO
! ----------------------------------------------------------------------------
!$omp teams

! CHECK: omp.parallel
! CHECK: omp.distribute
! CHECK-NEXT: omp.wsloop
! CHECK-NEXT: omp.loop_nest
!$omp distribute parallel do
do i = 1, 10
end do
!$omp end distribute parallel do

!$omp end teams

! ----------------------------------------------------------------------------
! DISTRIBUTE PARALLEL DO SIMD
! ----------------------------------------------------------------------------
!$omp teams

! CHECK: omp.parallel
! CHECK: omp.distribute
! CHECK-NEXT: omp.wsloop
! CHECK-NEXT: omp.simd
! CHECK-NEXT: omp.loop_nest
!$omp distribute parallel do simd
do i = 1, 10
end do
!$omp end distribute parallel do simd

!$omp end teams

! ----------------------------------------------------------------------------
! DISTRIBUTE SIMD
Expand Down Expand Up @@ -113,6 +140,35 @@ program main
end do
!$omp end target teams distribute

! ----------------------------------------------------------------------------
! TARGET TEAMS DISTRIBUTE PARALLEL DO
! ----------------------------------------------------------------------------
! CHECK: omp.target
! CHECK: omp.teams
! CHECK: omp.parallel
! CHECK: omp.distribute
! CHECK-NEXT: omp.wsloop
! CHECK-NEXT: omp.loop_nest
!$omp target teams distribute parallel do
do i = 1, 10
end do
!$omp end target teams distribute parallel do

! ----------------------------------------------------------------------------
! TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD
! ----------------------------------------------------------------------------
! CHECK: omp.target
! CHECK: omp.teams
! CHECK: omp.parallel
! CHECK: omp.distribute
! CHECK-NEXT: omp.wsloop
! CHECK-NEXT: omp.simd
! CHECK-NEXT: omp.loop_nest
!$omp target teams distribute parallel do simd
do i = 1, 10
end do
!$omp end target teams distribute parallel do simd

! ----------------------------------------------------------------------------
! TARGET TEAMS DISTRIBUTE SIMD
! ----------------------------------------------------------------------------
Expand All @@ -137,6 +193,33 @@ program main
end do
!$omp end teams distribute

! ----------------------------------------------------------------------------
! TEAMS DISTRIBUTE PARALLEL DO
! ----------------------------------------------------------------------------
! CHECK: omp.teams
! CHECK: omp.parallel
! CHECK: omp.distribute
! CHECK-NEXT: omp.wsloop
! CHECK-NEXT: omp.loop_nest
!$omp teams distribute parallel do
do i = 1, 10
end do
!$omp end teams distribute parallel do

! ----------------------------------------------------------------------------
! TEAMS DISTRIBUTE PARALLEL DO SIMD
! ----------------------------------------------------------------------------
! CHECK: omp.teams
! CHECK: omp.parallel
! CHECK: omp.distribute
! CHECK-NEXT: omp.wsloop
! CHECK-NEXT: omp.simd
! CHECK-NEXT: omp.loop_nest
!$omp teams distribute parallel do simd
do i = 1, 10
end do
!$omp end teams distribute parallel do simd

! ----------------------------------------------------------------------------
! TEAMS DISTRIBUTE SIMD
! ----------------------------------------------------------------------------
Expand Down
14 changes: 7 additions & 7 deletions flang/test/Lower/OpenMP/parallel-reduction3.f90
Original file line number Diff line number Diff line change
Expand Up @@ -69,19 +69,19 @@
! CHECK: %[[VAL_13:.*]] = arith.constant 0 : i32
! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_12]]#0 : i32, !fir.box<!fir.array<?xi32>>
! CHECK: omp.parallel {
! CHECK: %[[VAL_14:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_16:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
! CHECK: fir.store %[[VAL_12]]#0 to %[[VAL_16]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
! CHECK: %[[VAL_14:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
! CHECK: fir.store %[[VAL_12]]#0 to %[[VAL_14]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
! CHECK: %[[VAL_15:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
! CHECK: %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_17:.*]] = arith.constant 1 : i32
! CHECK: %[[VAL_18:.*]] = arith.constant 100 : i32
! CHECK: %[[VAL_19:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_Uxi32 %[[VAL_16]] -> %[[VAL_20:.*]] : !fir.ref<!fir.box<!fir.array<?xi32>>>) {
! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_Uxi32 %[[VAL_14]] -> %[[VAL_20:.*]] : !fir.ref<!fir.box<!fir.array<?xi32>>>) {
! CHECK-NEXT: omp.loop_nest (%[[VAL_21:.*]]) : i32 = (%[[VAL_17]]) to (%[[VAL_18]]) inclusive step (%[[VAL_19]]) {
! CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFsEc"} : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> (!fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.array<?xi32>>>)
! CHECK: fir.store %[[VAL_21]] to %[[VAL_15]]#1 : !fir.ref<i32>
! CHECK: fir.store %[[VAL_21]] to %[[VAL_16]]#1 : !fir.ref<i32>
! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref<!fir.box<!fir.array<?xi32>>>
! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_16]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_25:.*]] = arith.constant 0 : index
! CHECK: %[[VAL_26:.*]]:3 = fir.box_dims %[[VAL_23]], %[[VAL_25]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
! CHECK: %[[VAL_27:.*]] = fir.shape %[[VAL_26]]#1 : (index) -> !fir.shape<1>
Expand Down
14 changes: 7 additions & 7 deletions flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90
Original file line number Diff line number Diff line change
Expand Up @@ -79,18 +79,18 @@ subroutine reduce(r)
! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFFreduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %{{[0-9]+}} {fortran_attrs = {{.*}}, uniq_name = "_QFFreduceEr"} : (!fir.box<!fir.array<?xf64>>, !fir.dscope) -> (!fir.box<!fir.array<?xf64>>, !fir.box<!fir.array<?xf64>>)
! CHECK: omp.parallel {
! CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFFreduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box<!fir.array<?xf64>>
! CHECK: fir.store %[[VAL_3]]#1 to %[[VAL_6]] : !fir.ref<!fir.box<!fir.array<?xf64>>>
! CHECK: %[[VAL_4:.*]] = fir.alloca !fir.box<!fir.array<?xf64>>
! CHECK: fir.store %[[VAL_3]]#1 to %[[VAL_4]] : !fir.ref<!fir.box<!fir.array<?xf64>>>
! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFFreduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_7:.*]] = arith.constant 0 : i32
! CHECK: %[[VAL_8:.*]] = arith.constant 10 : i32
! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_Uxf64 %[[VAL_6]] -> %[[VAL_10:.*]] : !fir.ref<!fir.box<!fir.array<?xf64>>>) {
! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_Uxf64 %[[VAL_4]] -> %[[VAL_10:.*]] : !fir.ref<!fir.box<!fir.array<?xf64>>>) {
! CHECK-NEXT: omp.loop_nest (%[[VAL_11:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) {
! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {fortran_attrs = {{.*}}, uniq_name = "_QFFreduceEr"} : (!fir.ref<!fir.box<!fir.array<?xf64>>>) -> (!fir.ref<!fir.box<!fir.array<?xf64>>>, !fir.ref<!fir.box<!fir.array<?xf64>>>)
! CHECK: fir.store %[[VAL_11]] to %[[VAL_5]]#1 : !fir.ref<i32>
! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
! CHECK: fir.store %[[VAL_11]] to %[[VAL_6]]#1 : !fir.ref<i32>
! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> f64
! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<!fir.box<!fir.array<?xf64>>>
! CHECK: %[[VAL_16:.*]] = arith.constant 1 : index
Expand Down
18 changes: 9 additions & 9 deletions flang/test/Lower/OpenMP/wsloop-reduction-array.f90
Original file line number Diff line number Diff line change
Expand Up @@ -73,24 +73,24 @@ program reduce
! CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1>
! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_2]](%[[VAL_4]]) {uniq_name = "_QFEr"} : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<2xi32>>, !fir.ref<!fir.array<2xi32>>)
! CHECK: omp.parallel {
! CHECK: %[[VAL_6:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_8:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
! CHECK: %[[VAL_9:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
! CHECK: fir.store %[[VAL_8]] to %[[VAL_9]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
! CHECK: %[[VAL_6:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
! CHECK: %[[VAL_7:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
! CHECK: fir.store %[[VAL_6]] to %[[VAL_7]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
! CHECK: %[[VAL_8:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_10:.*]] = arith.constant 0 : i32
! CHECK: %[[VAL_11:.*]] = arith.constant 10 : i32
! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_9]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>) {
! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_7]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>) {
! CHECK-NEXT: omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) {
! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEr"} : (!fir.ref<!fir.box<!fir.array<2xi32>>>) -> (!fir.ref<!fir.box<!fir.array<2xi32>>>, !fir.ref<!fir.box<!fir.array<2xi32>>>)
! CHECK: fir.store %[[VAL_14]] to %[[VAL_7]]#1 : !fir.ref<i32>
! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
! CHECK: fir.store %[[VAL_14]] to %[[VAL_9]]#1 : !fir.ref<i32>
! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
! CHECK: %[[VAL_18:.*]] = arith.constant 1 : index
! CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_17]] (%[[VAL_18]]) : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
! CHECK: hlfir.assign %[[VAL_16]] to %[[VAL_19]] : i32, !fir.ref<i32>
! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_21:.*]] = arith.constant 0 : i32
! CHECK: %[[VAL_22:.*]] = arith.subi %[[VAL_21]], %[[VAL_20]] : i32
! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
Expand Down
18 changes: 9 additions & 9 deletions flang/test/Lower/OpenMP/wsloop-reduction-array2.f90
Original file line number Diff line number Diff line change
Expand Up @@ -73,23 +73,23 @@ program reduce
! CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1>
! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_2]](%[[VAL_4]]) {uniq_name = "_QFEr"} : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<2xi32>>, !fir.ref<!fir.array<2xi32>>)
! CHECK: omp.parallel {
! CHECK: %[[VAL_6:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_8:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
! CHECK: %[[VAL_9:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
! CHECK: fir.store %[[VAL_8]] to %[[VAL_9]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
! CHECK: %[[VAL_6:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<2xi32>>
! CHECK: %[[VAL_7:.*]] = fir.alloca !fir.box<!fir.array<2xi32>>
! CHECK: fir.store %[[VAL_6]] to %[[VAL_7]] : !fir.ref<!fir.box<!fir.array<2xi32>>>
! CHECK: %[[VAL_8:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_10:.*]] = arith.constant 0 : i32
! CHECK: %[[VAL_11:.*]] = arith.constant 10 : i32
! CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_9]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>) {
! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_2xi32 %[[VAL_7]] -> %[[VAL_13:.*]] : !fir.ref<!fir.box<!fir.array<2xi32>>>) {
! CHECK-NEXT: omp.loop_nest (%[[VAL_14:.*]]) : i32 = (%[[VAL_10]]) to (%[[VAL_11]]) inclusive step (%[[VAL_12]]) {
! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEr"} : (!fir.ref<!fir.box<!fir.array<2xi32>>>) -> (!fir.ref<!fir.box<!fir.array<2xi32>>>, !fir.ref<!fir.box<!fir.array<2xi32>>>)
! CHECK: fir.store %[[VAL_14]] to %[[VAL_7]]#1 : !fir.ref<i32>
! CHECK: fir.store %[[VAL_14]] to %[[VAL_9]]#1 : !fir.ref<i32>
! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
! CHECK: %[[VAL_17:.*]] = arith.constant 1 : index
! CHECK: %[[VAL_18:.*]] = hlfir.designate %[[VAL_16]] (%[[VAL_17]]) : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_18]] : !fir.ref<i32>
! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_21:.*]] = arith.addi %[[VAL_19]], %[[VAL_20]] : i32
! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
! CHECK: %[[VAL_23:.*]] = arith.constant 1 : index
Expand All @@ -99,7 +99,7 @@ program reduce
! CHECK: %[[VAL_26:.*]] = arith.constant 2 : index
! CHECK: %[[VAL_27:.*]] = hlfir.designate %[[VAL_25]] (%[[VAL_26]]) : (!fir.box<!fir.array<2xi32>>, index) -> !fir.ref<i32>
! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_27]] : !fir.ref<i32>
! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_30:.*]] = arith.subi %[[VAL_28]], %[[VAL_29]] : i32
! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref<!fir.box<!fir.array<2xi32>>>
! CHECK: %[[VAL_32:.*]] = arith.constant 2 : index
Expand Down
22 changes: 11 additions & 11 deletions flang/test/Lower/OpenMP/wsloop-reduction-multiple-clauses.f90
Original file line number Diff line number Diff line change
Expand Up @@ -109,23 +109,23 @@ program main
! CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f64
! CHECK: hlfir.assign %[[VAL_10]] to %[[VAL_4]]#0 : f64, !fir.ref<!fir.array<3x3xf64>>
! CHECK: omp.parallel {
! CHECK: %[[VAL_11:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_13:.*]] = fir.embox %[[VAL_4]]#0(%[[VAL_3]]) : (!fir.ref<!fir.array<3x3xf64>>, !fir.shape<2>) -> !fir.box<!fir.array<3x3xf64>>
! CHECK: %[[VAL_14:.*]] = fir.alloca !fir.box<!fir.array<3x3xf64>>
! CHECK: fir.store %[[VAL_13]] to %[[VAL_14]] : !fir.ref<!fir.box<!fir.array<3x3xf64>>>
! CHECK: %[[VAL_11:.*]] = fir.embox %[[VAL_4]]#0(%[[VAL_3]]) : (!fir.ref<!fir.array<3x3xf64>>, !fir.shape<2>) -> !fir.box<!fir.array<3x3xf64>>
! CHECK: %[[VAL_12:.*]] = fir.alloca !fir.box<!fir.array<3x3xf64>>
! CHECK: fir.store %[[VAL_11]] to %[[VAL_12]] : !fir.ref<!fir.box<!fir.array<3x3xf64>>>
! CHECK: %[[VAL_13:.*]] = fir.alloca i32 {bindc_name = "i", pinned, {{.*}}}
! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_13]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32
! CHECK: %[[VAL_16:.*]] = arith.constant 10 : i32
! CHECK: %[[VAL_17:.*]] = arith.constant 1 : i32
! CHECK: omp.wsloop reduction(@add_reduction_f64 %[[VAL_8]]#0 -> %[[VAL_18:.*]] : !fir.ref<f64>, byref @add_reduction_byref_box_3x3xf64 %[[VAL_14]] -> %[[VAL_19:.*]] : !fir.ref<!fir.box<!fir.array<3x3xf64>>>) {
! CHECK: omp.wsloop reduction(@add_reduction_f64 %[[VAL_8]]#0 -> %[[VAL_18:.*]] : !fir.ref<f64>, byref @add_reduction_byref_box_3x3xf64 %[[VAL_12]] -> %[[VAL_19:.*]] : !fir.ref<!fir.box<!fir.array<3x3xf64>>>) {
! CHECK: omp.loop_nest (%[[VAL_20:.*]]) : i32 = (%[[VAL_15]]) to (%[[VAL_16]]) inclusive step (%[[VAL_17]]) {
! CHECK: %[[VAL_21:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFEscalar"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
! CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_19]] {uniq_name = "_QFEarray"} : (!fir.ref<!fir.box<!fir.array<3x3xf64>>>) -> (!fir.ref<!fir.box<!fir.array<3x3xf64>>>, !fir.ref<!fir.box<!fir.array<3x3xf64>>>)
! CHECK: fir.store %[[VAL_20]] to %[[VAL_12]]#1 : !fir.ref<i32>
! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
! CHECK: fir.store %[[VAL_20]] to %[[VAL_14]]#1 : !fir.ref<i32>
! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_23]] : (i32) -> f64
! CHECK: hlfir.assign %[[VAL_24]] to %[[VAL_21]]#0 : f64, !fir.ref<f64>
! CHECK: %[[VAL_25:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_25:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_26:.*]] = arith.constant 1 : i32
! CHECK: %[[VAL_27:.*]] = arith.addi %[[VAL_25]], %[[VAL_26]] : i32
! CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_27]] : (i32) -> f64
Expand All @@ -134,7 +134,7 @@ program main
! CHECK: %[[VAL_31:.*]] = arith.constant 1 : index
! CHECK: %[[VAL_32:.*]] = hlfir.designate %[[VAL_29]] (%[[VAL_30]], %[[VAL_31]]) : (!fir.box<!fir.array<3x3xf64>>, index, index) -> !fir.ref<f64>
! CHECK: hlfir.assign %[[VAL_28]] to %[[VAL_32]] : f64, !fir.ref<f64>
! CHECK: %[[VAL_33:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_33:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_34:.*]] = arith.constant 2 : i32
! CHECK: %[[VAL_35:.*]] = arith.addi %[[VAL_33]], %[[VAL_34]] : i32
! CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_35]] : (i32) -> f64
Expand All @@ -143,7 +143,7 @@ program main
! CHECK: %[[VAL_39:.*]] = arith.constant 2 : index
! CHECK: %[[VAL_40:.*]] = hlfir.designate %[[VAL_37]] (%[[VAL_38]], %[[VAL_39]]) : (!fir.box<!fir.array<3x3xf64>>, index, index) -> !fir.ref<f64>
! CHECK: hlfir.assign %[[VAL_36]] to %[[VAL_40]] : f64, !fir.ref<f64>
! CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
! CHECK: %[[VAL_42:.*]] = arith.constant 3 : i32
! CHECK: %[[VAL_43:.*]] = arith.addi %[[VAL_41]], %[[VAL_42]] : i32
! CHECK: %[[VAL_44:.*]] = fir.convert %[[VAL_43]] : (i32) -> f64
Expand Down
2 changes: 1 addition & 1 deletion flang/test/Lower/common-block.f90
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
! REQUIRES: flang-supports-f128-math
! RUN: bbc %s -o - | tco | FileCheck %s
! RUN: %flang -emit-llvm -S -mmlir -disable-external-name-interop %s -o - | FileCheck %s

Expand Down Expand Up @@ -79,3 +78,4 @@ subroutine s7()
real(16) r16
common /co1/ r16
end subroutine

1 change: 0 additions & 1 deletion flang/test/Semantics/kinds03.f90
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
! REQUIRES: flang-supports-f128-math
! RUN: %python %S/test_symbols.py %s %flang_fc1
!DEF: /MainProgram1/ipdt DerivedType
!DEF: /MainProgram1/ipdt/k TypeParam INTEGER(4)
Expand Down
1 change: 0 additions & 1 deletion flang/test/Semantics/modfile26.f90
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
! REQUIRES: flang-supports-f128-math
! RUN: %python %S/test_modfile.py %s %flang_fc1
! Intrinsics SELECTED_INT_KIND, SELECTED_REAL_KIND, PRECISION, RANGE,
! RADIX, DIGITS
Expand Down
1 change: 0 additions & 1 deletion flang/test/Semantics/realkinds-aarch64-01.f90
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
! REQUIRES: aarch64-registered-target
! REQUIRES: flang-supports-f128-math
! RUN: %python %S/test_modfile.py %s %flang_fc1 -triple aarch64-unknown-linux-gnu

module m1
Expand Down
3 changes: 1 addition & 2 deletions flang/test/lit.cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,8 @@
# Add features and substitutions to test F128 math support.
# %f128-lib substitution may be used to generate check prefixes
# for LIT tests checking for F128 library support.
if config.flang_runtime_f128_math_lib or config.have_ldbl_mant_dig_113:
config.available_features.add("flang-supports-f128-math")
if config.flang_runtime_f128_math_lib:
config.available_features.add("flang-supports-f128-math")
config.available_features.add(
"flang-f128-math-lib-" + config.flang_runtime_f128_math_lib
)
Expand Down
1 change: 0 additions & 1 deletion flang/test/lit.site.cfg.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ if "openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";"):
else:
config.openmp_module_dir = None
config.flang_runtime_f128_math_lib = "@FLANG_RUNTIME_F128_MATH_LIB@"
config.have_ldbl_mant_dig_113 = "@HAVE_LDBL_MANT_DIG_113@"

import lit.llvm
lit.llvm.initialize(lit_config, config)
Expand Down
25 changes: 1 addition & 24 deletions flang/tools/f18/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,25 +31,6 @@ set(MODULES_WITHOUT_IMPLEMENTATION

set(MODULES ${MODULES_WITH_IMPLEMENTATION} ${MODULES_WITHOUT_IMPLEMENTATION})

# Check if 128-bit float computations can be done via long double.
check_cxx_source_compiles(
"#include <cfloat>
#if LDBL_MANT_DIG != 113
#error LDBL_MANT_DIG != 113
#endif
int main() { return 0; }
"
HAVE_LDBL_MANT_DIG_113)

# Figure out whether we can support REAL(KIND=16)
if (FLANG_RUNTIME_F128_MATH_LIB)
set(FLANG_SUPPORT_R16 "1")
elseif (HAVE_LDBL_MANT_DIG_113)
set(FLANG_SUPPORT_R16 "1")
else()
set(FLANG_SUPPORT_R16 "0")
endif()

# Init variable to hold extra object files coming from the Fortran modules;
# these module files will be contributed from the CMakeLists in flang/tools/f18.
set(module_objects "")
Expand Down Expand Up @@ -95,10 +76,6 @@ if (NOT CMAKE_CROSSCOMPILING)
endif()
endif()

set(decls "")
if (FLANG_SUPPORT_R16)
set(decls "-DFLANG_SUPPORT_R16")
endif()

# Some modules have an implementation part that needs to be added to the
# FortranRuntime library.
Expand All @@ -115,7 +92,7 @@ if (NOT CMAKE_CROSSCOMPILING)
# TODO: We may need to flag this with conditional, in case Flang is built w/o OpenMP support
add_custom_command(OUTPUT ${base}.mod ${object_output}
COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR}
COMMAND flang-new ${opts} ${decls} -cpp ${compile_with} -module-dir ${FLANG_INTRINSIC_MODULES_DIR}
COMMAND flang-new ${opts} -cpp ${compile_with} -module-dir ${FLANG_INTRINSIC_MODULES_DIR}
${FLANG_SOURCE_DIR}/module/${filename}.f90
DEPENDS flang-new ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends}
)
Expand Down
33 changes: 20 additions & 13 deletions libc/src/string/memory_utils/x86_64/inline_memcpy.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,21 @@ inline_memcpy_x86_avx_ge64(Ptr __restrict dst, CPtr __restrict src,
return builtin::Memcpy<64>::loop_and_tail(dst, src, count);
}

[[maybe_unused]] LIBC_INLINE void inline_memcpy_prefetch(Ptr __restrict dst,
CPtr __restrict src,
size_t distance) {
prefetch_to_local_cache(src + distance);
prefetch_for_write(dst + distance);
}

[[maybe_unused]] LIBC_INLINE void
inline_memcpy_x86_sse2_ge64_sw_prefetching(Ptr __restrict dst,
CPtr __restrict src, size_t count) {
using namespace LIBC_NAMESPACE::x86;
prefetch_to_local_cache(src + K_ONE_CACHELINE);
inline_memcpy_prefetch(dst, src, K_ONE_CACHELINE);
if (count <= 128)
return builtin::Memcpy<64>::head_tail(dst, src, count);
prefetch_to_local_cache(src + K_TWO_CACHELINES);
inline_memcpy_prefetch(dst, src, K_TWO_CACHELINES);
// Aligning 'dst' on a 32B boundary.
builtin::Memcpy<32>::block(dst, src);
align_to_next_boundary<32, Arg::Dst>(dst, src, count);
Expand All @@ -90,17 +97,17 @@ inline_memcpy_x86_sse2_ge64_sw_prefetching(Ptr __restrict dst,
if (count < 352) {
// Two cache lines at a time.
while (offset + K_TWO_CACHELINES + 32 <= count) {
prefetch_to_local_cache(src + offset + K_ONE_CACHELINE);
prefetch_to_local_cache(src + offset + K_TWO_CACHELINES);
inline_memcpy_prefetch(dst, src, offset + K_ONE_CACHELINE);
inline_memcpy_prefetch(dst, src, offset + K_TWO_CACHELINES);
builtin::Memcpy<K_TWO_CACHELINES>::block_offset(dst, src, offset);
offset += K_TWO_CACHELINES;
}
} else {
// Three cache lines at a time.
while (offset + K_THREE_CACHELINES + 32 <= count) {
prefetch_to_local_cache(src + offset + K_ONE_CACHELINE);
prefetch_to_local_cache(src + offset + K_TWO_CACHELINES);
prefetch_to_local_cache(src + offset + K_THREE_CACHELINES);
inline_memcpy_prefetch(dst, src, offset + K_ONE_CACHELINE);
inline_memcpy_prefetch(dst, src, offset + K_TWO_CACHELINES);
inline_memcpy_prefetch(dst, src, offset + K_THREE_CACHELINES);
// It is likely that this copy will be turned into a 'rep;movsb' on
// non-AVX machines.
builtin::Memcpy<K_THREE_CACHELINES>::block_offset(dst, src, offset);
Expand All @@ -120,11 +127,11 @@ inline_memcpy_x86_sse2_ge64_sw_prefetching(Ptr __restrict dst,
inline_memcpy_x86_avx_ge64_sw_prefetching(Ptr __restrict dst,
CPtr __restrict src, size_t count) {
using namespace LIBC_NAMESPACE::x86;
prefetch_to_local_cache(src + K_ONE_CACHELINE);
inline_memcpy_prefetch(dst, src, K_ONE_CACHELINE);
if (count <= 128)
return builtin::Memcpy<64>::head_tail(dst, src, count);
prefetch_to_local_cache(src + K_TWO_CACHELINES);
prefetch_to_local_cache(src + K_THREE_CACHELINES);
inline_memcpy_prefetch(dst, src, K_TWO_CACHELINES);
inline_memcpy_prefetch(dst, src, K_THREE_CACHELINES);
if (count < 256)
return builtin::Memcpy<128>::head_tail(dst, src, count);
// Aligning 'dst' on a 32B boundary.
Expand All @@ -139,9 +146,9 @@ inline_memcpy_x86_avx_ge64_sw_prefetching(Ptr __restrict dst,
// - count >= 128.
while (offset + K_THREE_CACHELINES + 64 <= count) {
// Three cache lines at a time.
prefetch_to_local_cache(src + offset + K_ONE_CACHELINE);
prefetch_to_local_cache(src + offset + K_TWO_CACHELINES);
prefetch_to_local_cache(src + offset + K_THREE_CACHELINES);
inline_memcpy_prefetch(dst, src, offset + K_ONE_CACHELINE);
inline_memcpy_prefetch(dst, src, offset + K_TWO_CACHELINES);
inline_memcpy_prefetch(dst, src, offset + K_THREE_CACHELINES);
builtin::Memcpy<K_THREE_CACHELINES>::block_offset(dst, src, offset);
offset += K_THREE_CACHELINES;
}
Expand Down
28 changes: 0 additions & 28 deletions libcxx/include/cmath
Original file line number Diff line number Diff line change
Expand Up @@ -554,20 +554,6 @@ using ::scalbnl _LIBCPP_USING_IF_EXISTS;
using ::tgammal _LIBCPP_USING_IF_EXISTS;
using ::truncl _LIBCPP_USING_IF_EXISTS;

template <class _A1, __enable_if_t<is_floating_point<_A1>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool __constexpr_isnan(_A1 __lcpp_x) _NOEXCEPT {
#if __has_builtin(__builtin_isnan)
return __builtin_isnan(__lcpp_x);
#else
return isnan(__lcpp_x);
#endif
}

template <class _A1, __enable_if_t<!is_floating_point<_A1>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool __constexpr_isnan(_A1 __lcpp_x) _NOEXCEPT {
return std::isnan(__lcpp_x);
}

template <class _A1, __enable_if_t<is_floating_point<_A1>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool __constexpr_isinf(_A1 __lcpp_x) _NOEXCEPT {
#if __has_builtin(__builtin_isinf)
Expand All @@ -582,20 +568,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool __constexpr_isinf(_A1 __lcpp_x) _NO
return std::isinf(__lcpp_x);
}

template <class _A1, __enable_if_t<is_floating_point<_A1>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool __constexpr_isfinite(_A1 __lcpp_x) _NOEXCEPT {
#if __has_builtin(__builtin_isfinite)
return __builtin_isfinite(__lcpp_x);
#else
return isfinite(__lcpp_x);
#endif
}

template <class _A1, __enable_if_t<!is_floating_point<_A1>::value, int> = 0>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool __constexpr_isfinite(_A1 __lcpp_x) _NOEXCEPT {
return __builtin_isfinite(__lcpp_x);
}

#if _LIBCPP_STD_VER >= 20
template <typename _Fp>
_LIBCPP_HIDE_FROM_ABI constexpr _Fp __lerp(_Fp __a, _Fp __b, _Fp __t) noexcept {
Expand Down
50 changes: 24 additions & 26 deletions libcxx/include/complex
Original file line number Diff line number Diff line change
Expand Up @@ -1019,9 +1019,9 @@ inline _LIBCPP_HIDE_FROM_ABI typename __libcpp_complex_overload_traits<_Tp>::_Co

template <class _Tp>
_LIBCPP_HIDE_FROM_ABI complex<_Tp> polar(const _Tp& __rho, const _Tp& __theta = _Tp()) {
if (std::__constexpr_isnan(__rho) || std::signbit(__rho))
if (std::isnan(__rho) || std::signbit(__rho))
return complex<_Tp>(_Tp(NAN), _Tp(NAN));
if (std::__constexpr_isnan(__theta)) {
if (std::isnan(__theta)) {
if (std::__constexpr_isinf(__rho))
return complex<_Tp>(__rho, __theta);
return complex<_Tp>(__theta, __theta);
Expand All @@ -1032,10 +1032,10 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> polar(const _Tp& __rho, const _Tp& __theta =
return complex<_Tp>(_Tp(NAN), _Tp(NAN));
}
_Tp __x = __rho * std::cos(__theta);
if (std::__constexpr_isnan(__x))
if (std::isnan(__x))
__x = 0;
_Tp __y = __rho * std::sin(__theta);
if (std::__constexpr_isnan(__y))
if (std::isnan(__y))
__y = 0;
return complex<_Tp>(__x, __y);
}
Expand All @@ -1062,10 +1062,8 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> sqrt(const complex<_Tp>& __x) {
return complex<_Tp>(_Tp(INFINITY), __x.imag());
if (std::__constexpr_isinf(__x.real())) {
if (__x.real() > _Tp(0))
return complex<_Tp>(
__x.real(), std::__constexpr_isnan(__x.imag()) ? __x.imag() : std::copysign(_Tp(0), __x.imag()));
return complex<_Tp>(
std::__constexpr_isnan(__x.imag()) ? __x.imag() : _Tp(0), std::copysign(__x.real(), __x.imag()));
return complex<_Tp>(__x.real(), std::isnan(__x.imag()) ? __x.imag() : std::copysign(_Tp(0), __x.imag()));
return complex<_Tp>(std::isnan(__x.imag()) ? __x.imag() : _Tp(0), std::copysign(__x.real(), __x.imag()));
}
return std::polar(std::sqrt(std::abs(__x)), std::arg(__x) / _Tp(2));
}
Expand All @@ -1080,9 +1078,9 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> exp(const complex<_Tp>& __x) {
}
if (std::__constexpr_isinf(__x.real())) {
if (__x.real() < _Tp(0)) {
if (!std::__constexpr_isfinite(__i))
if (!std::isfinite(__i))
__i = _Tp(1);
} else if (__i == 0 || !std::__constexpr_isfinite(__i)) {
} else if (__i == 0 || !std::isfinite(__i)) {
if (std::__constexpr_isinf(__i))
__i = _Tp(NAN);
return complex<_Tp>(__x.real(), __i);
Expand Down Expand Up @@ -1131,13 +1129,13 @@ template <class _Tp>
_LIBCPP_HIDE_FROM_ABI complex<_Tp> asinh(const complex<_Tp>& __x) {
const _Tp __pi(atan2(+0., -0.));
if (std::__constexpr_isinf(__x.real())) {
if (std::__constexpr_isnan(__x.imag()))
if (std::isnan(__x.imag()))
return __x;
if (std::__constexpr_isinf(__x.imag()))
return complex<_Tp>(__x.real(), std::copysign(__pi * _Tp(0.25), __x.imag()));
return complex<_Tp>(__x.real(), std::copysign(_Tp(0), __x.imag()));
}
if (std::__constexpr_isnan(__x.real())) {
if (std::isnan(__x.real())) {
if (std::__constexpr_isinf(__x.imag()))
return complex<_Tp>(__x.imag(), __x.real());
if (__x.imag() == 0)
Expand All @@ -1156,7 +1154,7 @@ template <class _Tp>
_LIBCPP_HIDE_FROM_ABI complex<_Tp> acosh(const complex<_Tp>& __x) {
const _Tp __pi(atan2(+0., -0.));
if (std::__constexpr_isinf(__x.real())) {
if (std::__constexpr_isnan(__x.imag()))
if (std::isnan(__x.imag()))
return complex<_Tp>(std::abs(__x.real()), __x.imag());
if (std::__constexpr_isinf(__x.imag())) {
if (__x.real() > 0)
Expand All @@ -1168,7 +1166,7 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> acosh(const complex<_Tp>& __x) {
return complex<_Tp>(-__x.real(), std::copysign(__pi, __x.imag()));
return complex<_Tp>(__x.real(), std::copysign(_Tp(0), __x.imag()));
}
if (std::__constexpr_isnan(__x.real())) {
if (std::isnan(__x.real())) {
if (std::__constexpr_isinf(__x.imag()))
return complex<_Tp>(std::abs(__x.imag()), __x.real());
return complex<_Tp>(__x.real(), __x.real());
Expand All @@ -1187,12 +1185,12 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> atanh(const complex<_Tp>& __x) {
if (std::__constexpr_isinf(__x.imag())) {
return complex<_Tp>(std::copysign(_Tp(0), __x.real()), std::copysign(__pi / _Tp(2), __x.imag()));
}
if (std::__constexpr_isnan(__x.imag())) {
if (std::isnan(__x.imag())) {
if (std::__constexpr_isinf(__x.real()) || __x.real() == 0)
return complex<_Tp>(std::copysign(_Tp(0), __x.real()), __x.imag());
return complex<_Tp>(__x.imag(), __x.imag());
}
if (std::__constexpr_isnan(__x.real())) {
if (std::isnan(__x.real())) {
return complex<_Tp>(__x.real(), __x.real());
}
if (std::__constexpr_isinf(__x.real())) {
Expand All @@ -1209,11 +1207,11 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> atanh(const complex<_Tp>& __x) {

template <class _Tp>
_LIBCPP_HIDE_FROM_ABI complex<_Tp> sinh(const complex<_Tp>& __x) {
if (std::__constexpr_isinf(__x.real()) && !std::__constexpr_isfinite(__x.imag()))
if (std::__constexpr_isinf(__x.real()) && !std::isfinite(__x.imag()))
return complex<_Tp>(__x.real(), _Tp(NAN));
if (__x.real() == 0 && !std::__constexpr_isfinite(__x.imag()))
if (__x.real() == 0 && !std::isfinite(__x.imag()))
return complex<_Tp>(__x.real(), _Tp(NAN));
if (__x.imag() == 0 && !std::__constexpr_isfinite(__x.real()))
if (__x.imag() == 0 && !std::isfinite(__x.real()))
return __x;
return complex<_Tp>(std::sinh(__x.real()) * std::cos(__x.imag()), std::cosh(__x.real()) * std::sin(__x.imag()));
}
Expand All @@ -1222,13 +1220,13 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> sinh(const complex<_Tp>& __x) {

template <class _Tp>
_LIBCPP_HIDE_FROM_ABI complex<_Tp> cosh(const complex<_Tp>& __x) {
if (std::__constexpr_isinf(__x.real()) && !std::__constexpr_isfinite(__x.imag()))
if (std::__constexpr_isinf(__x.real()) && !std::isfinite(__x.imag()))
return complex<_Tp>(std::abs(__x.real()), _Tp(NAN));
if (__x.real() == 0 && !std::__constexpr_isfinite(__x.imag()))
if (__x.real() == 0 && !std::isfinite(__x.imag()))
return complex<_Tp>(_Tp(NAN), __x.real());
if (__x.real() == 0 && __x.imag() == 0)
return complex<_Tp>(_Tp(1), __x.imag());
if (__x.imag() == 0 && !std::__constexpr_isfinite(__x.real()))
if (__x.imag() == 0 && !std::isfinite(__x.real()))
return complex<_Tp>(std::abs(__x.real()), __x.imag());
return complex<_Tp>(std::cosh(__x.real()) * std::cos(__x.imag()), std::sinh(__x.real()) * std::sin(__x.imag()));
}
Expand All @@ -1238,11 +1236,11 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> cosh(const complex<_Tp>& __x) {
template <class _Tp>
_LIBCPP_HIDE_FROM_ABI complex<_Tp> tanh(const complex<_Tp>& __x) {
if (std::__constexpr_isinf(__x.real())) {
if (!std::__constexpr_isfinite(__x.imag()))
if (!std::isfinite(__x.imag()))
return complex<_Tp>(std::copysign(_Tp(1), __x.real()), _Tp(0));
return complex<_Tp>(std::copysign(_Tp(1), __x.real()), std::copysign(_Tp(0), std::sin(_Tp(2) * __x.imag())));
}
if (std::__constexpr_isnan(__x.real()) && __x.imag() == 0)
if (std::isnan(__x.real()) && __x.imag() == 0)
return __x;
_Tp __2r(_Tp(2) * __x.real());
_Tp __2i(_Tp(2) * __x.imag());
Expand All @@ -1267,7 +1265,7 @@ template <class _Tp>
_LIBCPP_HIDE_FROM_ABI complex<_Tp> acos(const complex<_Tp>& __x) {
const _Tp __pi(atan2(+0., -0.));
if (std::__constexpr_isinf(__x.real())) {
if (std::__constexpr_isnan(__x.imag()))
if (std::isnan(__x.imag()))
return complex<_Tp>(__x.imag(), __x.real());
if (std::__constexpr_isinf(__x.imag())) {
if (__x.real() < _Tp(0))
Expand All @@ -1278,7 +1276,7 @@ _LIBCPP_HIDE_FROM_ABI complex<_Tp> acos(const complex<_Tp>& __x) {
return complex<_Tp>(__pi, std::signbit(__x.imag()) ? -__x.real() : __x.real());
return complex<_Tp>(_Tp(0), std::signbit(__x.imag()) ? __x.real() : -__x.real());
}
if (std::__constexpr_isnan(__x.real())) {
if (std::isnan(__x.real())) {
if (std::__constexpr_isinf(__x.imag()))
return complex<_Tp>(__x.real(), -__x.imag());
return complex<_Tp>(__x.real(), __x.real());
Expand Down
8 changes: 0 additions & 8 deletions libcxx/include/string
Original file line number Diff line number Diff line change
Expand Up @@ -3462,14 +3462,6 @@ inline _LIBCPP_CONSTEXPR_SINCE_CXX20 void basic_string<_CharT, _Traits, _Allocat

// find

template <class _Traits>
struct _LIBCPP_HIDDEN __traits_eq {
typedef typename _Traits::char_type char_type;
_LIBCPP_HIDE_FROM_ABI bool operator()(const char_type& __x, const char_type& __y) _NOEXCEPT {
return _Traits::eq(__x, __y);
}
};

template <class _CharT, class _Traits, class _Allocator>
_LIBCPP_CONSTEXPR_SINCE_CXX20 typename basic_string<_CharT, _Traits, _Allocator>::size_type
basic_string<_CharT, _Traits, _Allocator>::find(const value_type* __s, size_type __pos, size_type __n) const _NOEXCEPT {
Expand Down
2 changes: 0 additions & 2 deletions libcxx/test/libcxx/numerics/c.math/constexpr-fns.pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@

#include "test_macros.h"

static_assert(std::__constexpr_isnan(0.) == false, "");
static_assert(std::__constexpr_isinf(0.0) == false, "");
static_assert(std::__constexpr_isfinite(0.0) == true, "");

int main(int, char**)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@


class TestDAP_redirection_to_console(lldbdap_testcase.DAPTestCaseBase):
@skipIfWindows
def test(self):
"""
Without proper stderr and stdout redirection, the following code would throw an
Expand Down
2 changes: 0 additions & 2 deletions lldb/test/API/tools/lldb-dap/coreFile/TestDAP_coreFile.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@


class TestDAP_coreFile(lldbdap_testcase.DAPTestCaseBase):
@skipIfWindows
@skipIfLLVMTargetMissing("X86")
def test_core_file(self):
current_dir = os.path.dirname(__file__)
Expand Down Expand Up @@ -58,7 +57,6 @@ def test_core_file(self):
self.dap_server.request_next(threadId=32259)
self.assertEqual(self.get_stackFrames(), expected_frames)

@skipIfWindows
@skipIfLLVMTargetMissing("X86")
def test_core_file_source_mapping(self):
"""Test that sourceMap property is correctly applied when loading a core"""
Expand Down
30 changes: 10 additions & 20 deletions lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
import lldbdap_testcase
import time
import os
import re


class TestDAP_launch(lldbdap_testcase.DAPTestCaseBase):
@skipIfWindows
def test_default(self):
"""
Tests the default launch of a simple program. No arguments,
Expand All @@ -27,7 +27,6 @@ def test_default(self):
lines = output.splitlines()
self.assertIn(program, lines[0], "make sure program path is in first argument")

@skipIfWindows
def test_termination(self):
"""
Tests the correct termination of lldb-dap upon a 'disconnect'
Expand All @@ -47,7 +46,6 @@ def test_termination(self):
# Check the return code
self.assertEqual(self.dap_server.process.poll(), 0)

@skipIfWindows
def test_stopOnEntry(self):
"""
Tests the default launch of a simple program that stops at the
Expand All @@ -66,7 +64,6 @@ def test_stopOnEntry(self):
reason, "breakpoint", 'verify stop isn\'t "main" breakpoint'
)

@skipIfWindows
def test_cwd(self):
"""
Tests the default launch of a simple program with a current working
Expand All @@ -92,15 +89,17 @@ def test_cwd(self):
)
self.assertTrue(found, "verified program working directory")

@skipIfWindows
def test_debuggerRoot(self):
"""
Tests the "debuggerRoot" will change the working directory of
the lldb-dap debug adaptor.
"""
program = self.getBuildArtifact("a.out")
program_parent_dir = os.path.realpath(os.path.dirname(os.path.dirname(program)))
commands = ["platform shell echo cwd = $PWD"]

var = "%cd%" if lldbplatformutil.getHostPlatform() == "windows" else "$PWD"
commands = [f"platform shell echo cwd = {var}"]

self.build_and_launch(
program, debuggerRoot=program_parent_dir, initCommands=commands
)
Expand All @@ -114,14 +113,13 @@ def test_debuggerRoot(self):
found = True
self.assertEqual(
program_parent_dir,
line[len(prefix) :],
line.strip()[len(prefix) :],
"lldb-dap working dir '%s' == '%s'"
% (program_parent_dir, line[6:]),
% (program_parent_dir, line[len(prefix) :]),
)
self.assertTrue(found, "verified lldb-dap working directory")
self.continue_to_exit()

@skipIfWindows
def test_sourcePath(self):
"""
Tests the "sourcePath" will set the target.source-map.
Expand All @@ -146,7 +144,6 @@ def test_sourcePath(self):
self.assertTrue(found, 'found "sourcePath" in console output')
self.continue_to_exit()

@skipIfWindows
def test_disableSTDIO(self):
"""
Tests the default launch of a simple program with STDIO disabled.
Expand Down Expand Up @@ -182,7 +179,6 @@ def test_shellExpandArguments_enabled(self):
quote_path, line, 'verify "%s" expanded to "%s"' % (glob, program)
)

@skipIfWindows
def test_shellExpandArguments_disabled(self):
"""
Tests the default launch of a simple program with shell expansion
Expand All @@ -204,7 +200,6 @@ def test_shellExpandArguments_disabled(self):
quote_path, line, 'verify "%s" stayed to "%s"' % (glob, glob)
)

@skipIfWindows
def test_args(self):
"""
Tests launch of a simple program with arguments
Expand All @@ -229,7 +224,6 @@ def test_args(self):
'arg[%i] "%s" not in "%s"' % (i + 1, quoted_arg, lines[i]),
)

@skipIfWindows
def test_environment(self):
"""
Tests launch of a simple program with environment variables
Expand Down Expand Up @@ -258,7 +252,6 @@ def test_environment(self):
found, '"%s" must exist in program environment (%s)' % (var, lines)
)

@skipIfWindows
@skipIf(
archs=["arm", "aarch64"]
) # failed run https://lab.llvm.org/buildbot/#/builders/96/builds/6933
Expand Down Expand Up @@ -344,7 +337,6 @@ def test_commands(self):
self.verify_commands("exitCommands", output, exitCommands)
self.verify_commands("terminateCommands", output, terminateCommands)

@skipIfWindows
def test_extra_launch_commands(self):
"""
Tests the "launchCommands" with extra launching settings
Expand Down Expand Up @@ -409,7 +401,6 @@ def test_extra_launch_commands(self):
output = self.get_console(timeout=lldbdap_testcase.DAPTestCaseBase.timeoutval)
self.verify_commands("exitCommands", output, exitCommands)

@skipIfWindows
def test_failing_launch_commands(self):
"""
Tests "launchCommands" failures prevents a launch.
Expand All @@ -418,7 +409,8 @@ def test_failing_launch_commands(self):
program = self.getBuildArtifact("a.out")

# Run an invalid launch command, in this case a bad path.
launchCommands = ['!target create "/bad/path%s"' % (program)]
bad_path = os.path.join("bad", "path")
launchCommands = ['!target create "%s%s"' % (bad_path, program)]

initCommands = ["target list", "platform list"]
preRunCommands = ["image list a.out", "image dump sections a.out"]
Expand Down Expand Up @@ -447,9 +439,8 @@ def test_failing_launch_commands(self):
# Verify all "launchCommands" were founc in console output
# The launch should fail due to the invalid command.
self.verify_commands("launchCommands", output, launchCommands)
self.assertRegex(output, r"bad/path/.*does not exist")
self.assertRegex(output, re.escape(bad_path) + r".*does not exist")

@skipIfWindows
@skipIfNetBSD # Hangs on NetBSD as well
@skipIf(archs=["arm", "aarch64"], oslist=["linux"])
def test_terminate_commands(self):
Expand All @@ -476,7 +467,6 @@ def test_terminate_commands(self):
)
self.verify_commands("terminateCommands", output, terminateCommands)

@skipIfWindows
def test_version(self):
"""
Tests that "initialize" response contains the "version" string the same
Expand Down
4 changes: 4 additions & 0 deletions lldb/test/API/tools/lldb-dap/launch/main.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#include <stdio.h>
#include <stdlib.h>
#ifdef _WIN32
#include <direct.h>
#else
#include <unistd.h>
#endif

int main(int argc, char const *argv[], char const *envp[]) {
for (int i = 0; i < argc; ++i)
Expand Down
1 change: 1 addition & 0 deletions llvm/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ set(LLVM_LINK_COMPONENTS
add_benchmark(DummyYAML DummyYAML.cpp PARTIAL_SOURCES_INTENDED)
add_benchmark(xxhash xxhash.cpp PARTIAL_SOURCES_INTENDED)
add_benchmark(GetIntrinsicForClangBuiltin GetIntrinsicForClangBuiltin.cpp PARTIAL_SOURCES_INTENDED)
add_benchmark(FormatVariadicBM FormatVariadicBM.cpp PARTIAL_SOURCES_INTENDED)
63 changes: 63 additions & 0 deletions llvm/benchmarks/FormatVariadicBM.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
//===- FormatVariadicBM.cpp - formatv() benchmark ---------- --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "benchmark/benchmark.h"
#include "llvm/Support/FormatVariadic.h"
#include <algorithm>
#include <string>
#include <vector>

using namespace llvm;
using namespace std;

// Generate a list of format strings that have `NumReplacements` replacements
// by permuting the replacements and some literal text.
static vector<string> getFormatStrings(int NumReplacements) {
vector<string> Components;
for (int I = 0; I < NumReplacements; I++)
Components.push_back("{" + to_string(I) + "}");
// Intersperse these with some other literal text (_).
const string_view Literal = "____";
for (char C : Literal)
Components.push_back(string(1, C));

vector<string> Formats;
do {
string Concat;
for (const string &C : Components)
Concat += C;
Formats.emplace_back(Concat);
} while (next_permutation(Components.begin(), Components.end()));
return Formats;
}

// Generate the set of formats to exercise outside the benchmark code.
static const vector<vector<string>> Formats = {
getFormatStrings(1), getFormatStrings(2), getFormatStrings(3),
getFormatStrings(4), getFormatStrings(5),
};

// Benchmark formatv() for a variety of format strings and 1-5 replacements.
static void BM_FormatVariadic(benchmark::State &state) {
for (auto _ : state) {
for (const string &Fmt : Formats[0])
formatv(Fmt.c_str(), 1).str();
for (const string &Fmt : Formats[1])
formatv(Fmt.c_str(), 1, 2).str();
for (const string &Fmt : Formats[2])
formatv(Fmt.c_str(), 1, 2, 3).str();
for (const string &Fmt : Formats[3])
formatv(Fmt.c_str(), 1, 2, 3, 4).str();
for (const string &Fmt : Formats[4])
formatv(Fmt.c_str(), 1, 2, 3, 4, 5).str();
}
}

BENCHMARK(BM_FormatVariadic);

BENCHMARK_MAIN();
2 changes: 1 addition & 1 deletion llvm/docs/CodeOfConduct.rst
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ Unported License`_.

.. _Django Project: https://www.djangoproject.com/conduct/
.. _Speak Up! project: http://speakup.io/coc.html
.. _sexual and gender-based violence:
.. _sexual and gender-based violence: https://hr.un.org/sites/hr.un.org/files/SEA%20Glossary%20%20%5BSecond%20Edition%20-%202017%5D%20-%20English_0.pdf
.. _hate crimes: https://hatecrime.osce.org
.. _hate speech: https://www.un.org/en/genocideprevention/documents/UN%20Strategy%20and%20Plan%20of%20Action%20on%20Hate%20Speech%2018%20June%20SYNOPSIS.pdf
.. _Creative Commons Attribution 3.0 Unported License: http://creativecommons.org/licenses/by/3.0/
39 changes: 22 additions & 17 deletions llvm/include/llvm/Support/FormatVariadic.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,23 +67,20 @@ class formatv_object_base {
protected:
StringRef Fmt;
ArrayRef<support::detail::format_adapter *> Adapters;

static bool consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
size_t &Align, char &Pad);

static std::pair<ReplacementItem, StringRef>
splitLiteralAndReplacement(StringRef Fmt);
bool Validate;

formatv_object_base(StringRef Fmt,
ArrayRef<support::detail::format_adapter *> Adapters)
: Fmt(Fmt), Adapters(Adapters) {}
ArrayRef<support::detail::format_adapter *> Adapters,
bool Validate)
: Fmt(Fmt), Adapters(Adapters), Validate(Validate) {}

formatv_object_base(formatv_object_base const &rhs) = delete;
formatv_object_base(formatv_object_base &&rhs) = default;

public:
void format(raw_ostream &S) const {
for (auto &R : parseFormatString(Fmt)) {
const auto Replacements = parseFormatString(Fmt, Adapters.size(), Validate);
for (const auto &R : Replacements) {
if (R.Type == ReplacementType::Empty)
continue;
if (R.Type == ReplacementType::Literal) {
Expand All @@ -101,9 +98,10 @@ class formatv_object_base {
Align.format(S, R.Options);
}
}
static SmallVector<ReplacementItem, 2> parseFormatString(StringRef Fmt);

static std::optional<ReplacementItem> parseReplacementItem(StringRef Spec);
// Parse and optionally validate format string (in debug builds).
static SmallVector<ReplacementItem, 2>
parseFormatString(StringRef Fmt, size_t NumArgs, bool Validate);

std::string str() const {
std::string Result;
Expand Down Expand Up @@ -149,8 +147,8 @@ template <typename Tuple> class formatv_object : public formatv_object_base {
};

public:
formatv_object(StringRef Fmt, Tuple &&Params)
: formatv_object_base(Fmt, ParameterPointers),
formatv_object(StringRef Fmt, Tuple &&Params, bool Validate)
: formatv_object_base(Fmt, ParameterPointers, Validate),
Parameters(std::move(Params)) {
ParameterPointers = std::apply(create_adapters(), Parameters);
}
Expand Down Expand Up @@ -247,15 +245,22 @@ template <typename Tuple> class formatv_object : public formatv_object_base {
// assertion. Otherwise, it will try to do something reasonable, but in general
// the details of what that is are undefined.
//

// formatv() with validation enable/disable controlled by the first argument.
template <typename... Ts>
inline auto formatv(const char *Fmt, Ts &&...Vals)
inline auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
-> formatv_object<decltype(std::make_tuple(
support::detail::build_format_adapter(std::forward<Ts>(Vals))...))> {
using ParamTuple = decltype(std::make_tuple(
support::detail::build_format_adapter(std::forward<Ts>(Vals))...));
return formatv_object<ParamTuple>(
Fmt, std::make_tuple(support::detail::build_format_adapter(
std::forward<Ts>(Vals))...));
auto Params = std::make_tuple(
support::detail::build_format_adapter(std::forward<Ts>(Vals))...);
return formatv_object<ParamTuple>(Fmt, std::move(Params), Validate);
}

// formatv() with validation enabled.
template <typename... Ts> inline auto formatv(const char *Fmt, Ts &&...Vals) {
return formatv<Ts...>(true, Fmt, std::forward<Ts>(Vals)...);
}

} // end namespace llvm
Expand Down
6 changes: 2 additions & 4 deletions llvm/lib/Analysis/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,6 @@ add_llvm_component_library(LLVMAnalysis
TargetParser
)

include(CheckCXXSymbolExists)
check_cxx_symbol_exists(logf128 math.h HAS_LOGF128)
if(HAS_LOGF128)
target_compile_definitions(LLVMAnalysis PRIVATE HAS_LOGF128)
if(LLVM_HAS_LOGF128)
target_compile_definitions(LLVMAnalysis PRIVATE HAS_LOGF128)
endif()
15 changes: 7 additions & 8 deletions llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2061,10 +2061,8 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
unsigned LastAsmLine =
Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine();

bool PrevInstInSameSection =
(!PrevInstBB ||
PrevInstBB->getSectionID() == MI->getParent()->getSectionID());
if (DL == PrevInstLoc && PrevInstInSameSection) {
bool PrevInstInDiffBB = PrevInstBB && PrevInstBB != MI->getParent();
if (DL == PrevInstLoc && !PrevInstInDiffBB) {
// If we have an ongoing unspecified location, nothing to do here.
if (!DL)
return;
Expand Down Expand Up @@ -2093,8 +2091,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
// possibly debug information; we want it to have a source location.
// - Instruction is at the top of a block; we don't want to inherit the
// location from the physically previous (maybe unrelated) block.
if (UnknownLocations == Enable || PrevLabel ||
(PrevInstBB && PrevInstBB != MI->getParent())) {
if (UnknownLocations == Enable || PrevLabel || PrevInstInDiffBB) {
// Preserve the file and column numbers, if we can, to save space in
// the encoded line table.
// Do not update PrevInstLoc, it remembers the last non-0 line.
Expand All @@ -2119,9 +2116,11 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
PrologEndLoc = DebugLoc();
}
// If the line changed, we call that a new statement; unless we went to
// line 0 and came back, in which case it is not a new statement.
// line 0 and came back, in which case it is not a new statement. We also
// mark is_stmt for the first non-0 line in each BB, in case a predecessor BB
// ends with a different line.
unsigned OldLine = PrevInstLoc ? PrevInstLoc.getLine() : LastAsmLine;
if (DL.getLine() && DL.getLine() != OldLine)
if (DL.getLine() && (DL.getLine() != OldLine || PrevInstInDiffBB))
Flags |= DWARF2_FLAG_IS_STMT;

const MDNode *Scope = DL.getScope();
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7031,7 +7031,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// If ISD::IS_FPCLASS should be expanded, do it right now, because the
// expansion can use illegal types. Making expansion early allows
// legalizing these types prior to selection.
if (!TLI.isOperationLegalOrCustom(ISD::IS_FPCLASS, ArgVT)) {
if (!TLI.isOperationLegal(ISD::IS_FPCLASS, ArgVT) &&
!TLI.isOperationCustom(ISD::IS_FPCLASS, ArgVT)) {
SDValue Result = TLI.expandIS_FPCLASS(DestVT, Op, Test, Flags, sdl, DAG);
setValue(&I, Result);
return;
Expand Down
12 changes: 10 additions & 2 deletions llvm/lib/IR/BasicBlock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -975,8 +975,16 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
if (ReadFromTail && Src->getMarker(Last)) {
DbgMarker *FromLast = Src->getMarker(Last);
if (LastIsEnd) {
Dest->adoptDbgRecords(Src, Last, true);
// adoptDbgRecords will release any trailers.
if (Dest == end()) {
// Abosrb the trailing markers from Src.
assert(FromLast == Src->getTrailingDbgRecords());
createMarker(Dest)->absorbDebugValues(*FromLast, true);
FromLast->eraseFromParent();
Src->deleteTrailingDbgRecords();
} else {
// adoptDbgRecords will release any trailers.
Dest->adoptDbgRecords(Src, Last, true);
}
assert(!Src->getTrailingDbgRecords());
} else {
// FIXME: can we use adoptDbgRecords here to reduce allocations?
Expand Down
85 changes: 72 additions & 13 deletions llvm/lib/Support/FormatVariadic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ static std::optional<AlignStyle> translateLocChar(char C) {
LLVM_BUILTIN_UNREACHABLE;
}

bool formatv_object_base::consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
size_t &Align, char &Pad) {
static bool consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
size_t &Align, char &Pad) {
Where = AlignStyle::Right;
Align = 0;
Pad = ' ';
Expand All @@ -35,8 +35,7 @@ bool formatv_object_base::consumeFieldLayout(StringRef &Spec, AlignStyle &Where,

if (Spec.size() > 1) {
// A maximum of 2 characters at the beginning can be used for something
// other
// than the width.
// other than the width.
// If Spec[1] is a loc char, then Spec[0] is a pad char and Spec[2:...]
// contains the width.
// Otherwise, if Spec[0] is a loc char, then Spec[1:...] contains the width.
Expand All @@ -55,8 +54,7 @@ bool formatv_object_base::consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
return !Failed;
}

std::optional<ReplacementItem>
formatv_object_base::parseReplacementItem(StringRef Spec) {
static std::optional<ReplacementItem> parseReplacementItem(StringRef Spec) {
StringRef RepString = Spec.trim("{}");

// If the replacement sequence does not start with a non-negative integer,
Expand All @@ -82,15 +80,14 @@ formatv_object_base::parseReplacementItem(StringRef Spec) {
RepString = StringRef();
}
RepString = RepString.trim();
if (!RepString.empty()) {
assert(false && "Unexpected characters found in replacement string!");
}
assert(RepString.empty() &&
"Unexpected characters found in replacement string!");

return ReplacementItem{Spec, Index, Align, Where, Pad, Options};
}

std::pair<ReplacementItem, StringRef>
formatv_object_base::splitLiteralAndReplacement(StringRef Fmt) {
static std::pair<ReplacementItem, StringRef>
splitLiteralAndReplacement(StringRef Fmt) {
while (!Fmt.empty()) {
// Everything up until the first brace is a literal.
if (Fmt.front() != '{') {
Expand Down Expand Up @@ -143,15 +140,77 @@ formatv_object_base::splitLiteralAndReplacement(StringRef Fmt) {
return std::make_pair(ReplacementItem{Fmt}, StringRef());
}

#ifndef NDEBUG
#define ENABLE_VALIDATION 1
#else
#define ENABLE_VALIDATION 0 // Conveniently enable validation in release mode.
#endif

SmallVector<ReplacementItem, 2>
formatv_object_base::parseFormatString(StringRef Fmt) {
formatv_object_base::parseFormatString(StringRef Fmt, size_t NumArgs,
bool Validate) {
SmallVector<ReplacementItem, 2> Replacements;
ReplacementItem I;

#if ENABLE_VALIDATION
const StringRef SavedFmtStr = Fmt;
size_t NumExpectedArgs = 0;
#endif

while (!Fmt.empty()) {
ReplacementItem I;
std::tie(I, Fmt) = splitLiteralAndReplacement(Fmt);
if (I.Type != ReplacementType::Empty)
Replacements.push_back(I);
#if ENABLE_VALIDATION
if (I.Type == ReplacementType::Format)
NumExpectedArgs = std::max(NumExpectedArgs, I.Index + 1);
#endif
}

#if ENABLE_VALIDATION
if (!Validate)
return Replacements;

// Perform additional validation. Verify that the number of arguments matches
// the number of replacement indices and that there are no holes in the
// replacement indices.

// When validation fails, return an array of replacement items that
// will print an error message as the outout of this formatv() (used when
// validation is enabled in release mode).
auto getErrorReplacements = [SavedFmtStr](StringLiteral ErrorMsg) {
return SmallVector<ReplacementItem, 2>{
ReplacementItem("Invalid formatv() call: "), ReplacementItem(ErrorMsg),
ReplacementItem(" for format string: "), ReplacementItem(SavedFmtStr)};
};

if (NumExpectedArgs != NumArgs) {
errs() << formatv(
"Expected {0} Args, but got {1} for format string '{2}'\n",
NumExpectedArgs, NumArgs, SavedFmtStr);
assert(0 && "Invalid formatv() call");
return getErrorReplacements("Unexpected number of arguments");
}

// Find the number of unique indices seen. All replacement indices
// are < NumExpectedArgs.
SmallVector<bool> Indices(NumExpectedArgs);
size_t Count = 0;
for (const ReplacementItem &I : Replacements) {
if (I.Type != ReplacementType::Format || Indices[I.Index])
continue;
Indices[I.Index] = true;
++Count;
}

if (Count != NumExpectedArgs) {
errs() << formatv(
"Replacement field indices cannot have holes for format string '{0}'\n",
SavedFmtStr);
assert(0 && "Invalid format string");
return getErrorReplacements("Replacement indices have holes");
}
#endif // ENABLE_VALIDATION
return Replacements;
}

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Support/Z3Solver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ using namespace llvm;
#include "llvm/ADT/Twine.h"

#include <set>
#include <unordered_map>

#include <z3.h>

Expand Down
63 changes: 62 additions & 1 deletion llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1073,6 +1073,33 @@ static bool isAllActivePredicate(Value *Pred) {
m_ConstantInt<AArch64SVEPredPattern::all>()));
}

// Simplify unary operation where predicate has all inactive lanes by replacing
// instruction with its operand
static std::optional<Instruction *>
instCombineSVENoActiveReplace(InstCombiner &IC, IntrinsicInst &II,
bool hasInactiveVector) {
int PredOperand = hasInactiveVector ? 1 : 0;
int ReplaceOperand = hasInactiveVector ? 0 : 1;
if (match(II.getOperand(PredOperand), m_ZeroInt())) {
IC.replaceInstUsesWith(II, II.getOperand(ReplaceOperand));
return IC.eraseInstFromFunction(II);
}
return std::nullopt;
}

// Simplify unary operation where predicate has all inactive lanes or
// replace unused first operand with undef when all lanes are active
static std::optional<Instruction *>
instCombineSVEAllOrNoActiveUnary(InstCombiner &IC, IntrinsicInst &II) {
if (isAllActivePredicate(II.getOperand(1)) &&
!isa<llvm::UndefValue>(II.getOperand(0)) &&
!isa<llvm::PoisonValue>(II.getOperand(0))) {
Value *Undef = llvm::UndefValue::get(II.getType());
return IC.replaceOperand(II, 0, Undef);
}
return instCombineSVENoActiveReplace(IC, II, true);
}

// Erase unary operation where predicate has all inactive lanes
static std::optional<Instruction *>
instCombineSVENoActiveUnaryErase(InstCombiner &IC, IntrinsicInst &II,
Expand Down Expand Up @@ -2109,7 +2136,41 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
switch (IID) {
default:
break;

case Intrinsic::aarch64_sve_fcvt_bf16f32:
case Intrinsic::aarch64_sve_fcvt_f16f32:
case Intrinsic::aarch64_sve_fcvt_f16f64:
case Intrinsic::aarch64_sve_fcvt_f32f16:
case Intrinsic::aarch64_sve_fcvt_f32f64:
case Intrinsic::aarch64_sve_fcvt_f64f16:
case Intrinsic::aarch64_sve_fcvt_f64f32:
case Intrinsic::aarch64_sve_fcvtlt_f32f16:
case Intrinsic::aarch64_sve_fcvtlt_f64f32:
case Intrinsic::aarch64_sve_fcvtnt_bf16f32:
case Intrinsic::aarch64_sve_fcvtnt_f16f32:
case Intrinsic::aarch64_sve_fcvtnt_f32f64:
case Intrinsic::aarch64_sve_fcvtx_f32f64:
case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
case Intrinsic::aarch64_sve_fcvtzs:
case Intrinsic::aarch64_sve_fcvtzs_i32f16:
case Intrinsic::aarch64_sve_fcvtzs_i32f64:
case Intrinsic::aarch64_sve_fcvtzs_i64f16:
case Intrinsic::aarch64_sve_fcvtzs_i64f32:
case Intrinsic::aarch64_sve_fcvtzu:
case Intrinsic::aarch64_sve_fcvtzu_i32f16:
case Intrinsic::aarch64_sve_fcvtzu_i32f64:
case Intrinsic::aarch64_sve_fcvtzu_i64f16:
case Intrinsic::aarch64_sve_fcvtzu_i64f32:
case Intrinsic::aarch64_sve_scvtf:
case Intrinsic::aarch64_sve_scvtf_f16i32:
case Intrinsic::aarch64_sve_scvtf_f16i64:
case Intrinsic::aarch64_sve_scvtf_f32i64:
case Intrinsic::aarch64_sve_scvtf_f64i32:
case Intrinsic::aarch64_sve_ucvtf:
case Intrinsic::aarch64_sve_ucvtf_f16i32:
case Intrinsic::aarch64_sve_ucvtf_f16i64:
case Intrinsic::aarch64_sve_ucvtf_f32i64:
case Intrinsic::aarch64_sve_ucvtf_f64i32:
return instCombineSVEAllOrNoActiveUnary(IC, II);
case Intrinsic::aarch64_sve_st1_scatter:
case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
case Intrinsic::aarch64_sve_st1_scatter_sxtw:
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ struct AMDGPULowerBufferFatPointersPass
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
extern char &AMDGPURewriteOutArgumentsID;

void initializeGCNDPPCombinePass(PassRegistry &);
extern char &GCNDPPCombineID;
void initializeGCNDPPCombineLegacyPass(PassRegistry &);
extern char &GCNDPPCombineLegacyID;

void initializeSIFoldOperandsLegacyPass(PassRegistry &);
extern char &SIFoldOperandsLegacyID;
Expand Down
17 changes: 11 additions & 6 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -426,12 +426,17 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
// FIXME: These IS_FPCLASS vector fp types are marked custom so it reaches
// scalarization code. Can be removed when IS_FPCLASS expand isn't called by
// default unless marked custom/legal.
setOperationAction(
ISD::IS_FPCLASS,
{MVT::v2f16, MVT::v3f16, MVT::v4f16, MVT::v16f16, MVT::v2f32, MVT::v3f32,
MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32,
MVT::v2f64, MVT::v3f64, MVT::v4f64, MVT::v8f64, MVT::v16f64},
Custom);
setOperationAction(ISD::IS_FPCLASS,
{MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,
MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32,
MVT::v2f64, MVT::v3f64, MVT::v4f64, MVT::v8f64,
MVT::v16f64},
Custom);

if (isTypeLegal(MVT::f16))
setOperationAction(ISD::IS_FPCLASS,
{MVT::v2f16, MVT::v3f16, MVT::v4f16, MVT::v16f16},
Custom);

// Expand to fneg + fadd.
setOperationAction(ISD::FSUB, MVT::f64, Expand);
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,5 @@ MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
#undef MACHINE_FUNCTION_PASS
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1219,6 +1219,7 @@ template <> struct DOTGraphTraits<SplitGraph> : public DefaultDOTGraphTraits {
case SplitGraph::EdgeKind::IndirectCall:
return "style=\"dashed\"";
}
llvm_unreachable("Unknown SplitGraph::EdgeKind enum");
}
};

Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPUUnifyDivergentExitNodes.h"
#include "GCNDPPCombine.h"
#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
#include "GCNVOPDUtils.h"
Expand Down Expand Up @@ -403,7 +404,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeR600VectorRegMergerPass(*PR);
initializeGlobalISel(*PR);
initializeAMDGPUDAGToDAGISelLegacyPass(*PR);
initializeGCNDPPCombinePass(*PR);
initializeGCNDPPCombineLegacyPass(*PR);
initializeSILowerI1CopiesLegacyPass(*PR);
initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
initializeSILowerWWMCopiesPass(*PR);
Expand Down Expand Up @@ -1273,7 +1274,7 @@ void GCNPassConfig::addMachineSSAOptimization() {
// XXX - Can we get away without running DeadMachineInstructionElim again?
addPass(&SIFoldOperandsLegacyID);
if (EnableDPPCombine)
addPass(&GCNDPPCombineID);
addPass(&GCNDPPCombineLegacyID);
addPass(&SILoadStoreOptimizerID);
if (isPassEnabled(EnableSDWAPeephole)) {
addPass(&SIPeepholeSDWAID);
Expand Down
55 changes: 41 additions & 14 deletions llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
// The mov_dpp instruction should reside in the same BB as all its uses
//===----------------------------------------------------------------------===//

#include "GCNDPPCombine.h"
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Expand All @@ -51,7 +52,7 @@ STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");

namespace {

class GCNDPPCombine : public MachineFunctionPass {
class GCNDPPCombine {
MachineRegisterInfo *MRI;
const SIInstrInfo *TII;
const GCNSubtarget *ST;
Expand All @@ -76,12 +77,18 @@ class GCNDPPCombine : public MachineFunctionPass {

bool combineDPPMov(MachineInstr &MI) const;

int getDPPOp(unsigned Op, bool IsShrinkable) const;
bool isShrinkable(MachineInstr &MI) const;

public:
bool run(MachineFunction &MF);
};

class GCNDPPCombineLegacy : public MachineFunctionPass {
public:
static char ID;

GCNDPPCombine() : MachineFunctionPass(ID) {
initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
}
GCNDPPCombineLegacy() : MachineFunctionPass(ID) {}

bool runOnMachineFunction(MachineFunction &MF) override;

Expand All @@ -96,22 +103,19 @@ class GCNDPPCombine : public MachineFunctionPass {
return MachineFunctionProperties()
.set(MachineFunctionProperties::Property::IsSSA);
}

private:
int getDPPOp(unsigned Op, bool IsShrinkable) const;
bool isShrinkable(MachineInstr &MI) const;
};

} // end anonymous namespace

INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
INITIALIZE_PASS(GCNDPPCombineLegacy, DEBUG_TYPE, "GCN DPP Combine", false,
false)

char GCNDPPCombine::ID = 0;
char GCNDPPCombineLegacy::ID = 0;

char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
char &llvm::GCNDPPCombineLegacyID = GCNDPPCombineLegacy::ID;

FunctionPass *llvm::createGCNDPPCombinePass() {
return new GCNDPPCombine();
return new GCNDPPCombineLegacy();
}

bool GCNDPPCombine::isShrinkable(MachineInstr &MI) const {
Expand Down Expand Up @@ -749,9 +753,16 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
return !Rollback;
}

bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
bool GCNDPPCombineLegacy::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;

return GCNDPPCombine().run(MF);
}

bool GCNDPPCombine::run(MachineFunction &MF) {
ST = &MF.getSubtarget<GCNSubtarget>();
if (!ST->hasDPP() || skipFunction(MF.getFunction()))
if (!ST->hasDPP())
return false;

MRI = &MF.getRegInfo();
Expand Down Expand Up @@ -781,3 +792,19 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
}
return Changed;
}

PreservedAnalyses GCNDPPCombinePass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &) {
if (MF.getFunction().hasOptNone())
return PreservedAnalyses::all();

MFPropsModifier _(*this, MF);

bool Changed = GCNDPPCombine().run(MF);
if (!Changed)
return PreservedAnalyses::all();

auto PA = getMachineFunctionPassPreservedAnalyses();
PA.preserveSet<CFGAnalyses>();
return PA;
}
28 changes: 28 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNDPPCombine.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
//=======--- GCNDPPCombine.h - optimization for DPP instructions ---==========//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H
#define LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H

#include "llvm/CodeGen/MachinePassManager.h"

namespace llvm {
class GCNDPPCombinePass : public PassInfoMixin<GCNDPPCombinePass> {
public:
PreservedAnalyses run(MachineFunction &MF,
MachineFunctionAnalysisManager &MAM);

MachineFunctionProperties getRequiredProperties() {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::IsSSA);
}
};

} // end namespace llvm

#endif // LLVM_LIB_TARGET_AMDGPU_GCNDPPCOMBINE_H
7 changes: 7 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1221,6 +1221,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
setOperationAction(ISD::IS_FPCLASS, MVT::f128, Custom);
setOperationAction(ISD::IS_FPCLASS, MVT::ppcf128, Custom);
}

// 128 bit shifts can be accomplished via 3 instructions for SHL and
Expand Down Expand Up @@ -11479,6 +11480,12 @@ SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
uint64_t RHSC = Op.getConstantOperandVal(1);
SDLoc Dl(Op);
FPClassTest Category = static_cast<FPClassTest>(RHSC);
if (LHS.getValueType() == MVT::ppcf128) {
// The higher part determines the value class.
LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
DAG.getConstant(1, Dl, MVT::i32));
}

return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
}

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/Sparc/SparcInstrAliases.td
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,10 @@ def : InstAlias<"flush", (FLUSH), 0>;
// unimp -> unimp 0
def : InstAlias<"unimp", (UNIMP 0), 0>;

// Not in spec, but we follow Solaris behavior of having `illtrap`
// interchangeable with `unimp` all the time.
def : MnemonicAlias<"illtrap", "unimp">;

def : MnemonicAlias<"iflush", "flush">;

def : MnemonicAlias<"stub", "stb">;
Expand Down
19 changes: 17 additions & 2 deletions llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2963,14 +2963,29 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {

case Intrinsic::x86_avx_vpermilvar_ps:
case Intrinsic::x86_avx_vpermilvar_ps_256:
case Intrinsic::x86_avx512_vpermilvar_ps_512:
case Intrinsic::x86_avx512_vpermilvar_ps_512: {
if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) {
return IC.replaceInstUsesWith(II, V);
}

KnownBits KnownMask(32);
if (IC.SimplifyDemandedBits(&II, 1, APInt(32, 0b00011), KnownMask))
return &II;
break;
}

case Intrinsic::x86_avx_vpermilvar_pd:
case Intrinsic::x86_avx_vpermilvar_pd_256:
case Intrinsic::x86_avx512_vpermilvar_pd_512:
case Intrinsic::x86_avx512_vpermilvar_pd_512: {
if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) {
return IC.replaceInstUsesWith(II, V);
}

KnownBits KnownMask(64);
if (IC.SimplifyDemandedBits(&II, 1, APInt(64, 0b00010), KnownMask))
return &II;
break;
}

case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6529,6 +6529,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
// Certain instructions can be cheaper to vectorize if they have a constant
// second vector operand. One example of this are shifts on x86.
Value *Op2 = I->getOperand(1);
if (!isa<Constant>(Op2) && PSE.getSE()->isSCEVable(Op2->getType()) &&
isa<SCEVConstant>(PSE.getSCEV(Op2))) {
Op2 = cast<SCEVConstant>(PSE.getSCEV(Op2))->getValue();
}
auto Op2Info = TTI.getOperandInfo(Op2);
if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue &&
Legal->isInvariant(Op2))
Expand Down
67 changes: 33 additions & 34 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4757,13 +4757,12 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
});
});
const unsigned AbsoluteDiff = std::abs(*Diff);
if (IsPossibleStrided &&
(IsAnyPointerUsedOutGraph ||
((Sz > MinProfitableStridedLoads ||
(AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
has_single_bit(AbsoluteDiff))) &&
AbsoluteDiff > Sz) ||
*Diff == -(static_cast<int>(Sz) - 1))) {
if (IsPossibleStrided && (IsAnyPointerUsedOutGraph ||
((Sz > MinProfitableStridedLoads ||
(AbsoluteDiff <= MaxProfitableLoadStride * Sz &&
has_single_bit(AbsoluteDiff))) &&
AbsoluteDiff > Sz) ||
*Diff == -(static_cast<int>(Sz) - 1))) {
int Stride = *Diff / static_cast<int>(Sz - 1);
if (*Diff == Stride * static_cast<int>(Sz - 1)) {
Align Alignment =
Expand All @@ -4778,8 +4777,7 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
if (Ptr == PtrN)
Dist = *Diff;
else if (Ptr != Ptr0)
Dist =
*getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
// If the strides are not the same or repeated, we can't
// vectorize.
if (((Dist / Stride) * Stride) != Dist ||
Expand Down Expand Up @@ -4822,14 +4820,14 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
if (VectorizedCnt == VL.size() / VF) {
// Compare masked gather cost and loads + insersubvector costs.
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
auto [ScalarGEPCost, VectorGEPCost] = getGEPCosts(
TTI, PointerOps, PointerOps.front(), Instruction::GetElementPtr,
CostKind, ScalarTy, VecTy);
auto [ScalarGEPCost, VectorGEPCost] =
getGEPCosts(TTI, PointerOps, PointerOps.front(),
Instruction::GetElementPtr, CostKind, ScalarTy, VecTy);
InstructionCost MaskedGatherCost =
TTI.getGatherScatterOpCost(
Instruction::Load, VecTy,
cast<LoadInst>(VL0)->getPointerOperand(),
/*VariableMask=*/false, CommonAlignment, CostKind) +
TTI.getGatherScatterOpCost(Instruction::Load, VecTy,
cast<LoadInst>(VL0)->getPointerOperand(),
/*VariableMask=*/false, CommonAlignment,
CostKind) +
VectorGEPCost - ScalarGEPCost;
InstructionCost VecLdCost = 0;
auto *SubVecTy = getWidenedType(ScalarTy, VF);
Expand All @@ -4853,23 +4851,23 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
getGEPCosts(TTI, ArrayRef(PointerOps).slice(I * VF, VF),
LI0->getPointerOperand(), Instruction::Load,
CostKind, ScalarTy, SubVecTy);
VecLdCost +=
TTI.getStridedMemoryOpCost(
Instruction::Load, SubVecTy, LI0->getPointerOperand(),
/*VariableMask=*/false, CommonAlignment, CostKind) +
VectorGEPCost - ScalarGEPCost;
VecLdCost += TTI.getStridedMemoryOpCost(Instruction::Load, SubVecTy,
LI0->getPointerOperand(),
/*VariableMask=*/false,
CommonAlignment, CostKind) +
VectorGEPCost - ScalarGEPCost;
break;
}
case LoadsState::ScatterVectorize: {
auto [ScalarGEPCost, VectorGEPCost] = getGEPCosts(
TTI, ArrayRef(PointerOps).slice(I * VF, VF),
LI0->getPointerOperand(), Instruction::GetElementPtr,
CostKind, ScalarTy, SubVecTy);
VecLdCost +=
TTI.getGatherScatterOpCost(
Instruction::Load, SubVecTy, LI0->getPointerOperand(),
/*VariableMask=*/false, CommonAlignment, CostKind) +
VectorGEPCost - ScalarGEPCost;
LI0->getPointerOperand(), Instruction::GetElementPtr, CostKind,
ScalarTy, SubVecTy);
VecLdCost += TTI.getGatherScatterOpCost(Instruction::Load, SubVecTy,
LI0->getPointerOperand(),
/*VariableMask=*/false,
CommonAlignment, CostKind) +
VectorGEPCost - ScalarGEPCost;
break;
}
case LoadsState::Gather:
Expand All @@ -4880,8 +4878,8 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
for (int Idx : seq<int>(0, VL.size()))
ShuffleMask[Idx] = Idx / VF == I ? VL.size() + Idx % VF : Idx;
VecLdCost +=
::getShuffleCost(TTI, TTI::SK_InsertSubvector, VecTy,
ShuffleMask, CostKind, I * VF, SubVecTy);
::getShuffleCost(TTI, TTI::SK_InsertSubvector, VecTy, ShuffleMask,
CostKind, I * VF, SubVecTy);
}
// If masked gather cost is higher - better to vectorize, so
// consider it as a gather node. It will be better estimated
Expand All @@ -4897,10 +4895,9 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
// increases the cost.
Loop *L = LI->getLoopFor(cast<LoadInst>(VL0)->getParent());
bool ProfitableGatherPointers =
L && Sz > 2 &&
static_cast<unsigned>(count_if(PointerOps, [L](Value *V) {
return L->isLoopInvariant(V);
})) <= Sz / 2;
L && Sz > 2 && static_cast<unsigned>(count_if(PointerOps, [L](Value *V) {
return L->isLoopInvariant(V);
})) <= Sz / 2;
if (ProfitableGatherPointers || all_of(PointerOps, [IsSorted](Value *P) {
auto *GEP = dyn_cast<GetElementPtrInst>(P);
return (IsSorted && !GEP && doesNotNeedToBeScheduled(P)) ||
Expand Down Expand Up @@ -9465,6 +9462,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
return 0;
if (isa<InsertElementInst>(VL[0]))
return InstructionCost::getInvalid();
if (isa<CmpInst>(VL.front()))
ScalarTy = VL.front()->getType();
return processBuildVector<ShuffleCostEstimator, InstructionCost>(
E, ScalarTy, *TTI, VectorizedVals, *this, CheckedExtracts);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes='print<access-info>' -disable-output %s 2>&1 | FileCheck %s

target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"

; FIXME: Start == End for access group with AddRec.
define void @runtime_checks_with_symbolic_max_btc_neg_1(ptr %P, ptr %S, i32 %x, i32 %y) {
; CHECK-LABEL: 'runtime_checks_with_symbolic_max_btc_neg_1'
; CHECK-NEXT: loop:
; CHECK-NEXT: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Check 0:
; CHECK-NEXT: Comparing group ([[GRP1:0x[0-9a-f]+]]):
; CHECK-NEXT: %gep.iv = getelementptr inbounds i32, ptr %P, i32 %iv
; CHECK-NEXT: Against group ([[GRP2:0x[0-9a-f]+]]):
; CHECK-NEXT: ptr %S
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group [[GRP1]]:
; CHECK-NEXT: (Low: ((4 * %y) + %P) High: ((4 * %y) + %P))
; CHECK-NEXT: Member: {((4 * %y) + %P),+,4}<%loop>
; CHECK-NEXT: Group [[GRP2]]:
; CHECK-NEXT: (Low: %S High: (4 + %S))
; CHECK-NEXT: Member: %S
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
;
entry:
br label %loop

loop:
%iv = phi i32 [ %y, %entry ], [ %iv.next, %loop ]
%gep.iv = getelementptr inbounds i32, ptr %P, i32 %iv
%l = load i32, ptr %S
store i32 %l, ptr %gep.iv, align 4
%iv.next = add nsw i32 %iv, 1
%c.2 = icmp slt i32 %iv.next, %l
br i1 %c.2, label %loop, label %exit

exit:
ret void
}

; FIXME: Start > End for access group with AddRec.
define void @runtime_check_with_symbolic_max_btc_neg_2(ptr %P, ptr %S, i32 %x, i32 %y) {
; CHECK-LABEL: 'runtime_check_with_symbolic_max_btc_neg_2'
; CHECK-NEXT: loop:
; CHECK-NEXT: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Check 0:
; CHECK-NEXT: Comparing group ([[GRP3:0x[0-9a-f]+]]):
; CHECK-NEXT: %gep.iv = getelementptr inbounds i32, ptr %P, i32 %iv
; CHECK-NEXT: Against group ([[GRP4:0x[0-9a-f]+]]):
; CHECK-NEXT: ptr %S
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group [[GRP3]]:
; CHECK-NEXT: (Low: ((4 * %y) + %P) High: (-4 + (4 * %y) + %P))
; CHECK-NEXT: Member: {((4 * %y) + %P),+,4}<%loop>
; CHECK-NEXT: Group [[GRP4]]:
; CHECK-NEXT: (Low: %S High: (4 + %S))
; CHECK-NEXT: Member: %S
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
;
entry:
br label %loop

loop:
%iv = phi i32 [ %y, %entry ], [ %iv.next, %loop ]
%gep.iv = getelementptr inbounds i32, ptr %P, i32 %iv
%l = load i32, ptr %S
store i32 %l, ptr %gep.iv, align 4
%iv.next = add nsw i32 %iv, 1
%a = and i32 %l, -2
%c.2 = icmp slt i32 %iv.next, %a
br i1 %c.2, label %loop, label %exit

exit:
ret void
}

define i32 @check_no_dep_via_bounds_compare_symbolic_max_btc_neg_1(ptr %P, i32 %x, i32 %y) {
; CHECK-LABEL: 'check_no_dep_via_bounds_compare_symbolic_max_btc_neg_1'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Unknown data dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: store i64 0, ptr %gep.iv, align 4 ->
; CHECK-NEXT: %l.2 = load i32, ptr %gep.P.4, align 4
; CHECK-EMPTY:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: %l = load i32, ptr %gep.iv, align 4 ->
; CHECK-NEXT: store i64 0, ptr %gep.iv, align 4
; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: {(8 + (8 * %y) + %P),+,8}<%loop> Added Flags: <nusw>
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
;
entry:
%gep.P.4 = getelementptr inbounds nuw i8, ptr %P, i32 4
%gep.P.8 = getelementptr inbounds nuw i8, ptr %P, i32 8
br label %loop

loop:
%1 = phi i32 [ %x, %entry ], [ %sel, %loop.latch ]
%iv = phi i32 [ %y, %entry ], [ %iv.next, %loop.latch ]
%gep.iv = getelementptr inbounds i64, ptr %gep.P.8, i32 %iv
%l = load i32, ptr %gep.iv, align 4
%c.1 = icmp eq i32 %l, 3
br i1 %c.1, label %loop.latch, label %if.then

if.then: ; preds = %for.body
store i64 0, ptr %gep.iv, align 4
%l.2 = load i32, ptr %gep.P.4
br label %loop.latch

loop.latch:
%sel = phi i32 [ %l.2, %if.then ], [ %1, %loop ]
%iv.next = add nsw i32 %iv, 1
%c.2 = icmp slt i32 %iv.next, %sel
br i1 %c.2, label %loop, label %exit

exit:
%res = phi i32 [ %iv.next, %loop.latch ]
ret i32 %res
}
115 changes: 0 additions & 115 deletions llvm/test/CodeGen/AArch64/atomicrmw-fadd-fp-vector.ll

This file was deleted.

Loading