| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,262 @@ | ||
| ! RUN: %flang_fc1 -fopenmp -emit-llvm %s -o - | FileCheck %s | ||
|
|
||
| ! Combinational testing of control flow graph and builder insertion points | ||
| ! in mlir-to-llvm conversion: | ||
| ! - mixing multiple delayed privatizations and multiple reductions | ||
| ! - multiple blocks in the private alloc region | ||
| ! - private alloc region has to read from the mold variable | ||
| ! - firstprivate | ||
| ! - multiple blocks in the private copy region | ||
| ! - multiple blocks in the reduction init region | ||
| ! - reduction init region has to read from the mold variable | ||
| ! - re-used omp.private ops | ||
| ! - re-used omp.reduction.declare ops | ||
| ! - unstructured code inside of the parallel region | ||
| ! - needs private dealloc region, and this has multiple blocks | ||
| ! - needs reduction cleanup region, and this has multiple blocks | ||
|
|
||
| ! This maybe belongs in the mlir tests, but what we are doing here is complex | ||
| ! enough that I find the kind of minimised mlir code preferred by mlir reviewers | ||
| ! hard to read without some fortran here for reference. Nothing like this would | ||
| ! be generated by other upstream users of the MLIR OpenMP dialect. | ||
|
|
||
| subroutine worst_case(a, b, c, d) | ||
| real, allocatable :: a(:), b(:), c(:), d(:) | ||
| integer i | ||
|
|
||
| !$omp parallel firstprivate(a,b) reduction(+:c,d) | ||
| if (sum(a) == 1) stop 1 | ||
| !$omp end parallel | ||
| end subroutine | ||
|
|
||
| ! CHECK-LABEL: define internal void @worst_case_..omp_par | ||
| ! CHECK-NEXT: omp.par.entry: | ||
| ! [reduction alloc regions inlined here] | ||
| ! CHECK: br label %omp.private.latealloc | ||
|
|
||
| ! CHECK: omp.private.latealloc: ; preds = %omp.par.entry | ||
| ! CHECK-NEXT: br label %omp.private.alloc5 | ||
|
|
||
| ! CHECK: omp.private.alloc5: ; preds = %omp.private.latealloc | ||
| ! [begin private alloc for first var] | ||
| ! [read the length from the mold argument] | ||
| ! [if it is non-zero...] | ||
| ! CHECK: br i1 {{.*}}, label %omp.private.alloc6, label %omp.private.alloc7 | ||
|
|
||
| ! CHECK: omp.private.alloc7: ; preds = %omp.private.alloc5 | ||
| ! [finish private alloc for first var with zero extent] | ||
| ! CHECK: br label %omp.private.alloc8 | ||
|
|
||
| ! CHECK: omp.private.alloc8: ; preds = %omp.private.alloc6, %omp.private.alloc7 | ||
| ! CHECK-NEXT: br label %omp.region.cont4 | ||
|
|
||
| ! CHECK: omp.region.cont4: ; preds = %omp.private.alloc8 | ||
| ! CHECK-NEXT: %{{.*}} = phi ptr | ||
| ! CHECK-NEXT: br label %omp.private.alloc | ||
|
|
||
| ! CHECK: omp.private.alloc: ; preds = %omp.region.cont4 | ||
| ! [begin private alloc for first var] | ||
| ! [read the length from the mold argument] | ||
| ! [if it is non-zero...] | ||
| ! CHECK: br i1 %{{.*}}, label %omp.private.alloc1, label %omp.private.alloc2 | ||
|
|
||
| ! CHECK: omp.private.alloc2: ; preds = %omp.private.alloc | ||
| ! [finish private alloc for second var with zero extent] | ||
| ! CHECK: br label %omp.private.alloc3 | ||
|
|
||
| ! CHECK: omp.private.alloc3: ; preds = %omp.private.alloc1, %omp.private.alloc2 | ||
| ! CHECK-NEXT: br label %omp.region.cont | ||
|
|
||
| ! CHECK: omp.region.cont: ; preds = %omp.private.alloc3 | ||
| ! CHECK-NEXT: %{{.*}} = phi ptr | ||
| ! CHECK-NEXT: br label %omp.private.copy | ||
|
|
||
| ! CHECK: omp.private.copy: ; preds = %omp.region.cont | ||
| ! CHECK-NEXT: br label %omp.private.copy10 | ||
|
|
||
| ! CHECK: omp.private.copy10: ; preds = %omp.private.copy | ||
| ! [begin firstprivate copy for first var] | ||
| ! [read the length, is it non-zero?] | ||
| ! CHECK: br i1 %{{.*}}, label %omp.private.copy11, label %omp.private.copy12 | ||
|
|
||
| ! CHECK: omp.private.copy12: ; preds = %omp.private.copy11, %omp.private.copy10 | ||
| ! CHECK-NEXT: br label %omp.region.cont9 | ||
|
|
||
| ! CHECK: omp.region.cont9: ; preds = %omp.private.copy12 | ||
| ! CHECK-NEXT: %{{.*}} = phi ptr | ||
| ! CHECK-NEXT: br label %omp.private.copy14 | ||
|
|
||
| ! CHECK: omp.private.copy14: ; preds = %omp.region.cont9 | ||
| ! [begin firstprivate copy for second var] | ||
| ! [read the length, is it non-zero?] | ||
| ! CHECK: br i1 %{{.*}}, label %omp.private.copy15, label %omp.private.copy16 | ||
|
|
||
| ! CHECK: omp.private.copy16: ; preds = %omp.private.copy15, %omp.private.copy14 | ||
| ! CHECK-NEXT: br label %omp.region.cont13 | ||
|
|
||
| ! CHECK: omp.region.cont13: ; preds = %omp.private.copy16 | ||
| ! CHECK-NEXT: %{{.*}} = phi ptr | ||
| ! CHECK-NEXT: br label %omp.reduction.init | ||
|
|
||
| ! CHECK: omp.reduction.init: ; preds = %omp.region.cont13 | ||
| ! [deffered stores for results of reduction alloc regions] | ||
| ! CHECK: br label %[[VAL_96:.*]] | ||
|
|
||
| ! CHECK: omp.reduction.neutral: ; preds = %omp.reduction.init | ||
| ! [start of reduction initialization region] | ||
| ! [null check:] | ||
| ! CHECK: br i1 %{{.*}}, label %omp.reduction.neutral18, label %omp.reduction.neutral19 | ||
|
|
||
| ! CHECK: omp.reduction.neutral19: ; preds = %omp.reduction.neutral | ||
| ! [malloc and assign the default value to the reduction variable] | ||
| ! CHECK: br label %omp.reduction.neutral20 | ||
|
|
||
| ! CHECK: omp.reduction.neutral20: ; preds = %omp.reduction.neutral18, %omp.reduction.neutral19 | ||
| ! CHECK-NEXT: br label %omp.region.cont17 | ||
|
|
||
| ! CHECK: omp.region.cont17: ; preds = %omp.reduction.neutral20 | ||
| ! CHECK-NEXT: %{{.*}} = phi ptr | ||
| ! CHECK-NEXT: br label %omp.reduction.neutral22 | ||
|
|
||
| ! CHECK: omp.reduction.neutral22: ; preds = %omp.region.cont17 | ||
| ! [start of reduction initialization region] | ||
| ! [null check:] | ||
| ! CHECK: br i1 %{{.*}}, label %omp.reduction.neutral23, label %omp.reduction.neutral24 | ||
|
|
||
| ! CHECK: omp.reduction.neutral24: ; preds = %omp.reduction.neutral22 | ||
| ! [malloc and assign the default value to the reduction variable] | ||
| ! CHECK: br label %omp.reduction.neutral25 | ||
|
|
||
| ! CHECK: omp.reduction.neutral25: ; preds = %omp.reduction.neutral23, %omp.reduction.neutral24 | ||
| ! CHECK-NEXT: br label %omp.region.cont21 | ||
|
|
||
| ! CHECK: omp.region.cont21: ; preds = %omp.reduction.neutral25 | ||
| ! CHECK-NEXT: %{{.*}} = phi ptr | ||
| ! CHECK-NEXT: br label %omp.par.region | ||
|
|
||
| ! CHECK: omp.par.region: ; preds = %omp.region.cont21 | ||
| ! CHECK-NEXT: br label %omp.par.region27 | ||
|
|
||
| ! CHECK: omp.par.region27: ; preds = %omp.par.region | ||
| ! [call SUM runtime function] | ||
| ! [if (sum(a) == 1)] | ||
| ! CHECK: br i1 %{{.*}}, label %omp.par.region28, label %omp.par.region29 | ||
|
|
||
| ! CHECK: omp.par.region29: ; preds = %omp.par.region27 | ||
| ! CHECK-NEXT: br label %omp.region.cont26 | ||
|
|
||
| ! CHECK: omp.region.cont26: ; preds = %omp.par.region28, %omp.par.region29 | ||
| ! [omp parallel region done, call into the runtime to complete reduction] | ||
| ! CHECK: %[[VAL_233:.*]] = call i32 @__kmpc_reduce( | ||
| ! CHECK: switch i32 %[[VAL_233]], label %reduce.finalize [ | ||
| ! CHECK-NEXT: i32 1, label %reduce.switch.nonatomic | ||
| ! CHECK-NEXT: i32 2, label %reduce.switch.atomic | ||
| ! CHECK-NEXT: ] | ||
|
|
||
| ! CHECK: reduce.switch.atomic: ; preds = %omp.region.cont26 | ||
| ! CHECK-NEXT: unreachable | ||
|
|
||
| ! CHECK: reduce.switch.nonatomic: ; preds = %omp.region.cont26 | ||
| ! CHECK-NEXT: %[[red_private_value_0:.*]] = load ptr, ptr %{{.*}}, align 8 | ||
| ! CHECK-NEXT: br label %omp.reduction.nonatomic.body | ||
|
|
||
| ! [various blocks implementing the reduction] | ||
|
|
||
| ! CHECK: omp.region.cont35: ; preds = | ||
| ! CHECK-NEXT: %{{.*}} = phi ptr | ||
| ! CHECK-NEXT: call void @__kmpc_end_reduce( | ||
| ! CHECK-NEXT: br label %reduce.finalize | ||
|
|
||
| ! CHECK: reduce.finalize: ; preds = | ||
| ! CHECK-NEXT: br label %omp.par.pre_finalize | ||
|
|
||
| ! CHECK: omp.par.pre_finalize: ; preds = %reduce.finalize | ||
| ! CHECK-NEXT: %{{.*}} = load ptr, ptr | ||
| ! CHECK-NEXT: br label %omp.reduction.cleanup | ||
|
|
||
| ! CHECK: omp.reduction.cleanup: ; preds = %omp.par.pre_finalize | ||
| ! [null check] | ||
| ! CHECK: br i1 %{{.*}}, label %omp.reduction.cleanup41, label %omp.reduction.cleanup42 | ||
|
|
||
| ! CHECK: omp.reduction.cleanup42: ; preds = %omp.reduction.cleanup41, %omp.reduction.cleanup | ||
| ! CHECK-NEXT: br label %omp.region.cont40 | ||
|
|
||
| ! CHECK: omp.region.cont40: ; preds = %omp.reduction.cleanup42 | ||
| ! CHECK-NEXT: %{{.*}} = load ptr, ptr | ||
| ! CHECK-NEXT: br label %omp.reduction.cleanup44 | ||
|
|
||
| ! CHECK: omp.reduction.cleanup44: ; preds = %omp.region.cont40 | ||
| ! [null check] | ||
| ! CHECK: br i1 %{{.*}}, label %omp.reduction.cleanup45, label %omp.reduction.cleanup46 | ||
|
|
||
| ! CHECK: omp.reduction.cleanup46: ; preds = %omp.reduction.cleanup45, %omp.reduction.cleanup44 | ||
| ! CHECK-NEXT: br label %omp.region.cont43 | ||
|
|
||
| ! CHECK: omp.region.cont43: ; preds = %omp.reduction.cleanup46 | ||
| ! CHECK-NEXT: br label %omp.private.dealloc | ||
|
|
||
| ! CHECK: omp.private.dealloc: ; preds = %omp.region.cont43 | ||
| ! [null check] | ||
| ! CHECK: br i1 %{{.*}}, label %omp.private.dealloc48, label %omp.private.dealloc49 | ||
|
|
||
| ! CHECK: omp.private.dealloc49: ; preds = %omp.private.dealloc48, %omp.private.dealloc | ||
| ! CHECK-NEXT: br label %omp.region.cont47 | ||
|
|
||
| ! CHECK: omp.region.cont47: ; preds = %omp.private.dealloc49 | ||
| ! CHECK-NEXT: br label %omp.private.dealloc51 | ||
|
|
||
| ! CHECK: omp.private.dealloc51: ; preds = %omp.region.cont47 | ||
| ! [null check] | ||
| ! CHECK: br i1 %{{.*}}, label %omp.private.dealloc52, label %omp.private.dealloc53 | ||
|
|
||
| ! CHECK: omp.private.dealloc53: ; preds = %omp.private.dealloc52, %omp.private.dealloc51 | ||
| ! CHECK-NEXT: br label %omp.region.cont50 | ||
|
|
||
| ! CHECK: omp.region.cont50: ; preds = %omp.private.dealloc53 | ||
| ! CHECK-NEXT: br label %omp.par.outlined.exit.exitStub | ||
|
|
||
| ! CHECK: omp.private.dealloc52: ; preds = %omp.private.dealloc51 | ||
| ! [dealloc memory] | ||
| ! CHECK: br label %omp.private.dealloc53 | ||
|
|
||
| ! CHECK: omp.private.dealloc48: ; preds = %omp.private.dealloc | ||
| ! [dealloc memory] | ||
| ! CHECK: br label %omp.private.dealloc49 | ||
|
|
||
| ! CHECK: omp.reduction.cleanup45: ; preds = %omp.reduction.cleanup44 | ||
| ! CHECK-NEXT: call void @free( | ||
| ! CHECK-NEXT: br label %omp.reduction.cleanup46 | ||
|
|
||
| ! CHECK: omp.reduction.cleanup41: ; preds = %omp.reduction.cleanup | ||
| ! CHECK-NEXT: call void @free( | ||
| ! CHECK-NEXT: br label %omp.reduction.cleanup42 | ||
|
|
||
| ! CHECK: omp.par.region28: ; preds = %omp.par.region27 | ||
| ! CHECK-NEXT: call {} @_FortranAStopStatement | ||
|
|
||
| ! CHECK: omp.reduction.neutral23: ; preds = %omp.reduction.neutral22 | ||
| ! [source length was zero: finish initializing array] | ||
| ! CHECK: br label %omp.reduction.neutral25 | ||
|
|
||
| ! CHECK: omp.reduction.neutral18: ; preds = %omp.reduction.neutral | ||
| ! [source length was zero: finish initializing array] | ||
| ! CHECK: br label %omp.reduction.neutral20 | ||
|
|
||
| ! CHECK: omp.private.copy15: ; preds = %omp.private.copy14 | ||
| ! [source length was non-zero: call assign runtime] | ||
| ! CHECK: br label %omp.private.copy16 | ||
|
|
||
| ! CHECK: omp.private.copy11: ; preds = %omp.private.copy10 | ||
| ! [source length was non-zero: call assign runtime] | ||
| ! CHECK: br label %omp.private.copy12 | ||
|
|
||
| ! CHECK: omp.private.alloc1: ; preds = %omp.private.alloc | ||
| ! [var extent was non-zero: malloc a private array] | ||
| ! CHECK: br label %omp.private.alloc3 | ||
|
|
||
| ! CHECK: omp.private.alloc6: ; preds = %omp.private.alloc5 | ||
| ! [var extent was non-zero: malloc a private array] | ||
| ! CHECK: br label %omp.private.alloc8 | ||
|
|
||
| ! CHECK: omp.par.outlined.exit.exitStub: ; preds = %omp.region.cont50 | ||
| ! CHECK-NEXT: ret void |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,46 @@ | ||
| !RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s | ||
|
|
||
| ! Regression test for https://github.com/llvm/llvm-project/issues/106297 | ||
|
|
||
| program bug | ||
| implicit none | ||
| integer :: table(10) | ||
| !$OMP PARALLEL PRIVATE(table) | ||
| table = 50 | ||
| if (any(table/=50)) then | ||
| stop 'fail 3' | ||
| end if | ||
| !$OMP END PARALLEL | ||
| print *,'ok' | ||
| End Program | ||
|
|
||
|
|
||
| ! CHECK-LABEL: define internal void {{.*}}..omp_par( | ||
| ! CHECK: omp.par.entry: | ||
| ! CHECK: %[[VAL_9:.*]] = alloca i32, align 4 | ||
| ! CHECK: %[[VAL_10:.*]] = load i32, ptr %[[VAL_11:.*]], align 4 | ||
| ! CHECK: store i32 %[[VAL_10]], ptr %[[VAL_9]], align 4 | ||
| ! CHECK: %[[VAL_12:.*]] = load i32, ptr %[[VAL_9]], align 4 | ||
| ! CHECK: %[[PRIV_TABLE:.*]] = alloca [10 x i32], i64 1, align 4 | ||
| ! ... | ||
| ! check that we use the private copy of table for the assignment | ||
| ! CHECK: omp.par.region1: | ||
| ! CHECK: %[[ELEMENTAL_TMP:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 | ||
| ! CHECK: %[[TABLE_BOX_ADDR:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 | ||
| ! CHECK: %[[BOXED_FIFTY:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, align 8 | ||
| ! CHECK: %[[TABLE_BOX_ADDR2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 | ||
| ! CHECK: %[[TABLE_BOX_VAL:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } { ptr undef, i64 ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64), i32 20240719, i8 1, i8 9, i8 0, i8 0, [1 x [3 x i64]] {{\[\[}}3 x i64] [i64 1, i64 10, i64 ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64)]] }, ptr %[[PRIV_TABLE]], 0 | ||
| ! CHECK: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[TABLE_BOX_VAL]], ptr %[[TABLE_BOX_ADDR]], align 8 | ||
| ! CHECK: %[[TABLE_BOX_VAL2:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[TABLE_BOX_ADDR]], align 8 | ||
| ! CHECK: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[TABLE_BOX_VAL2]], ptr %[[TABLE_BOX_ADDR2]], align 8 | ||
| ! CHECK: %[[VAL_26:.*]] = call {} @_FortranAAssign(ptr %[[TABLE_BOX_ADDR2]], ptr %[[BOXED_FIFTY]], ptr @{{.*}}, i32 9) | ||
| ! ... | ||
| ! check that we use the private copy of table for table/=50 | ||
| ! CHECK: omp.par.region3: | ||
| ! CHECK: %[[VAL_44:.*]] = sub nsw i64 %{{.*}}, 1 | ||
| ! CHECK: %[[VAL_45:.*]] = mul nsw i64 %[[VAL_44]], 1 | ||
| ! CHECK: %[[VAL_46:.*]] = mul nsw i64 %[[VAL_45]], 1 | ||
| ! CHECK: %[[VAL_47:.*]] = add nsw i64 %[[VAL_46]], 0 | ||
| ! CHECK: %[[VAL_48:.*]] = getelementptr i32, ptr %[[PRIV_TABLE]], i64 %[[VAL_47]] | ||
| ! CHECK: %[[VAL_49:.*]] = load i32, ptr %[[VAL_48]], align 4 | ||
| ! CHECK: %[[VAL_50:.*]] = icmp ne i32 %[[VAL_49]], 50 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| //===-- Implementation header for exp10m1f16 --------------------*- C++ -*-===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_LIBC_SRC_MATH_EXP10M1F16_H | ||
| #define LLVM_LIBC_SRC_MATH_EXP10M1F16_H | ||
|
|
||
| #include "src/__support/macros/config.h" | ||
| #include "src/__support/macros/properties/types.h" | ||
|
|
||
| namespace LIBC_NAMESPACE_DECL { | ||
|
|
||
| float16 exp10m1f16(float16 x); | ||
|
|
||
| } // namespace LIBC_NAMESPACE_DECL | ||
|
|
||
| #endif // LLVM_LIBC_SRC_MATH_EXP10M1F16_H |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,163 @@ | ||
| //===-- Half-precision 10^x - 1 function ----------------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "src/math/exp10m1f16.h" | ||
| #include "expxf16.h" | ||
| #include "hdr/errno_macros.h" | ||
| #include "hdr/fenv_macros.h" | ||
| #include "src/__support/FPUtil/FEnvImpl.h" | ||
| #include "src/__support/FPUtil/FPBits.h" | ||
| #include "src/__support/FPUtil/PolyEval.h" | ||
| #include "src/__support/FPUtil/cast.h" | ||
| #include "src/__support/FPUtil/except_value_utils.h" | ||
| #include "src/__support/FPUtil/multiply_add.h" | ||
| #include "src/__support/FPUtil/rounding_mode.h" | ||
| #include "src/__support/common.h" | ||
| #include "src/__support/macros/config.h" | ||
| #include "src/__support/macros/optimization.h" | ||
| #include "src/__support/macros/properties/cpu_features.h" | ||
|
|
||
| namespace LIBC_NAMESPACE_DECL { | ||
|
|
||
| static constexpr fputil::ExceptValues<float16, 3> EXP10M1F16_EXCEPTS_LO = {{ | ||
| // (input, RZ output, RU offset, RD offset, RN offset) | ||
| // x = 0x1.5c4p-4, exp10m1f16(x) = 0x1.bacp-3 (RZ) | ||
| {0x2d71U, 0x32ebU, 1U, 0U, 0U}, | ||
| // x = -0x1.5ep-13, exp10m1f16(x) = -0x1.92cp-12 (RZ) | ||
| {0x8978U, 0x8e4bU, 0U, 1U, 0U}, | ||
| // x = -0x1.e2p-10, exp10m1f16(x) = -0x1.14cp-8 (RZ) | ||
| {0x9788U, 0x9c53U, 0U, 1U, 0U}, | ||
| }}; | ||
|
|
||
| #ifdef LIBC_TARGET_CPU_HAS_FMA | ||
| static constexpr size_t N_EXP10M1F16_EXCEPTS_HI = 3; | ||
| #else | ||
| static constexpr size_t N_EXP10M1F16_EXCEPTS_HI = 6; | ||
| #endif | ||
|
|
||
| static constexpr fputil::ExceptValues<float16, N_EXP10M1F16_EXCEPTS_HI> | ||
| EXP10M1F16_EXCEPTS_HI = {{ | ||
| // (input, RZ output, RU offset, RD offset, RN offset) | ||
| // x = 0x1.8f4p-2, exp10m1f16(x) = 0x1.744p+0 (RZ) | ||
| {0x363dU, 0x3dd1U, 1U, 0U, 0U}, | ||
| // x = 0x1.95cp-2, exp10m1f16(x) = 0x1.7d8p+0 (RZ) | ||
| {0x3657U, 0x3df6U, 1U, 0U, 0U}, | ||
| // x = 0x1.d04p-2, exp10m1f16(x) = 0x1.d7p+0 (RZ) | ||
| {0x3741U, 0x3f5cU, 1U, 0U, 1U}, | ||
| #ifndef LIBC_TARGET_CPU_HAS_FMA | ||
| // x = 0x1.0cp+1, exp10m1f16(x) = 0x1.ec4p+6 (RZ) | ||
| {0x4030U, 0x57b1U, 1U, 0U, 1U}, | ||
| // x = 0x1.1b8p+1, exp10m1f16(x) = 0x1.45cp+7 (RZ) | ||
| {0x406eU, 0x5917U, 1U, 0U, 1U}, | ||
| // x = 0x1.2f4p+2, exp10m1f16(x) = 0x1.ab8p+15 (RZ) | ||
| {0x44bdU, 0x7aaeU, 1U, 0U, 1U}, | ||
| #endif | ||
| }}; | ||
|
|
||
| LLVM_LIBC_FUNCTION(float16, exp10m1f16, (float16 x)) { | ||
| using FPBits = fputil::FPBits<float16>; | ||
| FPBits x_bits(x); | ||
|
|
||
| uint16_t x_u = x_bits.uintval(); | ||
| uint16_t x_abs = x_u & 0x7fffU; | ||
|
|
||
| // When |x| <= 2^(-3), or |x| >= 11 * log10(2), or x is NaN. | ||
| if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x429fU)) { | ||
| // exp10m1(NaN) = NaN | ||
| if (x_bits.is_nan()) { | ||
| if (x_bits.is_signaling_nan()) { | ||
| fputil::raise_except_if_required(FE_INVALID); | ||
| return FPBits::quiet_nan().get_val(); | ||
| } | ||
|
|
||
| return x; | ||
| } | ||
|
|
||
| // When x >= 16 * log10(2). | ||
| if (x_u >= 0x44d1U && x_bits.is_pos()) { | ||
| // exp10m1(+inf) = +inf | ||
| if (x_bits.is_inf()) | ||
| return FPBits::inf().get_val(); | ||
|
|
||
| switch (fputil::quick_get_round()) { | ||
| case FE_TONEAREST: | ||
| case FE_UPWARD: | ||
| fputil::set_errno_if_required(ERANGE); | ||
| fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT); | ||
| return FPBits::inf().get_val(); | ||
| default: | ||
| return FPBits::max_normal().get_val(); | ||
| } | ||
| } | ||
|
|
||
| // When x < -11 * log10(2). | ||
| if (x_u > 0xc29fU) { | ||
| // exp10m1(-inf) = -1 | ||
| if (x_bits.is_inf()) | ||
| return FPBits::one(Sign::NEG).get_val(); | ||
|
|
||
| // When x >= -0x1.ce4p+1, round(10^x - 1, HP, RN) = -0x1.ffcp-1. | ||
| if (x_u <= 0xc339U) { | ||
| return fputil::round_result_slightly_down( | ||
| fputil::cast<float16>(-0x1.ffcp-1)); | ||
| } | ||
|
|
||
| // When x < -0x1.ce4p+1, round(10^x - 1, HP, RN) = -1. | ||
| switch (fputil::quick_get_round()) { | ||
| case FE_TONEAREST: | ||
| case FE_DOWNWARD: | ||
| return FPBits::one(Sign::NEG).get_val(); | ||
| default: | ||
| return fputil::cast<float16>(-0x1.ffcp-1); | ||
| } | ||
| } | ||
|
|
||
| // When |x| <= 2^(-3). | ||
| if (x_abs <= 0x3000U) { | ||
| if (auto r = EXP10M1F16_EXCEPTS_LO.lookup(x_u); | ||
| LIBC_UNLIKELY(r.has_value())) | ||
| return r.value(); | ||
|
|
||
| float xf = x; | ||
| // Degree-5 minimax polynomial generated by Sollya with the following | ||
| // commands: | ||
| // > display = hexadecimal; | ||
| // > P = fpminimax((10^x - 1)/x, 4, [|SG...|], [-2^-3, 2^-3]); | ||
| // > x * P; | ||
| return fputil::cast<float16>( | ||
| xf * fputil::polyeval(xf, 0x1.26bb1cp+1f, 0x1.5351c8p+1f, | ||
| 0x1.04704p+1f, 0x1.2ce084p+0f, 0x1.14a6bep-1f)); | ||
| } | ||
| } | ||
|
|
||
| // When x is 1, 2, or 3. These are hard-to-round cases with exact results. | ||
| // 10^4 - 1 = 9'999 is not exactly representable as a float16, but luckily the | ||
| // polynomial approximation gives the correct result for x = 4 in all | ||
| // rounding modes. | ||
| if (LIBC_UNLIKELY((x_u & ~(0x3c00U | 0x4000U | 0x4200U | 0x4400U)) == 0)) { | ||
| switch (x_u) { | ||
| case 0x3c00U: // x = 1.0f16 | ||
| return fputil::cast<float16>(9.0); | ||
| case 0x4000U: // x = 2.0f16 | ||
| return fputil::cast<float16>(99.0); | ||
| case 0x4200U: // x = 3.0f16 | ||
| return fputil::cast<float16>(999.0); | ||
| } | ||
| } | ||
|
|
||
| if (auto r = EXP10M1F16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value())) | ||
| return r.value(); | ||
|
|
||
| // exp10(x) = exp2((hi + mid) * log2(10)) * exp10(lo) | ||
| auto [exp2_hi_mid, exp10_lo] = exp10_range_reduction(x); | ||
| // exp10m1(x) = exp2((hi + mid) * log2(lo)) * exp10(lo) - 1 | ||
| return fputil::cast<float16>( | ||
| fputil::multiply_add(exp2_hi_mid, exp10_lo, -1.0f)); | ||
| } | ||
|
|
||
| } // namespace LIBC_NAMESPACE_DECL |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| //===-- Exhaustive test for exp10m1f16 ------------------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "src/math/exp10m1f16.h" | ||
| #include "test/UnitTest/FPMatcher.h" | ||
| #include "test/UnitTest/Test.h" | ||
| #include "utils/MPFRWrapper/MPFRUtils.h" | ||
|
|
||
| using LlvmLibcExp10m1f16Test = LIBC_NAMESPACE::testing::FPTest<float16>; | ||
|
|
||
| namespace mpfr = LIBC_NAMESPACE::testing::mpfr; | ||
|
|
||
| // Range: [0, Inf]; | ||
| static constexpr uint16_t POS_START = 0x0000U; | ||
| static constexpr uint16_t POS_STOP = 0x7c00U; | ||
|
|
||
| // Range: [-Inf, 0]; | ||
| static constexpr uint16_t NEG_START = 0x8000U; | ||
| static constexpr uint16_t NEG_STOP = 0xfc00U; | ||
|
|
||
| TEST_F(LlvmLibcExp10m1f16Test, PositiveRange) { | ||
| for (uint16_t v = POS_START; v <= POS_STOP; ++v) { | ||
| float16 x = FPBits(v).get_val(); | ||
| EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Exp10m1, x, | ||
| LIBC_NAMESPACE::exp10m1f16(x), 0.5); | ||
| } | ||
| } | ||
|
|
||
| TEST_F(LlvmLibcExp10m1f16Test, NegativeRange) { | ||
| for (uint16_t v = NEG_START; v <= NEG_STOP; ++v) { | ||
| float16 x = FPBits(v).get_val(); | ||
| EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Exp10m1, x, | ||
| LIBC_NAMESPACE::exp10m1f16(x), 0.5); | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,113 @@ | ||
| //===-- Unittests for exp10m1f16 ------------------------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "hdr/fenv_macros.h" | ||
| #include "src/__support/FPUtil/cast.h" | ||
| #include "src/errno/libc_errno.h" | ||
| #include "src/math/exp10m1f16.h" | ||
| #include "test/UnitTest/FPMatcher.h" | ||
| #include "test/UnitTest/Test.h" | ||
|
|
||
| using LlvmLibcExp10m1f16Test = LIBC_NAMESPACE::testing::FPTest<float16>; | ||
|
|
||
| TEST_F(LlvmLibcExp10m1f16Test, SpecialNumbers) { | ||
| LIBC_NAMESPACE::libc_errno = 0; | ||
|
|
||
| EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::exp10m1f16(aNaN)); | ||
| EXPECT_MATH_ERRNO(0); | ||
|
|
||
| EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::exp10m1f16(sNaN), | ||
| FE_INVALID); | ||
| EXPECT_MATH_ERRNO(0); | ||
|
|
||
| EXPECT_FP_EQ_ALL_ROUNDING(inf, LIBC_NAMESPACE::exp10m1f16(inf)); | ||
| EXPECT_MATH_ERRNO(0); | ||
|
|
||
| EXPECT_FP_EQ_ALL_ROUNDING(LIBC_NAMESPACE::fputil::cast<float16>(-1.0), | ||
| LIBC_NAMESPACE::exp10m1f16(neg_inf)); | ||
| EXPECT_MATH_ERRNO(0); | ||
|
|
||
| EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::exp10m1f16(zero)); | ||
| EXPECT_MATH_ERRNO(0); | ||
|
|
||
| EXPECT_FP_EQ_ALL_ROUNDING(neg_zero, LIBC_NAMESPACE::exp10m1f16(neg_zero)); | ||
| EXPECT_MATH_ERRNO(0); | ||
| } | ||
|
|
||
| TEST_F(LlvmLibcExp10m1f16Test, Overflow) { | ||
| LIBC_NAMESPACE::libc_errno = 0; | ||
|
|
||
| EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::exp10m1f16(max_normal), | ||
| FE_OVERFLOW | FE_INEXACT); | ||
| EXPECT_MATH_ERRNO(ERANGE); | ||
|
|
||
| // round(16 * log10(2), HP, RN); | ||
| float16 x = LIBC_NAMESPACE::fputil::cast<float16>(0x1.344p+2); | ||
|
|
||
| EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_NEAREST( | ||
| inf, LIBC_NAMESPACE::exp10m1f16(x), FE_OVERFLOW | FE_INEXACT); | ||
| EXPECT_MATH_ERRNO(ERANGE); | ||
|
|
||
| EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_UPWARD( | ||
| inf, LIBC_NAMESPACE::exp10m1f16(x), FE_OVERFLOW | FE_INEXACT); | ||
| EXPECT_MATH_ERRNO(ERANGE); | ||
|
|
||
| EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_DOWNWARD( | ||
| max_normal, LIBC_NAMESPACE::exp10m1f16(x), FE_INEXACT); | ||
| EXPECT_MATH_ERRNO(0); | ||
|
|
||
| EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_TOWARD_ZERO( | ||
| max_normal, LIBC_NAMESPACE::exp10m1f16(x), FE_INEXACT); | ||
| EXPECT_MATH_ERRNO(0); | ||
| } | ||
|
|
||
| TEST_F(LlvmLibcExp10m1f16Test, ResultNearNegOne) { | ||
| LIBC_NAMESPACE::libc_errno = 0; | ||
|
|
||
| EXPECT_FP_EQ_WITH_EXCEPTION(LIBC_NAMESPACE::fputil::cast<float16>(-1.0), | ||
| LIBC_NAMESPACE::exp10m1f16(neg_max_normal), | ||
| FE_INEXACT); | ||
|
|
||
| // round(-11 * log10(2), HP, RD); | ||
| float16 x = LIBC_NAMESPACE::fputil::cast<float16>(-0x1.a8p+1); | ||
|
|
||
| EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_NEAREST( | ||
| LIBC_NAMESPACE::fputil::cast<float16>(-0x1.ffcp-1), | ||
| LIBC_NAMESPACE::exp10m1f16(x), FE_INEXACT); | ||
|
|
||
| EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_UPWARD( | ||
| LIBC_NAMESPACE::fputil::cast<float16>(-0x1.ffcp-1), | ||
| LIBC_NAMESPACE::exp10m1f16(x), FE_INEXACT); | ||
|
|
||
| EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_DOWNWARD( | ||
| LIBC_NAMESPACE::fputil::cast<float16>(-1.0), | ||
| LIBC_NAMESPACE::exp10m1f16(x), FE_INEXACT); | ||
|
|
||
| EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_TOWARD_ZERO( | ||
| LIBC_NAMESPACE::fputil::cast<float16>(-0x1.ffcp-1), | ||
| LIBC_NAMESPACE::exp10m1f16(x), FE_INEXACT); | ||
|
|
||
| // Next float16 value below -0x1.ce4p+1. | ||
| x = LIBC_NAMESPACE::fputil::cast<float16>(-0x1.ce8p+1); | ||
|
|
||
| EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_NEAREST( | ||
| LIBC_NAMESPACE::fputil::cast<float16>(-1.0), | ||
| LIBC_NAMESPACE::exp10m1f16(x), FE_INEXACT); | ||
|
|
||
| EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_UPWARD( | ||
| LIBC_NAMESPACE::fputil::cast<float16>(-0x1.ffcp-1), | ||
| LIBC_NAMESPACE::exp10m1f16(x), FE_INEXACT); | ||
|
|
||
| EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_DOWNWARD( | ||
| LIBC_NAMESPACE::fputil::cast<float16>(-1.0), | ||
| LIBC_NAMESPACE::exp10m1f16(x), FE_INEXACT); | ||
|
|
||
| EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_TOWARD_ZERO( | ||
| LIBC_NAMESPACE::fputil::cast<float16>(-0x1.ffcp-1), | ||
| LIBC_NAMESPACE::exp10m1f16(x), FE_INEXACT); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,6 +42,7 @@ enum class Operation : int { | |
| Exp2, | ||
| Exp2m1, | ||
| Exp10, | ||
| Exp10m1, | ||
| Expm1, | ||
| Floor, | ||
| Log, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,5 +5,5 @@ all: a.out simple | |
| include Makefile.rules | ||
|
|
||
| simple: | ||
| "$(MAKE)" -f $(MAKEFILE_RULES) \ | ||
| C_SOURCES=simple.c EXE=simple | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,23 +1,23 @@ | ||
| CXX_SOURCES := main.cpp | ||
| LD_EXTRAS := -L. -l_d -l_c -l_a -l_b | ||
|
|
||
| a.out: lib_b lib_a lib_c lib_d | ||
|
|
||
| include Makefile.rules | ||
|
|
||
| lib_a: lib_b | ||
| "$(MAKE)" -f $(MAKEFILE_RULES) \ | ||
| DYLIB_ONLY=YES DYLIB_CXX_SOURCES=a.cpp DYLIB_NAME=_a \ | ||
| LD_EXTRAS="-L. -l_b" | ||
|
|
||
| lib_b: | ||
| "$(MAKE)" -f $(MAKEFILE_RULES) \ | ||
| DYLIB_ONLY=YES DYLIB_CXX_SOURCES=b.cpp DYLIB_NAME=_b | ||
|
|
||
| lib_c: | ||
| "$(MAKE)" -f $(MAKEFILE_RULES) \ | ||
| DYLIB_ONLY=YES DYLIB_CXX_SOURCES=c.cpp DYLIB_NAME=_c | ||
|
|
||
| lib_d: | ||
| "$(MAKE)" -f $(MAKEFILE_RULES) \ | ||
| DYLIB_ONLY=YES DYLIB_CXX_SOURCES=d.cpp DYLIB_NAME=_d |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,4 +7,4 @@ include Makefile.rules | |
| a.out: lib_One lib_Two | ||
|
|
||
| lib_%: | ||
| "$(MAKE)" VPATH=$(SRCDIR)/$* -I $(SRCDIR) -f $(SRCDIR)/$*.mk | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| //===- llvm/CodeGen/EarlyIfConversion.h -------------------------*- C++ -*-===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_CODEGEN_EARLYIFCONVERSION_H | ||
| #define LLVM_CODEGEN_EARLYIFCONVERSION_H | ||
|
|
||
| #include "llvm/CodeGen/MachinePassManager.h" | ||
|
|
||
| namespace llvm { | ||
|
|
||
| class EarlyIfConverterPass : public PassInfoMixin<EarlyIfConverterPass> { | ||
| public: | ||
| PreservedAnalyses run(MachineFunction &MF, | ||
| MachineFunctionAnalysisManager &MFAM); | ||
| }; | ||
|
|
||
| } // namespace llvm | ||
|
|
||
| #endif // LLVM_CODEGEN_EARLYIFCONVERSION_H |