diff --git a/llvm/test/CodeGen/X86/avx512-fma.ll b/llvm/test/CodeGen/X86/avx512-fma.ll index 97f8e5f4ea16c..29120c8815aea 100644 --- a/llvm/test/CodeGen/X86/avx512-fma.ll +++ b/llvm/test/CodeGen/X86/avx512-fma.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=KNL -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=SKX +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; ALL-LABEL: test_x86_fmadd_ps_z: ; ALL: ## %bb.0: ; ALL-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 ; ALL-NEXT: retq - %x = fmul <16 x float> %a0, %a1 - %res = fadd <16 x float> %x, %a2 + %x = fmul contract <16 x float> %a0, %a1 + %res = fadd contract <16 x float> %x, %a2 ret <16 x float> %res } @@ -17,8 +17,8 @@ define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 ; ALL: ## %bb.0: ; ALL-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 ; ALL-NEXT: retq - %x = fmul <16 x float> %a0, %a1 - %res = fsub <16 x float> %x, %a2 + %x = fmul contract <16 x float> %a0, %a1 + %res = fsub contract <16 x float> %x, %a2 ret <16 x float> %res } @@ -27,8 +27,8 @@ define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <1 ; ALL: ## %bb.0: ; ALL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 ; ALL-NEXT: retq - %x = fmul <16 x float> %a0, %a1 - %res = fsub <16 x float> %a2, %x + %x = fmul contract <16 x float> %a0, %a1 + %res = fsub contract <16 x float> %a2, %x ret <16 x float> %res } @@ -37,12 +37,12 @@ define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <1 ; ALL: ## %bb.0: ; ALL-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 ; ALL-NEXT: retq - %x = fmul <16 x float> %a0, %a1 - %y = fsub <16 x float> %a0, %a1 + %y = fsub contract <16 x float> , %x - %res = fsub <16 x float> %y, %a2 + %res = fsub contract <16 x float> %y, %a2 ret <16 x float> %res } @@ -51,8 +51,8 @@ define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 ; ALL: ## %bb.0: ; ALL-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 ; ALL-NEXT: retq - %x = fmul <8 x double> %a0, %a1 - %res = fadd <8 x double> %x, %a2 + %x = fmul contract <8 x double> %a0, %a1 + %res = fadd contract <8 x double> %x, %a2 ret <8 x double> %res } @@ -61,8 +61,8 @@ define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 ; ALL: ## %bb.0: ; ALL-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 ; ALL-NEXT: retq - %x = fmul <8 x double> %a0, %a1 - %res = fsub <8 x double> %x, %a2 + %x = fmul contract <8 x double> %a0, %a1 + %res = fsub contract <8 x double> %x, %a2 ret <8 x double> %res } @@ -71,8 +71,8 @@ define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) { ; ALL: ## %bb.0: ; ALL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 ; ALL-NEXT: retq - %x = fmul double %a0, %a1 - %res = fsub double %x, %a2 + %x = fmul contract double %a0, %a1 + %res = fsub contract double %x, %a2 ret double %res } @@ -82,8 +82,8 @@ define double @test_x86_fmsub_213_m(double %a0, double %a1, ptr %a2_ptr) { ; ALL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem ; ALL-NEXT: retq %a2 = load double , ptr%a2_ptr - %x = fmul double %a0, %a1 - %res = fsub double %x, %a2 + %x = fmul contract double %a0, %a1 + %res = fsub contract double %x, %a2 ret double %res } @@ -93,8 +93,8 @@ define double @test_x86_fmsub_231_m(double %a0, double %a1, ptr %a2_ptr) { ; ALL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 ; ALL-NEXT: retq %a2 = load double , ptr%a2_ptr - %x = fmul double %a0, %a2 - %res = fsub double %x, %a1 + %x = fmul contract double %a0, %a2 + %res = fsub contract double %x, %a1 ret double %res } @@ -103,8 +103,8 @@ define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind { ; ALL: ## %bb.0: ; ALL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1 ; ALL-NEXT: retq - %b1 = fmul <16 x float> %a1, - %b2 = fadd <16 x float> %b1, %a2 + %b1 = fmul contract <16 x float> %a1, + %b2 = fadd contract <16 x float> %b1, %a2 ret <16 x float> %b2 } @@ -113,8 +113,8 @@ define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind { ; ALL: ## %bb.0: ; ALL-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + mem ; ALL-NEXT: retq - %b1 = fmul <16 x float> %a1, %a2 - %b2 = fadd <16 x float> %b1, + %b1 = fmul contract <16 x float> %a1, %a2 + %b2 = fadd contract <16 x float> %b1, ret <16 x float> %b2 } @@ -135,8 +135,8 @@ define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, pt ; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 {%k1} = (zmm0 * mem) + zmm1 ; SKX-NEXT: retq %a2 = load <16 x float>,ptr%a2_ptrt,align 1 - %x = fmul <16 x float> %a0, %a2 - %y = fadd <16 x float> %x, %a1 + %x = fmul contract <16 x float> %a0, %a2 + %y = fadd contract <16 x float> %x, %a1 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0 ret <16 x float> %res } @@ -160,8 +160,8 @@ define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, pt ; SKX-NEXT: vmovaps %zmm1, %zmm0 ; SKX-NEXT: retq %a2 = load <16 x float>,ptr%a2_ptrt,align 1 - %x = fmul <16 x float> %a0, %a2 - %y = fadd <16 x float> %x, %a1 + %x = fmul contract <16 x float> %a0, %a2 + %y = fadd contract <16 x float> %x, %a1 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1 ret <16 x float> %res } @@ -185,8 +185,8 @@ define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, pt ; SKX-NEXT: vmovaps %zmm1, %zmm0 ; SKX-NEXT: retq %a2 = load <16 x float>,ptr%a2_ptrt,align 1 - %x = fmul <16 x float> %a1, %a0 - %y = fadd <16 x float> %x, %a2 + %x = fmul contract <16 x float> %a1, %a0 + %y = fadd contract <16 x float> %x, %a2 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1 ret <16 x float> %res } diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll index 36b95e744ba14..f1477b57375c4 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast --enable-no-signed-zeros-fp-math -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,NO-SZ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,HAS-SZ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown --enable-no-signed-zeros-fp-math -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,NO-SZ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,HAS-SZ ; FADD(acc, FMA(a, b, +0.0)) can be combined to FMA(a, b, acc) if the nsz flag set. define dso_local <32 x half> @test1(<32 x half> %acc, <32 x half> %a, <32 x half> %b) { @@ -18,9 +18,9 @@ define dso_local <32 x half> @test1(<32 x half> %acc, <32 x half> %a, <32 x half entry: %0 = bitcast <32 x half> %a to <16 x float> %1 = bitcast <32 x half> %b to <16 x float> - %2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4) + %2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4) %3 = bitcast <16 x float> %2 to <32 x half> - %add.i = fadd <32 x half> %3, %acc + %add.i = fadd contract <32 x half> %3, %acc ret <32 x half> %add.i } @@ -39,9 +39,9 @@ define dso_local <32 x half> @test2(<32 x half> %acc, <32 x half> %a, <32 x half entry: %0 = bitcast <32 x half> %a to <16 x float> %1 = bitcast <32 x half> %b to <16 x float> - %2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4) + %2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4) %3 = bitcast <16 x float> %2 to <32 x half> - %add.i = fadd <32 x half> %3, %acc + %add.i = fadd contract <32 x half> %3, %acc ret <32 x half> %add.i } @@ -60,9 +60,9 @@ define dso_local <16 x half> @test3(<16 x half> %acc, <16 x half> %a, <16 x half entry: %0 = bitcast <16 x half> %a to <8 x float> %1 = bitcast <16 x half> %b to <8 x float> - %2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1) + %2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1) %3 = bitcast <8 x float> %2 to <16 x half> - %add.i = fadd <16 x half> %3, %acc + %add.i = fadd contract <16 x half> %3, %acc ret <16 x half> %add.i } @@ -81,9 +81,9 @@ define dso_local <16 x half> @test4(<16 x half> %acc, <16 x half> %a, <16 x half entry: %0 = bitcast <16 x half> %a to <8 x float> %1 = bitcast <16 x half> %b to <8 x float> - %2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1) + %2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1) %3 = bitcast <8 x float> %2 to <16 x half> - %add.i = fadd <16 x half> %3, %acc + %add.i = fadd contract <16 x half> %3, %acc ret <16 x half> %add.i } @@ -102,9 +102,9 @@ define dso_local <8 x half> @test5(<8 x half> %acc, <8 x half> %a, <8 x half> %b entry: %0 = bitcast <8 x half> %a to <4 x float> %1 = bitcast <8 x half> %b to <4 x float> - %2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1) + %2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1) %3 = bitcast <4 x float> %2 to <8 x half> - %add.i = fadd <8 x half> %3, %acc + %add.i = fadd contract <8 x half> %3, %acc ret <8 x half> %add.i } @@ -123,9 +123,9 @@ define dso_local <8 x half> @test6(<8 x half> %acc, <8 x half> %a, <8 x half> %b entry: %0 = bitcast <8 x half> %a to <4 x float> %1 = bitcast <8 x half> %b to <4 x float> - %2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1) + %2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1) %3 = bitcast <4 x float> %2 to <8 x half> - %add.i = fadd <8 x half> %3, %acc + %add.i = fadd contract <8 x half> %3, %acc ret <8 x half> %add.i } @@ -138,9 +138,9 @@ define dso_local <32 x half> @test13(<32 x half> %acc, <32 x half> %a, <32 x hal entry: %0 = bitcast <32 x half> %a to <16 x float> %1 = bitcast <32 x half> %b to <16 x float> - %2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> , i16 -1, i32 4) + %2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> , i16 -1, i32 4) %3 = bitcast <16 x float> %2 to <32 x half> - %add.i = fadd <32 x half> %3, %acc + %add.i = fadd contract <32 x half> %3, %acc ret <32 x half> %add.i } @@ -152,9 +152,9 @@ define dso_local <32 x half> @test14(<32 x half> %acc, <32 x half> %a, <32 x hal entry: %0 = bitcast <32 x half> %a to <16 x float> %1 = bitcast <32 x half> %b to <16 x float> - %2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> , i16 -1, i32 4) + %2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> , i16 -1, i32 4) %3 = bitcast <16 x float> %2 to <32 x half> - %add.i = fadd <32 x half> %3, %acc + %add.i = fadd contract <32 x half> %3, %acc ret <32 x half> %add.i } @@ -166,9 +166,9 @@ define dso_local <16 x half> @test15(<16 x half> %acc, <16 x half> %a, <16 x hal entry: %0 = bitcast <16 x half> %a to <8 x float> %1 = bitcast <16 x half> %b to <8 x float> - %2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> , i8 -1) + %2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> , i8 -1) %3 = bitcast <8 x float> %2 to <16 x half> - %add.i = fadd <16 x half> %3, %acc + %add.i = fadd contract <16 x half> %3, %acc ret <16 x half> %add.i } @@ -180,9 +180,9 @@ define dso_local <16 x half> @test16(<16 x half> %acc, <16 x half> %a, <16 x hal entry: %0 = bitcast <16 x half> %a to <8 x float> %1 = bitcast <16 x half> %b to <8 x float> - %2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> , i8 -1) + %2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> , i8 -1) %3 = bitcast <8 x float> %2 to <16 x half> - %add.i = fadd <16 x half> %3, %acc + %add.i = fadd contract <16 x half> %3, %acc ret <16 x half> %add.i } @@ -194,9 +194,9 @@ define dso_local <8 x half> @test17(<8 x half> %acc, <8 x half> %a, <8 x half> % entry: %0 = bitcast <8 x half> %a to <4 x float> %1 = bitcast <8 x half> %b to <4 x float> - %2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> , i8 -1) + %2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> , i8 -1) %3 = bitcast <4 x float> %2 to <8 x half> - %add.i = fadd <8 x half> %3, %acc + %add.i = fadd contract <8 x half> %3, %acc ret <8 x half> %add.i } @@ -208,9 +208,9 @@ define dso_local <8 x half> @test18(<8 x half> %acc, <8 x half> %a, <8 x half> % entry: %0 = bitcast <8 x half> %a to <4 x float> %1 = bitcast <8 x half> %b to <4 x float> - %2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> , i8 -1) + %2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> , i8 -1) %3 = bitcast <4 x float> %2 to <8 x half> - %add.i = fadd <8 x half> %3, %acc + %add.i = fadd contract <8 x half> %3, %acc ret <8 x half> %add.i } diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll index a509503584649..5d9784aa5d2eb 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl --fp-contract=fast --enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl --enable-unsafe-fp-math | FileCheck %s define dso_local <32 x half> @test1(<32 x half> %acc.coerce, <32 x half> %lhs.coerce, <32 x half> %rhs.coerce) { ; CHECK-LABEL: test1: diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll index 43f30da15b20d..b58bae93ed660 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl --fp-contract=fast --enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl --enable-unsafe-fp-math | FileCheck %s define dso_local <32 x half> @test1(<32 x half> %acc.coerce, <32 x half> %lhs.coerce.conj, <32 x half> %rhs.coerce) local_unnamed_addr #0 { ; CHECK-LABEL: test1: diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll index 7b142ea170c22..92bdebb34979a 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl --fp-contract=fast --enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl --enable-unsafe-fp-math | FileCheck %s define dso_local <32 x half> @test1(<32 x half> %lhs.coerce.conj, <32 x half> %rhs.coerce) local_unnamed_addr #0 { ; CHECK-LABEL: test1: @@ -94,13 +94,13 @@ define dso_local <32 x half> @test6(<16 x i32> %a, <16 x float> %b) local_unname entry: %0 = xor <16 x i32> %a, splat (i32 -2147483648) %1 = bitcast <16 x i32> %0 to <16 x float> - %2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float> splat (float 1.000000e+00), <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4) + %2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float> splat (float 1.000000e+00), <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4) %3 = bitcast <16 x float> %2 to <32 x half> - %4 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float> %1, <16 x float> %b, <16 x float> zeroinitializer, i16 -1, i32 4) + %4 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.512(<16 x float> %1, <16 x float> %b, <16 x float> zeroinitializer, i16 -1, i32 4) %5 = bitcast <16 x float> %4 to <32 x half> - %6 = fadd <32 x half> %3, %5 + %6 = fadd contract <32 x half> %3, %5 %7 = bitcast <16 x float> %b to <32 x half> - %8 = fadd <32 x half> %6, %7 + %8 = fadd contract <32 x half> %6, %7 ret <32 x half> %8 } diff --git a/llvm/test/CodeGen/X86/dag-combiner-fma-folding.ll b/llvm/test/CodeGen/X86/dag-combiner-fma-folding.ll index 6291100f42c3d..3ebbf34dd8367 100644 --- a/llvm/test/CodeGen/X86/dag-combiner-fma-folding.ll +++ b/llvm/test/CodeGen/X86/dag-combiner-fma-folding.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=x86_64-- --start-before=x86-isel -mattr=+avx,+fma %s -o - | FileCheck %s -; RUN: llc -mtriple=x86_64-- --start-before=x86-isel -mattr=+avx,+fma %s -o - -fp-contract=fast | FileCheck %s define double @fma_folding(double %x) { ; CHECK-LABEL: fma_folding: diff --git a/llvm/test/CodeGen/X86/fma-do-not-commute.ll b/llvm/test/CodeGen/X86/fma-do-not-commute.ll index 0dc8e62c56d0c..1b60c15cf2be0 100644 --- a/llvm/test/CodeGen/X86/fma-do-not-commute.ll +++ b/llvm/test/CodeGen/X86/fma-do-not-commute.ll @@ -1,4 +1,4 @@ -; RUN: llc -fp-contract=fast -mattr=+fma -disable-cgp < %s -o - | FileCheck %s +; RUN: llc -mattr=+fma -disable-cgp < %s -o - | FileCheck %s ; Check that the 2nd and 3rd arguments of fmaXXX231 reg1, reg2, mem3 are not commuted. ; target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @@ -20,8 +20,8 @@ loop: %sum0 = phi float [ %fma, %loop ], [ %arg, %entry ] %addrVal = load float, ptr %addr, align 4 %addr2Val = load float, ptr %addr2, align 4 - %fmul = fmul float %addrVal, %addr2Val - %fma = fadd float %sum0, %fmul + %fmul = fmul contract float %addrVal, %addr2Val + %fma = fadd contract float %sum0, %fmul br i1 true, label %exit, label %loop exit: diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll index dc35c8f8dc657..be5e23cd4cce3 100644 --- a/llvm/test/CodeGen/X86/fma_patterns.ll +++ b/llvm/test/CodeGen/X86/fma_patterns.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefixes=FMA,FMA-INFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefixes=FMA4,FMA4-INFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefixes=FMA4,FMA4-INFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast | FileCheck %s --check-prefixes=AVX512,AVX512-INFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefixes=FMA,FMA-NOINFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefixes=FMA4,FMA4-NOINFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefixes=FMA4,FMA4-NOINFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefixes=AVX512,AVX512-NOINFS +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma | FileCheck %s --check-prefixes=FMA,FMA-INFS +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma | FileCheck %s --check-prefixes=FMA4,FMA4-INFS +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 | FileCheck %s --check-prefixes=FMA4,FMA4-INFS +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512-INFS +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -enable-no-infs-fp-math | FileCheck %s --check-prefixes=FMA,FMA-NOINFS +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -enable-no-infs-fp-math | FileCheck %s --check-prefixes=FMA4,FMA4-NOINFS +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -enable-no-infs-fp-math | FileCheck %s --check-prefixes=FMA4,FMA4-NOINFS +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -enable-no-infs-fp-math | FileCheck %s --check-prefixes=AVX512,AVX512-NOINFS ; ; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z) @@ -27,8 +27,8 @@ define float @test_f32_fmadd(float %a0, float %a1, float %a2) { ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; AVX512-NEXT: retq - %x = fmul float %a0, %a1 - %res = fadd float %x, %a2 + %x = fmul contract float %a0, %a1 + %res = fadd contract float %x, %a2 ret float %res } @@ -47,8 +47,8 @@ define <4 x float> @test_4f32_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; AVX512-NEXT: retq - %x = fmul <4 x float> %a0, %a1 - %res = fadd <4 x float> %x, %a2 + %x = fmul contract <4 x float> %a0, %a1 + %res = fadd contract <4 x float> %x, %a2 ret <4 x float> %res } @@ -67,8 +67,8 @@ define <8 x float> @test_8f32_fmadd(<8 x float> %a0, <8 x float> %a1, <8 x float ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 ; AVX512-NEXT: retq - %x = fmul <8 x float> %a0, %a1 - %res = fadd <8 x float> %x, %a2 + %x = fmul contract <8 x float> %a0, %a1 + %res = fadd contract <8 x float> %x, %a2 ret <8 x float> %res } @@ -87,8 +87,8 @@ define double @test_f64_fmadd(double %a0, double %a1, double %a2) { ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; AVX512-NEXT: retq - %x = fmul double %a0, %a1 - %res = fadd double %x, %a2 + %x = fmul contract double %a0, %a1 + %res = fadd contract double %x, %a2 ret double %res } @@ -107,8 +107,8 @@ define <2 x double> @test_2f64_fmadd(<2 x double> %a0, <2 x double> %a1, <2 x do ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; AVX512-NEXT: retq - %x = fmul <2 x double> %a0, %a1 - %res = fadd <2 x double> %x, %a2 + %x = fmul contract <2 x double> %a0, %a1 + %res = fadd contract <2 x double> %x, %a2 ret <2 x double> %res } @@ -127,8 +127,8 @@ define <4 x double> @test_4f64_fmadd(<4 x double> %a0, <4 x double> %a1, <4 x do ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 ; AVX512-NEXT: retq - %x = fmul <4 x double> %a0, %a1 - %res = fadd <4 x double> %x, %a2 + %x = fmul contract <4 x double> %a0, %a1 + %res = fadd contract <4 x double> %x, %a2 ret <4 x double> %res } @@ -151,8 +151,8 @@ define float @test_f32_fmsub(float %a0, float %a1, float %a2) { ; AVX512: # %bb.0: ; AVX512-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 ; AVX512-NEXT: retq - %x = fmul float %a0, %a1 - %res = fsub float %x, %a2 + %x = fmul contract float %a0, %a1 + %res = fsub contract float %x, %a2 ret float %res } @@ -171,8 +171,8 @@ define <4 x float> @test_4f32_fmsub(<4 x float> %a0, <4 x float> %a1, <4 x float ; AVX512: # %bb.0: ; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 ; AVX512-NEXT: retq - %x = fmul <4 x float> %a0, %a1 - %res = fsub <4 x float> %x, %a2 + %x = fmul contract <4 x float> %a0, %a1 + %res = fsub contract <4 x float> %x, %a2 ret <4 x float> %res } @@ -191,8 +191,8 @@ define <8 x float> @test_8f32_fmsub(<8 x float> %a0, <8 x float> %a1, <8 x float ; AVX512: # %bb.0: ; AVX512-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 ; AVX512-NEXT: retq - %x = fmul <8 x float> %a0, %a1 - %res = fsub <8 x float> %x, %a2 + %x = fmul contract <8 x float> %a0, %a1 + %res = fsub contract <8 x float> %x, %a2 ret <8 x float> %res } @@ -211,8 +211,8 @@ define double @test_f64_fmsub(double %a0, double %a1, double %a2) { ; AVX512: # %bb.0: ; AVX512-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 ; AVX512-NEXT: retq - %x = fmul double %a0, %a1 - %res = fsub double %x, %a2 + %x = fmul contract double %a0, %a1 + %res = fsub contract double %x, %a2 ret double %res } @@ -231,8 +231,8 @@ define <2 x double> @test_2f64_fmsub(<2 x double> %a0, <2 x double> %a1, <2 x do ; AVX512: # %bb.0: ; AVX512-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 ; AVX512-NEXT: retq - %x = fmul <2 x double> %a0, %a1 - %res = fsub <2 x double> %x, %a2 + %x = fmul contract <2 x double> %a0, %a1 + %res = fsub contract <2 x double> %x, %a2 ret <2 x double> %res } @@ -251,8 +251,8 @@ define <4 x double> @test_4f64_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x do ; AVX512: # %bb.0: ; AVX512-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 ; AVX512-NEXT: retq - %x = fmul <4 x double> %a0, %a1 - %res = fsub <4 x double> %x, %a2 + %x = fmul contract <4 x double> %a0, %a1 + %res = fsub contract <4 x double> %x, %a2 ret <4 x double> %res } @@ -275,8 +275,8 @@ define float @test_f32_fnmadd(float %a0, float %a1, float %a2) { ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 ; AVX512-NEXT: retq - %x = fmul float %a0, %a1 - %res = fsub float %a2, %x + %x = fmul contract float %a0, %a1 + %res = fsub contract float %a2, %x ret float %res } @@ -295,8 +295,8 @@ define <4 x float> @test_4f32_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x floa ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 ; AVX512-NEXT: retq - %x = fmul <4 x float> %a0, %a1 - %res = fsub <4 x float> %a2, %x + %x = fmul contract <4 x float> %a0, %a1 + %res = fsub contract <4 x float> %a2, %x ret <4 x float> %res } @@ -315,8 +315,8 @@ define <8 x float> @test_8f32_fnmadd(<8 x float> %a0, <8 x float> %a1, <8 x floa ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 ; AVX512-NEXT: retq - %x = fmul <8 x float> %a0, %a1 - %res = fsub <8 x float> %a2, %x + %x = fmul contract <8 x float> %a0, %a1 + %res = fsub contract <8 x float> %a2, %x ret <8 x float> %res } @@ -335,8 +335,8 @@ define double @test_f64_fnmadd(double %a0, double %a1, double %a2) { ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 ; AVX512-NEXT: retq - %x = fmul double %a0, %a1 - %res = fsub double %a2, %x + %x = fmul contract double %a0, %a1 + %res = fsub contract double %a2, %x ret double %res } @@ -355,8 +355,8 @@ define <2 x double> @test_2f64_fnmadd(<2 x double> %a0, <2 x double> %a1, <2 x d ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 ; AVX512-NEXT: retq - %x = fmul <2 x double> %a0, %a1 - %res = fsub <2 x double> %a2, %x + %x = fmul contract <2 x double> %a0, %a1 + %res = fsub contract <2 x double> %a2, %x ret <2 x double> %res } @@ -375,8 +375,8 @@ define <4 x double> @test_4f64_fnmadd(<4 x double> %a0, <4 x double> %a1, <4 x d ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 ; AVX512-NEXT: retq - %x = fmul <4 x double> %a0, %a1 - %res = fsub <4 x double> %a2, %x + %x = fmul contract <4 x double> %a0, %a1 + %res = fsub contract <4 x double> %a2, %x ret <4 x double> %res } @@ -399,9 +399,9 @@ define float @test_f32_fnmsub(float %a0, float %a1, float %a2) { ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 ; AVX512-NEXT: retq - %x = fmul float %a0, %a1 - %y = fsub float -0.000000e+00, %x - %res = fsub float %y, %a2 + %x = fmul contract float %a0, %a1 + %y = fsub contract float -0.000000e+00, %x + %res = fsub contract float %y, %a2 ret float %res } @@ -420,9 +420,9 @@ define <4 x float> @test_4f32_fnmsub(<4 x float> %a0, <4 x float> %a1, <4 x floa ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 ; AVX512-NEXT: retq - %x = fmul <4 x float> %a0, %a1 - %y = fsub <4 x float> , %x - %res = fsub <4 x float> %y, %a2 + %x = fmul contract <4 x float> %a0, %a1 + %y = fsub contract <4 x float> , %x + %res = fsub contract <4 x float> %y, %a2 ret <4 x float> %res } @@ -441,9 +441,9 @@ define <8 x float> @test_8f32_fnmsub(<8 x float> %a0, <8 x float> %a1, <8 x floa ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 ; AVX512-NEXT: retq - %x = fmul <8 x float> %a0, %a1 - %y = fsub <8 x float> , %x - %res = fsub <8 x float> %y, %a2 + %x = fmul contract <8 x float> %a0, %a1 + %y = fsub contract <8 x float> , %x + %res = fsub contract <8 x float> %y, %a2 ret <8 x float> %res } @@ -462,9 +462,9 @@ define double @test_f64_fnmsub(double %a0, double %a1, double %a2) { ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 ; AVX512-NEXT: retq - %x = fmul double %a0, %a1 - %y = fsub double -0.000000e+00, %x - %res = fsub double %y, %a2 + %x = fmul contract double %a0, %a1 + %y = fsub contract double -0.000000e+00, %x + %res = fsub contract double %y, %a2 ret double %res } @@ -483,9 +483,9 @@ define <2 x double> @test_2f64_fnmsub(<2 x double> %a0, <2 x double> %a1, <2 x d ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 ; AVX512-NEXT: retq - %x = fmul <2 x double> %a0, %a1 - %y = fsub <2 x double> , %x - %res = fsub <2 x double> %y, %a2 + %x = fmul contract <2 x double> %a0, %a1 + %y = fsub contract <2 x double> , %x + %res = fsub contract <2 x double> %y, %a2 ret <2 x double> %res } @@ -504,9 +504,9 @@ define <4 x double> @test_4f64_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x d ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 ; AVX512-NEXT: retq - %x = fmul <4 x double> %a0, %a1 - %y = fsub <4 x double> , %x - %res = fsub <4 x double> %y, %a2 + %x = fmul contract <4 x double> %a0, %a1 + %y = fsub contract <4 x double> , %x + %res = fsub contract <4 x double> %y, %a2 ret <4 x double> %res } @@ -530,8 +530,8 @@ define <4 x float> @test_4f32_fmadd_load(ptr %a0, <4 x float> %a1, <4 x float> % ; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 ; AVX512-NEXT: retq %x = load <4 x float>, ptr %a0 - %y = fmul <4 x float> %x, %a1 - %res = fadd <4 x float> %y, %a2 + %y = fmul contract <4 x float> %x, %a1 + %res = fadd contract <4 x float> %y, %a2 ret <4 x float> %res } @@ -551,8 +551,8 @@ define <2 x double> @test_2f64_fmsub_load(ptr %a0, <2 x double> %a1, <2 x double ; AVX512-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 ; AVX512-NEXT: retq %x = load <2 x double>, ptr %a0 - %y = fmul <2 x double> %x, %a1 - %res = fsub <2 x double> %y, %a2 + %y = fmul contract <2 x double> %x, %a1 + %res = fsub contract <2 x double> %y, %a2 ret <2 x double> %res } @@ -593,8 +593,8 @@ define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) { ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 ; AVX512-NOINFS-NEXT: retq - %a = fadd <4 x float> %x, - %m = fmul <4 x float> %a, %y + %a = fadd contract <4 x float> %x, + %m = fmul contract <4 x float> %a, %y ret <4 x float> %m } @@ -631,8 +631,8 @@ define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) { ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 ; AVX512-NOINFS-NEXT: retq - %a = fadd <4 x float> %x, - %m = fmul <4 x float> %y, %a + %a = fadd contract <4 x float> %x, + %m = fmul contract <4 x float> %y, %a ret <4 x float> %m } @@ -669,8 +669,8 @@ define <4 x float> @test_v4f32_mul_y_add_x_one_undefs(<4 x float> %x, <4 x float ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 ; AVX512-NOINFS-NEXT: retq - %a = fadd <4 x float> %x, - %m = fmul <4 x float> %y, %a + %a = fadd contract <4 x float> %x, + %m = fmul contract <4 x float> %y, %a ret <4 x float> %m } @@ -707,8 +707,8 @@ define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %a = fadd <4 x float> %x, - %m = fmul <4 x float> %a, %y + %a = fadd contract <4 x float> %x, + %m = fmul contract <4 x float> %a, %y ret <4 x float> %m } @@ -745,8 +745,8 @@ define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %a = fadd <4 x float> %x, - %m = fmul <4 x float> %y, %a + %a = fadd contract <4 x float> %x, + %m = fmul contract <4 x float> %y, %a ret <4 x float> %m } @@ -783,8 +783,8 @@ define <4 x float> @test_v4f32_mul_y_add_x_negone_undefs(<4 x float> %x, <4 x fl ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %a = fadd <4 x float> %x, - %m = fmul <4 x float> %y, %a + %a = fadd contract <4 x float> %x, + %m = fmul contract <4 x float> %y, %a ret <4 x float> %m } @@ -824,8 +824,8 @@ define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) { ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> , %x - %m = fmul <4 x float> %s, %y + %s = fsub contract <4 x float> , %x + %m = fmul contract <4 x float> %s, %y ret <4 x float> %m } @@ -865,8 +865,8 @@ define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) { ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> , %x - %m = fmul <4 x float> %y, %s + %s = fsub contract <4 x float> , %x + %m = fmul contract <4 x float> %y, %s ret <4 x float> %m } @@ -906,8 +906,8 @@ define <4 x float> @test_v4f32_mul_y_sub_one_x_undefs(<4 x float> %x, <4 x float ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> , %x - %m = fmul <4 x float> %y, %s + %s = fsub contract <4 x float> , %x + %m = fmul contract <4 x float> %y, %s ret <4 x float> %m } @@ -947,8 +947,8 @@ define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> , %x - %m = fmul <4 x float> %s, %y + %s = fsub contract <4 x float> , %x + %m = fmul contract <4 x float> %s, %y ret <4 x float> %m } @@ -988,8 +988,8 @@ define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> , %x - %m = fmul <4 x float> %y, %s + %s = fsub contract <4 x float> , %x + %m = fmul contract <4 x float> %y, %s ret <4 x float> %m } @@ -1029,8 +1029,8 @@ define <4 x float> @test_v4f32_mul_y_sub_negone_x_undefs(<4 x float> %x, <4 x fl ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> , %x - %m = fmul <4 x float> %y, %s + %s = fsub contract <4 x float> , %x + %m = fmul contract <4 x float> %y, %s ret <4 x float> %m } @@ -1067,8 +1067,8 @@ define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) { ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> %x, - %m = fmul <4 x float> %s, %y + %s = fsub contract <4 x float> %x, + %m = fmul contract <4 x float> %s, %y ret <4 x float> %m } @@ -1105,8 +1105,8 @@ define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) { ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> %x, - %m = fmul <4 x float> %y, %s + %s = fsub contract <4 x float> %x, + %m = fmul contract <4 x float> %y, %s ret <4 x float> %m } @@ -1143,8 +1143,8 @@ define <4 x float> @test_v4f32_mul_y_sub_x_one_undefs(<4 x float> %x, <4 x float ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> %x, - %m = fmul <4 x float> %y, %s + %s = fsub contract <4 x float> %x, + %m = fmul contract <4 x float> %y, %s ret <4 x float> %m } @@ -1181,8 +1181,8 @@ define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> %x, - %m = fmul <4 x float> %s, %y + %s = fsub contract <4 x float> %x, + %m = fmul contract <4 x float> %s, %y ret <4 x float> %m } @@ -1219,8 +1219,8 @@ define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> %x, - %m = fmul <4 x float> %y, %s + %s = fsub contract <4 x float> %x, + %m = fmul contract <4 x float> %y, %s ret <4 x float> %m } @@ -1257,8 +1257,8 @@ define <4 x float> @test_v4f32_mul_y_sub_x_negone_undefs(<4 x float> %x, <4 x fl ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <4 x float> %x, - %m = fmul <4 x float> %y, %s + %s = fsub contract <4 x float> %x, + %m = fmul contract <4 x float> %y, %s ret <4 x float> %m } @@ -1308,10 +1308,10 @@ define float @test_f32_interp(float %x, float %y, float %t) { ; AVX512-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 ; AVX512-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub nsz float 1.0, %t - %tx = fmul nsz float %x, %t - %ty = fmul nsz float %y, %t1 - %r = fadd nsz float %tx, %ty + %t1 = fsub contract nsz float 1.0, %t + %tx = fmul contract nsz float %x, %t + %ty = fmul contract nsz float %y, %t1 + %r = fadd contract nsz float %tx, %ty ret float %r } @@ -1357,10 +1357,10 @@ define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub nsz <4 x float> , %t - %tx = fmul nsz <4 x float> %x, %t - %ty = fmul nsz <4 x float> %y, %t1 - %r = fadd nsz <4 x float> %tx, %ty + %t1 = fsub contract nsz <4 x float> , %t + %tx = fmul contract nsz <4 x float> %x, %t + %ty = fmul contract nsz <4 x float> %y, %t1 + %r = fadd contract nsz <4 x float> %tx, %ty ret <4 x float> %r } @@ -1406,10 +1406,10 @@ define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub nsz <8 x float> , %t - %tx = fmul nsz <8 x float> %x, %t - %ty = fmul nsz <8 x float> %y, %t1 - %r = fadd nsz <8 x float> %tx, %ty + %t1 = fsub contract nsz <8 x float> , %t + %tx = fmul contract nsz <8 x float> %x, %t + %ty = fmul contract nsz <8 x float> %y, %t1 + %r = fadd contract nsz <8 x float> %tx, %ty ret <8 x float> %r } @@ -1455,10 +1455,10 @@ define double @test_f64_interp(double %x, double %y, double %t) { ; AVX512-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 ; AVX512-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub nsz double 1.0, %t - %tx = fmul nsz double %x, %t - %ty = fmul nsz double %y, %t1 - %r = fadd nsz double %tx, %ty + %t1 = fsub contract nsz double 1.0, %t + %tx = fmul contract nsz double %x, %t + %ty = fmul contract nsz double %y, %t1 + %r = fadd contract nsz double %tx, %ty ret double %r } @@ -1507,10 +1507,10 @@ define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x do ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub nsz <2 x double> , %t - %tx = fmul nsz <2 x double> %x, %t - %ty = fmul nsz <2 x double> %y, %t1 - %r = fadd nsz <2 x double> %tx, %ty + %t1 = fsub contract nsz <2 x double> , %t + %tx = fmul contract nsz <2 x double> %x, %t + %ty = fmul contract nsz <2 x double> %y, %t1 + %r = fadd contract nsz <2 x double> %tx, %ty ret <2 x double> %r } @@ -1556,10 +1556,10 @@ define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x do ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub nsz <4 x double> , %t - %tx = fmul nsz <4 x double> %x, %t - %ty = fmul nsz <4 x double> %y, %t1 - %r = fadd nsz <4 x double> %tx, %ty + %t1 = fsub contract nsz <4 x double> , %t + %tx = fmul contract nsz <4 x double> %x, %t + %ty = fmul contract nsz <4 x double> %y, %t1 + %r = fadd contract nsz <4 x double> %tx, %ty ret <4 x double> %r } @@ -1603,9 +1603,9 @@ define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, < ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 ; AVX512-NEXT: retq - %mul = fmul nsz <4 x double> %a0, %a1 - %sub = fsub nsz <4 x double> %mul, %a2 - %neg = fsub nsz <4 x double> , %sub + %mul = fmul contract nsz <4 x double> %a0, %a1 + %sub = fsub contract nsz <4 x double> %mul, %a2 + %neg = fsub contract nsz <4 x double> , %sub ret <4 x double> %neg } @@ -1817,10 +1817,10 @@ define double @fadd_fma_fmul_1(double %a, double %b, double %c, double %d, doubl ; AVX512-NEXT: vfmadd213sd {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm4 ; AVX512-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; AVX512-NEXT: retq - %m1 = fmul fast double %a, %b - %m2 = fmul fast double %c, %d - %a1 = fadd fast double %m1, %m2 - %a2 = fadd fast double %a1, %n1 + %m1 = fmul contract fast double %a, %b + %m2 = fmul contract fast double %c, %d + %a1 = fadd contract fast double %m1, %m2 + %a2 = fadd contract fast double %a1, %n1 ret double %a2 } @@ -1846,10 +1846,10 @@ define float @fadd_fma_fmul_fmf(float %a, float %b, float %c, float %d, float %n ; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm4 ; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; AVX512-NEXT: retq - %m1 = fmul float %a, %b - %m2 = fmul float %c, %d + %m1 = fmul contract float %a, %b + %m2 = fmul contract float %c, %d %a1 = fadd contract float %m1, %m2 - %a2 = fadd reassoc float %n0, %a1 + %a2 = fadd contract reassoc float %n0, %a1 ret float %a2 } @@ -1876,8 +1876,8 @@ define float @fadd_fma_fmul_2(float %a, float %b, float %c, float %d, float %n0) ; AVX512-NEXT: vfmadd231ss {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2 ; AVX512-NEXT: vaddss %xmm2, %xmm4, %xmm0 ; AVX512-NEXT: retq - %m1 = fmul float %a, %b - %m2 = fmul float %c, %d + %m1 = fmul contract float %a, %b + %m2 = fmul contract float %c, %d %a1 = fadd contract float %m1, %m2 %a2 = fadd contract float %n0, %a1 ret float %a2 @@ -1911,13 +1911,13 @@ define <2 x double> @fadd_fma_fmul_3(<2 x double> %x1, <2 x double> %x2, <2 x do ; AVX512-NEXT: vfmadd231pd {{.*#+}} xmm2 = (xmm5 * xmm4) + xmm2 ; AVX512-NEXT: vmovapd %xmm2, %xmm0 ; AVX512-NEXT: retq - %m1 = fmul fast <2 x double> %x1, %x2 - %m2 = fmul fast <2 x double> %x3, %x4 - %m3 = fmul fast <2 x double> %x5, %x6 - %m4 = fmul fast <2 x double> %x7, %x8 - %a1 = fadd fast <2 x double> %m1, %m2 - %a2 = fadd fast <2 x double> %m3, %m4 - %a3 = fadd fast <2 x double> %a1, %a2 + %m1 = fmul contract fast <2 x double> %x1, %x2 + %m2 = fmul contract fast <2 x double> %x3, %x4 + %m3 = fmul contract fast <2 x double> %x5, %x6 + %m4 = fmul contract fast <2 x double> %x7, %x8 + %a1 = fadd contract fast <2 x double> %m1, %m2 + %a2 = fadd contract fast <2 x double> %m3, %m4 + %a3 = fadd contract fast <2 x double> %a1, %a2 ret <2 x double> %a3 } @@ -1947,11 +1947,11 @@ define float @fadd_fma_fmul_extra_use_1(float %a, float %b, float %c, float %d, ; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm0 ; AVX512-NEXT: vaddss %xmm2, %xmm4, %xmm0 ; AVX512-NEXT: retq - %m1 = fmul fast float %a, %b + %m1 = fmul contract fast float %a, %b store float %m1, ptr %p - %m2 = fmul fast float %c, %d - %a1 = fadd fast float %m1, %m2 - %a2 = fadd fast float %n0, %a1 + %m2 = fmul contract fast float %c, %d + %a1 = fadd contract fast float %m1, %m2 + %a2 = fadd contract fast float %n0, %a1 ret float %a2 } @@ -1981,11 +1981,11 @@ define float @fadd_fma_fmul_extra_use_2(float %a, float %b, float %c, float %d, ; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; AVX512-NEXT: vaddss %xmm0, %xmm4, %xmm0 ; AVX512-NEXT: retq - %m1 = fmul fast float %a, %b - %m2 = fmul fast float %c, %d + %m1 = fmul contract fast float %a, %b + %m2 = fmul contract fast float %c, %d store float %m2, ptr %p - %a1 = fadd fast float %m1, %m2 - %a2 = fadd fast float %n0, %a1 + %a1 = fadd contract fast float %m1, %m2 + %a2 = fadd contract fast float %n0, %a1 ret float %a2 } @@ -2015,10 +2015,10 @@ define float @fadd_fma_fmul_extra_use_3(float %a, float %b, float %c, float %d, ; AVX512-NEXT: vmovss %xmm2, (%rdi) ; AVX512-NEXT: vaddss %xmm2, %xmm4, %xmm0 ; AVX512-NEXT: retq - %m1 = fmul fast float %a, %b - %m2 = fmul fast float %c, %d - %a1 = fadd fast float %m1, %m2 + %m1 = fmul contract fast float %a, %b + %m2 = fmul contract fast float %c, %d + %a1 = fadd contract fast float %m1, %m2 store float %a1, ptr %p - %a2 = fadd fast float %n0, %a1 + %a2 = fadd contract fast float %n0, %a1 ret float %a2 } diff --git a/llvm/test/CodeGen/X86/fma_patterns_wide.ll b/llvm/test/CodeGen/X86/fma_patterns_wide.ll index d910110467ee0..f0af3945ae959 100644 --- a/llvm/test/CodeGen/X86/fma_patterns_wide.ll +++ b/llvm/test/CodeGen/X86/fma_patterns_wide.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA --check-prefix=FMA-INFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-INFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-INFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq -fp-contract=fast | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-INFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA --check-prefix=FMA-NOINFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-NOINFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-NOINFS -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-NOINFS +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma | FileCheck %s --check-prefix=FMA --check-prefix=FMA-INFS +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-INFS +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-INFS +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-INFS +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA --check-prefix=FMA-NOINFS +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-NOINFS +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-NOINFS +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq -enable-no-infs-fp-math | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-NOINFS ; ; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z) @@ -29,8 +29,8 @@ define <16 x float> @test_16f32_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 ; AVX512-NEXT: retq - %x = fmul <16 x float> %a0, %a1 - %res = fadd <16 x float> %x, %a2 + %x = fmul contract <16 x float> %a0, %a1 + %res = fadd contract <16 x float> %x, %a2 ret <16 x float> %res } @@ -51,8 +51,8 @@ define <8 x double> @test_8f64_fmadd(<8 x double> %a0, <8 x double> %a1, <8 x do ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 ; AVX512-NEXT: retq - %x = fmul <8 x double> %a0, %a1 - %res = fadd <8 x double> %x, %a2 + %x = fmul contract <8 x double> %a0, %a1 + %res = fadd contract <8 x double> %x, %a2 ret <8 x double> %res } @@ -77,8 +77,8 @@ define <16 x float> @test_16f32_fmsub(<16 x float> %a0, <16 x float> %a1, <16 x ; AVX512: # %bb.0: ; AVX512-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 ; AVX512-NEXT: retq - %x = fmul <16 x float> %a0, %a1 - %res = fsub <16 x float> %x, %a2 + %x = fmul contract <16 x float> %a0, %a1 + %res = fsub contract <16 x float> %x, %a2 ret <16 x float> %res } @@ -99,8 +99,8 @@ define <8 x double> @test_8f64_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x do ; AVX512: # %bb.0: ; AVX512-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 ; AVX512-NEXT: retq - %x = fmul <8 x double> %a0, %a1 - %res = fsub <8 x double> %x, %a2 + %x = fmul contract <8 x double> %a0, %a1 + %res = fsub contract <8 x double> %x, %a2 ret <8 x double> %res } @@ -125,8 +125,8 @@ define <16 x float> @test_16f32_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 ; AVX512-NEXT: retq - %x = fmul <16 x float> %a0, %a1 - %res = fsub <16 x float> %a2, %x + %x = fmul contract <16 x float> %a0, %a1 + %res = fsub contract <16 x float> %a2, %x ret <16 x float> %res } @@ -147,8 +147,8 @@ define <8 x double> @test_8f64_fnmadd(<8 x double> %a0, <8 x double> %a1, <8 x d ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 ; AVX512-NEXT: retq - %x = fmul <8 x double> %a0, %a1 - %res = fsub <8 x double> %a2, %x + %x = fmul contract <8 x double> %a0, %a1 + %res = fsub contract <8 x double> %a2, %x ret <8 x double> %res } @@ -173,9 +173,9 @@ define <16 x float> @test_16f32_fnmsub(<16 x float> %a0, <16 x float> %a1, <16 x ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 ; AVX512-NEXT: retq - %x = fmul <16 x float> %a0, %a1 - %y = fsub <16 x float> , %x - %res = fsub <16 x float> %y, %a2 + %x = fmul contract <16 x float> %a0, %a1 + %y = fsub contract <16 x float> , %x + %res = fsub contract <16 x float> %y, %a2 ret <16 x float> %res } @@ -196,9 +196,9 @@ define <8 x double> @test_8f64_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x d ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 ; AVX512-NEXT: retq - %x = fmul <8 x double> %a0, %a1 - %y = fsub <8 x double> , %x - %res = fsub <8 x double> %y, %a2 + %x = fmul contract <8 x double> %a0, %a1 + %y = fsub contract <8 x double> , %x + %res = fsub contract <8 x double> %y, %a2 ret <8 x double> %res } @@ -224,8 +224,8 @@ define <16 x float> @test_16f32_fmadd_load(ptr %a0, <16 x float> %a1, <16 x floa ; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1 ; AVX512-NEXT: retq %x = load <16 x float>, ptr %a0 - %y = fmul <16 x float> %x, %a1 - %res = fadd <16 x float> %y, %a2 + %y = fmul contract <16 x float> %x, %a1 + %res = fadd contract <16 x float> %y, %a2 ret <16 x float> %res } @@ -247,8 +247,8 @@ define <8 x double> @test_8f64_fmsub_load(ptr %a0, <8 x double> %a1, <8 x double ; AVX512-NEXT: vfmsub132pd {{.*#+}} zmm0 = (zmm0 * mem) - zmm1 ; AVX512-NEXT: retq %x = load <8 x double>, ptr %a0 - %y = fmul <8 x double> %x, %a1 - %res = fsub <8 x double> %y, %a2 + %y = fmul contract <8 x double> %x, %a1 + %res = fsub contract <8 x double> %y, %a2 ret <8 x double> %res } @@ -297,8 +297,8 @@ define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> % ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1 ; AVX512-NOINFS-NEXT: retq - %a = fadd <16 x float> %x, - %m = fmul <16 x float> %a, %y + %a = fadd contract <16 x float> %x, + %m = fmul contract <16 x float> %a, %y ret <16 x float> %m } @@ -343,8 +343,8 @@ define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1 ; AVX512-NOINFS-NEXT: retq - %a = fadd <8 x double> %x, - %m = fmul <8 x double> %y, %a + %a = fadd contract <8 x double> %x, + %m = fmul contract <8 x double> %y, %a ret <8 x double> %m } @@ -389,8 +389,8 @@ define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1 ; AVX512-NOINFS-NEXT: retq - %a = fadd <16 x float> %x, - %m = fmul <16 x float> %a, %y + %a = fadd contract <16 x float> %x, + %m = fmul contract <16 x float> %a, %y ret <16 x float> %m } @@ -435,8 +435,8 @@ define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double> ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1 ; AVX512-NOINFS-NEXT: retq - %a = fadd <8 x double> %x, - %m = fmul <8 x double> %y, %a + %a = fadd contract <8 x double> %x, + %m = fmul contract <8 x double> %y, %a ret <8 x double> %m } @@ -482,8 +482,8 @@ define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> % ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <16 x float> , %x - %m = fmul <16 x float> %s, %y + %s = fsub contract <16 x float> , %x + %m = fmul contract <16 x float> %s, %y ret <16 x float> %m } @@ -529,8 +529,8 @@ define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <8 x double> , %x - %m = fmul <8 x double> %y, %s + %s = fsub contract <8 x double> , %x + %m = fmul contract <8 x double> %y, %s ret <8 x double> %m } @@ -576,8 +576,8 @@ define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <16 x float> , %x - %m = fmul <16 x float> %s, %y + %s = fsub contract <16 x float> , %x + %m = fmul contract <16 x float> %s, %y ret <16 x float> %m } @@ -623,8 +623,8 @@ define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double> ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <8 x double> , %x - %m = fmul <8 x double> %y, %s + %s = fsub contract <8 x double> , %x + %m = fmul contract <8 x double> %y, %s ret <8 x double> %m } @@ -669,8 +669,8 @@ define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> % ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <16 x float> %x, - %m = fmul <16 x float> %s, %y + %s = fsub contract <16 x float> %x, + %m = fmul contract <16 x float> %s, %y ret <16 x float> %m } @@ -715,8 +715,8 @@ define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <8 x double> %x, - %m = fmul <8 x double> %y, %s + %s = fsub contract <8 x double> %x, + %m = fmul contract <8 x double> %y, %s ret <8 x double> %m } @@ -761,8 +761,8 @@ define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <16 x float> %x, - %m = fmul <16 x float> %s, %y + %s = fsub contract <16 x float> %x, + %m = fmul contract <16 x float> %s, %y ret <16 x float> %m } @@ -807,8 +807,8 @@ define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> ; AVX512-NOINFS: # %bb.0: ; AVX512-NOINFS-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1 ; AVX512-NOINFS-NEXT: retq - %s = fsub <8 x double> %x, - %m = fmul <8 x double> %y, %s + %s = fsub contract <8 x double> %x, + %m = fmul contract <8 x double> %y, %s ret <8 x double> %m } @@ -868,10 +868,10 @@ define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm1 = (zmm2 * zmm1) - zmm1 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm2 * zmm0) - zmm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub nsz <16 x float> , %t - %tx = fmul nsz <16 x float> %x, %t - %ty = fmul nsz <16 x float> %y, %t1 - %r = fadd nsz <16 x float> %tx, %ty + %t1 = fsub contract nsz <16 x float> , %t + %tx = fmul contract nsz <16 x float> %x, %t + %ty = fmul contract nsz <16 x float> %y, %t1 + %r = fadd contract nsz <16 x float> %tx, %ty ret <16 x float> %r } @@ -927,10 +927,10 @@ define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x do ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm1 = (zmm2 * zmm1) - zmm1 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm2 * zmm0) - zmm1 ; AVX512-NOINFS-NEXT: retq - %t1 = fsub nsz <8 x double> , %t - %tx = fmul nsz <8 x double> %x, %t - %ty = fmul nsz <8 x double> %y, %t1 - %r = fadd nsz <8 x double> %tx, %ty + %t1 = fsub contract nsz <8 x double> , %t + %tx = fmul contract nsz <8 x double> %x, %t + %ty = fmul contract nsz <8 x double> %y, %t1 + %r = fadd contract nsz <8 x double> %tx, %ty ret <8 x double> %r } @@ -955,9 +955,9 @@ define <16 x float> @test_v16f32_fneg_fmadd(<16 x float> %a0, <16 x float> %a1, ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 ; AVX512-NEXT: retq - %mul = fmul nsz <16 x float> %a0, %a1 - %add = fadd nsz <16 x float> %mul, %a2 - %neg = fsub nsz <16 x float> , %add + %mul = fmul contract nsz <16 x float> %a0, %a1 + %add = fadd contract nsz <16 x float> %mul, %a2 + %neg = fsub contract nsz <16 x float> , %add ret <16 x float> %neg } @@ -978,9 +978,9 @@ define <8 x double> @test_v8f64_fneg_fmsub(<8 x double> %a0, <8 x double> %a1, < ; AVX512: # %bb.0: ; AVX512-NEXT: vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 ; AVX512-NEXT: retq - %mul = fmul nsz <8 x double> %a0, %a1 - %sub = fsub nsz <8 x double> %mul, %a2 - %neg = fsub nsz <8 x double> , %sub + %mul = fmul contract nsz <8 x double> %a0, %a1 + %sub = fsub contract nsz <8 x double> %mul, %a2 + %neg = fsub contract nsz <8 x double> , %sub ret <8 x double> %neg } @@ -1001,10 +1001,10 @@ define <16 x float> @test_v16f32_fneg_fnmadd(<16 x float> %a0, <16 x float> %a1, ; AVX512: # %bb.0: ; AVX512-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 ; AVX512-NEXT: retq - %mul = fmul nsz <16 x float> %a0, %a1 - %neg0 = fsub nsz <16 x float> , %mul - %add = fadd nsz <16 x float> %neg0, %a2 - %neg1 = fsub nsz <16 x float> , %add + %mul = fmul contract nsz <16 x float> %a0, %a1 + %neg0 = fsub contract nsz <16 x float> , %mul + %add = fadd contract nsz <16 x float> %neg0, %a2 + %neg1 = fsub contract nsz <16 x float> , %add ret <16 x float> %neg1 } @@ -1025,10 +1025,10 @@ define <8 x double> @test_v8f64_fneg_fnmsub(<8 x double> %a0, <8 x double> %a1, ; AVX512: # %bb.0: ; AVX512-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 ; AVX512-NEXT: retq - %mul = fmul nsz <8 x double> %a0, %a1 - %neg0 = fsub nsz <8 x double> , %mul - %sub = fsub nsz <8 x double> %neg0, %a2 - %neg1 = fsub nsz <8 x double> , %sub + %mul = fmul contract nsz <8 x double> %a0, %a1 + %neg0 = fsub contract nsz <8 x double> , %mul + %sub = fsub contract nsz <8 x double> %neg0, %a2 + %neg1 = fsub contract nsz <8 x double> , %sub ret <8 x double> %neg1 } @@ -1108,8 +1108,8 @@ define <16 x float> @test_v16f32_fneg_fmul(<16 x float> %x, <16 x float> %y) #0 ; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; AVX512-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 ; AVX512-NEXT: retq - %m = fmul nsz <16 x float> %x, %y - %n = fsub <16 x float> , %m + %m = fmul contract nsz <16 x float> %x, %y + %n = fsub contract <16 x float> , %m ret <16 x float> %n } @@ -1133,8 +1133,8 @@ define <8 x double> @test_v8f64_fneg_fmul(<8 x double> %x, <8 x double> %y) #0 { ; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; AVX512-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 ; AVX512-NEXT: retq - %m = fmul nsz <8 x double> %x, %y - %n = fsub <8 x double> , %m + %m = fmul contract nsz <8 x double> %x, %y + %n = fsub contract <8 x double> , %m ret <8 x double> %n } @@ -1162,8 +1162,8 @@ define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> % ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512-NEXT: retq - %m = fmul <8 x double> %x, %y - %n = fsub <8 x double> , %m + %m = fmul contract <8 x double> %x, %y + %n = fsub contract <8 x double> , %m ret <8 x double> %n } diff --git a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll index d59b12c6d1231..81529aff39ff1 100644 --- a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll +++ b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK-SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX512F,CHECK-NO-FASTFMA -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX512F,CHECK-FMA +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX512F,CHECK-ONLY-AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX512F,CHECK-SKX declare i16 @llvm.umax.i16(i16, i16) declare i64 @llvm.umin.i64(i64, i64) @@ -23,18 +23,18 @@ define <4 x float> @fmul_pow2_4xfloat(<4 x i32> %i) { ; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: retq ; -; CHECK-NO-FASTFMA-LABEL: fmul_pow2_4xfloat: -; CHECK-NO-FASTFMA: # %bb.0: -; CHECK-NO-FASTFMA-NEXT: vpslld $23, %xmm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1091567616,1091567616,1091567616,1091567616] -; CHECK-NO-FASTFMA-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: retq -; -; CHECK-FMA-LABEL: fmul_pow2_4xfloat: -; CHECK-FMA: # %bb.0: -; CHECK-FMA-NEXT: vpslld $23, %xmm0, %xmm0 -; CHECK-FMA-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 -; CHECK-FMA-NEXT: retq +; CHECK-ONLY-AVX512F-LABEL: fmul_pow2_4xfloat: +; CHECK-ONLY-AVX512F: # %bb.0: +; CHECK-ONLY-AVX512F-NEXT: vpslld $23, %xmm0, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1091567616,1091567616,1091567616,1091567616] +; CHECK-ONLY-AVX512F-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: retq +; +; CHECK-SKX-LABEL: fmul_pow2_4xfloat: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpslld $23, %xmm0, %xmm0 +; CHECK-SKX-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; CHECK-SKX-NEXT: retq %p2 = shl <4 x i32> , %i %p2_f = uitofp <4 x i32> %p2 to <4 x float> %r = fmul <4 x float> , %p2_f @@ -371,34 +371,34 @@ define <8 x half> @fmul_pow2_8xhalf(<8 x i16> %i) { ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8 ; CHECK-AVX2-NEXT: retq ; -; CHECK-NO-FASTFMA-LABEL: fmul_pow2_8xhalf: -; CHECK-NO-FASTFMA: # %bb.0: -; CHECK-NO-FASTFMA-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; CHECK-NO-FASTFMA-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] -; CHECK-NO-FASTFMA-NEXT: vpsllvd %ymm0, %ymm1, %ymm0 -; CHECK-NO-FASTFMA-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-NO-FASTFMA-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] -; CHECK-NO-FASTFMA-NEXT: vcvtdq2ps %ymm0, %ymm0 -; CHECK-NO-FASTFMA-NEXT: vcvtps2ph $4, %ymm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vcvtph2ps %xmm0, %ymm0 -; CHECK-NO-FASTFMA-NEXT: vbroadcastss {{.*#+}} ymm1 = [8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3] -; CHECK-NO-FASTFMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 -; CHECK-NO-FASTFMA-NEXT: vcvtps2ph $4, %ymm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vzeroupper -; CHECK-NO-FASTFMA-NEXT: retq -; -; CHECK-FMA-LABEL: fmul_pow2_8xhalf: -; CHECK-FMA: # %bb.0: -; CHECK-FMA-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1] -; CHECK-FMA-NEXT: vpsllvw %xmm0, %xmm1, %xmm0 -; CHECK-FMA-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; CHECK-FMA-NEXT: vcvtdq2ps %ymm0, %ymm0 -; CHECK-FMA-NEXT: vcvtps2ph $4, %ymm0, %xmm0 -; CHECK-FMA-NEXT: vcvtph2ps %xmm0, %ymm0 -; CHECK-FMA-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 -; CHECK-FMA-NEXT: vcvtps2ph $4, %ymm0, %xmm0 -; CHECK-FMA-NEXT: vzeroupper -; CHECK-FMA-NEXT: retq +; CHECK-ONLY-AVX512F-LABEL: fmul_pow2_8xhalf: +; CHECK-ONLY-AVX512F: # %bb.0: +; CHECK-ONLY-AVX512F-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; CHECK-ONLY-AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] +; CHECK-ONLY-AVX512F-NEXT: vpsllvd %ymm0, %ymm1, %ymm0 +; CHECK-ONLY-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-ONLY-AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %ymm0, %ymm0 +; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vcvtph2ps %xmm0, %ymm0 +; CHECK-ONLY-AVX512F-NEXT: vbroadcastss {{.*#+}} ymm1 = [8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3] +; CHECK-ONLY-AVX512F-NEXT: vmulps %ymm1, %ymm0, %ymm0 +; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vzeroupper +; CHECK-ONLY-AVX512F-NEXT: retq +; +; CHECK-SKX-LABEL: fmul_pow2_8xhalf: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1] +; CHECK-SKX-NEXT: vpsllvw %xmm0, %xmm1, %xmm0 +; CHECK-SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; CHECK-SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 +; CHECK-SKX-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; CHECK-SKX-NEXT: vcvtph2ps %xmm0, %ymm0 +; CHECK-SKX-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 +; CHECK-SKX-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; CHECK-SKX-NEXT: vzeroupper +; CHECK-SKX-NEXT: retq %p2 = shl <8 x i16> , %i %p2_f = uitofp <8 x i16> %p2 to <8 x half> %r = fmul <8 x half> , %p2_f @@ -656,19 +656,19 @@ define <8 x half> @fdiv_pow2_8xhalf(<8 x i16> %i) { ; CHECK-AVX2-NEXT: vpsubw %xmm0, %xmm1, %xmm0 ; CHECK-AVX2-NEXT: retq ; -; CHECK-NO-FASTFMA-LABEL: fdiv_pow2_8xhalf: -; CHECK-NO-FASTFMA: # %bb.0: -; CHECK-NO-FASTFMA-NEXT: vpsllw $10, %xmm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vpbroadcastw {{.*#+}} xmm1 = [28672,28672,28672,28672,28672,28672,28672,28672] -; CHECK-NO-FASTFMA-NEXT: vpsubw %xmm0, %xmm1, %xmm0 -; CHECK-NO-FASTFMA-NEXT: retq -; -; CHECK-FMA-LABEL: fdiv_pow2_8xhalf: -; CHECK-FMA: # %bb.0: -; CHECK-FMA-NEXT: vpsllw $10, %xmm0, %xmm0 -; CHECK-FMA-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28672,28672,28672,28672,28672,28672,28672,28672] -; CHECK-FMA-NEXT: vpsubw %xmm0, %xmm1, %xmm0 -; CHECK-FMA-NEXT: retq +; CHECK-ONLY-AVX512F-LABEL: fdiv_pow2_8xhalf: +; CHECK-ONLY-AVX512F: # %bb.0: +; CHECK-ONLY-AVX512F-NEXT: vpsllw $10, %xmm0, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vpbroadcastw {{.*#+}} xmm1 = [28672,28672,28672,28672,28672,28672,28672,28672] +; CHECK-ONLY-AVX512F-NEXT: vpsubw %xmm0, %xmm1, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: retq +; +; CHECK-SKX-LABEL: fdiv_pow2_8xhalf: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpsllw $10, %xmm0, %xmm0 +; CHECK-SKX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28672,28672,28672,28672,28672,28672,28672,28672] +; CHECK-SKX-NEXT: vpsubw %xmm0, %xmm1, %xmm0 +; CHECK-SKX-NEXT: retq %p2 = shl <8 x i16> , %i %p2_f = uitofp <8 x i16> %p2 to <8 x half> %r = fdiv <8 x half> , %p2_f @@ -882,21 +882,21 @@ define double @fmul_pow_shl_cnt_fail_maybe_non_pow2(i64 %v, i64 %cnt) nounwind { ; CHECK-AVX2-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: retq ; -; CHECK-NO-FASTFMA-LABEL: fmul_pow_shl_cnt_fail_maybe_non_pow2: -; CHECK-NO-FASTFMA: # %bb.0: -; CHECK-NO-FASTFMA-NEXT: movq %rsi, %rcx -; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $rcx -; CHECK-NO-FASTFMA-NEXT: shlq %cl, %rdi -; CHECK-NO-FASTFMA-NEXT: vcvtusi2sd %rdi, %xmm15, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: retq -; -; CHECK-FMA-LABEL: fmul_pow_shl_cnt_fail_maybe_non_pow2: -; CHECK-FMA: # %bb.0: -; CHECK-FMA-NEXT: shlxq %rsi, %rdi, %rax -; CHECK-FMA-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0 -; CHECK-FMA-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-FMA-NEXT: retq +; CHECK-ONLY-AVX512F-LABEL: fmul_pow_shl_cnt_fail_maybe_non_pow2: +; CHECK-ONLY-AVX512F: # %bb.0: +; CHECK-ONLY-AVX512F-NEXT: movq %rsi, %rcx +; CHECK-ONLY-AVX512F-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-ONLY-AVX512F-NEXT: shlq %cl, %rdi +; CHECK-ONLY-AVX512F-NEXT: vcvtusi2sd %rdi, %xmm15, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: retq +; +; CHECK-SKX-LABEL: fmul_pow_shl_cnt_fail_maybe_non_pow2: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: shlxq %rsi, %rdi, %rax +; CHECK-SKX-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0 +; CHECK-SKX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-SKX-NEXT: retq %shl = shl nuw i64 %v, %cnt %conv = uitofp i64 %shl to double %mul = fmul double 9.000000e+00, %conv @@ -935,26 +935,26 @@ define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nou ; CHECK-AVX2-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: retq ; -; CHECK-NO-FASTFMA-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast: -; CHECK-NO-FASTFMA: # %bb.0: -; CHECK-NO-FASTFMA-NEXT: vpmovsxbq {{.*#+}} xmm1 = [2,2] -; CHECK-NO-FASTFMA-NEXT: vpsllvq %xmm0, %xmm1, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vpextrq $1, %xmm0, %rax -; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1 -; CHECK-NO-FASTFMA-NEXT: vmovq %xmm0, %rax -; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero -; CHECK-NO-FASTFMA-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.5E+1,1.5E+1,1.5E+1,1.5E+1] -; CHECK-NO-FASTFMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: retq -; -; CHECK-FMA-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast: -; CHECK-FMA: # %bb.0: -; CHECK-FMA-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] -; CHECK-FMA-NEXT: vpsllvq %xmm0, %xmm1, %xmm0 -; CHECK-FMA-NEXT: vcvtqq2ps %xmm0, %xmm0 -; CHECK-FMA-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 -; CHECK-FMA-NEXT: retq +; CHECK-ONLY-AVX512F-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast: +; CHECK-ONLY-AVX512F: # %bb.0: +; CHECK-ONLY-AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm1 = [2,2] +; CHECK-ONLY-AVX512F-NEXT: vpsllvq %xmm0, %xmm1, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vpextrq $1, %xmm0, %rax +; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1 +; CHECK-ONLY-AVX512F-NEXT: vmovq %xmm0, %rax +; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; CHECK-ONLY-AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.5E+1,1.5E+1,1.5E+1,1.5E+1] +; CHECK-ONLY-AVX512F-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: retq +; +; CHECK-SKX-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] +; CHECK-SKX-NEXT: vpsllvq %xmm0, %xmm1, %xmm0 +; CHECK-SKX-NEXT: vcvtqq2ps %xmm0, %xmm0 +; CHECK-SKX-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; CHECK-SKX-NEXT: retq %shl = shl nsw nuw <2 x i64> , %cnt %conv = uitofp <2 x i64> %shl to <2 x float> %mul = fmul <2 x float> , %conv @@ -974,17 +974,17 @@ define <2 x double> @fmul_pow_shl_cnt_vec(<2 x i64> %cnt) nounwind { ; CHECK-AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: retq ; -; CHECK-NO-FASTFMA-LABEL: fmul_pow_shl_cnt_vec: -; CHECK-NO-FASTFMA: # %bb.0: -; CHECK-NO-FASTFMA-NEXT: vpsllq $52, %xmm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: retq -; -; CHECK-FMA-LABEL: fmul_pow_shl_cnt_vec: -; CHECK-FMA: # %bb.0: -; CHECK-FMA-NEXT: vpsllq $52, %xmm0, %xmm0 -; CHECK-FMA-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 -; CHECK-FMA-NEXT: retq +; CHECK-ONLY-AVX512F-LABEL: fmul_pow_shl_cnt_vec: +; CHECK-ONLY-AVX512F: # %bb.0: +; CHECK-ONLY-AVX512F-NEXT: vpsllq $52, %xmm0, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: retq +; +; CHECK-SKX-LABEL: fmul_pow_shl_cnt_vec: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpsllq $52, %xmm0, %xmm0 +; CHECK-SKX-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; CHECK-SKX-NEXT: retq %shl = shl nsw nuw <2 x i64> , %cnt %conv = uitofp <2 x i64> %shl to <2 x double> %mul = fmul <2 x double> , %conv @@ -1007,21 +1007,59 @@ define <4 x float> @fmul_pow_shl_cnt_vec_preserve_fma(<4 x i32> %cnt, <4 x float ; CHECK-AVX2-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: retq ; -; CHECK-NO-FASTFMA-LABEL: fmul_pow_shl_cnt_vec_preserve_fma: -; CHECK-NO-FASTFMA: # %bb.0: -; CHECK-NO-FASTFMA-NEXT: vpslld $23, %xmm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1092616192,1092616192,1092616192,1092616192] -; CHECK-NO-FASTFMA-NEXT: vpaddd %xmm2, %xmm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vaddps %xmm1, %xmm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: retq -; -; CHECK-FMA-LABEL: fmul_pow_shl_cnt_vec_preserve_fma: -; CHECK-FMA: # %bb.0: -; CHECK-FMA-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2,2,2,2] -; CHECK-FMA-NEXT: vpsllvd %xmm0, %xmm2, %xmm0 -; CHECK-FMA-NEXT: vcvtdq2ps %xmm0, %xmm0 -; CHECK-FMA-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 -; CHECK-FMA-NEXT: retq +; CHECK-ONLY-AVX512F-LABEL: fmul_pow_shl_cnt_vec_preserve_fma: +; CHECK-ONLY-AVX512F: # %bb.0: +; CHECK-ONLY-AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2,2,2,2] +; CHECK-ONLY-AVX512F-NEXT: vpsllvd %xmm0, %xmm2, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %xmm0, %xmm2 +; CHECK-ONLY-AVX512F-NEXT: vbroadcastss {{.*#+}} xmm0 = [5.0E+0,5.0E+0,5.0E+0,5.0E+0] +; CHECK-ONLY-AVX512F-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 +; CHECK-ONLY-AVX512F-NEXT: retq +; +; CHECK-SKX-LABEL: fmul_pow_shl_cnt_vec_preserve_fma: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2,2,2,2] +; CHECK-SKX-NEXT: vpsllvd %xmm0, %xmm2, %xmm0 +; CHECK-SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; CHECK-SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 +; CHECK-SKX-NEXT: retq + %shl = shl nsw nuw <4 x i32> , %cnt + %conv = uitofp <4 x i32> %shl to <4 x float> + %mul = fmul contract <4 x float> , %conv + %res = fadd contract <4 x float> %mul, %add + ret <4 x float> %res +} + +define <4 x float> @fmul_pow_shl_cnt_vec_no_fma(<4 x i32> %cnt, <4 x float> %add) nounwind { +; CHECK-SSE-LABEL: fmul_pow_shl_cnt_vec_no_fma: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: pslld $23, %xmm0 +; CHECK-SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE-NEXT: addps %xmm1, %xmm0 +; CHECK-SSE-NEXT: retq +; +; CHECK-AVX2-LABEL: fmul_pow_shl_cnt_vec_no_fma: +; CHECK-AVX2: # %bb.0: +; CHECK-AVX2-NEXT: vpslld $23, %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1092616192,1092616192,1092616192,1092616192] +; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; CHECK-AVX2-NEXT: retq +; +; CHECK-ONLY-AVX512F-LABEL: fmul_pow_shl_cnt_vec_no_fma: +; CHECK-ONLY-AVX512F: # %bb.0: +; CHECK-ONLY-AVX512F-NEXT: vpslld $23, %xmm0, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1092616192,1092616192,1092616192,1092616192] +; CHECK-ONLY-AVX512F-NEXT: vpaddd %xmm2, %xmm0, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: retq +; +; CHECK-SKX-LABEL: fmul_pow_shl_cnt_vec_no_fma: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpslld $23, %xmm0, %xmm0 +; CHECK-SKX-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; CHECK-SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; CHECK-SKX-NEXT: retq %shl = shl nsw nuw <4 x i32> , %cnt %conv = uitofp <4 x i32> %shl to <4 x float> %mul = fmul <4 x float> , %conv @@ -1131,34 +1169,34 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind { ; CHECK-AVX2-NEXT: addq $56, %rsp ; CHECK-AVX2-NEXT: retq ; -; CHECK-NO-FASTFMA-LABEL: fmul_pow_shl_cnt_vec_fail_to_large: -; CHECK-NO-FASTFMA: # %bb.0: -; CHECK-NO-FASTFMA-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; CHECK-NO-FASTFMA-NEXT: vpmovsxbd {{.*#+}} ymm1 = [2,2,0,0,0,0,0,0] -; CHECK-NO-FASTFMA-NEXT: vpsllvd %ymm0, %ymm1, %ymm0 -; CHECK-NO-FASTFMA-NEXT: vpmovdw %zmm0, %ymm0 -; CHECK-NO-FASTFMA-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; CHECK-NO-FASTFMA-NEXT: vcvtdq2ps %ymm0, %ymm0 -; CHECK-NO-FASTFMA-NEXT: vcvtps2ph $4, %ymm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vcvtph2ps %xmm0, %ymm0 -; CHECK-NO-FASTFMA-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.5E+1,1.5E+1,1.5E+1,1.5E+1,1.5E+1,1.5E+1,1.5E+1,1.5E+1] -; CHECK-NO-FASTFMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 -; CHECK-NO-FASTFMA-NEXT: vcvtps2ph $4, %ymm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vzeroupper -; CHECK-NO-FASTFMA-NEXT: retq -; -; CHECK-FMA-LABEL: fmul_pow_shl_cnt_vec_fail_to_large: -; CHECK-FMA: # %bb.0: -; CHECK-FMA-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2] -; CHECK-FMA-NEXT: vpsllvw %xmm0, %xmm1, %xmm0 -; CHECK-FMA-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; CHECK-FMA-NEXT: vcvtdq2ps %ymm0, %ymm0 -; CHECK-FMA-NEXT: vcvtps2ph $4, %ymm0, %xmm0 -; CHECK-FMA-NEXT: vcvtph2ps %xmm0, %ymm0 -; CHECK-FMA-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 -; CHECK-FMA-NEXT: vcvtps2ph $4, %ymm0, %xmm0 -; CHECK-FMA-NEXT: vzeroupper -; CHECK-FMA-NEXT: retq +; CHECK-ONLY-AVX512F-LABEL: fmul_pow_shl_cnt_vec_fail_to_large: +; CHECK-ONLY-AVX512F: # %bb.0: +; CHECK-ONLY-AVX512F-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; CHECK-ONLY-AVX512F-NEXT: vpmovsxbd {{.*#+}} ymm1 = [2,2,0,0,0,0,0,0] +; CHECK-ONLY-AVX512F-NEXT: vpsllvd %ymm0, %ymm1, %ymm0 +; CHECK-ONLY-AVX512F-NEXT: vpmovdw %zmm0, %ymm0 +; CHECK-ONLY-AVX512F-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %ymm0, %ymm0 +; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vcvtph2ps %xmm0, %ymm0 +; CHECK-ONLY-AVX512F-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.5E+1,1.5E+1,1.5E+1,1.5E+1,1.5E+1,1.5E+1,1.5E+1,1.5E+1] +; CHECK-ONLY-AVX512F-NEXT: vmulps %ymm1, %ymm0, %ymm0 +; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vzeroupper +; CHECK-ONLY-AVX512F-NEXT: retq +; +; CHECK-SKX-LABEL: fmul_pow_shl_cnt_vec_fail_to_large: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2] +; CHECK-SKX-NEXT: vpsllvw %xmm0, %xmm1, %xmm0 +; CHECK-SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; CHECK-SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 +; CHECK-SKX-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; CHECK-SKX-NEXT: vcvtph2ps %xmm0, %ymm0 +; CHECK-SKX-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 +; CHECK-SKX-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; CHECK-SKX-NEXT: vzeroupper +; CHECK-SKX-NEXT: retq %shl = shl nsw nuw <2 x i16> , %cnt %conv = uitofp <2 x i16> %shl to <2 x half> %mul = fmul <2 x half> , %conv @@ -1195,23 +1233,23 @@ define double @fmul_pow_shl_cnt_fail_maybe_bad_exp(i64 %cnt) nounwind { ; CHECK-AVX2-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; CHECK-AVX2-NEXT: retq ; -; CHECK-NO-FASTFMA-LABEL: fmul_pow_shl_cnt_fail_maybe_bad_exp: -; CHECK-NO-FASTFMA: # %bb.0: -; CHECK-NO-FASTFMA-NEXT: movq %rdi, %rcx -; CHECK-NO-FASTFMA-NEXT: movl $1, %eax -; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $rcx -; CHECK-NO-FASTFMA-NEXT: shlq %cl, %rax -; CHECK-NO-FASTFMA-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: retq -; -; CHECK-FMA-LABEL: fmul_pow_shl_cnt_fail_maybe_bad_exp: -; CHECK-FMA: # %bb.0: -; CHECK-FMA-NEXT: movl $1, %eax -; CHECK-FMA-NEXT: shlxq %rdi, %rax, %rax -; CHECK-FMA-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0 -; CHECK-FMA-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; CHECK-FMA-NEXT: retq +; CHECK-ONLY-AVX512F-LABEL: fmul_pow_shl_cnt_fail_maybe_bad_exp: +; CHECK-ONLY-AVX512F: # %bb.0: +; CHECK-ONLY-AVX512F-NEXT: movq %rdi, %rcx +; CHECK-ONLY-AVX512F-NEXT: movl $1, %eax +; CHECK-ONLY-AVX512F-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-ONLY-AVX512F-NEXT: shlq %cl, %rax +; CHECK-ONLY-AVX512F-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: retq +; +; CHECK-SKX-LABEL: fmul_pow_shl_cnt_fail_maybe_bad_exp: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: movl $1, %eax +; CHECK-SKX-NEXT: shlxq %rdi, %rax, %rax +; CHECK-SKX-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0 +; CHECK-SKX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-SKX-NEXT: retq %shl = shl nuw i64 1, %cnt %conv = uitofp i64 %shl to double %mul = fmul double 9.745314e+288, %conv @@ -1295,15 +1333,15 @@ define float @fdiv_pow_shl_cnt_fail_maybe_z(i64 %cnt) nounwind { ; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-SSE-NEXT: shlq %cl, %rax ; CHECK-SSE-NEXT: testq %rax, %rax -; CHECK-SSE-NEXT: js .LBB23_1 +; CHECK-SSE-NEXT: js .LBB24_1 ; CHECK-SSE-NEXT: # %bb.2: ; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1 -; CHECK-SSE-NEXT: jmp .LBB23_3 -; CHECK-SSE-NEXT: .LBB23_1: +; CHECK-SSE-NEXT: jmp .LBB24_3 +; CHECK-SSE-NEXT: .LBB24_1: ; CHECK-SSE-NEXT: shrq %rax ; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1 ; CHECK-SSE-NEXT: addss %xmm1, %xmm1 -; CHECK-SSE-NEXT: .LBB23_3: +; CHECK-SSE-NEXT: .LBB24_3: ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: divss %xmm1, %xmm0 ; CHECK-SSE-NEXT: retq @@ -1315,38 +1353,38 @@ define float @fdiv_pow_shl_cnt_fail_maybe_z(i64 %cnt) nounwind { ; CHECK-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-AVX2-NEXT: shlq %cl, %rax ; CHECK-AVX2-NEXT: testq %rax, %rax -; CHECK-AVX2-NEXT: js .LBB23_1 +; CHECK-AVX2-NEXT: js .LBB24_1 ; CHECK-AVX2-NEXT: # %bb.2: ; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 -; CHECK-AVX2-NEXT: jmp .LBB23_3 -; CHECK-AVX2-NEXT: .LBB23_1: +; CHECK-AVX2-NEXT: jmp .LBB24_3 +; CHECK-AVX2-NEXT: .LBB24_1: ; CHECK-AVX2-NEXT: shrq %rax ; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 ; CHECK-AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm0 -; CHECK-AVX2-NEXT: .LBB23_3: +; CHECK-AVX2-NEXT: .LBB24_3: ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm1 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; CHECK-AVX2-NEXT: retq ; -; CHECK-NO-FASTFMA-LABEL: fdiv_pow_shl_cnt_fail_maybe_z: -; CHECK-NO-FASTFMA: # %bb.0: -; CHECK-NO-FASTFMA-NEXT: movq %rdi, %rcx -; CHECK-NO-FASTFMA-NEXT: movl $8, %eax -; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $rcx -; CHECK-NO-FASTFMA-NEXT: shlq %cl, %rax -; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vmovss {{.*#+}} xmm1 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0] -; CHECK-NO-FASTFMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 -; CHECK-NO-FASTFMA-NEXT: retq -; -; CHECK-FMA-LABEL: fdiv_pow_shl_cnt_fail_maybe_z: -; CHECK-FMA: # %bb.0: -; CHECK-FMA-NEXT: movl $8, %eax -; CHECK-FMA-NEXT: shlxq %rdi, %rax, %rax -; CHECK-FMA-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0 -; CHECK-FMA-NEXT: vmovss {{.*#+}} xmm1 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0] -; CHECK-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 -; CHECK-FMA-NEXT: retq +; CHECK-ONLY-AVX512F-LABEL: fdiv_pow_shl_cnt_fail_maybe_z: +; CHECK-ONLY-AVX512F: # %bb.0: +; CHECK-ONLY-AVX512F-NEXT: movq %rdi, %rcx +; CHECK-ONLY-AVX512F-NEXT: movl $8, %eax +; CHECK-ONLY-AVX512F-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-ONLY-AVX512F-NEXT: shlq %cl, %rax +; CHECK-ONLY-AVX512F-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vmovss {{.*#+}} xmm1 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-ONLY-AVX512F-NEXT: vdivss %xmm0, %xmm1, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: retq +; +; CHECK-SKX-LABEL: fdiv_pow_shl_cnt_fail_maybe_z: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: movl $8, %eax +; CHECK-SKX-NEXT: shlxq %rdi, %rax, %rax +; CHECK-SKX-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0 +; CHECK-SKX-NEXT: vmovss {{.*#+}} xmm1 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-SKX-NEXT: vdivss %xmm0, %xmm1, %xmm0 +; CHECK-SKX-NEXT: retq %shl = shl i64 8, %cnt %conv = uitofp i64 %shl to float %mul = fdiv float -9.000000e+00, %conv @@ -1376,25 +1414,25 @@ define float @fdiv_pow_shl_cnt_fail_neg_int(i64 %cnt) nounwind { ; CHECK-AVX2-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; CHECK-AVX2-NEXT: retq ; -; CHECK-NO-FASTFMA-LABEL: fdiv_pow_shl_cnt_fail_neg_int: -; CHECK-NO-FASTFMA: # %bb.0: -; CHECK-NO-FASTFMA-NEXT: movq %rdi, %rcx -; CHECK-NO-FASTFMA-NEXT: movl $8, %eax -; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $rcx -; CHECK-NO-FASTFMA-NEXT: shlq %cl, %rax -; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vmovss {{.*#+}} xmm1 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0] -; CHECK-NO-FASTFMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 -; CHECK-NO-FASTFMA-NEXT: retq -; -; CHECK-FMA-LABEL: fdiv_pow_shl_cnt_fail_neg_int: -; CHECK-FMA: # %bb.0: -; CHECK-FMA-NEXT: movl $8, %eax -; CHECK-FMA-NEXT: shlxq %rdi, %rax, %rax -; CHECK-FMA-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 -; CHECK-FMA-NEXT: vmovss {{.*#+}} xmm1 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0] -; CHECK-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 -; CHECK-FMA-NEXT: retq +; CHECK-ONLY-AVX512F-LABEL: fdiv_pow_shl_cnt_fail_neg_int: +; CHECK-ONLY-AVX512F: # %bb.0: +; CHECK-ONLY-AVX512F-NEXT: movq %rdi, %rcx +; CHECK-ONLY-AVX512F-NEXT: movl $8, %eax +; CHECK-ONLY-AVX512F-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-ONLY-AVX512F-NEXT: shlq %cl, %rax +; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vmovss {{.*#+}} xmm1 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-ONLY-AVX512F-NEXT: vdivss %xmm0, %xmm1, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: retq +; +; CHECK-SKX-LABEL: fdiv_pow_shl_cnt_fail_neg_int: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: movl $8, %eax +; CHECK-SKX-NEXT: shlxq %rdi, %rax, %rax +; CHECK-SKX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0 +; CHECK-SKX-NEXT: vmovss {{.*#+}} xmm1 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-SKX-NEXT: vdivss %xmm0, %xmm1, %xmm0 +; CHECK-SKX-NEXT: retq %shl = shl i64 8, %cnt %conv = sitofp i64 %shl to float %mul = fdiv float -9.000000e+00, %conv @@ -1460,31 +1498,31 @@ define half @fdiv_pow_shl_cnt_fail_out_of_bounds(i32 %cnt) nounwind { ; CHECK-AVX2-NEXT: popq %rax ; CHECK-AVX2-NEXT: retq ; -; CHECK-NO-FASTFMA-LABEL: fdiv_pow_shl_cnt_fail_out_of_bounds: -; CHECK-NO-FASTFMA: # %bb.0: -; CHECK-NO-FASTFMA-NEXT: movl %edi, %ecx -; CHECK-NO-FASTFMA-NEXT: movl $1, %eax -; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $ecx -; CHECK-NO-FASTFMA-NEXT: shll %cl, %eax -; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vcvtph2ps %xmm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vmovss {{.*#+}} xmm1 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] -; CHECK-NO-FASTFMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: retq -; -; CHECK-FMA-LABEL: fdiv_pow_shl_cnt_fail_out_of_bounds: -; CHECK-FMA: # %bb.0: -; CHECK-FMA-NEXT: movl $1, %eax -; CHECK-FMA-NEXT: shlxl %edi, %eax, %eax -; CHECK-FMA-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0 -; CHECK-FMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; CHECK-FMA-NEXT: vcvtph2ps %xmm0, %xmm0 -; CHECK-FMA-NEXT: vmovss {{.*#+}} xmm1 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] -; CHECK-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 -; CHECK-FMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; CHECK-FMA-NEXT: retq +; CHECK-ONLY-AVX512F-LABEL: fdiv_pow_shl_cnt_fail_out_of_bounds: +; CHECK-ONLY-AVX512F: # %bb.0: +; CHECK-ONLY-AVX512F-NEXT: movl %edi, %ecx +; CHECK-ONLY-AVX512F-NEXT: movl $1, %eax +; CHECK-ONLY-AVX512F-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-ONLY-AVX512F-NEXT: shll %cl, %eax +; CHECK-ONLY-AVX512F-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vmovss {{.*#+}} xmm1 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] +; CHECK-ONLY-AVX512F-NEXT: vdivss %xmm0, %xmm1, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: retq +; +; CHECK-SKX-LABEL: fdiv_pow_shl_cnt_fail_out_of_bounds: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: movl $1, %eax +; CHECK-SKX-NEXT: shlxl %edi, %eax, %eax +; CHECK-SKX-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0 +; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-SKX-NEXT: vcvtph2ps %xmm0, %xmm0 +; CHECK-SKX-NEXT: vmovss {{.*#+}} xmm1 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] +; CHECK-SKX-NEXT: vdivss %xmm0, %xmm1, %xmm0 +; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-SKX-NEXT: retq %shl = shl nuw i32 1, %cnt %conv = uitofp i32 %shl to half %mul = fdiv half 0xH7000, %conv @@ -1571,33 +1609,33 @@ define half @fdiv_pow_shl_cnt_fail_out_of_bound2(i16 %cnt) nounwind { ; CHECK-AVX2-NEXT: popq %rax ; CHECK-AVX2-NEXT: retq ; -; CHECK-NO-FASTFMA-LABEL: fdiv_pow_shl_cnt_fail_out_of_bound2: -; CHECK-NO-FASTFMA: # %bb.0: -; CHECK-NO-FASTFMA-NEXT: movl %edi, %ecx -; CHECK-NO-FASTFMA-NEXT: movl $1, %eax -; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $ecx -; CHECK-NO-FASTFMA-NEXT: shll %cl, %eax -; CHECK-NO-FASTFMA-NEXT: movzwl %ax, %eax -; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vcvtph2ps %xmm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vmovss {{.*#+}} xmm1 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0] -; CHECK-NO-FASTFMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; CHECK-NO-FASTFMA-NEXT: retq -; -; CHECK-FMA-LABEL: fdiv_pow_shl_cnt_fail_out_of_bound2: -; CHECK-FMA: # %bb.0: -; CHECK-FMA-NEXT: movl $1, %eax -; CHECK-FMA-NEXT: shlxl %edi, %eax, %eax -; CHECK-FMA-NEXT: movzwl %ax, %eax -; CHECK-FMA-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0 -; CHECK-FMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; CHECK-FMA-NEXT: vcvtph2ps %xmm0, %xmm0 -; CHECK-FMA-NEXT: vmovss {{.*#+}} xmm1 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0] -; CHECK-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 -; CHECK-FMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; CHECK-FMA-NEXT: retq +; CHECK-ONLY-AVX512F-LABEL: fdiv_pow_shl_cnt_fail_out_of_bound2: +; CHECK-ONLY-AVX512F: # %bb.0: +; CHECK-ONLY-AVX512F-NEXT: movl %edi, %ecx +; CHECK-ONLY-AVX512F-NEXT: movl $1, %eax +; CHECK-ONLY-AVX512F-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-ONLY-AVX512F-NEXT: shll %cl, %eax +; CHECK-ONLY-AVX512F-NEXT: movzwl %ax, %eax +; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vmovss {{.*#+}} xmm1 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-ONLY-AVX512F-NEXT: vdivss %xmm0, %xmm1, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: retq +; +; CHECK-SKX-LABEL: fdiv_pow_shl_cnt_fail_out_of_bound2: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: movl $1, %eax +; CHECK-SKX-NEXT: shlxl %edi, %eax, %eax +; CHECK-SKX-NEXT: movzwl %ax, %eax +; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0 +; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-SKX-NEXT: vcvtph2ps %xmm0, %xmm0 +; CHECK-SKX-NEXT: vmovss {{.*#+}} xmm1 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-SKX-NEXT: vdivss %xmm0, %xmm1, %xmm0 +; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-SKX-NEXT: retq %shl = shl nuw i16 1, %cnt %conv = uitofp i16 %shl to half %mul = fdiv half 0xH4000, %conv @@ -1653,25 +1691,25 @@ define float @fdiv_pow_shl_cnt32_out_of_bounds2(i32 %cnt) nounwind { ; CHECK-AVX2-NEXT: vdivss %xmm0, %xmm1, %xmm0 ; CHECK-AVX2-NEXT: retq ; -; CHECK-NO-FASTFMA-LABEL: fdiv_pow_shl_cnt32_out_of_bounds2: -; CHECK-NO-FASTFMA: # %bb.0: -; CHECK-NO-FASTFMA-NEXT: movl %edi, %ecx -; CHECK-NO-FASTFMA-NEXT: movl $1, %eax -; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $ecx -; CHECK-NO-FASTFMA-NEXT: shll %cl, %eax -; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0 -; CHECK-NO-FASTFMA-NEXT: vmovss {{.*#+}} xmm1 = [1.00974148E-28,0.0E+0,0.0E+0,0.0E+0] -; CHECK-NO-FASTFMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 -; CHECK-NO-FASTFMA-NEXT: retq -; -; CHECK-FMA-LABEL: fdiv_pow_shl_cnt32_out_of_bounds2: -; CHECK-FMA: # %bb.0: -; CHECK-FMA-NEXT: movl $1, %eax -; CHECK-FMA-NEXT: shlxl %edi, %eax, %eax -; CHECK-FMA-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0 -; CHECK-FMA-NEXT: vmovss {{.*#+}} xmm1 = [1.00974148E-28,0.0E+0,0.0E+0,0.0E+0] -; CHECK-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0 -; CHECK-FMA-NEXT: retq +; CHECK-ONLY-AVX512F-LABEL: fdiv_pow_shl_cnt32_out_of_bounds2: +; CHECK-ONLY-AVX512F: # %bb.0: +; CHECK-ONLY-AVX512F-NEXT: movl %edi, %ecx +; CHECK-ONLY-AVX512F-NEXT: movl $1, %eax +; CHECK-ONLY-AVX512F-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-ONLY-AVX512F-NEXT: shll %cl, %eax +; CHECK-ONLY-AVX512F-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: vmovss {{.*#+}} xmm1 = [1.00974148E-28,0.0E+0,0.0E+0,0.0E+0] +; CHECK-ONLY-AVX512F-NEXT: vdivss %xmm0, %xmm1, %xmm0 +; CHECK-ONLY-AVX512F-NEXT: retq +; +; CHECK-SKX-LABEL: fdiv_pow_shl_cnt32_out_of_bounds2: +; CHECK-SKX: # %bb.0: +; CHECK-SKX-NEXT: movl $1, %eax +; CHECK-SKX-NEXT: shlxl %edi, %eax, %eax +; CHECK-SKX-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0 +; CHECK-SKX-NEXT: vmovss {{.*#+}} xmm1 = [1.00974148E-28,0.0E+0,0.0E+0,0.0E+0] +; CHECK-SKX-NEXT: vdivss %xmm0, %xmm1, %xmm0 +; CHECK-SKX-NEXT: retq %shl = shl nuw i32 1, %cnt %conv = uitofp i32 %shl to float %mul = fdiv float 0x3a1fffff00000000, %conv