Skip to content

Commit

Permalink
AVX-512: Fixed a bug in FMA instruction selection on KNL
Browse files Browse the repository at this point in the history
The FMA instruction was selected from AVX2 set instead of AVX-512

Differential Revision: http://reviews.llvm.org/D16884

llvm-svn: 259792
  • Loading branch information
Elena Demikhovsky committed Feb 4, 2016
1 parent 23e44f5 commit 8652827
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 15 deletions.
2 changes: 1 addition & 1 deletion llvm/lib/Target/X86/X86InstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -845,7 +845,7 @@ class AVXPCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, T8PD,
VEX_4V, FMASC, Requires<[HasFMA]>;
VEX_4V, FMASC, Requires<[HasFMA, NoVLX]>;

// FMA4 Instruction Templates
class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
Expand Down
6 changes: 6 additions & 0 deletions llvm/test/CodeGen/X86/avx-isa-check.ll
Original file line number Diff line number Diff line change
Expand Up @@ -575,3 +575,9 @@ entry:
%C = zext <8 x i8> %B to <8 x i16>
ret <8 x i16> %C
}

define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
%x = fmul <4 x float> %a0, %a1
%res = fsub <4 x float> %x, %a2
ret <4 x float> %res
}
15 changes: 5 additions & 10 deletions llvm/test/CodeGen/X86/avx512-fma.ll
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,11 @@ define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
}

define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) {
; KNL-LABEL: test_x86_fmsub_213_m:
; KNL: ## BB#0:
; KNL-NEXT: vfmsub213sd (%rdi), %xmm0, %xmm1
; KNL-NEXT: vmovaps %zmm1, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: test_x86_fmsub_213_m:
; SKX: ## BB#0:
; SKX-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0
; SKX-NEXT: retq
; ALL-LABEL: test_x86_fmsub_213_m:
; ALL: ## BB#0:
; ALL-NEXT: vfmsub213sd (%rdi), %xmm0, %xmm1
; ALL-NEXT: vmovaps %zmm1, %zmm0
; ALL-NEXT: retq
%a2 = load double , double *%a2_ptr
%x = fmul double %a0, %a1
%res = fsub double %x, %a2
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1508,15 +1508,15 @@ define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1

define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
; CHECK-LABEL: test_mask_vfmadd128_ps_rmkz
; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07]
; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0x07]
%a2 = load <4 x float>, <4 x float>* %ptr_a2
%res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
ret <4 x float> %res
}

define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
; CHECK-LABEL: test_mask_vfmadd128_ps_rmkza
; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07]
; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0x07]
%a2 = load <4 x float>, <4 x float>* %ptr_a2, align 4
%res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
ret <4 x float> %res
Expand Down Expand Up @@ -1594,7 +1594,7 @@ define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> %

define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2) {
; CHECK-LABEL: test_mask_vfmadd128_pd_rmkz
; CHECK: vfmadd213pd (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0x07]
; CHECK: vfmadd213pd (%rdi), %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0x07]
%a2 = load <2 x double>, <2 x double>* %ptr_a2
%res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
ret <2 x double> %res
Expand Down Expand Up @@ -1624,7 +1624,7 @@ define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> %

define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2) {
; CHECK-LABEL: test_mask_vfmadd256_pd_rmkz
; CHECK: vfmadd213pd (%rdi), %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0x07]
; CHECK: vfmadd213pd (%rdi), %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0x07]
%a2 = load <4 x double>, <4 x double>* %ptr_a2
%res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
ret <4 x double> %res
Expand Down

0 comments on commit 8652827

Please sign in to comment.