Skip to content

Commit

Permalink
[X86] Add patterns to fold EVEX store with EVEX encoded vcvtps2ph ins…
Browse files Browse the repository at this point in the history
…tructions. Remove bad pattern that had vf432 vcvtps2ph storing 128-bits.

llvm-svn: 317662
  • Loading branch information
topperc committed Nov 8, 2017
1 parent b832ee6 commit 65e6d0b
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 19 deletions.
34 changes: 23 additions & 11 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Original file line number Diff line number Diff line change
Expand Up @@ -7225,17 +7225,16 @@ multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
(X86cvtps2ph (_src.VT _src.RC:$src1),
(i32 imm:$src2)),
NoItinerary, 0, 0>, AVX512AIi8Base;
def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(store (_dest.VT (X86cvtps2ph (_src.VT _src.RC:$src1),
(i32 imm:$src2))),
addr:$dst)]>;
let hasSideEffects = 0, mayStore = 1 in
def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[]>, EVEX_K;
let hasSideEffects = 0, mayStore = 1 in {
def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>;
def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[]>, EVEX_K;
}
}
multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
let hasSideEffects = 0 in
Expand All @@ -7255,6 +7254,19 @@ let Predicates = [HasAVX512] in {
defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem>,
EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
}

def : Pat<(store (f64 (extractelt
(bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
(iPTR 0))), addr:$dst),
(VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
def : Pat<(store (i64 (extractelt
(bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
(iPTR 0))), addr:$dst),
(VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
(VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
(VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
}

// Patterns for matching conversions from float to half-float and vice versa.
Expand Down
12 changes: 4 additions & 8 deletions llvm/test/CodeGen/X86/f16c-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -303,14 +303,12 @@ define void @test_x86_vcvtps2ph_128_m2(double* nocapture %hf4x16, <4 x float> %f
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m2:
; X32-AVX512VL: # BB#0: # %entry
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
; X32-AVX512VL-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m2:
; X64-AVX512VL: # BB#0: # %entry
; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
; X64-AVX512VL-NEXT: vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07]
; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03]
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4x32, i32 3)
Expand All @@ -335,14 +333,12 @@ define void @test_x86_vcvtps2ph_128_m3(i64* nocapture %hf4x16, <4 x float> %f4x3
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m3:
; X32-AVX512VL: # BB#0: # %entry
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
; X32-AVX512VL-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m3:
; X64-AVX512VL: # BB#0: # %entry
; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x03]
; X64-AVX512VL-NEXT: vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07]
; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03]
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
entry:
%0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4x32, i32 3)
Expand Down

0 comments on commit 65e6d0b

Please sign in to comment.