Skip to content

Commit

Permalink
[AVX-512] Port 128 and 256-bit memory->register sign/zero extend patt…
Browse files Browse the repository at this point in the history
…erns from SSE file. Also add a minimal set for 512-bit.

llvm-svn: 283704
  • Loading branch information
topperc committed Oct 9, 2016
1 parent 29558b8 commit 64378f4
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 13 deletions.
142 changes: 142 additions & 0 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Original file line number Diff line number Diff line change
Expand Up @@ -7227,6 +7227,148 @@ let Predicates = [HasAVX512] in {
defm : avx512_ext_lowering<"DQZ", v8i64_info, v8i32x_info, extloadvi32>;
}

multiclass AVX512_pmovx_patterns<string OpcPrefix, string ExtTy,
SDNode ExtOp, PatFrag ExtLoad16> {
// 128-bit patterns
let Predicates = [HasVLX, HasBWI] in {
def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
def : Pat<(v8i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
def : Pat<(v8i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
def : Pat<(v8i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
}
let Predicates = [HasVLX] in {
def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
def : Pat<(v4i32 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
def : Pat<(v4i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;

def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;

def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
def : Pat<(v4i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
def : Pat<(v4i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
def : Pat<(v4i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;

def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (v8i16 (vzmovl_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;

def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
}
// 256-bit patterns
let Predicates = [HasVLX, HasBWI] in {
def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
}
let Predicates = [HasVLX] in {
def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;

def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;

def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;

def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;

def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
}
// 512-bit patterns
let Predicates = [HasBWI] in {
def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
}
let Predicates = [HasAVX512] in {
def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZrm) addr:$src)>;

def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZrm) addr:$src)>;

def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZrm) addr:$src)>;

def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZrm) addr:$src)>;

def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
}
}

defm : AVX512_pmovx_patterns<"VPMOVSX", "s", X86vsext, extloadi32i16>;
defm : AVX512_pmovx_patterns<"VPMOVZX", "z", X86vzext, loadi16_anyext>;

//===----------------------------------------------------------------------===//
// GATHER - SCATTER Operations

Expand Down
16 changes: 7 additions & 9 deletions llvm/test/CodeGen/X86/pmul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -476,9 +476,8 @@ define <32 x i8> @mul_v32i8c(<32 x i8> %i) nounwind {
;
; AVX512BW-LABEL: mul_v32i8c:
; AVX512BW: # BB#0: # %entry
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX512BW-NEXT: vpmovsxbw %ymm1, %zmm1
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
; AVX512BW-NEXT: vpmovsxbw {{.*}}(%rip), %zmm1
; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
Expand Down Expand Up @@ -932,16 +931,15 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %i) nounwind {
;
; AVX512BW-LABEL: mul_v64i8c:
; AVX512BW: # BB#0: # %entry
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX512BW-NEXT: vpmovsxbw %ymm1, %zmm1
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm2
; AVX512BW-NEXT: vpmullw %zmm1, %zmm2, %zmm2
; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1
; AVX512BW-NEXT: vpmovsxbw {{.*}}(%rip), %zmm2
; AVX512BW-NEXT: vpmullw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmullw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
entry:
%A = mul <64 x i8> %i, < i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117 >
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/X86/vector-half-conversions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1057,8 +1057,7 @@ define <4 x float> @load_cvt_8i16_to_4f32(<8 x i16>* %a0) nounwind {
;
; AVX512VL-LABEL: load_cvt_8i16_to_4f32:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vmovdqa64 (%rdi), %xmm0
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; AVX512VL-NEXT: movq %rax, %rcx
Expand Down Expand Up @@ -2449,8 +2448,7 @@ define <4 x double> @load_cvt_8i16_to_4f64(<8 x i16>* %a0) nounwind {
;
; AVX512VL-LABEL: load_cvt_8i16_to_4f64:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vmovdqa64 (%rdi), %xmm0
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; AVX512VL-NEXT: movq %rax, %rcx
Expand Down

0 comments on commit 64378f4

Please sign in to comment.