Skip to content

Commit

Permalink
[X86][AVX] Fold extract_subvector(broadcast(x),c) -> extract_subvecto…
Browse files Browse the repository at this point in the history
…r(broadcast(x),0) iff c != 0

If we're extracting an upper subvector from a broadcast we're better off extracting the lowest subvector instead as it avoids an actual extract instruction and might help SimplifyDemandedVectorElts further simplify the code.
  • Loading branch information
RKSimon committed May 24, 2020
1 parent 57bb478 commit e508d64
Show file tree
Hide file tree
Showing 9 changed files with 207 additions and 273 deletions.
7 changes: 7 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47290,6 +47290,13 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
}
}

// If we're extracting an upper subvector from a broadcast we should just
// extract the lowest subvector instead which should allow
// SimplifyDemandedVectorElts do more simplifications.
if (IdxVal != 0 && (InVec.getOpcode() == X86ISD::VBROADCAST ||
InVec.getOpcode() == X86ISD::VBROADCAST_LOAD))
return extractSubVector(InVec, 0, DAG, SDLoc(N), VT.getSizeInBits());

// If we're extracting the lowest subvector and we're the only user,
// we may be able to perform this with a smaller vector width.
if (IdxVal == 0 && InVec.hasOneUse()) {
Expand Down
16 changes: 6 additions & 10 deletions llvm/test/CodeGen/X86/pr45443.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@ define <16 x float> @PR45443() {
; X86-NEXT: vpbroadcastd {{.*#+}} zmm1 = [2181038080,2181038080,2181038080,2181038080,2181038080,2181038080,2181038080,2181038080,2181038080,2181038080,2181038080,2181038080,2181038080,2181038080,2181038080,2181038080]
; X86-NEXT: vfmadd231ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm0
; X86-NEXT: vpcmpltud {{\.LCPI.*}}{1to16}, %zmm1, %k1
; X86-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; X86-NEXT: vpbroadcastd {{.*#+}} ymm3 = [16777215,16777215,16777215,16777215,16777215,16777215,16777215,16777215]
; X86-NEXT: vpand %ymm3, %ymm2, %ymm2
; X86-NEXT: vpand %ymm3, %ymm1, %ymm1
; X86-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; X86-NEXT: vpbroadcastd {{.*#+}} ymm2 = [16777215,16777215,16777215,16777215,16777215,16777215,16777215,16777215]
; X86-NEXT: vpand %ymm2, %ymm1, %ymm1
; X86-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
; X86-NEXT: vptestmd %zmm1, %zmm1, %k1 {%k1}
; X86-NEXT: vbroadcastss {{\.LCPI.*}}, %zmm0 {%k1}
; X86-NEXT: retl
Expand All @@ -22,11 +20,9 @@ define <16 x float> @PR45443() {
; X64-NEXT: vpbroadcastd {{.*#+}} zmm1 = [2181038080,2181038080,2181038080,2181038080,2181038080,2181038080,2181038080,2181038080,2181038080,2181038080,2181038080,2181038080,2181038080,2181038080,2181038080,2181038080]
; X64-NEXT: vfmadd231ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm0
; X64-NEXT: vpcmpltud {{.*}}(%rip){1to16}, %zmm1, %k1
; X64-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; X64-NEXT: vpbroadcastd {{.*#+}} ymm3 = [16777215,16777215,16777215,16777215,16777215,16777215,16777215,16777215]
; X64-NEXT: vpand %ymm3, %ymm2, %ymm2
; X64-NEXT: vpand %ymm3, %ymm1, %ymm1
; X64-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; X64-NEXT: vpbroadcastd {{.*#+}} ymm2 = [16777215,16777215,16777215,16777215,16777215,16777215,16777215,16777215]
; X64-NEXT: vpand %ymm2, %ymm1, %ymm1
; X64-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1
; X64-NEXT: vptestmd %zmm1, %zmm1, %k1 {%k1}
; X64-NEXT: vbroadcastss {{.*}}(%rip), %zmm0 {%k1}
; X64-NEXT: retq
Expand Down
20 changes: 8 additions & 12 deletions llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -587,10 +587,9 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind
;
; XOPAVX2-LABEL: splatvar_funnnel_v4i64:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpbroadcastq %xmm1, %ymm1
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
; XOPAVX2-NEXT: vprotq %xmm3, %xmm2, %xmm2
; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
; XOPAVX2-NEXT: vprotq %xmm1, %xmm2, %xmm2
; XOPAVX2-NEXT: vprotq %xmm1, %xmm0, %xmm0
; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
Expand Down Expand Up @@ -671,10 +670,9 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind
;
; XOPAVX2-LABEL: splatvar_funnnel_v8i32:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpbroadcastd %xmm1, %ymm1
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
; XOPAVX2-NEXT: vprotd %xmm3, %xmm2, %xmm2
; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1
; XOPAVX2-NEXT: vprotd %xmm1, %xmm2, %xmm2
; XOPAVX2-NEXT: vprotd %xmm1, %xmm0, %xmm0
; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
Expand Down Expand Up @@ -741,10 +739,9 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw
;
; XOPAVX2-LABEL: splatvar_funnnel_v16i16:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpbroadcastw %xmm1, %ymm1
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
; XOPAVX2-NEXT: vprotw %xmm3, %xmm2, %xmm2
; XOPAVX2-NEXT: vpbroadcastw %xmm1, %xmm1
; XOPAVX2-NEXT: vprotw %xmm1, %xmm2, %xmm2
; XOPAVX2-NEXT: vprotw %xmm1, %xmm0, %xmm0
; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
Expand Down Expand Up @@ -891,10 +888,9 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind
;
; XOPAVX2-LABEL: splatvar_funnnel_v32i8:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpbroadcastb %xmm1, %ymm1
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
; XOPAVX2-NEXT: vprotb %xmm3, %xmm2, %xmm2
; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
; XOPAVX2-NEXT: vprotb %xmm1, %xmm2, %xmm2
; XOPAVX2-NEXT: vprotb %xmm1, %xmm0, %xmm0
; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; XOPAVX2-NEXT: retq
Expand Down
Loading

0 comments on commit e508d64

Please sign in to comment.