Skip to content

Commit

Permalink
[X86] combineLoad - allow constant loads to share matching 'lower con…
Browse files Browse the repository at this point in the history
…stant bits' with larger VBROADCAST_LOAD/SUBV_BROADCAST_LOAD nodes

We already had separate support for VBROADCAST_LOAD - merge this with the generic load handling and add SUBV_BROADCAST_LOAD support as well.
  • Loading branch information
RKSimon committed Dec 14, 2023
1 parent 5e38ba2 commit 88f1a2c
Show file tree
Hide file tree
Showing 10 changed files with 25,535 additions and 25,876 deletions.
36 changes: 9 additions & 27 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49963,33 +49963,15 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
}
return true;
};
if (User->getOpcode() == X86ISD::VBROADCAST_LOAD &&
getTargetConstantFromBasePtr(Ptr)) {
// See if we are loading a constant that has also been broadcast.
APInt Undefs, UserUndefs;
SmallVector<APInt> Bits, UserBits;
if (getTargetConstantBitsFromNode(SDValue(N, 0), 8, Undefs, Bits) &&
getTargetConstantBitsFromNode(SDValue(User, 0), 8, UserUndefs,
UserBits)) {
UserUndefs = UserUndefs.trunc(Undefs.getBitWidth());
UserBits.truncate(Bits.size());
if (MatchingBits(Undefs, UserUndefs, Bits, UserBits)) {
SDValue Extract = extractSubVector(
SDValue(User, 0), 0, DAG, SDLoc(N), RegVT.getSizeInBits());
Extract = DAG.getBitcast(RegVT, Extract);
return DCI.CombineTo(N, Extract, SDValue(User, 1));
}
}
}
if (ISD::isNormalLoad(User)) {
// See if we are loading a constant that matches in the lower
// bits of a longer constant (but from a different constant pool ptr).
SDValue UserPtr = cast<MemSDNode>(User)->getBasePtr();
const Constant *LdC = getTargetConstantFromBasePtr(Ptr);
const Constant *UserC = getTargetConstantFromBasePtr(UserPtr);
if (LdC && UserC && UserPtr != Ptr &&
LdC->getType()->getPrimitiveSizeInBits() <
UserC->getType()->getPrimitiveSizeInBits()) {
// See if we are loading a constant that matches in the lower
// bits of a longer constant (but from a different constant pool ptr).
SDValue UserPtr = cast<MemSDNode>(User)->getBasePtr();
const Constant *LdC = getTargetConstantFromBasePtr(Ptr);
const Constant *UserC = getTargetConstantFromBasePtr(UserPtr);
if (LdC && UserC && UserPtr != Ptr) {
unsigned LdSize = LdC->getType()->getPrimitiveSizeInBits();
unsigned UserSize = UserC->getType()->getPrimitiveSizeInBits();
if (LdSize < UserSize || !ISD::isNormalLoad(User)) {
APInt Undefs, UserUndefs;
SmallVector<APInt> Bits, UserBits;
if (getTargetConstantBitsFromNode(SDValue(N, 0), 8, Undefs, Bits) &&
Expand Down
19 changes: 9 additions & 10 deletions llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1459,17 +1459,16 @@ define <8 x i64> @f8xi64_i128(<8 x i64> %a) {
;
; AVX-64-LABEL: f8xi64_i128:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2
; AVX-64-NEXT: vpaddq %xmm3, %xmm1, %xmm1
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2
; AVX-64-NEXT: vpaddq %xmm3, %xmm0, %xmm0
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [0,1,0,1]
; AVX-64-NEXT: # ymm2 = mem[0,1,0,1]
; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX-64-NEXT: vpaddq %xmm2, %xmm3, %xmm3
; AVX-64-NEXT: vpaddq %xmm2, %xmm1, %xmm1
; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX-64-NEXT: vpaddq %xmm2, %xmm3, %xmm3
; AVX-64-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX-64-NEXT: retq
Expand Down
565 changes: 247 additions & 318 deletions llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-8.ll

Large diffs are not rendered by default.

3,336 changes: 1,581 additions & 1,755 deletions llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-8.ll

Large diffs are not rendered by default.

2,576 changes: 1,290 additions & 1,286 deletions llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-5.ll

Large diffs are not rendered by default.

6,071 changes: 3,094 additions & 2,977 deletions llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-7.ll

Large diffs are not rendered by default.

6,206 changes: 3,102 additions & 3,104 deletions llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-8.ll

Large diffs are not rendered by default.

4,094 changes: 2,062 additions & 2,032 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-6.ll

Large diffs are not rendered by default.

22,626 changes: 11,267 additions & 11,359 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-7.ll

Large diffs are not rendered by default.

5,882 changes: 2,874 additions & 3,008 deletions llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-8.ll

Large diffs are not rendered by default.

0 comments on commit 88f1a2c

Please sign in to comment.