diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2a3425a42607e..69e3148b8c864 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21006,20 +21006,18 @@ SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) { &IsFast) || !IsFast) return SDValue(); - EVT PtrVT = Ptr.getValueType(); - SDValue Offset = - DAG.getNode(ISD::MUL, DL, PtrVT, DAG.getZExtOrTrunc(Idx, DL, PtrVT), - DAG.getConstant(EltVT.getSizeInBits() / 8, DL, PtrVT)); - SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, Offset); MachinePointerInfo PointerInfo(ST->getAddressSpace()); // If the offset is a known constant then try to recover the pointer // info + SDValue NewPtr; if (auto *CIdx = dyn_cast(Idx)) { unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8; NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL); PointerInfo = ST->getPointerInfo().getWithOffset(COffset); + } else { + NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx); } return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(), diff --git a/llvm/test/CodeGen/Mips/msa/basic_operations.ll b/llvm/test/CodeGen/Mips/msa/basic_operations.ll index fbae1bda7c665..1f6c430a932e0 100644 --- a/llvm/test/CodeGen/Mips/msa/basic_operations.ll +++ b/llvm/test/CodeGen/Mips/msa/basic_operations.ll @@ -1879,6 +1879,7 @@ define void @insert_v16i8_vidx(i32 signext %a) nounwind { ; O32-NEXT: addu $1, $2, $25 ; O32-NEXT: lw $2, %got(i32)($1) ; O32-NEXT: lw $2, 0($2) +; O32-NEXT: andi $2, $2, 15 ; O32-NEXT: lw $1, %got(v16i8)($1) ; O32-NEXT: addu $1, $1, $2 ; O32-NEXT: jr $ra @@ -1891,6 +1892,7 @@ define void @insert_v16i8_vidx(i32 signext %a) nounwind { ; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(insert_v16i8_vidx))) ; N32-NEXT: lw $2, %got_disp(i32)($1) ; N32-NEXT: lw $2, 0($2) +; N32-NEXT: andi $2, $2, 15 ; N32-NEXT: lw $1, %got_disp(v16i8)($1) ; N32-NEXT: addu $1, $1, $2 ; N32-NEXT: jr $ra @@ -1902,7 +1904,8 @@ define void @insert_v16i8_vidx(i32 signext %a) nounwind { ; N64-NEXT: daddu $1, $1, $25 ; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v16i8_vidx))) ; N64-NEXT: ld $2, %got_disp(i32)($1) -; N64-NEXT: lwu $2, 0($2) +; N64-NEXT: lw $2, 0($2) +; N64-NEXT: andi $2, $2, 15 ; N64-NEXT: ld $1, %got_disp(v16i8)($1) ; N64-NEXT: daddu $1, $1, $2 ; N64-NEXT: jr $ra @@ -1925,6 +1928,7 @@ define void @insert_v8i16_vidx(i32 signext %a) nounwind { ; O32-NEXT: addu $1, $2, $25 ; O32-NEXT: lw $2, %got(i32)($1) ; O32-NEXT: lw $2, 0($2) +; O32-NEXT: andi $2, $2, 7 ; O32-NEXT: lw $1, %got(v8i16)($1) ; O32-NEXT: lsa $1, $2, $1, 1 ; O32-NEXT: jr $ra @@ -1937,6 +1941,7 @@ define void @insert_v8i16_vidx(i32 signext %a) nounwind { ; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(insert_v8i16_vidx))) ; N32-NEXT: lw $2, %got_disp(i32)($1) ; N32-NEXT: lw $2, 0($2) +; N32-NEXT: andi $2, $2, 7 ; N32-NEXT: lw $1, %got_disp(v8i16)($1) ; N32-NEXT: lsa $1, $2, $1, 1 ; N32-NEXT: jr $ra @@ -1948,7 +1953,8 @@ define void @insert_v8i16_vidx(i32 signext %a) nounwind { ; N64-NEXT: daddu $1, $1, $25 ; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v8i16_vidx))) ; N64-NEXT: ld $2, %got_disp(i32)($1) -; N64-NEXT: lwu $2, 0($2) +; N64-NEXT: lw $2, 0($2) +; N64-NEXT: andi $2, $2, 7 ; N64-NEXT: ld $1, %got_disp(v8i16)($1) ; N64-NEXT: dlsa $1, $2, $1, 1 ; N64-NEXT: jr $ra @@ -1971,6 +1977,7 @@ define void @insert_v4i32_vidx(i32 signext %a) nounwind { ; O32-NEXT: addu $1, $2, $25 ; O32-NEXT: lw $2, %got(i32)($1) ; O32-NEXT: lw $2, 0($2) +; O32-NEXT: andi $2, $2, 3 ; O32-NEXT: lw $1, %got(v4i32)($1) ; O32-NEXT: lsa $1, $2, $1, 2 ; O32-NEXT: jr $ra @@ -1983,6 +1990,7 @@ define void @insert_v4i32_vidx(i32 signext %a) nounwind { ; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(insert_v4i32_vidx))) ; N32-NEXT: lw $2, %got_disp(i32)($1) ; N32-NEXT: lw $2, 0($2) +; N32-NEXT: andi $2, $2, 3 ; N32-NEXT: lw $1, %got_disp(v4i32)($1) ; N32-NEXT: lsa $1, $2, $1, 2 ; N32-NEXT: jr $ra @@ -1994,7 +2002,8 @@ define void @insert_v4i32_vidx(i32 signext %a) nounwind { ; N64-NEXT: daddu $1, $1, $25 ; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v4i32_vidx))) ; N64-NEXT: ld $2, %got_disp(i32)($1) -; N64-NEXT: lwu $2, 0($2) +; N64-NEXT: lw $2, 0($2) +; N64-NEXT: andi $2, $2, 3 ; N64-NEXT: ld $1, %got_disp(v4i32)($1) ; N64-NEXT: dlsa $1, $2, $1, 2 ; N64-NEXT: jr $ra @@ -2018,6 +2027,7 @@ define void @insert_v2i64_vidx(i64 signext %a) nounwind { ; O32-NEXT: addu $1, $2, $25 ; O32-NEXT: lw $2, %got(i32)($1) ; O32-NEXT: lw $2, 0($2) +; O32-NEXT: andi $2, $2, 1 ; O32-NEXT: lw $1, %got(v2i64)($1) ; O32-NEXT: lsa $1, $2, $1, 3 ; O32-NEXT: sw $5, 4($1) @@ -2031,6 +2041,7 @@ define void @insert_v2i64_vidx(i64 signext %a) nounwind { ; N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(insert_v2i64_vidx))) ; N32-NEXT: lw $2, %got_disp(i32)($1) ; N32-NEXT: lw $2, 0($2) +; N32-NEXT: andi $2, $2, 1 ; N32-NEXT: lw $1, %got_disp(v2i64)($1) ; N32-NEXT: lsa $1, $2, $1, 3 ; N32-NEXT: jr $ra @@ -2042,7 +2053,8 @@ define void @insert_v2i64_vidx(i64 signext %a) nounwind { ; N64-NEXT: daddu $1, $1, $25 ; N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(insert_v2i64_vidx))) ; N64-NEXT: ld $2, %got_disp(i32)($1) -; N64-NEXT: lwu $2, 0($2) +; N64-NEXT: lw $2, 0($2) +; N64-NEXT: andi $2, $2, 1 ; N64-NEXT: ld $1, %got_disp(v2i64)($1) ; N64-NEXT: dlsa $1, $2, $1, 3 ; N64-NEXT: jr $ra diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll index a3f41fd842222..7e7b7403dbdfb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -321,20 +321,13 @@ define <32 x i16> @insertelt_v32i16(<32 x i16> %a, i16 %y, i32 %idx) { } define void @insertelt_v32i16_store(ptr %x, i16 %y, i32 %idx) { -; RV32-LABEL: insertelt_v32i16_store: -; RV32: # %bb.0: -; RV32-NEXT: slli a2, a2, 1 -; RV32-NEXT: add a0, a0, a2 -; RV32-NEXT: sh a1, 0(a0) -; RV32-NEXT: ret -; -; RV64-LABEL: insertelt_v32i16_store: -; RV64: # %bb.0: -; RV64-NEXT: slli a2, a2, 32 -; RV64-NEXT: srli a2, a2, 31 -; RV64-NEXT: add a0, a0, a2 -; RV64-NEXT: sh a1, 0(a0) -; RV64-NEXT: ret +; CHECK-LABEL: insertelt_v32i16_store: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a2, a2, 31 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: sh a1, 0(a0) +; CHECK-NEXT: ret %a = load <32 x i16>, ptr %x %b = insertelement <32 x i16> %a, i16 %y, i32 %idx store <32 x i16> %b, ptr %x @@ -366,20 +359,13 @@ define <8 x float> @insertelt_v8f32(<8 x float> %a, float %y, i32 %idx) { } define void @insertelt_v8f32_store(ptr %x, float %y, i32 %idx) { -; RV32-LABEL: insertelt_v8f32_store: -; RV32: # %bb.0: -; RV32-NEXT: slli a1, a1, 2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: fsw fa0, 0(a0) -; RV32-NEXT: ret -; -; RV64-LABEL: insertelt_v8f32_store: -; RV64: # %bb.0: -; RV64-NEXT: slli a1, a1, 32 -; RV64-NEXT: srli a1, a1, 30 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: fsw fa0, 0(a0) -; RV64-NEXT: ret +; CHECK-LABEL: insertelt_v8f32_store: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a1, a1, 7 +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: fsw fa0, 0(a0) +; CHECK-NEXT: ret %a = load <8 x float>, ptr %x %b = insertelement <8 x float> %a, float %y, i32 %idx store <8 x float> %b, ptr %x @@ -443,6 +429,7 @@ define <8 x i64> @insertelt_v8i64(<8 x i64> %a, i32 %idx) { define void @insertelt_v8i64_store(ptr %x, i32 %idx) { ; RV32-LABEL: insertelt_v8i64_store: ; RV32: # %bb.0: +; RV32-NEXT: andi a1, a1, 7 ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: li a1, -1 @@ -452,8 +439,8 @@ define void @insertelt_v8i64_store(ptr %x, i32 %idx) { ; ; RV64-LABEL: insertelt_v8i64_store: ; RV64: # %bb.0: -; RV64-NEXT: slli a1, a1, 32 -; RV64-NEXT: srli a1, a1, 29 +; RV64-NEXT: andi a1, a1, 7 +; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: li a1, -1 ; RV64-NEXT: sd a1, 0(a0) @@ -521,6 +508,7 @@ define <8 x i64> @insertelt_c6_v8i64(<8 x i64> %a, i32 %idx) { define void @insertelt_c6_v8i64_store(ptr %x, i32 %idx) { ; RV32-LABEL: insertelt_c6_v8i64_store: ; RV32: # %bb.0: +; RV32-NEXT: andi a1, a1, 7 ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: sw zero, 4(a0) @@ -530,8 +518,8 @@ define void @insertelt_c6_v8i64_store(ptr %x, i32 %idx) { ; ; RV64-LABEL: insertelt_c6_v8i64_store: ; RV64: # %bb.0: -; RV64-NEXT: slli a1, a1, 32 -; RV64-NEXT: srli a1, a1, 29 +; RV64-NEXT: andi a1, a1, 7 +; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: li a1, 6 ; RV64-NEXT: sd a1, 0(a0) diff --git a/llvm/test/CodeGen/X86/pr59980.ll b/llvm/test/CodeGen/X86/pr59980.ll index 0823f960724e2..a6d22c2244c89 100644 --- a/llvm/test/CodeGen/X86/pr59980.ll +++ b/llvm/test/CodeGen/X86/pr59980.ll @@ -9,6 +9,7 @@ define void @foo(ptr %0, ptr %1, ptr %2) #0 { ; CHECK: ## %bb.0: ; CHECK-NEXT: movl (%rdx), %eax ; CHECK-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 +; CHECK-NEXT: andl $15, %eax ; CHECK-NEXT: vpextrw $0, %xmm0, (%rsi,%rax,2) ; CHECK-NEXT: retq %4 = bitcast ptr %2 to ptr