diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 54bdb8750f709..b2483043136a4 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -60,6 +60,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { return SelectionDAGISel::runOnMachineFunction(MF); } + void PreprocessISelDAG() override; + void Select(SDNode *Node) override; /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for @@ -529,6 +531,72 @@ char AArch64DAGToDAGISelLegacy::ID = 0; INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false) +void AArch64DAGToDAGISel::PreprocessISelDAG() { + bool MadeChange = false; + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); + I != E;) { + SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. + + switch (N->getOpcode()) { + case ISD::BITCAST: { + // Canonicalize bitcast(extload) or bitcast(zextload) into + // scalar_to_vector(load) or insert(zero, load), to help generate the + // canonical patterns that tablegen expects. This helps generate extending + // loads that zero the top data implicitly. + EVT VT = N->getValueType(0); + if (Subtarget->isLittleEndian() && + (/*VT == MVT::f32 || VT == MVT::f64 ||*/ VT.isVector())) { + auto *Ld = dyn_cast(N->getOperand(0)); + if (Ld && Ld->isSimple() && !Ld->isIndexed() && + (Ld->getExtensionType() == ISD::EXTLOAD || + Ld->getExtensionType() == ISD::ZEXTLOAD)) { + LLVM_DEBUG({ + dbgs() << "Found an extending load "; + Ld->dump(); + }); + + EVT MemVT = Ld->getMemoryVT(); + assert(VT.is64BitVector() || VT.is128BitVector() || VT == MVT::f32 || + VT == MVT::f64); + assert( + VT.getScalarSizeInBits() == 8 || VT.getScalarSizeInBits() == 16 || + VT.getScalarSizeInBits() == 32 || VT.getScalarSizeInBits() == 64); + assert(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32); + EVT ScalarVT = MemVT.getSizeInBits() < 32 ? MVT::i32 : MemVT; + EVT ExtVT = + EVT::getVectorVT(*CurDAG->getContext(), MemVT, + VT.getSizeInBits() / MemVT.getSizeInBits()); + + SDLoc DL(N); + SDValue NewLd = + CurDAG->getExtLoad(ISD::EXTLOAD, DL, ScalarVT, Ld->getChain(), + Ld->getBasePtr(), MemVT, Ld->getMemOperand()); + SDValue Ext; + if (Ld->getExtensionType() == ISD::EXTLOAD) + Ext = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, NewLd); + else + Ext = CurDAG->getNode(ISD::INSERT_VECTOR_ELT, DL, ExtVT, + CurDAG->getConstant(0, DL, ExtVT), NewLd, + CurDAG->getConstant(0, DL, MVT::i64)); + Ext = CurDAG->getBitcast(VT, Ext); + + --I; + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Ext); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewLd.getValue(1)); + ++I; + MadeChange = true; + } + } + break; + } + } + } + + if (MadeChange) + CurDAG->RemoveDeadNodes(); +} + /// isIntImmediate - This method tests to see if the node is a constant /// operand. If so Imm will receive the 32-bit value. static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { diff --git a/llvm/test/CodeGen/AArch64/bitcast-extend.ll b/llvm/test/CodeGen/AArch64/bitcast-extend.ll index 741dcf3ad4c2f..b6b1d75207123 100644 --- a/llvm/test/CodeGen/AArch64/bitcast-extend.ll +++ b/llvm/test/CodeGen/AArch64/bitcast-extend.ll @@ -289,11 +289,16 @@ define void @extractbitcastext_s(i32 %bytes, ptr %output) { } define <8 x i8> @load_zext_i8_v8i8(ptr %p) { -; CHECK-LABEL: load_zext_i8_v8i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i8_v8i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr b0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i8_v8i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrb w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i8, ptr %p %z = zext i8 %l to i64 %b = bitcast i64 %z to <8 x i8> @@ -301,11 +306,16 @@ define <8 x i8> @load_zext_i8_v8i8(ptr %p) { } define <8 x i8> @load_zext_i16_v8i8(ptr %p) { -; CHECK-LABEL: load_zext_i16_v8i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i16_v8i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i16_v8i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrh w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i16, ptr %p %z = zext i16 %l to i64 %b = bitcast i64 %z to <8 x i8> @@ -313,11 +323,16 @@ define <8 x i8> @load_zext_i16_v8i8(ptr %p) { } define <8 x i8> @load_zext_i32_v8i8(ptr %p) { -; CHECK-LABEL: load_zext_i32_v8i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i32_v8i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i32_v8i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i32, ptr %p %z = zext i32 %l to i64 %b = bitcast i64 %z to <8 x i8> @@ -358,11 +373,16 @@ define <16 x i8> @load_zext_v16i8(ptr %p) { define <4 x i16> @load_zext_i8_v4i16(ptr %p) { -; CHECK-LABEL: load_zext_i8_v4i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i8_v4i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr b0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i8_v4i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrb w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i8, ptr %p %z = zext i8 %l to i64 %b = bitcast i64 %z to <4 x i16> @@ -370,11 +390,16 @@ define <4 x i16> @load_zext_i8_v4i16(ptr %p) { } define <4 x i16> @load_zext_i16_v4i16(ptr %p) { -; CHECK-LABEL: load_zext_i16_v4i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i16_v4i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i16_v4i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrh w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i16, ptr %p %z = zext i16 %l to i64 %b = bitcast i64 %z to <4 x i16> @@ -382,11 +407,16 @@ define <4 x i16> @load_zext_i16_v4i16(ptr %p) { } define <4 x i16> @load_zext_i32_v4i16(ptr %p) { -; CHECK-LABEL: load_zext_i32_v4i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i32_v4i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i32_v4i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i32, ptr %p %z = zext i32 %l to i64 %b = bitcast i64 %z to <4 x i16> @@ -394,11 +424,16 @@ define <4 x i16> @load_zext_i32_v4i16(ptr %p) { } define <2 x i32> @load_zext_i8_v2i32(ptr %p) { -; CHECK-LABEL: load_zext_i8_v2i32: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i8_v2i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr b0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i8_v2i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrb w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i8, ptr %p %z = zext i8 %l to i64 %b = bitcast i64 %z to <2 x i32> @@ -406,11 +441,16 @@ define <2 x i32> @load_zext_i8_v2i32(ptr %p) { } define <2 x i32> @load_zext_i16_v2i32(ptr %p) { -; CHECK-LABEL: load_zext_i16_v2i32: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i16_v2i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i16_v2i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrh w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i16, ptr %p %z = zext i16 %l to i64 %b = bitcast i64 %z to <2 x i32> @@ -418,11 +458,16 @@ define <2 x i32> @load_zext_i16_v2i32(ptr %p) { } define <2 x i32> @load_zext_i32_v2i32(ptr %p) { -; CHECK-LABEL: load_zext_i32_v2i32: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i32_v2i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i32_v2i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i32, ptr %p %z = zext i32 %l to i64 %b = bitcast i64 %z to <2 x i32> @@ -430,11 +475,16 @@ define <2 x i32> @load_zext_i32_v2i32(ptr %p) { } define <1 x i64> @load_zext_i8_v1i64(ptr %p) { -; CHECK-LABEL: load_zext_i8_v1i64: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i8_v1i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr b0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i8_v1i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrb w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i8, ptr %p %z = zext i8 %l to i64 %b = bitcast i64 %z to <1 x i64> @@ -442,11 +492,16 @@ define <1 x i64> @load_zext_i8_v1i64(ptr %p) { } define <1 x i64> @load_zext_i16_v1i64(ptr %p) { -; CHECK-LABEL: load_zext_i16_v1i64: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i16_v1i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i16_v1i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrh w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i16, ptr %p %z = zext i16 %l to i64 %b = bitcast i64 %z to <1 x i64> @@ -454,11 +509,16 @@ define <1 x i64> @load_zext_i16_v1i64(ptr %p) { } define <1 x i64> @load_zext_i32_v1i64(ptr %p) { -; CHECK-LABEL: load_zext_i32_v1i64: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i32_v1i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i32_v1i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i32, ptr %p %z = zext i32 %l to i64 %b = bitcast i64 %z to <1 x i64> @@ -467,11 +527,16 @@ define <1 x i64> @load_zext_i32_v1i64(ptr %p) { define <4 x half> @load_zext_i8_v4f16(ptr %p) { -; CHECK-LABEL: load_zext_i8_v4f16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i8_v4f16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr b0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i8_v4f16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrb w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i8, ptr %p %z = zext i8 %l to i64 %b = bitcast i64 %z to <4 x half> @@ -479,11 +544,16 @@ define <4 x half> @load_zext_i8_v4f16(ptr %p) { } define <4 x half> @load_zext_i16_v4f16(ptr %p) { -; CHECK-LABEL: load_zext_i16_v4f16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i16_v4f16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i16_v4f16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrh w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i16, ptr %p %z = zext i16 %l to i64 %b = bitcast i64 %z to <4 x half> @@ -491,11 +561,16 @@ define <4 x half> @load_zext_i16_v4f16(ptr %p) { } define <4 x half> @load_zext_i32_v4f16(ptr %p) { -; CHECK-LABEL: load_zext_i32_v4f16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i32_v4f16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i32_v4f16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i32, ptr %p %z = zext i32 %l to i64 %b = bitcast i64 %z to <4 x half> @@ -505,8 +580,7 @@ define <4 x half> @load_zext_i32_v4f16(ptr %p) { define <4 x bfloat> @load_zext_i8_v4bf16(ptr %p) { ; CHECK-LABEL: load_zext_i8_v4bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr b0, [x0] ; CHECK-NEXT: ret %l = load i8, ptr %p %z = zext i8 %l to i64 @@ -517,8 +591,7 @@ define <4 x bfloat> @load_zext_i8_v4bf16(ptr %p) { define <4 x bfloat> @load_zext_i16_v4bf16(ptr %p) { ; CHECK-LABEL: load_zext_i16_v4bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr h0, [x0] ; CHECK-NEXT: ret %l = load i16, ptr %p %z = zext i16 %l to i64 @@ -529,8 +602,7 @@ define <4 x bfloat> @load_zext_i16_v4bf16(ptr %p) { define <4 x bfloat> @load_zext_i32_v4bf16(ptr %p) { ; CHECK-LABEL: load_zext_i32_v4bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ret %l = load i32, ptr %p %z = zext i32 %l to i64 @@ -539,11 +611,16 @@ define <4 x bfloat> @load_zext_i32_v4bf16(ptr %p) { } define <2 x float> @load_zext_i8_v2f32(ptr %p) { -; CHECK-LABEL: load_zext_i8_v2f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i8_v2f32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr b0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i8_v2f32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrb w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i8, ptr %p %z = zext i8 %l to i64 %b = bitcast i64 %z to <2 x float> @@ -551,11 +628,16 @@ define <2 x float> @load_zext_i8_v2f32(ptr %p) { } define <2 x float> @load_zext_i16_v2f32(ptr %p) { -; CHECK-LABEL: load_zext_i16_v2f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i16_v2f32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i16_v2f32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrh w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i16, ptr %p %z = zext i16 %l to i64 %b = bitcast i64 %z to <2 x float> @@ -563,11 +645,16 @@ define <2 x float> @load_zext_i16_v2f32(ptr %p) { } define <2 x float> @load_zext_i32_v2f32(ptr %p) { -; CHECK-LABEL: load_zext_i32_v2f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i32_v2f32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i32_v2f32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i32, ptr %p %z = zext i32 %l to i64 %b = bitcast i64 %z to <2 x float> @@ -575,11 +662,16 @@ define <2 x float> @load_zext_i32_v2f32(ptr %p) { } define <1 x double> @load_zext_i8_v1f64(ptr %p) { -; CHECK-LABEL: load_zext_i8_v1f64: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i8_v1f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr b0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i8_v1f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrb w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i8, ptr %p %z = zext i8 %l to i64 %b = bitcast i64 %z to <1 x double> @@ -587,11 +679,16 @@ define <1 x double> @load_zext_i8_v1f64(ptr %p) { } define <1 x double> @load_zext_i16_v1f64(ptr %p) { -; CHECK-LABEL: load_zext_i16_v1f64: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i16_v1f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i16_v1f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldrh w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i16, ptr %p %z = zext i16 %l to i64 %b = bitcast i64 %z to <1 x double> @@ -599,11 +696,16 @@ define <1 x double> @load_zext_i16_v1f64(ptr %p) { } define <1 x double> @load_zext_i32_v1f64(ptr %p) { -; CHECK-LABEL: load_zext_i32_v1f64: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: load_zext_i32_v1f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: load_zext_i32_v1f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %l = load i32, ptr %p %z = zext i32 %l to i64 %b = bitcast i64 %z to <1 x double> diff --git a/llvm/test/CodeGen/AArch64/dp1.ll b/llvm/test/CodeGen/AArch64/dp1.ll index e904f4b6d247a..e52fec96d51e3 100644 --- a/llvm/test/CodeGen/AArch64/dp1.ll +++ b/llvm/test/CodeGen/AArch64/dp1.ll @@ -201,8 +201,7 @@ define void @ctpop_i32() { ; CHECK-SDAG: // %bb.0: ; CHECK-SDAG-NEXT: adrp x8, :got:var32 ; CHECK-SDAG-NEXT: ldr x8, [x8, :got_lo12:var32] -; CHECK-SDAG-NEXT: ldr w9, [x8] -; CHECK-SDAG-NEXT: fmov d0, x9 +; CHECK-SDAG-NEXT: ldr s0, [x8] ; CHECK-SDAG-NEXT: cnt v0.8b, v0.8b ; CHECK-SDAG-NEXT: addv b0, v0.8b ; CHECK-SDAG-NEXT: str s0, [x8]