From 2857b8e600b7a509ab96afb291222b1e22f5fcd4 Mon Sep 17 00:00:00 2001 From: clingfei <1599101385@qq.com> Date: Thu, 20 Nov 2025 23:18:08 +0800 Subject: [PATCH 1/2] [AArch64] Optimize memcpy for non-power of two sizes The previous getMemcpyLoadsAndStores implementation would chain load/store instructions from "NumLdStInMemcpy - GlueIter - GluedLdStLimit" to "NumLdStInMemcpy - GlueIter". This approach caused issues when copying non-power-of-two sizes, as it would chain leading load/stores with subsequent instructions at non-power-of-two aligned offsets. This chaining pattern prevented optimal optimizations in aarch64-ldst-opt pass for these load/store instructions. This commit modifies the chaining range to be from GlueIter to GlueIter + GluedLdStLimit, enabling proper optimization of load/store instructions in aarch64-ldst-opt. --- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 10 +- llvm/test/CodeGen/AArch64/aarch64-mops.ll | 126 +++++++++--------- 2 files changed, 67 insertions(+), 69 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 1b15a207a2d37..20e2b744acc31 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8746,8 +8746,8 @@ static SDValue getMemcpyLoadsAndStores( unsigned GlueIter = 0; for (unsigned cnt = 0; cnt < NumberLdChain; ++cnt) { - unsigned IndexFrom = NumLdStInMemcpy - GlueIter - GluedLdStLimit; - unsigned IndexTo = NumLdStInMemcpy - GlueIter; + unsigned IndexFrom = GlueIter; + unsigned IndexTo = GlueIter + GluedLdStLimit; chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, IndexFrom, IndexTo, OutLoadChains, OutStoreChains); @@ -8756,9 +8756,9 @@ static SDValue getMemcpyLoadsAndStores( // Residual ld/st. if (RemainingLdStInMemcpy) { - chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0, - RemainingLdStInMemcpy, OutLoadChains, - OutStoreChains); + chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, GlueIter, + NumLdStInMemcpy, OutLoadChains, + OutStoreChains); } } } diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops.ll b/llvm/test/CodeGen/AArch64/aarch64-mops.ll index 1710fad9f2539..fc64ce7d26d0e 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-mops.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-mops.ll @@ -1407,30 +1407,28 @@ define void @memcpy_inline_300(ptr %dst, ptr %src, i32 %value) { ; ; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_inline_300: ; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry -; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #16] -; SDAG-WITHOUT-MOPS-O2-NEXT: add x8, x1, #284 -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q2, [x1] -; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #16] -; SDAG-WITHOUT-MOPS-O2-NEXT: str q2, [x0] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #80] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1, #48] -; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #80] -; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0, #48] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #144] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1, #112] -; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #144] -; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0, #112] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #208] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1, #176] -; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #208] -; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0, #176] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q3, q1, [x1, #256] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q0, [x8] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q2, [x1, #240] -; SDAG-WITHOUT-MOPS-O2-NEXT: add x8, x0, #284 -; SDAG-WITHOUT-MOPS-O2-NEXT: str q0, [x8] -; SDAG-WITHOUT-MOPS-O2-NEXT: stp q3, q1, [x0, #256] -; SDAG-WITHOUT-MOPS-O2-NEXT: str q2, [x0, #240] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #224] +; SDAG-WITHOUT-MOPS-O2-NEXT: add x8, x1, #284 +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1, #192] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #224] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0, #192] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #160] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1, #128] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #160] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0, #128] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #96] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1, #64] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #96] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0, #64] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #32] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #32] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q1, [x1, #256] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q0, [x8] +; SDAG-WITHOUT-MOPS-O2-NEXT: add x8, x0, #284 +; SDAG-WITHOUT-MOPS-O2-NEXT: str q0, [x8] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q1, [x0, #256] ; SDAG-WITHOUT-MOPS-O2-NEXT: ret ; ; SDAG-MOPS-O2-LABEL: memcpy_inline_300: @@ -1536,46 +1534,46 @@ define void @memcpy_inline_300_volatile(ptr %dst, ptr %src, i32 %value) { ; ; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_inline_300_volatile: ; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q0, [x1] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q1, [x1, #16] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q2, [x1, #32] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q3, [x1, #48] -; SDAG-WITHOUT-MOPS-O2-NEXT: str q3, [x0, #48] -; SDAG-WITHOUT-MOPS-O2-NEXT: str q2, [x0, #32] -; SDAG-WITHOUT-MOPS-O2-NEXT: str q1, [x0, #16] -; SDAG-WITHOUT-MOPS-O2-NEXT: str q0, [x0] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q0, [x1, #64] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q1, [x1, #80] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q2, [x1, #96] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q3, [x1, #112] -; SDAG-WITHOUT-MOPS-O2-NEXT: str q3, [x0, #112] -; SDAG-WITHOUT-MOPS-O2-NEXT: str q2, [x0, #96] -; SDAG-WITHOUT-MOPS-O2-NEXT: str q1, [x0, #80] -; SDAG-WITHOUT-MOPS-O2-NEXT: str q0, [x0, #64] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q0, [x1, #128] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q1, [x1, #144] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q2, [x1, #160] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q3, [x1, #176] -; SDAG-WITHOUT-MOPS-O2-NEXT: str q3, [x0, #176] -; SDAG-WITHOUT-MOPS-O2-NEXT: str q2, [x0, #160] -; SDAG-WITHOUT-MOPS-O2-NEXT: str q1, [x0, #144] -; SDAG-WITHOUT-MOPS-O2-NEXT: str q0, [x0, #128] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q0, [x1, #192] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q1, [x1, #208] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q2, [x1, #224] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q3, [x1, #240] -; SDAG-WITHOUT-MOPS-O2-NEXT: str q3, [x0, #240] -; SDAG-WITHOUT-MOPS-O2-NEXT: str q2, [x0, #224] -; SDAG-WITHOUT-MOPS-O2-NEXT: str q1, [x0, #208] -; SDAG-WITHOUT-MOPS-O2-NEXT: str q0, [x0, #192] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q0, [x1, #256] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q1, [x1, #272] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x8, [x1, #288] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr w9, [x1, #296] -; SDAG-WITHOUT-MOPS-O2-NEXT: str w9, [x0, #296] -; SDAG-WITHOUT-MOPS-O2-NEXT: str x8, [x0, #288] -; SDAG-WITHOUT-MOPS-O2-NEXT: str q1, [x0, #272] -; SDAG-WITHOUT-MOPS-O2-NEXT: str q0, [x0, #256] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q0, [x1, #256] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q1, [x1, #272] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x8, [x1, #288] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr w9, [x1, #296] +; SDAG-WITHOUT-MOPS-O2-NEXT: str w9, [x0, #296] +; SDAG-WITHOUT-MOPS-O2-NEXT: str x8, [x0, #288] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q1, [x0, #272] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q0, [x0, #256] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q0, [x1, #192] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q1, [x1, #208] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q2, [x1, #224] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q3, [x1, #240] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q3, [x0, #240] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q2, [x0, #224] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q1, [x0, #208] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q0, [x0, #192] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q0, [x1, #128] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q1, [x1, #144] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q2, [x1, #160] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q3, [x1, #176] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q3, [x0, #176] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q2, [x0, #160] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q1, [x0, #144] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q0, [x0, #128] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q0, [x1, #64] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q1, [x1, #80] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q2, [x1, #96] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q3, [x1, #112] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q3, [x0, #112] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q2, [x0, #96] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q1, [x0, #80] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q0, [x0, #64] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q0, [x1] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q1, [x1, #16] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q2, [x1, #32] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q3, [x1, #48] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q3, [x0, #48] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q2, [x0, #32] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q1, [x0, #16] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q0, [x0] ; SDAG-WITHOUT-MOPS-O2-NEXT: ret ; ; SDAG-MOPS-O2-LABEL: memcpy_inline_300_volatile: From 44814f8f09c56bfb7b1b1f86eca90d63ae26a263 Mon Sep 17 00:00:00 2001 From: clingfei <1599101385@qq.com> Date: Sat, 22 Nov 2025 10:04:23 +0800 Subject: [PATCH 2/2] update test cases --- llvm/test/CodeGen/AArch64/aarch64-mops.ll | 223 ++++++++++++++++++++++ 1 file changed, 223 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops.ll b/llvm/test/CodeGen/AArch64/aarch64-mops.ll index fc64ce7d26d0e..ea33e98ec6447 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-mops.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-mops.ll @@ -1588,6 +1588,229 @@ entry: ret void } +define void @memcpy_inline_65(ptr %dst, ptr %src, i32 %value) { +; GISel-WITHOUT-MOPS-O0-LABEL: memcpy_inline_65: +; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #16] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #16] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #32] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #32] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #48] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #48] +; GISel-WITHOUT-MOPS-O0-NEXT: ldrb w8, [x1, #64] +; GISel-WITHOUT-MOPS-O0-NEXT: strb w8, [x0, #64] +; GISel-WITHOUT-MOPS-O0-NEXT: ret +; +; GISel-WITHOUT-MOPS-O3-LABEL: memcpy_inline_65: +; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #16] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #16] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #32] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #32] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #48] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #48] +; GISel-WITHOUT-MOPS-O3-NEXT: ldrb w8, [x1, #64] +; GISel-WITHOUT-MOPS-O3-NEXT: strb w8, [x0, #64] +; GISel-WITHOUT-MOPS-O3-NEXT: ret +; +; GISel-MOPS-O0-LABEL: memcpy_inline_65: +; GISel-MOPS-O0: // %bb.0: // %entry +; GISel-MOPS-O0-NEXT: ldr q0, [x1] +; GISel-MOPS-O0-NEXT: str q0, [x0] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #16] +; GISel-MOPS-O0-NEXT: str q0, [x0, #16] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #32] +; GISel-MOPS-O0-NEXT: str q0, [x0, #32] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #48] +; GISel-MOPS-O0-NEXT: str q0, [x0, #48] +; GISel-MOPS-O0-NEXT: ldrb w8, [x1, #64] +; GISel-MOPS-O0-NEXT: strb w8, [x0, #64] +; GISel-MOPS-O0-NEXT: ret +; +; GISel-MOPS-O3-LABEL: memcpy_inline_65: +; GISel-MOPS-O3: // %bb.0: // %entry +; GISel-MOPS-O3-NEXT: ldr q0, [x1] +; GISel-MOPS-O3-NEXT: str q0, [x0] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #16] +; GISel-MOPS-O3-NEXT: str q0, [x0, #16] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #32] +; GISel-MOPS-O3-NEXT: str q0, [x0, #32] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #48] +; GISel-MOPS-O3-NEXT: str q0, [x0, #48] +; GISel-MOPS-O3-NEXT: ldrb w8, [x1, #64] +; GISel-MOPS-O3-NEXT: strb w8, [x0, #64] +; GISel-MOPS-O3-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_inline_65: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: ldrb w8, [x1, #64] +; SDAG-WITHOUT-MOPS-O2-NEXT: strb w8, [x0, #64] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #32] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #32] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0] +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memcpy_inline_65: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: ldrb w8, [x1, #64] +; SDAG-MOPS-O2-NEXT: strb w8, [x0, #64] +; SDAG-MOPS-O2-NEXT: ldp q1, q0, [x1, #32] +; SDAG-MOPS-O2-NEXT: ldp q2, q3, [x1] +; SDAG-MOPS-O2-NEXT: stp q1, q0, [x0, #32] +; SDAG-MOPS-O2-NEXT: stp q2, q3, [x0] +; SDAG-MOPS-O2-NEXT: ret +entry: + call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 65, i1 false) + ret void +} + +define void @memcpy_inline_64(ptr %dst, ptr %src, i32 %value) { +; GISel-WITHOUT-MOPS-O0-LABEL: memcpy_inline_64: +; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #16] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #16] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #32] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #32] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #48] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #48] +; GISel-WITHOUT-MOPS-O0-NEXT: ret +; +; GISel-WITHOUT-MOPS-O3-LABEL: memcpy_inline_64: +; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #16] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #16] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #32] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #32] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #48] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #48] +; GISel-WITHOUT-MOPS-O3-NEXT: ret +; +; GISel-MOPS-O0-LABEL: memcpy_inline_64: +; GISel-MOPS-O0: // %bb.0: // %entry +; GISel-MOPS-O0-NEXT: ldr q0, [x1] +; GISel-MOPS-O0-NEXT: str q0, [x0] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #16] +; GISel-MOPS-O0-NEXT: str q0, [x0, #16] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #32] +; GISel-MOPS-O0-NEXT: str q0, [x0, #32] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #48] +; GISel-MOPS-O0-NEXT: str q0, [x0, #48] +; GISel-MOPS-O0-NEXT: ret +; +; GISel-MOPS-O3-LABEL: memcpy_inline_64: +; GISel-MOPS-O3: // %bb.0: // %entry +; GISel-MOPS-O3-NEXT: ldr q0, [x1] +; GISel-MOPS-O3-NEXT: str q0, [x0] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #16] +; GISel-MOPS-O3-NEXT: str q0, [x0, #16] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #32] +; GISel-MOPS-O3-NEXT: str q0, [x0, #32] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #48] +; GISel-MOPS-O3-NEXT: str q0, [x0, #48] +; GISel-MOPS-O3-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_inline_64: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #32] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #32] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0] +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memcpy_inline_64: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: ldp q1, q0, [x1, #32] +; SDAG-MOPS-O2-NEXT: ldp q2, q3, [x1] +; SDAG-MOPS-O2-NEXT: stp q1, q0, [x0, #32] +; SDAG-MOPS-O2-NEXT: stp q2, q3, [x0] +; SDAG-MOPS-O2-NEXT: ret +entry: + call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 64, i1 false) + ret void +} + +define void @memcpy_inline_63(ptr %dst, ptr %src, i32 %value) { +; GISel-WITHOUT-MOPS-O0-LABEL: memcpy_inline_63: +; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #16] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #16] +; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #32] +; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #32] +; GISel-WITHOUT-MOPS-O0-NEXT: ldur q0, [x1, #47] +; GISel-WITHOUT-MOPS-O0-NEXT: stur q0, [x0, #47] +; GISel-WITHOUT-MOPS-O0-NEXT: ret +; +; GISel-WITHOUT-MOPS-O3-LABEL: memcpy_inline_63: +; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #16] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #16] +; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #32] +; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #32] +; GISel-WITHOUT-MOPS-O3-NEXT: ldur q0, [x1, #47] +; GISel-WITHOUT-MOPS-O3-NEXT: stur q0, [x0, #47] +; GISel-WITHOUT-MOPS-O3-NEXT: ret +; +; GISel-MOPS-O0-LABEL: memcpy_inline_63: +; GISel-MOPS-O0: // %bb.0: // %entry +; GISel-MOPS-O0-NEXT: ldr q0, [x1] +; GISel-MOPS-O0-NEXT: str q0, [x0] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #16] +; GISel-MOPS-O0-NEXT: str q0, [x0, #16] +; GISel-MOPS-O0-NEXT: ldr q0, [x1, #32] +; GISel-MOPS-O0-NEXT: str q0, [x0, #32] +; GISel-MOPS-O0-NEXT: ldur q0, [x1, #47] +; GISel-MOPS-O0-NEXT: stur q0, [x0, #47] +; GISel-MOPS-O0-NEXT: ret +; +; GISel-MOPS-O3-LABEL: memcpy_inline_63: +; GISel-MOPS-O3: // %bb.0: // %entry +; GISel-MOPS-O3-NEXT: ldr q0, [x1] +; GISel-MOPS-O3-NEXT: str q0, [x0] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #16] +; GISel-MOPS-O3-NEXT: str q0, [x0, #16] +; GISel-MOPS-O3-NEXT: ldr q0, [x1, #32] +; GISel-MOPS-O3-NEXT: str q0, [x0, #32] +; GISel-MOPS-O3-NEXT: ldur q0, [x1, #47] +; GISel-MOPS-O3-NEXT: stur q0, [x0, #47] +; GISel-MOPS-O3-NEXT: ret +; +; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_inline_63: +; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q3, q1, [x1, #16] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldur q0, [x1, #47] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q2, [x1] +; SDAG-WITHOUT-MOPS-O2-NEXT: stur q0, [x0, #47] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q3, q1, [x0, #16] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q2, [x0] +; SDAG-WITHOUT-MOPS-O2-NEXT: ret +; +; SDAG-MOPS-O2-LABEL: memcpy_inline_63: +; SDAG-MOPS-O2: // %bb.0: // %entry +; SDAG-MOPS-O2-NEXT: ldp q3, q1, [x1, #16] +; SDAG-MOPS-O2-NEXT: ldur q0, [x1, #47] +; SDAG-MOPS-O2-NEXT: ldr q2, [x1] +; SDAG-MOPS-O2-NEXT: stur q0, [x0, #47] +; SDAG-MOPS-O2-NEXT: stp q3, q1, [x0, #16] +; SDAG-MOPS-O2-NEXT: str q2, [x0] +; SDAG-MOPS-O2-NEXT: ret +entry: + call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 63, i1 false) + ret void +} + define void @memmove_0(ptr %dst, ptr %src, i32 %value) { ; GISel-WITHOUT-MOPS-LABEL: memmove_0: ; GISel-WITHOUT-MOPS: // %bb.0: // %entry