diff --git a/llvm/test/CodeGen/AArch64/popcount.ll b/llvm/test/CodeGen/AArch64/popcount.ll index 99473369d3e7d..fc71ed0ab508f 100644 --- a/llvm/test/CodeGen/AArch64/popcount.ll +++ b/llvm/test/CodeGen/AArch64/popcount.ll @@ -1,22 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -O0 -mtriple=aarch64-unknown-unknown | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s ; Function Attrs: nobuiltin nounwind readonly define i8 @popcount128(i128* nocapture nonnull readonly %0) { ; CHECK-LABEL: popcount128: ; CHECK: // %bb.0: // %Entry -; CHECK-NEXT: ldr d0, [x0, #8] +; CHECK-NEXT: ldp d1, d0, [x0] ; CHECK-NEXT: cnt v0.8b, v0.8b -; CHECK-NEXT: uaddlv h1, v0.8b -; CHECK-NEXT: // implicit-def: $q2 -; CHECK-NEXT: mov v2.16b, v1.16b -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: cnt v0.8b, v0.8b -; CHECK-NEXT: uaddlv h1, v0.8b -; CHECK-NEXT: // implicit-def: $q2 -; CHECK-NEXT: mov v2.16b, v1.16b -; CHECK-NEXT: fmov w9, s2 +; CHECK-NEXT: cnt v1.8b, v1.8b +; CHECK-NEXT: uaddlv h0, v0.8b +; CHECK-NEXT: uaddlv h1, v1.8b +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: fmov w9, s1 ; CHECK-NEXT: add w0, w9, w8 ; CHECK-NEXT: ret Entry: @@ -33,32 +28,22 @@ declare i128 @llvm.ctpop.i128(i128) define i16 @popcount256(i256* nocapture nonnull readonly %0) { ; CHECK-LABEL: popcount256: ; CHECK: // %bb.0: // %Entry -; CHECK-NEXT: ldr d0, [x0, #24] -; CHECK-NEXT: cnt v0.8b, v0.8b -; CHECK-NEXT: uaddlv h1, v0.8b -; CHECK-NEXT: // implicit-def: $q2 -; CHECK-NEXT: mov v2.16b, v1.16b -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: ldr d0, [x0, #16] -; CHECK-NEXT: cnt v0.8b, v0.8b -; CHECK-NEXT: uaddlv h1, v0.8b -; CHECK-NEXT: // implicit-def: $q2 -; CHECK-NEXT: mov v2.16b, v1.16b -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: add w8, w9, w8 -; CHECK-NEXT: ldr d0, [x0, #8] -; CHECK-NEXT: cnt v0.8b, v0.8b -; CHECK-NEXT: uaddlv h1, v0.8b -; CHECK-NEXT: // implicit-def: $q2 -; CHECK-NEXT: mov v2.16b, v1.16b -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldp d1, d0, [x0, #16] +; CHECK-NEXT: ldp d3, d2, [x0] ; CHECK-NEXT: cnt v0.8b, v0.8b -; CHECK-NEXT: uaddlv h1, v0.8b -; CHECK-NEXT: // implicit-def: $q2 -; CHECK-NEXT: mov v2.16b, v1.16b +; CHECK-NEXT: cnt v1.8b, v1.8b +; CHECK-NEXT: uaddlv h0, v0.8b +; CHECK-NEXT: cnt v2.8b, v2.8b +; CHECK-NEXT: uaddlv h1, v1.8b +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: cnt v0.8b, v3.8b +; CHECK-NEXT: uaddlv h2, v2.8b +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: uaddlv h0, v0.8b ; CHECK-NEXT: fmov w10, s2 -; CHECK-NEXT: add w9, w10, w9 +; CHECK-NEXT: add w8, w9, w8 +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: add w9, w9, w10 ; CHECK-NEXT: add w0, w9, w8 ; CHECK-NEXT: ret Entry: @@ -75,21 +60,15 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) { ; CHECK-LABEL: popcount1x128: ; CHECK: // %bb.0: // %Entry ; CHECK-NEXT: fmov d0, x1 +; CHECK-NEXT: fmov d1, x0 ; CHECK-NEXT: cnt v0.8b, v0.8b -; CHECK-NEXT: uaddlv h1, v0.8b -; CHECK-NEXT: // implicit-def: $q2 -; CHECK-NEXT: mov v2.16b, v1.16b -; CHECK-NEXT: fmov w2, s2 -; CHECK-NEXT: mov w8, w2 -; CHECK-NEXT: fmov d0, x0 -; CHECK-NEXT: cnt v0.8b, v0.8b -; CHECK-NEXT: uaddlv h1, v0.8b -; CHECK-NEXT: // implicit-def: $q2 -; CHECK-NEXT: mov v2.16b, v1.16b -; CHECK-NEXT: fmov w2, s2 -; CHECK-NEXT: mov w9, w2 -; CHECK-NEXT: add x0, x9, x8 +; CHECK-NEXT: cnt v1.8b, v1.8b +; CHECK-NEXT: uaddlv h0, v0.8b +; CHECK-NEXT: uaddlv h1, v1.8b ; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: add x0, x9, x8 ; CHECK-NEXT: mov x1, v2.d[1] ; CHECK-NEXT: ret Entry: