From 891ba1526a02771bb60477ca495a724ec2a7937b Mon Sep 17 00:00:00 2001 From: AZero13 Date: Thu, 11 Sep 2025 09:15:23 -0400 Subject: [PATCH 1/2] Pre-commit test (NFC) --- .../test/CodeGen/AArch64/and-mask-variable.ll | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/and-mask-variable.ll diff --git a/llvm/test/CodeGen/AArch64/and-mask-variable.ll b/llvm/test/CodeGen/AArch64/and-mask-variable.ll new file mode 100644 index 0000000000000..a92f3cf5ec092 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/and-mask-variable.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-none-elf -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64-none-elf -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +define i32 @mask_pair(i32 %x, i32 %y) { +; CHECK-LABEL: mask_pair: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: lsl w8, w8, w1 +; CHECK-NEXT: and w0, w8, w0 +; CHECK-NEXT: ret + %shl = shl nsw i32 -1, %y + %and = and i32 %shl, %x + ret i32 %and +} + +define i64 @mask_pair_64(i64 %x, i64 %y) { +; CHECK-LABEL: mask_pair_64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-NEXT: lsl x8, x8, x1 +; CHECK-NEXT: and x0, x8, x0 +; CHECK-NEXT: ret + %shl = shl nsw i64 -1, %y + %and = and i64 %shl, %x + ret i64 %and +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-GI: {{.*}} +; CHECK-SD: {{.*}} From 90723996e8df05e9399e6f3b62f3be7293ff622a Mon Sep 17 00:00:00 2001 From: AZero13 Date: Thu, 11 Sep 2025 09:16:29 -0400 Subject: [PATCH 2/2] [AArch64] shouldFoldMaskToVariableShiftPair should be true for scalars up to the biggest legal type For AArch64, we want to do this for scalars up to the biggest legal type. --- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 10 ++ .../test/CodeGen/AArch64/and-mask-variable.ll | 78 ++++++++++++--- llvm/test/CodeGen/AArch64/extract-bits.ll | 98 ++++++++----------- llvm/test/CodeGen/AArch64/extract-lowbits.ll | 66 ++++++------- 4 files changed, 145 insertions(+), 107 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 46738365080f9..9665bc871b6b9 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -300,6 +300,16 @@ class AArch64TargetLowering : public TargetLowering { bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override; + /// Return true if it is profitable to fold a pair of shifts into a mask. + bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override { + EVT VT = Y.getValueType(); + + if (VT.isVector()) + return false; + + return VT.getScalarSizeInBits() <= 64; + } + bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const override; diff --git a/llvm/test/CodeGen/AArch64/and-mask-variable.ll b/llvm/test/CodeGen/AArch64/and-mask-variable.ll index a92f3cf5ec092..f41cdc6dd241b 100644 --- a/llvm/test/CodeGen/AArch64/and-mask-variable.ll +++ b/llvm/test/CodeGen/AArch64/and-mask-variable.ll @@ -3,28 +3,78 @@ ; RUN: llc -mtriple=aarch64-none-elf -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define i32 @mask_pair(i32 %x, i32 %y) { -; CHECK-LABEL: mask_pair: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: and w0, w8, w0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: mask_pair: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsr w8, w0, w1 +; CHECK-SD-NEXT: lsl w0, w8, w1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mask_pair: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-GI-NEXT: lsl w8, w8, w1 +; CHECK-GI-NEXT: and w0, w8, w0 +; CHECK-GI-NEXT: ret %shl = shl nsw i32 -1, %y %and = and i32 %shl, %x ret i32 %and } define i64 @mask_pair_64(i64 %x, i64 %y) { -; CHECK-LABEL: mask_pair_64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff -; CHECK-NEXT: lsl x8, x8, x1 -; CHECK-NEXT: and x0, x8, x0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: mask_pair_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsr x8, x0, x1 +; CHECK-SD-NEXT: lsl x0, x8, x1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mask_pair_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-GI-NEXT: lsl x8, x8, x1 +; CHECK-GI-NEXT: and x0, x8, x0 +; CHECK-GI-NEXT: ret %shl = shl nsw i64 -1, %y %and = and i64 %shl, %x ret i64 %and } + +define i128 @mask_pair_128(i128 %x, i128 %y) { +; CHECK-SD-LABEL: mask_pair_128: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-SD-NEXT: mvn w9, w2 +; CHECK-SD-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-SD-NEXT: lsl x8, x8, x2 +; CHECK-SD-NEXT: lsr x9, x10, x9 +; CHECK-SD-NEXT: tst x2, #0x40 +; CHECK-SD-NEXT: orr x9, x8, x9 +; CHECK-SD-NEXT: csel x9, x8, x9, ne +; CHECK-SD-NEXT: csel x8, xzr, x8, ne +; CHECK-SD-NEXT: and x0, x8, x0 +; CHECK-SD-NEXT: and x1, x9, x1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: mask_pair_128: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #64 // =0x40 +; CHECK-GI-NEXT: mov x9, #-1 // =0xffffffffffffffff +; CHECK-GI-NEXT: sub x10, x2, #64 +; CHECK-GI-NEXT: sub x8, x8, x2 +; CHECK-GI-NEXT: lsl x11, x9, x2 +; CHECK-GI-NEXT: cmp x2, #64 +; CHECK-GI-NEXT: lsr x8, x9, x8 +; CHECK-GI-NEXT: lsl x9, x9, x10 +; CHECK-GI-NEXT: csel x10, x11, xzr, lo +; CHECK-GI-NEXT: orr x8, x8, x11 +; CHECK-GI-NEXT: and x0, x10, x0 +; CHECK-GI-NEXT: csel x8, x8, x9, lo +; CHECK-GI-NEXT: cmp x2, #0 +; CHECK-GI-NEXT: csinv x8, x8, xzr, ne +; CHECK-GI-NEXT: and x1, x8, x1 +; CHECK-GI-NEXT: ret + %shl = shl nsw i128 -1, %y + %and = and i128 %shl, %x + ret i128 %and +} ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK-GI: {{.*}} -; CHECK-SD: {{.*}} +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll index 8e822d19a19b9..5a96116142b51 100644 --- a/llvm/test/CodeGen/AArch64/extract-bits.ll +++ b/llvm/test/CodeGen/AArch64/extract-bits.ll @@ -532,11 +532,10 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_c0: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w2 -; CHECK-NEXT: mov w9, #-1 // =0xffffffff -; CHECK-NEXT: lsr w10, w0, w1 -; CHECK-NEXT: lsr w8, w9, w8 -; CHECK-NEXT: and w0, w8, w10 +; CHECK-NEXT: lsr w8, w0, w1 +; CHECK-NEXT: neg w9, w2 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %shifted = lshr i32 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits @@ -548,12 +547,11 @@ define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_c1_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32 // =0x20 -; CHECK-NEXT: mov w9, #-1 // =0xffffffff -; CHECK-NEXT: lsr w10, w0, w1 -; CHECK-NEXT: sub w8, w8, w2 -; CHECK-NEXT: lsr w8, w9, w8 -; CHECK-NEXT: and w0, w8, w10 +; CHECK-NEXT: lsr w8, w0, w1 +; CHECK-NEXT: mov w9, #32 // =0x20 +; CHECK-NEXT: sub w9, w9, w2 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %skip = zext i8 %numskipbits to i32 %shifted = lshr i32 %val, %skip @@ -569,10 +567,9 @@ define i32 @bextr32_c2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: neg w9, w2 -; CHECK-NEXT: mov w10, #-1 // =0xffffffff -; CHECK-NEXT: lsr w9, w10, w9 ; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: and w0, w9, w8 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, ptr %w %shifted = lshr i32 %val, %numskipbits @@ -587,11 +584,10 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: mov w9, #32 // =0x20 -; CHECK-NEXT: mov w10, #-1 // =0xffffffff ; CHECK-NEXT: sub w9, w9, w2 ; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: lsr w9, w10, w9 -; CHECK-NEXT: and w0, w9, w8 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, ptr %w %skip = zext i8 %numskipbits to i32 @@ -606,11 +602,10 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_c4_commutative: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w2 -; CHECK-NEXT: mov w9, #-1 // =0xffffffff -; CHECK-NEXT: lsr w10, w0, w1 -; CHECK-NEXT: lsr w8, w9, w8 -; CHECK-NEXT: and w0, w10, w8 +; CHECK-NEXT: lsr w8, w0, w1 +; CHECK-NEXT: neg w9, w2 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %shifted = lshr i32 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits @@ -624,11 +619,10 @@ define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_c0: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x2 -; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff -; CHECK-NEXT: lsr x10, x0, x1 -; CHECK-NEXT: lsr x8, x9, x8 -; CHECK-NEXT: and x0, x8, x10 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: neg x9, x2 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits @@ -640,13 +634,12 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_c1_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64 // =0x40 -; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr x10, x0, x1 -; CHECK-NEXT: sub w8, w8, w2 -; CHECK-NEXT: lsr x8, x9, x8 -; CHECK-NEXT: and x0, x8, x10 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: mov w9, #64 // =0x40 +; CHECK-NEXT: sub w9, w9, w2 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %skip = zext i8 %numskipbits to i64 %shifted = lshr i64 %val, %skip @@ -662,10 +655,9 @@ define i64 @bextr64_c2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: neg x9, x2 -; CHECK-NEXT: mov x10, #-1 // =0xffffffffffffffff -; CHECK-NEXT: lsr x9, x10, x9 ; CHECK-NEXT: lsr x8, x8, x1 -; CHECK-NEXT: and x0, x9, x8 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, ptr %w %shifted = lshr i64 %val, %numskipbits @@ -679,13 +671,12 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n ; CHECK-LABEL: bextr64_c3_load_indexzext: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: mov w9, #64 // =0x40 -; CHECK-NEXT: mov x10, #-1 // =0xffffffffffffffff ; CHECK-NEXT: sub w9, w9, w2 -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: lsr x8, x8, x1 -; CHECK-NEXT: lsr x9, x10, x9 -; CHECK-NEXT: and x0, x9, x8 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, ptr %w %skip = zext i8 %numskipbits to i64 @@ -700,11 +691,10 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_c4_commutative: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x2 -; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff -; CHECK-NEXT: lsr x10, x0, x1 -; CHECK-NEXT: lsr x8, x9, x8 -; CHECK-NEXT: and x0, x10, x8 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: neg x9, x2 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits @@ -737,11 +727,10 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_32_c1: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w2 -; CHECK-NEXT: mov w9, #-1 // =0xffffffff -; CHECK-NEXT: lsr x10, x0, x1 -; CHECK-NEXT: lsr w8, w9, w8 -; CHECK-NEXT: and w0, w8, w10 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: neg w9, w2 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %shifted = lshr i64 %val, %numskipbits %truncshifted = trunc i64 %shifted to i32 @@ -756,11 +745,10 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_32_c2: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w2 -; CHECK-NEXT: mov w9, #-1 // =0xffffffff -; CHECK-NEXT: lsr x10, x0, x1 -; CHECK-NEXT: lsr w8, w9, w8 -; CHECK-NEXT: and w0, w8, w10 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: neg w9, w2 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits diff --git a/llvm/test/CodeGen/AArch64/extract-lowbits.ll b/llvm/test/CodeGen/AArch64/extract-lowbits.ll index 4b8f3e86b5fef..368440c65df84 100644 --- a/llvm/test/CodeGen/AArch64/extract-lowbits.ll +++ b/llvm/test/CodeGen/AArch64/extract-lowbits.ll @@ -347,10 +347,9 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind { define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind { ; CHECK-LABEL: bzhi32_c0: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: neg w9, w1 -; CHECK-NEXT: lsr w8, w8, w9 -; CHECK-NEXT: and w0, w8, w0 +; CHECK-NEXT: neg w8, w1 +; CHECK-NEXT: lsl w9, w0, w8 +; CHECK-NEXT: lsr w0, w9, w8 ; CHECK-NEXT: ret %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits @@ -362,10 +361,9 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { ; CHECK-LABEL: bzhi32_c1_indexzext: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #32 // =0x20 -; CHECK-NEXT: mov w9, #-1 // =0xffffffff ; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: lsr w8, w9, w8 -; CHECK-NEXT: and w0, w8, w0 +; CHECK-NEXT: lsl w9, w0, w8 +; CHECK-NEXT: lsr w0, w9, w8 ; CHECK-NEXT: ret %numhighbits = sub i8 32, %numlowbits %sh_prom = zext i8 %numhighbits to i32 @@ -377,11 +375,10 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { define i32 @bzhi32_c2_load(ptr %w, i32 %numlowbits) nounwind { ; CHECK-LABEL: bzhi32_c2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: neg w9, w1 -; CHECK-NEXT: ldr w10, [x0] -; CHECK-NEXT: lsr w8, w8, w9 -; CHECK-NEXT: and w0, w8, w10 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, ptr %w %numhighbits = sub i32 32, %numlowbits @@ -394,11 +391,10 @@ define i32 @bzhi32_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { ; CHECK-LABEL: bzhi32_c3_load_indexzext: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #32 // =0x20 -; CHECK-NEXT: mov w9, #-1 // =0xffffffff -; CHECK-NEXT: ldr w10, [x0] +; CHECK-NEXT: ldr w9, [x0] ; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: lsr w8, w9, w8 -; CHECK-NEXT: and w0, w8, w10 +; CHECK-NEXT: lsl w9, w9, w8 +; CHECK-NEXT: lsr w0, w9, w8 ; CHECK-NEXT: ret %val = load i32, ptr %w %numhighbits = sub i8 32, %numlowbits @@ -411,10 +407,9 @@ define i32 @bzhi32_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { ; CHECK-LABEL: bzhi32_c4_commutative: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: neg w9, w1 -; CHECK-NEXT: lsr w8, w8, w9 -; CHECK-NEXT: and w0, w0, w8 +; CHECK-NEXT: neg w8, w1 +; CHECK-NEXT: lsl w9, w0, w8 +; CHECK-NEXT: lsr w0, w9, w8 ; CHECK-NEXT: ret %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits @@ -427,10 +422,9 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind { ; CHECK-LABEL: bzhi64_c0: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff -; CHECK-NEXT: neg x9, x1 -; CHECK-NEXT: lsr x8, x8, x9 -; CHECK-NEXT: and x0, x8, x0 +; CHECK-NEXT: neg x8, x1 +; CHECK-NEXT: lsl x9, x0, x8 +; CHECK-NEXT: lsr x0, x9, x8 ; CHECK-NEXT: ret %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 -1, %numhighbits @@ -442,10 +436,9 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind { ; CHECK-LABEL: bzhi64_c1_indexzext: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #64 // =0x40 -; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff ; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: lsr x8, x9, x8 -; CHECK-NEXT: and x0, x8, x0 +; CHECK-NEXT: lsl x9, x0, x8 +; CHECK-NEXT: lsr x0, x9, x8 ; CHECK-NEXT: ret %numhighbits = sub i8 64, %numlowbits %sh_prom = zext i8 %numhighbits to i64 @@ -457,11 +450,10 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind { define i64 @bzhi64_c2_load(ptr %w, i64 %numlowbits) nounwind { ; CHECK-LABEL: bzhi64_c2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: neg x9, x1 -; CHECK-NEXT: ldr x10, [x0] -; CHECK-NEXT: lsr x8, x8, x9 -; CHECK-NEXT: and x0, x8, x10 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, ptr %w %numhighbits = sub i64 64, %numlowbits @@ -474,11 +466,10 @@ define i64 @bzhi64_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { ; CHECK-LABEL: bzhi64_c3_load_indexzext: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #64 // =0x40 -; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff -; CHECK-NEXT: ldr x10, [x0] +; CHECK-NEXT: ldr x9, [x0] ; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: lsr x8, x9, x8 -; CHECK-NEXT: and x0, x8, x10 +; CHECK-NEXT: lsl x9, x9, x8 +; CHECK-NEXT: lsr x0, x9, x8 ; CHECK-NEXT: ret %val = load i64, ptr %w %numhighbits = sub i8 64, %numlowbits @@ -491,10 +482,9 @@ define i64 @bzhi64_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind { ; CHECK-LABEL: bzhi64_c4_commutative: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff -; CHECK-NEXT: neg x9, x1 -; CHECK-NEXT: lsr x8, x8, x9 -; CHECK-NEXT: and x0, x0, x8 +; CHECK-NEXT: neg x8, x1 +; CHECK-NEXT: lsl x9, x0, x8 +; CHECK-NEXT: lsr x0, x9, x8 ; CHECK-NEXT: ret %numhighbits = sub i64 64, %numlowbits %mask = lshr i64 -1, %numhighbits