Skip to content

Conversation

AZero13
Copy link
Contributor

@AZero13 AZero13 commented Sep 11, 2025

For X86, we want to do this for scalars up to the biggest legal type. Having it be for types bigger results in bloated code.

… to the biggest legal type

For X86, we want to do this for scalars up to the biggest legal type.
@llvmbot
Copy link
Member

llvmbot commented Sep 11, 2025

@llvm/pr-subscribers-backend-x86

Author: AZero13 (AZero13)

Changes

For X86, we want to do this for scalars up to the biggest legal type. Having it be for types bigger results in bloated code.


Full diff: https://github.com/llvm/llvm-project/pull/158068.diff

2 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+2-5)
  • (added) llvm/test/CodeGen/X86/and-mask-variable.ll (+356)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 08ae0d52d795e..a5dc7fae4a12a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3663,11 +3663,8 @@ bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
   if (VT.isVector())
     return false;
 
-  // 64-bit shifts on 32-bit targets produce really bad bloated code.
-  if (VT == MVT::i64 && !Subtarget.is64Bit())
-    return false;
-
-  return true;
+  unsigned MaxWidth = Subtarget.is64Bit() ? 64 : 32;
+  return VT.getScalarSizeInBits() <= MaxWidth;
 }
 
 TargetLowering::ShiftLegalizationStrategy
diff --git a/llvm/test/CodeGen/X86/and-mask-variable.ll b/llvm/test/CodeGen/X86/and-mask-variable.ll
new file mode 100644
index 0000000000000..844a413391d75
--- /dev/null
+++ b/llvm/test/CodeGen/X86/and-mask-variable.ll
@@ -0,0 +1,356 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMINOTBM,X86-BMI1
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMITBM,X86-BMI1
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMITBM,X86-BMI2
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMINOTBM,X86-BMI2
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMINOTBM,X64-BMI1
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMITBM,X64-BMI1
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMITBM,X64-BMI2
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMINOTBM,X64-BMI2
+
+define i32 @mask_pair(i32 %x, i32 %y) {
+; X86-NOBMI-LABEL: mask_pair:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1-LABEL: mask_pair:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: mask_pair:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: mask_pair:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl %edi, %eax
+; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: mask_pair:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movl %edi, %eax
+; X64-BMI1-NEXT:    shrl %cl, %eax
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shll %cl, %eax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: mask_pair:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxl %esi, %edi, %eax
+; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
+; X64-BMI2-NEXT:    retq
+  %shl = shl nsw i32 -1, %y
+  %and = and i32 %shl, %x
+  ret i32 %and
+}
+
+define i64 @mask_pair_64(i64 %x, i64 %y) {
+; X86-NOBMI-LABEL: mask_pair_64:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl $-1, %edx
+; X86-NOBMI-NEXT:    movl $-1, %eax
+; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    testb $32, %cl
+; X86-NOBMI-NEXT:    je .LBB1_2
+; X86-NOBMI-NEXT:  # %bb.1:
+; X86-NOBMI-NEXT:    movl %eax, %edx
+; X86-NOBMI-NEXT:    xorl %eax, %eax
+; X86-NOBMI-NEXT:  .LBB1_2:
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1-LABEL: mask_pair_64:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movl $-1, %edx
+; X86-BMI1-NEXT:    movl $-1, %eax
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB1_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB1_2:
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: mask_pair_64:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movl $-1, %edx
+; X86-BMI2-NEXT:    shlxl %ecx, %edx, %eax
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB1_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:  .LBB1_2:
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: mask_pair_64:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    movq %rdi, %rax
+; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI-NEXT:    shlq %cl, %rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: mask_pair_64:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movq %rsi, %rcx
+; X64-BMI1-NEXT:    movq %rdi, %rax
+; X64-BMI1-NEXT:    shrq %cl, %rax
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1-NEXT:    shlq %cl, %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: mask_pair_64:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    shlxq %rsi, %rax, %rax
+; X64-BMI2-NEXT:    retq
+  %shl = shl nsw i64 -1, %y
+  %and = and i64 %shl, %x
+  ret i64 %and
+}
+
+define i128 @mask_pair_128(i128 %x, i128 %y) {
+; X86-NOBMI-LABEL: mask_pair_128:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %ebx
+; X86-NOBMI-NEXT:    .cfi_def_cfa_offset 8
+; X86-NOBMI-NEXT:    pushl %edi
+; X86-NOBMI-NEXT:    .cfi_def_cfa_offset 12
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    .cfi_def_cfa_offset 16
+; X86-NOBMI-NEXT:    subl $32, %esp
+; X86-NOBMI-NEXT:    .cfi_def_cfa_offset 48
+; X86-NOBMI-NEXT:    .cfi_offset %esi, -16
+; X86-NOBMI-NEXT:    .cfi_offset %edi, -12
+; X86-NOBMI-NEXT:    .cfi_offset %ebx, -8
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    movl $0, (%esp)
+; X86-NOBMI-NEXT:    movl %ecx, %edx
+; X86-NOBMI-NEXT:    shrb $3, %dl
+; X86-NOBMI-NEXT:    andb $12, %dl
+; X86-NOBMI-NEXT:    negb %dl
+; X86-NOBMI-NEXT:    movsbl %dl, %ebx
+; X86-NOBMI-NEXT:    movl 24(%esp,%ebx), %edx
+; X86-NOBMI-NEXT:    movl 28(%esp,%ebx), %esi
+; X86-NOBMI-NEXT:    shldl %cl, %edx, %esi
+; X86-NOBMI-NEXT:    movl 16(%esp,%ebx), %edi
+; X86-NOBMI-NEXT:    movl 20(%esp,%ebx), %ebx
+; X86-NOBMI-NEXT:    shldl %cl, %ebx, %edx
+; X86-NOBMI-NEXT:    shldl %cl, %edi, %ebx
+; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI-NEXT:    shll %cl, %edi
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %ebx
+; X86-NOBMI-NEXT:    movl %esi, 12(%eax)
+; X86-NOBMI-NEXT:    movl %edx, 8(%eax)
+; X86-NOBMI-NEXT:    movl %ebx, 4(%eax)
+; X86-NOBMI-NEXT:    movl %edi, (%eax)
+; X86-NOBMI-NEXT:    addl $32, %esp
+; X86-NOBMI-NEXT:    .cfi_def_cfa_offset 16
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    .cfi_def_cfa_offset 12
+; X86-NOBMI-NEXT:    popl %edi
+; X86-NOBMI-NEXT:    .cfi_def_cfa_offset 8
+; X86-NOBMI-NEXT:    popl %ebx
+; X86-NOBMI-NEXT:    .cfi_def_cfa_offset 4
+; X86-NOBMI-NEXT:    retl $4
+;
+; X86-BMI1-LABEL: mask_pair_128:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    .cfi_def_cfa_offset 8
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    .cfi_def_cfa_offset 12
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    .cfi_def_cfa_offset 16
+; X86-BMI1-NEXT:    subl $32, %esp
+; X86-BMI1-NEXT:    .cfi_def_cfa_offset 48
+; X86-BMI1-NEXT:    .cfi_offset %esi, -16
+; X86-BMI1-NEXT:    .cfi_offset %edi, -12
+; X86-BMI1-NEXT:    .cfi_offset %ebx, -8
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    movl $0, (%esp)
+; X86-BMI1-NEXT:    movl %ecx, %edx
+; X86-BMI1-NEXT:    shrb $3, %dl
+; X86-BMI1-NEXT:    andb $12, %dl
+; X86-BMI1-NEXT:    negb %dl
+; X86-BMI1-NEXT:    movsbl %dl, %ebx
+; X86-BMI1-NEXT:    movl 24(%esp,%ebx), %edx
+; X86-BMI1-NEXT:    movl 28(%esp,%ebx), %esi
+; X86-BMI1-NEXT:    shldl %cl, %edx, %esi
+; X86-BMI1-NEXT:    movl 16(%esp,%ebx), %edi
+; X86-BMI1-NEXT:    movl 20(%esp,%ebx), %ebx
+; X86-BMI1-NEXT:    shldl %cl, %ebx, %edx
+; X86-BMI1-NEXT:    shldl %cl, %edi, %ebx
+; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1-NEXT:    shll %cl, %edi
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %ebx
+; X86-BMI1-NEXT:    movl %esi, 12(%eax)
+; X86-BMI1-NEXT:    movl %edx, 8(%eax)
+; X86-BMI1-NEXT:    movl %ebx, 4(%eax)
+; X86-BMI1-NEXT:    movl %edi, (%eax)
+; X86-BMI1-NEXT:    addl $32, %esp
+; X86-BMI1-NEXT:    .cfi_def_cfa_offset 16
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    .cfi_def_cfa_offset 12
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    .cfi_def_cfa_offset 8
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    .cfi_def_cfa_offset 4
+; X86-BMI1-NEXT:    retl $4
+;
+; X86-BMI2-LABEL: mask_pair_128:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    .cfi_def_cfa_offset 8
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    .cfi_def_cfa_offset 12
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    .cfi_def_cfa_offset 16
+; X86-BMI2-NEXT:    subl $32, %esp
+; X86-BMI2-NEXT:    .cfi_def_cfa_offset 48
+; X86-BMI2-NEXT:    .cfi_offset %esi, -16
+; X86-BMI2-NEXT:    .cfi_offset %edi, -12
+; X86-BMI2-NEXT:    .cfi_offset %ebx, -8
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    movl $0, (%esp)
+; X86-BMI2-NEXT:    movl %ecx, %edx
+; X86-BMI2-NEXT:    shrb $3, %dl
+; X86-BMI2-NEXT:    andb $12, %dl
+; X86-BMI2-NEXT:    negb %dl
+; X86-BMI2-NEXT:    movsbl %dl, %edi
+; X86-BMI2-NEXT:    movl 24(%esp,%edi), %edx
+; X86-BMI2-NEXT:    movl 28(%esp,%edi), %esi
+; X86-BMI2-NEXT:    shldl %cl, %edx, %esi
+; X86-BMI2-NEXT:    movl 16(%esp,%edi), %ebx
+; X86-BMI2-NEXT:    movl 20(%esp,%edi), %edi
+; X86-BMI2-NEXT:    shldl %cl, %edi, %edx
+; X86-BMI2-NEXT:    shldl %cl, %ebx, %edi
+; X86-BMI2-NEXT:    shlxl %ecx, %ebx, %ecx
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-BMI2-NEXT:    movl %esi, 12(%eax)
+; X86-BMI2-NEXT:    movl %edx, 8(%eax)
+; X86-BMI2-NEXT:    movl %edi, 4(%eax)
+; X86-BMI2-NEXT:    movl %ecx, (%eax)
+; X86-BMI2-NEXT:    addl $32, %esp
+; X86-BMI2-NEXT:    .cfi_def_cfa_offset 16
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    .cfi_def_cfa_offset 12
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    .cfi_def_cfa_offset 8
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    .cfi_def_cfa_offset 4
+; X86-BMI2-NEXT:    retl $4
+;
+; X64-NOBMI-LABEL: mask_pair_128:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rdx, %rcx
+; X64-NOBMI-NEXT:    movq $-1, %rdx
+; X64-NOBMI-NEXT:    movq $-1, %r8
+; X64-NOBMI-NEXT:    shlq %cl, %r8
+; X64-NOBMI-NEXT:    xorl %eax, %eax
+; X64-NOBMI-NEXT:    testb $64, %cl
+; X64-NOBMI-NEXT:    cmovneq %r8, %rdx
+; X64-NOBMI-NEXT:    cmoveq %r8, %rax
+; X64-NOBMI-NEXT:    andq %rdi, %rax
+; X64-NOBMI-NEXT:    andq %rsi, %rdx
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: mask_pair_128:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movq %rdx, %rcx
+; X64-BMI1-NEXT:    movq $-1, %rdx
+; X64-BMI1-NEXT:    movq $-1, %r8
+; X64-BMI1-NEXT:    shlq %cl, %r8
+; X64-BMI1-NEXT:    xorl %eax, %eax
+; X64-BMI1-NEXT:    testb $64, %cl
+; X64-BMI1-NEXT:    cmovneq %r8, %rdx
+; X64-BMI1-NEXT:    cmoveq %r8, %rax
+; X64-BMI1-NEXT:    andq %rdi, %rax
+; X64-BMI1-NEXT:    andq %rsi, %rdx
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: mask_pair_128:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movq $-1, %rcx
+; X64-BMI2-NEXT:    shlxq %rdx, %rcx, %r8
+; X64-BMI2-NEXT:    xorl %eax, %eax
+; X64-BMI2-NEXT:    testb $64, %dl
+; X64-BMI2-NEXT:    cmovneq %r8, %rcx
+; X64-BMI2-NEXT:    cmoveq %r8, %rax
+; X64-BMI2-NEXT:    andq %rdi, %rax
+; X64-BMI2-NEXT:    andq %rsi, %rcx
+; X64-BMI2-NEXT:    movq %rcx, %rdx
+; X64-BMI2-NEXT:    retq
+  %shl = shl nsw i128 -1, %y
+  %and = and i128 %shl, %x
+  ret i128 %and
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; X64: {{.*}}
+; X64-BMINOTBM: {{.*}}
+; X64-BMITBM: {{.*}}
+; X86: {{.*}}
+; X86-BMINOTBM: {{.*}}
+; X86-BMITBM: {{.*}}

@AZero13
Copy link
Contributor Author

AZero13 commented Sep 11, 2025

@topperc

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants