Skip to content

Commit

Permalink
[CodeGen] ExpandReductions - add reduce_and/or(<X x i1> V) -> icmp(iX…
Browse files Browse the repository at this point in the history
… bitcast(<X x i1> V)) canonicalization

This already exists in InstCombine but was missing from the late stage ExpandReductions pass

Fixes #53419
Fixes #61923

Differential Revision: https://reviews.llvm.org/D147452
  • Loading branch information
RKSimon committed Apr 4, 2023
1 parent 1b16c70 commit 00e3ae4
Show file tree
Hide file tree
Showing 5 changed files with 1,039 additions and 2,331 deletions.
32 changes: 30 additions & 2 deletions llvm/lib/CodeGen/ExpandReductions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,38 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
}
break;
}
case Intrinsic::vector_reduce_and:
case Intrinsic::vector_reduce_or: {
// Canonicalize logical or/and reductions:
// Or reduction for i1 is represented as:
// %val = bitcast <ReduxWidth x i1> to iReduxWidth
// %res = cmp ne iReduxWidth %val, 0
// And reduction for i1 is represented as:
// %val = bitcast <ReduxWidth x i1> to iReduxWidth
// %res = cmp eq iReduxWidth %val, 11111
Value *Vec = II->getArgOperand(0);
auto *FTy = cast<FixedVectorType>(Vec->getType());
unsigned NumElts = FTy->getNumElements();
if (!isPowerOf2_32(NumElts))
continue;

if (FTy->getElementType() == Builder.getInt1Ty()) {
Rdx = Builder.CreateBitCast(Vec, Builder.getIntNTy(NumElts));
if (ID == Intrinsic::vector_reduce_and) {
Rdx = Builder.CreateICmpEQ(
Rdx, ConstantInt::getAllOnesValue(Rdx->getType()));
} else {
assert(ID == Intrinsic::vector_reduce_or && "Expected or reduction.");
Rdx = Builder.CreateIsNotNull(Rdx);
}
break;
}

Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
break;
}
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_mul:
case Intrinsic::vector_reduce_and:
case Intrinsic::vector_reduce_or:
case Intrinsic::vector_reduce_xor:
case Intrinsic::vector_reduce_smax:
case Intrinsic::vector_reduce_smin:
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/X86/pr53419.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,22 @@ declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1>)
declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>)
declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>)

; FIXME: All four versions are semantically equivalent and should produce same asm as scalar version.
; All four versions are semantically equivalent and should produce same asm as scalar version.

define i1 @intrinsic_v2i8(ptr align 1 %arg, ptr align 1 %arg1) {
; X64-LABEL: intrinsic_v2i8:
; X64: # %bb.0: # %bb
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: cmpw %ax, (%rsi)
; X64-NEXT: movzwl (%rsi), %eax
; X64-NEXT: cmpw (%rdi), %ax
; X64-NEXT: sete %al
; X64-NEXT: retq
;
; X86-LABEL: intrinsic_v2i8:
; X86: # %bb.0: # %bb
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: cmpw %ax, (%ecx)
; X86-NEXT: movzwl (%ecx), %ecx
; X86-NEXT: cmpw (%eax), %cx
; X86-NEXT: sete %al
; X86-NEXT: retl
bb:
Expand All @@ -39,17 +39,17 @@ bb:
define i1 @intrinsic_v4i8(ptr align 1 %arg, ptr align 1 %arg1) {
; X64-LABEL: intrinsic_v4i8:
; X64: # %bb.0: # %bb
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: cmpl %eax, (%rsi)
; X64-NEXT: movl (%rsi), %eax
; X64-NEXT: cmpl (%rdi), %eax
; X64-NEXT: sete %al
; X64-NEXT: retq
;
; X86-LABEL: intrinsic_v4i8:
; X86: # %bb.0: # %bb
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%eax), %eax
; X86-NEXT: cmpl %eax, (%ecx)
; X86-NEXT: movl (%ecx), %ecx
; X86-NEXT: cmpl (%eax), %ecx
; X86-NEXT: sete %al
; X86-NEXT: retl
bb:
Expand All @@ -63,20 +63,20 @@ bb:
define i1 @intrinsic_v8i8(ptr align 1 %arg, ptr align 1 %arg1) {
; X64-LABEL: intrinsic_v8i8:
; X64: # %bb.0: # %bb
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: cmpq %rax, (%rsi)
; X64-NEXT: movq (%rsi), %rax
; X64-NEXT: cmpq (%rdi), %rax
; X64-NEXT: sete %al
; X64-NEXT: retq
;
; X86-LABEL: intrinsic_v8i8:
; X86: # %bb.0: # %bb
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; X86-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; X86-NEXT: vpmovmskb %xmm0, %eax
; X86-NEXT: cmpb $-1, %al
; X86-NEXT: movl (%ecx), %edx
; X86-NEXT: movl 4(%ecx), %ecx
; X86-NEXT: xorl 4(%eax), %ecx
; X86-NEXT: xorl (%eax), %edx
; X86-NEXT: orl %ecx, %edx
; X86-NEXT: sete %al
; X86-NEXT: retl
bb:
Expand Down
15 changes: 5 additions & 10 deletions llvm/test/CodeGen/X86/pr61923.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; FIXME: Regressed test from https://github.com/llvm/llvm-project/issues/61923
; Regressed test from https://github.com/llvm/llvm-project/issues/61923
define void @test_loop(ptr align 1 %src, ptr align 1 %dest, i32 %len) {
; CHECK-LABEL: test_loop:
; CHECK: # %bb.0: # %entry
Expand All @@ -17,22 +17,17 @@ define void @test_loop(ptr align 1 %src, ptr align 1 %dest, i32 %len) {
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_2: # %memcmp.loop
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovdqu (%rsi,%rcx), %xmm0
; CHECK-NEXT: vmovdqu 16(%rsi,%rcx), %xmm1
; CHECK-NEXT: vpcmpeqb (%rdi,%rcx), %xmm0, %xmm0
; CHECK-NEXT: vpmovmskb %xmm0, %edx
; CHECK-NEXT: vpcmpeqb 16(%rdi,%rcx), %xmm1, %xmm0
; CHECK-NEXT: vpmovmskb %xmm0, %r8d
; CHECK-NEXT: shll $16, %r8d
; CHECK-NEXT: orl %edx, %r8d
; CHECK-NEXT: cmpl $-1, %r8d
; CHECK-NEXT: vmovups (%rsi,%rcx), %ymm0
; CHECK-NEXT: vxorps (%rdi,%rcx), %ymm0, %ymm0
; CHECK-NEXT: vptest %ymm0, %ymm0
; CHECK-NEXT: jne .LBB0_4
; CHECK-NEXT: # %bb.3: # %memcmp.loop.latch
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: addq $32, %rcx
; CHECK-NEXT: cmpq %rax, %rcx
; CHECK-NEXT: jb .LBB0_2
; CHECK-NEXT: .LBB0_4: # %done
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
entry:
%len.wide = zext i32 %len to i64
Expand Down
Loading

0 comments on commit 00e3ae4

Please sign in to comment.