Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions llvm/lib/Analysis/InstructionSimplify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1933,13 +1933,18 @@ static Value *simplifyAndOrWithICmpEq(unsigned Opcode, Value *Op0, Value *Op1,
// In the final case (Res == Absorber with inverted predicate), it is safe to
// refine poison during simplification, but not undef. For simplicity always
// disable undef-based folds here.
if (Value *Res = simplifyWithOpReplaced(Op1, A, B, Q.getWithoutUndef(),
/* AllowRefinement */ true,
/* DropFlags */ nullptr, MaxRecurse))
// Allow one extra recursion level for this speculative replace+simplify;
// because some folds require > MaxRecurse replacements to appear.
unsigned LocalMaxRecurse = MaxRecurse ? MaxRecurse + 1 : 1;
if (Value *Res =
simplifyWithOpReplaced(Op1, A, B, Q.getWithoutUndef(),
/* AllowRefinement */ true,
/* DropFlags */ nullptr, LocalMaxRecurse))
return Simplify(Res);
if (Value *Res = simplifyWithOpReplaced(Op1, B, A, Q.getWithoutUndef(),
/* AllowRefinement */ true,
/* DropFlags */ nullptr, MaxRecurse))
if (Value *Res =
simplifyWithOpReplaced(Op1, B, A, Q.getWithoutUndef(),
/* AllowRefinement */ true,
/* DropFlags */ nullptr, LocalMaxRecurse))
return Simplify(Res);

return nullptr;
Expand Down
22 changes: 4 additions & 18 deletions llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,11 @@ declare i32 @llvm.amdgcn.sffbh.i32(i32) nounwind readnone speculatable
define amdgpu_kernel void @select_constant_cttz(ptr addrspace(1) noalias %out, ptr addrspace(1) nocapture readonly %arrayidx) nounwind {
; GCN-LABEL: select_constant_cttz:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_load_dword s2, s[2:3], 0x0
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s3, 0xf000
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_lshr_b32 s4, 1, s2
; GCN-NEXT: s_cmp_lg_u32 s2, 0
; GCN-NEXT: s_ff1_i32_b32 s2, s4
; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0
; GCN-NEXT: s_and_b64 s[6:7], s[4:5], exec
; GCN-NEXT: s_cselect_b32 s2, -1, s2
; GCN-NEXT: s_flbit_i32 s6, s2
; GCN-NEXT: s_sub_i32 s8, 31, s6
; GCN-NEXT: s_cmp_eq_u32 s2, 0
; GCN-NEXT: s_cselect_b64 s[6:7], -1, 0
; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
; GCN-NEXT: s_and_b64 s[4:5], s[4:5], exec
; GCN-NEXT: s_cselect_b32 s4, -1, s8
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: v_mov_b32_e32 v0, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NEXT: s_endpgm
%v = load i32, ptr addrspace(1) %arrayidx, align 4
Expand All @@ -43,3 +28,4 @@ define amdgpu_kernel void @select_constant_cttz(ptr addrspace(1) noalias %out, p
}

!0 = !{i32 0, i32 33}

104 changes: 104 additions & 0 deletions llvm/test/Transforms/InstSimplify/and-or-implied-cond.ll
Original file line number Diff line number Diff line change
Expand Up @@ -347,3 +347,107 @@ define i1 @pr98753(i32 noundef %x, i32 %y) {
}

declare i1 @llvm.is.constant.i1(i1)


define i1 @or_icmp_fold(i64 %arg0) {
; CHECK-LABEL: @or_icmp_fold(
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[ARG0:%.*]], 32
; CHECK-NEXT: [[TMP2:%.*]] = trunc nuw i64 [[TMP1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 55296
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], -1114112
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], -1112064
; CHECK-NEXT: ret i1 [[TMP5]]
;
%1 = lshr i64 %arg0, 32
%2 = trunc nuw i64 %1 to i32
%3 = xor i32 %2, 55296
%4 = add i32 %3, -1114112
%5 = icmp ult i32 %4, -1112064
%6 = icmp eq i64 %1, 1114112
%7 = or i1 %6, %5
ret i1 %7
}


define i1 @or_icmp_fold_negative(i64 %arg0) {
; CHECK-LABEL: @or_icmp_fold_negative(
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[ARG0:%.*]], 32
; CHECK-NEXT: [[TMP2:%.*]] = trunc nuw i64 [[TMP1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 55296
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], -1114112
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 1000
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP1]], 1114112
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[TMP5]]
; CHECK-NEXT: ret i1 [[TMP7]]
;
%1 = lshr i64 %arg0, 32
%2 = trunc nuw i64 %1 to i32
%3 = xor i32 %2, 55296
%4 = add i32 %3, -1114112
%5 = icmp ult i32 %4, 1000
%6 = icmp eq i64 %1, 1114112
%7 = or i1 %6, %5
ret i1 %7
}

declare void @use(i32)

define i1 @or_icmp_fold_multi_use(i64 %arg0) {
; CHECK-LABEL: @or_icmp_fold_multi_use(
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[ARG0:%.*]], 32
; CHECK-NEXT: [[TMP2:%.*]] = trunc nuw i64 [[TMP1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 55296
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], -1114112
; CHECK-NEXT: call void @use(i32 [[TMP4]])
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], -1112064
; CHECK-NEXT: ret i1 [[TMP5]]
;
%1 = lshr i64 %arg0, 32
%2 = trunc nuw i64 %1 to i32
%3 = xor i32 %2, 55296
%4 = add i32 %3, -1114112
call void @use(i32 %4)
%5 = icmp ult i32 %4, -1112064
%6 = icmp eq i64 %1, 1114112
%7 = or i1 %6, %5
ret i1 %7
}

define i1 @or_icmp_fold_commuted(i64 %arg0) {
; CHECK-LABEL: @or_icmp_fold_commuted(
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[ARG0:%.*]], 32
; CHECK-NEXT: [[TMP2:%.*]] = trunc nuw i64 [[TMP1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 55296
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], -1114112
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], -1112064
; CHECK-NEXT: ret i1 [[TMP5]]
;
%1 = lshr i64 %arg0, 32
%2 = trunc nuw i64 %1 to i32
%3 = xor i32 %2, 55296
%4 = add i32 %3, -1114112
%5 = icmp ult i32 %4, -1112064
%6 = icmp eq i64 %1, 1114112
%7 = or i1 %5, %6
ret i1 %7
}


define <2 x i1> @or_icmp_fold_vec(<2 x i64> %arg0) {
; CHECK-LABEL: @or_icmp_fold_vec(
; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[ARG0:%.*]], splat (i64 32)
; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP2]], splat (i32 55296)
; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP3]], splat (i32 -1114112)
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult <2 x i32> [[TMP4]], splat (i32 -1112064)
; CHECK-NEXT: ret <2 x i1> [[TMP5]]
;
%1 = lshr <2 x i64> %arg0, <i64 32, i64 32>
%2 = trunc <2 x i64> %1 to <2 x i32>
%3 = xor <2 x i32> %2, <i32 55296, i32 55296>
%4 = add <2 x i32> %3, <i32 -1114112, i32 -1114112>
%5 = icmp ult <2 x i32> %4, <i32 -1112064, i32 -1112064>
%6 = icmp eq <2 x i64> %1, <i64 1114112, i64 1114112>
%7 = or <2 x i1> %6, %5
ret <2 x i1> %7
}