From db89135434f7f92d1eba51aebfc43293d292a48b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 16 Nov 2025 16:22:49 +0000 Subject: [PATCH] [DAG] Add baseline test coverage for #161036 Baseline tests from #161651 that were reverted in #167854 Still missing test coverage for the ffmpeg regression failures --- .../umin-sub-to-usubo-select-combine.ll | 158 +++++++++++++++++ .../X86/umin-sub-to-usubo-select-combine.ll | 166 ++++++++++++++++++ 2 files changed, 324 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/umin-sub-to-usubo-select-combine.ll create mode 100644 llvm/test/CodeGen/X86/umin-sub-to-usubo-select-combine.ll diff --git a/llvm/test/CodeGen/AArch64/umin-sub-to-usubo-select-combine.ll b/llvm/test/CodeGen/AArch64/umin-sub-to-usubo-select-combine.ll new file mode 100644 index 0000000000000..d5f516fb3aa27 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/umin-sub-to-usubo-select-combine.ll @@ -0,0 +1,158 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=aarch64 | FileCheck %s + +; GitHub issue #161036 + +; Positive test : umin(sub(a,b),a) with scalar types should be folded +define i64 @underflow_compare_fold_i64(i64 %a, i64 %b) { +; CHECK-LABEL: underflow_compare_fold_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x0, x1 +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: csel x0, x8, x0, lo +; CHECK-NEXT: ret + %sub = sub i64 %a, %b + %cond = tail call i64 @llvm.umin.i64(i64 %sub, i64 %a) + ret i64 %cond +} + +; Positive test : umin(a,sub(a,b)) with scalar types should be folded +define i64 @underflow_compare_fold_i64_commute(i64 %a, i64 %b) { +; CHECK-LABEL: underflow_compare_fold_i64_commute: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x0, x1 +; CHECK-NEXT: cmp x0, x8 +; CHECK-NEXT: csel x0, x0, x8, lo +; CHECK-NEXT: ret + %sub = sub i64 %a, %b + %cond = tail call i64 @llvm.umin.i64(i64 %a, i64 %sub) + ret i64 %cond +} + +; Positive test : multi-use is OK since the sub instruction still runs once +define i64 @underflow_compare_fold_i64_multi_use(i64 %a, i64 %b, ptr addrspace(1) %ptr) { +; CHECK-LABEL: underflow_compare_fold_i64_multi_use: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x0, x1 +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: str x8, [x2] +; CHECK-NEXT: csel x0, x8, x0, lo +; CHECK-NEXT: ret + %sub = sub i64 %a, %b + store i64 %sub, ptr addrspace(1) %ptr + %cond = call i64 @llvm.umin.i64(i64 %sub, i64 %a) + ret i64 %cond +} + +; Positive test : i32 +define i32 @underflow_compare_fold_i32(i32 %a, i32 %b) { +; CHECK-LABEL: underflow_compare_fold_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w0, w1 +; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: csel w0, w8, w0, lo +; CHECK-NEXT: ret + %sub = sub i32 %a, %b + %cond = tail call i32 @llvm.umin.i32(i32 %sub, i32 %a) + ret i32 %cond +} + +; Positive test : i32 +define i32 @underflow_compare_fold_i32_commute(i32 %a, i32 %b) { +; CHECK-LABEL: underflow_compare_fold_i32_commute: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w0, w1 +; CHECK-NEXT: cmp w0, w8 +; CHECK-NEXT: csel w0, w0, w8, lo +; CHECK-NEXT: ret + %sub = sub i32 %a, %b + %cond = tail call i32 @llvm.umin.i32(i32 %a, i32 %sub) + ret i32 %cond +} + +; Positive test : i32 +define i32 @underflow_compare_fold_i32_multi_use(i32 %a, i32 %b, ptr addrspace(1) %ptr) { +; CHECK-LABEL: underflow_compare_fold_i32_multi_use: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w0, w1 +; CHECK-NEXT: cmp w8, w0 +; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: csel w0, w8, w0, lo +; CHECK-NEXT: ret + %sub = sub i32 %a, %b + store i32 %sub, ptr addrspace(1) %ptr + %cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a) + ret i32 %cond +} + +; Negative test : i16 +define i16 @underflow_compare_fold_i16(i16 %a, i16 %b) { +; CHECK-LABEL: underflow_compare_fold_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w0, w1 +; CHECK-NEXT: and w9, w0, #0xffff +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w0, w8, w9, lo +; CHECK-NEXT: ret + %sub = sub i16 %a, %b + %cond = tail call i16 @llvm.umin.i16(i16 %sub, i16 %a) + ret i16 %cond +} + +; Negative test : i16 +define i16 @underflow_compare_fold_i16_commute(i16 %a, i16 %b) { +; CHECK-LABEL: underflow_compare_fold_i16_commute: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w0, w1 +; CHECK-NEXT: and w9, w0, #0xffff +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w0, w9, w8, lo +; CHECK-NEXT: ret + %sub = sub i16 %a, %b + %cond = tail call i16 @llvm.umin.i16(i16 %a, i16 %sub) + ret i16 %cond +} + +; Negative test : i16 +define i16 @underflow_compare_fold_i16_multi_use(i16 %a, i16 %b, ptr addrspace(1) %ptr) { +; CHECK-LABEL: underflow_compare_fold_i16_multi_use: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w0, w1 +; CHECK-NEXT: and w9, w0, #0xffff +; CHECK-NEXT: and w10, w8, #0xffff +; CHECK-NEXT: strh w8, [x2] +; CHECK-NEXT: cmp w10, w9 +; CHECK-NEXT: csel w0, w10, w9, lo +; CHECK-NEXT: ret + %sub = sub i16 %a, %b + store i16 %sub, ptr addrspace(1) %ptr + %cond = call i16 @llvm.umin.i16(i16 %sub, i16 %a) + ret i16 %cond +} + +; Negative test, vector types : umin(sub(a,b),a) but with vectors +define <16 x i8> @underflow_compare_dontfold_vectors(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: underflow_compare_dontfold_vectors: +; CHECK: // %bb.0: +; CHECK-NEXT: sub v1.16b, v0.16b, v1.16b +; CHECK-NEXT: umin v0.16b, v1.16b, v0.16b +; CHECK-NEXT: ret + %sub = sub <16 x i8> %a, %b + %cond = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %sub, <16 x i8> %a) + ret <16 x i8> %cond +} + +; Negative test, pattern mismatch : umin(add(a,b),a) +define i64 @umin_add(i64 %a, i64 %b) { +; CHECK-LABEL: umin_add: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, x1 +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: csel x0, x8, x0, lo +; CHECK-NEXT: ret + %add = add i64 %a, %b + %cond = tail call i64 @llvm.umin.i64(i64 %add, i64 %a) + ret i64 %cond +} diff --git a/llvm/test/CodeGen/X86/umin-sub-to-usubo-select-combine.ll b/llvm/test/CodeGen/X86/umin-sub-to-usubo-select-combine.ll new file mode 100644 index 0000000000000..6739be52d47f6 --- /dev/null +++ b/llvm/test/CodeGen/X86/umin-sub-to-usubo-select-combine.ll @@ -0,0 +1,166 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=x86_64 | FileCheck %s + +; GitHub issue #161036 + +; Positive test : umin(sub(a,b),a) with scalar types should be folded +define i64 @underflow_compare_fold_i64(i64 %a, i64 %b) { +; CHECK-LABEL: underflow_compare_fold_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: subq %rsi, %rax +; CHECK-NEXT: cmpq %rdi, %rax +; CHECK-NEXT: cmovaeq %rdi, %rax +; CHECK-NEXT: retq + %sub = sub i64 %a, %b + %cond = tail call i64 @llvm.umin.i64(i64 %sub, i64 %a) + ret i64 %cond +} + +; Positive test : umin(a,sub(a,b)) with scalar types should be folded +define i64 @underflow_compare_fold_i64_commute(i64 %a, i64 %b) { +; CHECK-LABEL: underflow_compare_fold_i64_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: subq %rsi, %rax +; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: cmovbq %rdi, %rax +; CHECK-NEXT: retq + %sub = sub i64 %a, %b + %cond = tail call i64 @llvm.umin.i64(i64 %a, i64 %sub) + ret i64 %cond +} + +; Positive test : multi-use is OK since the sub instruction still runs once +define i64 @underflow_compare_fold_i64_multi_use(i64 %a, i64 %b, ptr addrspace(1) %ptr) { +; CHECK-LABEL: underflow_compare_fold_i64_multi_use: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: subq %rsi, %rax +; CHECK-NEXT: movq %rax, (%rdx) +; CHECK-NEXT: cmpq %rdi, %rax +; CHECK-NEXT: cmovaeq %rdi, %rax +; CHECK-NEXT: retq + %sub = sub i64 %a, %b + store i64 %sub, ptr addrspace(1) %ptr + %cond = call i64 @llvm.umin.i64(i64 %sub, i64 %a) + ret i64 %cond +} + +; Positive test : i32 +define i32 @underflow_compare_fold_i32(i32 %a, i32 %b) { +; CHECK-LABEL: underflow_compare_fold_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: subl %esi, %eax +; CHECK-NEXT: cmpl %edi, %eax +; CHECK-NEXT: cmovael %edi, %eax +; CHECK-NEXT: retq + %sub = sub i32 %a, %b + %cond = tail call i32 @llvm.umin.i32(i32 %sub, i32 %a) + ret i32 %cond +} + +; Positive test : i32 +define i32 @underflow_compare_fold_i32_commute(i32 %a, i32 %b) { +; CHECK-LABEL: underflow_compare_fold_i32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: subl %esi, %eax +; CHECK-NEXT: cmpl %eax, %edi +; CHECK-NEXT: cmovbl %edi, %eax +; CHECK-NEXT: retq + %sub = sub i32 %a, %b + %cond = tail call i32 @llvm.umin.i32(i32 %a, i32 %sub) + ret i32 %cond +} + +; Positive test : i32 +define i32 @underflow_compare_fold_i32_multi_use(i32 %a, i32 %b, ptr addrspace(1) %ptr) { +; CHECK-LABEL: underflow_compare_fold_i32_multi_use: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: subl %esi, %eax +; CHECK-NEXT: movl %eax, (%rdx) +; CHECK-NEXT: cmpl %edi, %eax +; CHECK-NEXT: cmovael %edi, %eax +; CHECK-NEXT: retq + %sub = sub i32 %a, %b + store i32 %sub, ptr addrspace(1) %ptr + %cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a) + ret i32 %cond +} + +; Positive test : i16 +define i16 @underflow_compare_fold_i16(i16 %a, i16 %b) { +; CHECK-LABEL: underflow_compare_fold_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: subl %esi, %eax +; CHECK-NEXT: cmpw %di, %ax +; CHECK-NEXT: cmovael %edi, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: retq + %sub = sub i16 %a, %b + %cond = tail call i16 @llvm.umin.i16(i16 %sub, i16 %a) + ret i16 %cond +} + +; Positive test : i16 +define i16 @underflow_compare_fold_i16_commute(i16 %a, i16 %b) { +; CHECK-LABEL: underflow_compare_fold_i16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: subl %esi, %eax +; CHECK-NEXT: cmpw %ax, %di +; CHECK-NEXT: cmovbl %edi, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: retq + %sub = sub i16 %a, %b + %cond = tail call i16 @llvm.umin.i16(i16 %a, i16 %sub) + ret i16 %cond +} + +; Positive test : i16 +define i16 @underflow_compare_fold_i16_multi_use(i16 %a, i16 %b, ptr addrspace(1) %ptr) { +; CHECK-LABEL: underflow_compare_fold_i16_multi_use: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: subl %esi, %eax +; CHECK-NEXT: movw %ax, (%rdx) +; CHECK-NEXT: cmpw %di, %ax +; CHECK-NEXT: cmovael %edi, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: retq + %sub = sub i16 %a, %b + store i16 %sub, ptr addrspace(1) %ptr + %cond = call i16 @llvm.umin.i16(i16 %sub, i16 %a) + ret i16 %cond +} + + +; Negative test, vector types : umin(sub(a,b),a) but with vectors +define <16 x i8> @underflow_compare_dontfold_vectors(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: underflow_compare_dontfold_vectors: +; CHECK: # %bb.0: +; CHECK-NEXT: movdqa %xmm0, %xmm2 +; CHECK-NEXT: psubb %xmm1, %xmm2 +; CHECK-NEXT: pminub %xmm2, %xmm0 +; CHECK-NEXT: retq + %sub = sub <16 x i8> %a, %b + %cond = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %sub, <16 x i8> %a) + ret <16 x i8> %cond +} + +; Negative test, pattern mismatch : umin(add(a,b),a) +define i64 @umin_add(i64 %a, i64 %b) { +; CHECK-LABEL: umin_add: +; CHECK: # %bb.0: +; CHECK-NEXT: leaq (%rsi,%rdi), %rax +; CHECK-NEXT: cmpq %rdi, %rax +; CHECK-NEXT: cmovaeq %rdi, %rax +; CHECK-NEXT: retq + %add = add i64 %a, %b + %cond = tail call i64 @llvm.umin.i64(i64 %add, i64 %a) + ret i64 %cond +}