diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 1b15a207a2d37..0f0174c8aea35 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5664,6 +5664,9 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::FP_EXTEND: case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: + case ISD::TRUNCATE_SSAT_U: + case ISD::TRUNCATE_SSAT_S: + case ISD::TRUNCATE_USAT_U: // No poison except from flags (which is handled above) return false; diff --git a/llvm/test/CodeGen/AArch64/truncate-sat-freeze.ll b/llvm/test/CodeGen/AArch64/truncate-sat-freeze.ll new file mode 100644 index 0000000000000..97bf1bac2a7db --- /dev/null +++ b/llvm/test/CodeGen/AArch64/truncate-sat-freeze.ll @@ -0,0 +1,80 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu | FileCheck %s + +; Test that saturating truncate operations work correctly with freeze. +; These intrinsics map to TRUNCATE_SSAT_S, TRUNCATE_SSAT_U, and TRUNCATE_USAT_U, +; which are marked in canCreateUndefOrPoison() as not creating poison. +; This allows freeze to be eliminated, enabling optimizations like select simplification. + +define <4 x i16> @sqxtn_with_freeze(<4 x i32> %a) { +; CHECK-LABEL: sqxtn_with_freeze: +; CHECK: // %bb.0: +; CHECK-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NEXT: ret + %trunc = tail call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %a) + %freeze = freeze <4 x i16> %trunc + ret <4 x i16> %freeze +} + +define <4 x i16> @sqxtun_with_freeze(<4 x i32> %a) { +; CHECK-LABEL: sqxtun_with_freeze: +; CHECK: // %bb.0: +; CHECK-NEXT: sqxtun v0.4h, v0.4s +; CHECK-NEXT: ret + %trunc = tail call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %a) + %freeze = freeze <4 x i16> %trunc + ret <4 x i16> %freeze +} + +define <8 x i8> @uqxtn_with_freeze(<8 x i16> %a) { +; CHECK-LABEL: uqxtn_with_freeze: +; CHECK: // %bb.0: +; CHECK-NEXT: uqxtn v0.8b, v0.8h +; CHECK-NEXT: ret + %trunc = tail call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> %a) + %freeze = freeze <8 x i8> %trunc + ret <8 x i8> %freeze +} + +; Test freeze elimination enables select simplification for sqxtn +define <4 x i16> @test_sqxtn_freeze_removal_select(<4 x i32> %a, i1 %cond) { +; CHECK-LABEL: test_sqxtn_freeze_removal_select: +; CHECK: // %bb.0: +; CHECK-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NEXT: ret + %safe_a = freeze <4 x i32> %a + %val = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %safe_a) + %frozen_val = freeze <4 x i16> %val + %res = select i1 %cond, <4 x i16> %frozen_val, <4 x i16> %val + ret <4 x i16> %res +} + +; Test freeze elimination enables select simplification for sqxtun +define <4 x i16> @test_sqxtun_freeze_removal_select(<4 x i32> %a, i1 %cond) { +; CHECK-LABEL: test_sqxtun_freeze_removal_select: +; CHECK: // %bb.0: +; CHECK-NEXT: sqxtun v0.4h, v0.4s +; CHECK-NEXT: ret + %safe_a = freeze <4 x i32> %a + %val = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %safe_a) + %frozen_val = freeze <4 x i16> %val + %res = select i1 %cond, <4 x i16> %frozen_val, <4 x i16> %val + ret <4 x i16> %res +} + +; Test freeze elimination enables select simplification for uqxtn +define <8 x i8> @test_uqxtn_freeze_removal_select(<8 x i16> %a, i1 %cond) { +; CHECK-LABEL: test_uqxtn_freeze_removal_select: +; CHECK: // %bb.0: +; CHECK-NEXT: uqxtn v0.8b, v0.8h +; CHECK-NEXT: ret + %safe_a = freeze <8 x i16> %a + %val = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> %safe_a) + %frozen_val = freeze <8 x i8> %val + %res = select i1 %cond, <8 x i8> %frozen_val, <8 x i8> %val + ret <8 x i8> %res +} + +declare <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32>) +declare <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32>) +declare <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16>)