From ea8f9144d8b8609effd9986e64e443afe7a39981 Mon Sep 17 00:00:00 2001 From: Alexey Karyakin Date: Thu, 11 Sep 2025 12:25:21 -0700 Subject: [PATCH] [Hexagon] Lowering saturating subtraction Saturating arithmetic can be expressed by llvm.uadd/usub.sat generic intrinsics. Change-Id: Iead2ed8c6cc1378dc2a08eb33a6950aec5b8d9be JIRA: https://jira-dc.qualcomm.com/jira/browse/Hexagon Change-Id: I86c4734d81a5c581777763a4b8d224d3727e2bba --- .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 4 + llvm/lib/Target/Hexagon/HexagonPatternsHVX.td | 15 +++ llvm/test/CodeGen/Hexagon/vsubsat.ll | 99 +++++++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/vsubsat.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index f1fa40c1b9036..ff02a67d54363 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -208,6 +208,8 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::SPLAT_VECTOR, T, Legal); setOperationAction(ISD::UADDSAT, T, Legal); setOperationAction(ISD::SADDSAT, T, Legal); + setOperationAction(ISD::USUBSAT, T, Legal); + setOperationAction(ISD::SSUBSAT, T, Legal); if (T != ByteV) { setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal); @@ -302,6 +304,8 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::UADDSAT, T, Legal); setOperationAction(ISD::SADDSAT, T, Legal); setOperationAction(ISD::SUB, T, Legal); + setOperationAction(ISD::USUBSAT, T, Legal); + setOperationAction(ISD::SSUBSAT, T, Legal); setOperationAction(ISD::MUL, T, Custom); setOperationAction(ISD::MULHS, T, Custom); setOperationAction(ISD::MULHU, T, Custom); diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index fb2ef59d99ef1..1637b91f1fa12 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -441,6 +441,21 @@ let Predicates = [UseHVX] in { def: OpR_RR_pat_sat; } +let Predicates = [UseHVX] in { + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; + def: OpR_RR_pat_sat; +} + // For now, we always deal with vector floating point in SF mode. class OpR_RR_pat_conv diff --git a/llvm/test/CodeGen/Hexagon/vsubsat.ll b/llvm/test/CodeGen/Hexagon/vsubsat.ll new file mode 100644 index 0000000000000..bb65aff166e23 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/vsubsat.ll @@ -0,0 +1,99 @@ +; RUN: llc -march=hexagon -mattr=+hvxv73,+hvx-length128b < %s | FileCheck %s + +;; Saturating subtraction. + +; CHECK-LABEL: vsububsat +; CHECK: v[[#]].ub = vsub(v[[#]].ub,v[[#]].ub):sat +define dso_local <128 x i8> @vsububsat(<128 x i8> %x, <128 x i8> %y) { +entry: + %0 = tail call <128 x i8> @llvm.usub.sat.v128i8(<128 x i8> %x, <128 x i8> %y) + ret <128 x i8> %0 +} + +; CHECK-LABEL: vsubuhsat +; CHECK: v[[#]].uh = vsub(v[[#]].uh,v[[#]].uh):sat +define dso_local <64 x i16> @vsubuhsat(<64 x i16> %x, <64 x i16> %y) { +entry: + %0 = tail call <64 x i16> @llvm.usub.sat.v64i16(<64 x i16> %x, <64 x i16> %y) + ret <64 x i16> %0 +} + +; CHECK-LABEL: vsubuwsat +; CHECK: v[[#]].uw = vsub(v[[#]].uw,v[[#]].uw):sat +define dso_local <32 x i32> @vsubuwsat(<32 x i32> %x, <32 x i32> %y) { +entry: + %0 = tail call <32 x i32> @llvm.usub.sat.v32i32(<32 x i32> %x, <32 x i32> %y) + ret <32 x i32> %0 +} + +; CHECK-LABEL: vsubbsat +; CHECK: v[[#]].b = vsub(v[[#]].b,v[[#]].b):sat +define dso_local <128 x i8> @vsubbsat(<128 x i8> %x, <128 x i8> %y) { +entry: + %0 = tail call <128 x i8> @llvm.ssub.sat.v128i8(<128 x i8> %x, <128 x i8> %y) + ret <128 x i8> %0 +} + +; CHECK-LABEL: vsubhsat +; CHECK: v[[#]].h = vsub(v[[#]].h,v[[#]].h):sat +define dso_local <64 x i16> @vsubhsat(<64 x i16> %x, <64 x i16> %y) { +entry: + %0 = tail call <64 x i16> @llvm.ssub.sat.v64i16(<64 x i16> %x, <64 x i16> %y) + ret <64 x i16> %0 +} + +; CHECK-LABEL: vsubwsat +; CHECK: v[[#]].w = vsub(v[[#]].w,v[[#]].w):sat +define dso_local <32 x i32> @vsubwsat(<32 x i32> %x, <32 x i32> %y) { +entry: + %0 = tail call <32 x i32> @llvm.ssub.sat.v32i32(<32 x i32> %x, <32 x i32> %y) + ret <32 x i32> %0 +} + +; CHECK-LABEL: vsububsat_dv +; CHECK: v[[#]]:[[#]].ub = vsub(v[[#]]:[[#]].ub,v[[#]]:[[#]].ub):sat +define dso_local <256 x i8> @vsububsat_dv(<256 x i8> %x, <256 x i8> %y) { +entry: + %0 = tail call <256 x i8> @llvm.usub.sat.v256i8(<256 x i8> %x, <256 x i8> %y) + ret <256 x i8> %0 +} + +; CHECK-LABEL: vsubuhsat_dv +; CHECK: v[[#]]:[[#]].uh = vsub(v[[#]]:[[#]].uh,v[[#]]:[[#]].uh):sat +define dso_local <128 x i16> @vsubuhsat_dv(<128 x i16> %x, <128 x i16> %y) { +entry: + %0 = tail call <128 x i16> @llvm.usub.sat.v128i16(<128 x i16> %x, <128 x i16> %y) + ret <128 x i16> %0 +} + +; CHECK-LABEL: vsubuwsat_dv +; CHECK: v[[#]]:[[#]].uw = vsub(v[[#]]:[[#]].uw,v[[#]]:[[#]].uw):sat +define dso_local <64 x i32> @vsubuwsat_dv(<64 x i32> %x, <64 x i32> %y) { +entry: + %0 = tail call <64 x i32> @llvm.usub.sat.v64i32(<64 x i32> %x, <64 x i32> %y) + ret <64 x i32> %0 +} + +; CHECK-LABEL: vsubbsat_dv +; CHECK: v[[#]]:[[#]].b = vsub(v[[#]]:[[#]].b,v[[#]]:[[#]].b):sat +define dso_local <256 x i8> @vsubbsat_dv(<256 x i8> %x, <256 x i8> %y) { +entry: + %0 = tail call <256 x i8> @llvm.ssub.sat.v256i8(<256 x i8> %x, <256 x i8> %y) + ret <256 x i8> %0 +} + +; CHECK-LABEL: vsubhsat_dv +; CHECK: v[[#]]:[[#]].h = vsub(v[[#]]:[[#]].h,v[[#]]:[[#]].h):sat +define dso_local <128 x i16> @vsubhsat_dv(<128 x i16> %x, <128 x i16> %y) { +entry: + %0 = tail call <128 x i16> @llvm.ssub.sat.v128i16(<128 x i16> %x, <128 x i16> %y) + ret <128 x i16> %0 +} + +; CHECK-LABEL: vsubwsat_dv +; CHECK: v[[#]]:[[#]].w = vsub(v[[#]]:[[#]].w,v[[#]]:[[#]].w):sat +define dso_local <64 x i32> @vsubwsat_dv(<64 x i32> %x, <64 x i32> %y) { +entry: + %0 = tail call <64 x i32> @llvm.ssub.sat.v64i32(<64 x i32> %x, <64 x i32> %y) + ret <64 x i32> %0 +}