Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -976,10 +976,10 @@ def : GCNPat <
} // End SubtargetPredicate = HasLshlAddU64Inst

let SubtargetPredicate = HasAddMinMaxInsts in {
def : ThreeOp_i32_Pats<add, smax, V_ADD_MAX_I32_e64>;
def : ThreeOp_i32_Pats<add, umax, V_ADD_MAX_U32_e64>;
def : ThreeOp_i32_Pats<add, smin, V_ADD_MIN_I32_e64>;
def : ThreeOp_i32_Pats<add, umin, V_ADD_MIN_U32_e64>;
def : ThreeOp_i32_Pats<saddsat, smax, V_ADD_MAX_I32_e64>;
def : ThreeOp_i32_Pats<uaddsat, umax, V_ADD_MAX_U32_e64>;
def : ThreeOp_i32_Pats<saddsat, smin, V_ADD_MIN_I32_e64>;
def : ThreeOp_i32_Pats<uaddsat, umin, V_ADD_MIN_U32_e64>;
}

def : VOPBinOpClampPat<saddsat, V_ADD_I32_e64, i32>;
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -464,10 +464,10 @@ class ThreeOp_OpSelClampPats <SDPatternOperator op1, SDPatternOperator op2,
>;

let SubtargetPredicate = HasPkAddMinMaxInsts in {
def : ThreeOp_OpSelClampPats<add, smax, V_PK_ADD_MAX_I16>;
def : ThreeOp_OpSelClampPats<add, umax, V_PK_ADD_MAX_U16>;
def : ThreeOp_OpSelClampPats<add, smin, V_PK_ADD_MIN_I16>;
def : ThreeOp_OpSelClampPats<add, umin, V_PK_ADD_MIN_U16>;
def : ThreeOp_OpSelClampPats<saddsat, smax, V_PK_ADD_MAX_I16>;
def : ThreeOp_OpSelClampPats<uaddsat, umax, V_PK_ADD_MAX_U16>;
def : ThreeOp_OpSelClampPats<saddsat, smin, V_PK_ADD_MIN_I16>;
def : ThreeOp_OpSelClampPats<uaddsat, umin, V_PK_ADD_MIN_U16>;
}

let SubtargetPredicate = HasPkMinMax3Insts in {
Expand Down
151 changes: 55 additions & 96 deletions llvm/test/CodeGen/AMDGPU/add-max.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN: ; %bb.0:
; GCN-NEXT: v_add_max_u32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%add = call i32 @llvm.uadd.sat.i32(i32 %a, i32 %b)
%max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
%ret = bitcast i32 %max to float
ret float %ret
Expand All @@ -18,39 +18,38 @@ define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) {
; GCN: ; %bb.0:
; GCN-NEXT: v_add_max_u32 v0, s0, v0, v1
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%add = call i32 @llvm.uadd.sat.i32(i32 %a, i32 %b)
%max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
%ret = bitcast i32 %max to float
ret float %ret
}

define amdgpu_ps float @add_max_u32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) {
; SDAG-LABEL: add_max_u32_ssv:
; SDAG: ; %bb.0:
; SDAG-NEXT: v_add_max_u32 v0, s0, s1, v0
; SDAG-NEXT: ; return to shader part epilog
;
; GISEL-LABEL: add_max_u32_ssv:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_add_co_i32 s0, s0, s1
; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GISEL-NEXT: v_max_u32_e32 v0, s0, v0
; GISEL-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
; GCN-LABEL: add_max_u32_ssv:
; GCN: ; %bb.0:
; GCN-NEXT: v_add_max_u32 v0, s0, s1, v0
; GCN-NEXT: ; return to shader part epilog
%add = call i32 @llvm.uadd.sat.i32(i32 %a, i32 %b)
%max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
%ret = bitcast i32 %max to float
ret float %ret
}

define amdgpu_ps float @add_max_u32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c) {
; GCN-LABEL: add_max_u32_sss:
; GCN: ; %bb.0:
; GCN-NEXT: s_add_co_i32 s0, s0, s1
; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GCN-NEXT: s_max_u32 s0, s0, s2
; GCN-NEXT: v_mov_b32_e32 v0, s0
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
; SDAG-LABEL: add_max_u32_sss:
; SDAG: ; %bb.0:
; SDAG-NEXT: v_add_nc_u32_e64 v0, s0, s1 clamp
; SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-NEXT: v_max_u32_e32 v0, s2, v0
; SDAG-NEXT: ; return to shader part epilog
;
; GISEL-LABEL: add_max_u32_sss:
; GISEL: ; %bb.0:
; GISEL-NEXT: v_mov_b32_e32 v0, s2
; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-NEXT: v_add_max_u32 v0, s0, s1, v0
; GISEL-NEXT: ; return to shader part epilog
%add = call i32 @llvm.uadd.sat.i32(i32 %a, i32 %b)
%max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
%ret = bitcast i32 %max to float
ret float %ret
Expand All @@ -61,7 +60,7 @@ define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) {
; GCN: ; %bb.0:
; GCN-NEXT: v_add_max_u32 v0, v0, s0, 4
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%add = call i32 @llvm.uadd.sat.i32(i32 %a, i32 %b)
%max = call i32 @llvm.umax.i32(i32 %add, i32 4)
%ret = bitcast i32 %max to float
ret float %ret
Expand All @@ -72,26 +71,19 @@ define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) {
; GCN: ; %bb.0:
; GCN-NEXT: v_add_max_u32 v0, s0, v0, 0x64
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%add = call i32 @llvm.uadd.sat.i32(i32 %a, i32 %b)
%max = call i32 @llvm.umax.i32(i32 %add, i32 100)
%ret = bitcast i32 %max to float
ret float %ret
}

define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) {
; SDAG-LABEL: add_max_u32_slv:
; SDAG: ; %bb.0:
; SDAG-NEXT: v_add_max_u32 v0, 0x64, s0, v0
; SDAG-NEXT: ; return to shader part epilog
;
; GISEL-LABEL: add_max_u32_slv:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_addk_co_i32 s0, 0x64
; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GISEL-NEXT: v_max_u32_e32 v0, s0, v0
; GISEL-NEXT: ; return to shader part epilog
%add = add i32 %a, 100
%max = call i32 @llvm.umax.i32(i32 %add, i32 %b)
define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b, i32 %c) {
; GCN-LABEL: add_max_u32_slv:
; GCN: ; %bb.0:
; GCN-NEXT: v_add_max_u32 v0, s0, v0, v1
; GCN-NEXT: ; return to shader part epilog
%add = call i32 @llvm.uadd.sat.i32(i32 %a, i32 %b)
%max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
%ret = bitcast i32 %max to float
ret float %ret
}
Expand All @@ -101,7 +93,7 @@ define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN: ; %bb.0:
; GCN-NEXT: v_add_max_i32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%add = call i32 @llvm.sadd.sat.i32(i32 %a, i32 %b)
%max = call i32 @llvm.smax.i32(i32 %add, i32 %c)
%ret = bitcast i32 %max to float
ret float %ret
Expand All @@ -112,7 +104,7 @@ define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN: ; %bb.0:
; GCN-NEXT: v_add_min_u32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%add = call i32 @llvm.uadd.sat.i32(i32 %a, i32 %b)
%max = call i32 @llvm.umin.i32(i32 %add, i32 %c)
%ret = bitcast i32 %max to float
ret float %ret
Expand All @@ -123,7 +115,7 @@ define amdgpu_ps float @add_min_i32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN: ; %bb.0:
; GCN-NEXT: v_add_min_i32 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add i32 %a, %b
%add = call i32 @llvm.sadd.sat.i32(i32 %a, i32 %b)
%max = call i32 @llvm.smin.i32(i32 %add, i32 %c)
%ret = bitcast i32 %max to float
ret float %ret
Expand All @@ -134,7 +126,7 @@ define amdgpu_ps float @add_max_v2u16_vvv(<2 x i16> %a, <2 x i16> %b, <2 x i16>
; GCN: ; %bb.0:
; GCN-NEXT: v_pk_add_max_u16 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add <2 x i16> %a, %b
%add = call <2 x i16> @llvm.uadd.sat.i32(<2 x i16> %a, <2 x i16> %b)
%max = call <2 x i16> @llvm.umax.v216(<2 x i16> %add, <2 x i16> %c)
%ret = bitcast <2 x i16> %max to float
ret float %ret
Expand All @@ -145,29 +137,18 @@ define amdgpu_ps float @add_max_v2u16_svv(<2 x i16> inreg %a, <2 x i16> %b, <2 x
; GCN: ; %bb.0:
; GCN-NEXT: v_pk_add_max_u16 v0, s0, v0, v1
; GCN-NEXT: ; return to shader part epilog
%add = add <2 x i16> %a, %b
%add = call <2 x i16> @llvm.uadd.sat.i32(<2 x i16> %a, <2 x i16> %b)
%max = call <2 x i16> @llvm.umax.v216(<2 x i16> %add, <2 x i16> %c)
%ret = bitcast <2 x i16> %max to float
ret float %ret
}

define amdgpu_ps float @add_max_v2u16_ssv(<2 x i16> inreg %a, <2 x i16> inreg %b, <2 x i16> %c) {
; SDAG-LABEL: add_max_v2u16_ssv:
; SDAG: ; %bb.0:
; SDAG-NEXT: v_pk_add_max_u16 v0, s0, s1, v0
; SDAG-NEXT: ; return to shader part epilog
;
; GISEL-LABEL: add_max_v2u16_ssv:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_lshr_b32 s2, s0, 16
; GISEL-NEXT: s_lshr_b32 s3, s1, 16
; GISEL-NEXT: s_add_co_i32 s0, s0, s1
; GISEL-NEXT: s_add_co_i32 s2, s2, s3
; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GISEL-NEXT: s_pack_ll_b32_b16 s0, s0, s2
; GISEL-NEXT: v_pk_max_u16 v0, s0, v0
; GISEL-NEXT: ; return to shader part epilog
%add = add <2 x i16> %a, %b
; GCN-LABEL: add_max_v2u16_ssv:
; GCN: ; %bb.0:
; GCN-NEXT: v_pk_add_max_u16 v0, s0, s1, v0
; GCN-NEXT: ; return to shader part epilog
%add = call <2 x i16> @llvm.uadd.sat.i32(<2 x i16> %a, <2 x i16> %b)
%max = call <2 x i16> @llvm.umax.v216(<2 x i16> %add, <2 x i16> %c)
%ret = bitcast <2 x i16> %max to float
ret float %ret
Expand All @@ -176,30 +157,18 @@ define amdgpu_ps float @add_max_v2u16_ssv(<2 x i16> inreg %a, <2 x i16> inreg %b
define amdgpu_ps float @add_max_v2u16_sss(<2 x i16> inreg %a, <2 x i16> inreg %b, <2 x i16> inreg %c) {
; SDAG-LABEL: add_max_v2u16_sss:
; SDAG: ; %bb.0:
; SDAG-NEXT: v_pk_add_u16 v0, s0, s1
; SDAG-NEXT: v_pk_add_u16 v0, s0, s1 clamp
; SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; SDAG-NEXT: v_pk_max_u16 v0, v0, s2
; SDAG-NEXT: ; return to shader part epilog
;
; GISEL-LABEL: add_max_v2u16_sss:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_lshr_b32 s3, s0, 16
; GISEL-NEXT: s_lshr_b32 s4, s1, 16
; GISEL-NEXT: s_add_co_i32 s0, s0, s1
; GISEL-NEXT: s_add_co_i32 s3, s3, s4
; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GISEL-NEXT: s_pack_ll_b32_b16 s0, s0, s3
; GISEL-NEXT: s_and_b32 s3, s2, 0xffff
; GISEL-NEXT: s_lshr_b32 s1, s0, 16
; GISEL-NEXT: s_and_b32 s0, s0, 0xffff
; GISEL-NEXT: s_lshr_b32 s2, s2, 16
; GISEL-NEXT: s_max_u32 s0, s0, s3
; GISEL-NEXT: s_max_u32 s1, s1, s2
; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GISEL-NEXT: s_pack_ll_b32_b16 s0, s0, s1
; GISEL-NEXT: v_mov_b32_e32 v0, s0
; GISEL-NEXT: v_mov_b32_e32 v0, s2
; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL-NEXT: v_pk_add_max_u16 v0, s0, s1, v0
; GISEL-NEXT: ; return to shader part epilog
%add = add <2 x i16> %a, %b
%add = call <2 x i16> @llvm.uadd.sat.i32(<2 x i16> %a, <2 x i16> %b)
%max = call <2 x i16> @llvm.umax.v216(<2 x i16> %add, <2 x i16> %c)
%ret = bitcast <2 x i16> %max to float
ret float %ret
Expand All @@ -210,7 +179,7 @@ define amdgpu_ps float @add_max_v2u16_vsi(<2 x i16> %a, <2 x i16> inreg %b) {
; GCN: ; %bb.0:
; GCN-NEXT: v_pk_add_max_u16 v0, v0, s0, 4
; GCN-NEXT: ; return to shader part epilog
%add = add <2 x i16> %a, %b
%add = call <2 x i16> @llvm.uadd.sat.i32(<2 x i16> %a, <2 x i16> %b)
%max = call <2 x i16> @llvm.umax.v216(<2 x i16> %add, <2 x i16> <i16 4, i16 0>)
%ret = bitcast <2 x i16> %max to float
ret float %ret
Expand All @@ -221,28 +190,18 @@ define amdgpu_ps float @add_max_v2u16_svl(<2 x i16> inreg %a, <2 x i16> %b) {
; GCN: ; %bb.0:
; GCN-NEXT: v_pk_add_max_u16 v0, s0, v0, 0x650064
; GCN-NEXT: ; return to shader part epilog
%add = add <2 x i16> %a, %b
%add = call <2 x i16> @llvm.uadd.sat.i32(<2 x i16> %a, <2 x i16> %b)
%max = call <2 x i16> @llvm.umax.v216(<2 x i16> %add, <2 x i16> <i16 100, i16 101>)
%ret = bitcast <2 x i16> %max to float
ret float %ret
}

define amdgpu_ps float @add_max_v2u16_slv(<2 x i16> inreg %a, <2 x i16> %b) {
; SDAG-LABEL: add_max_v2u16_slv:
; SDAG: ; %bb.0:
; SDAG-NEXT: v_pk_add_max_u16 v0, 0x640064, s0, v0
; SDAG-NEXT: ; return to shader part epilog
;
; GISEL-LABEL: add_max_v2u16_slv:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_lshr_b32 s1, s0, 16
; GISEL-NEXT: s_add_co_i32 s0, s0, 0x640064
; GISEL-NEXT: s_addk_co_i32 s1, 0x64
; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GISEL-NEXT: s_pack_ll_b32_b16 s0, s0, s1
; GISEL-NEXT: v_pk_max_u16 v0, s0, v0
; GISEL-NEXT: ; return to shader part epilog
%add = add <2 x i16> %a, <i16 100, i16 100>
; GCN-LABEL: add_max_v2u16_slv:
; GCN: ; %bb.0:
; GCN-NEXT: v_pk_add_max_u16 v0, 0x640064, s0, v0
; GCN-NEXT: ; return to shader part epilog
%add = call <2 x i16> @llvm.uadd.sat.i32(<2 x i16> %a, <2 x i16> <i16 100, i16 100>)
%max = call <2 x i16> @llvm.umax.v216(<2 x i16> %add, <2 x i16> %b)
%ret = bitcast <2 x i16> %max to float
ret float %ret
Expand All @@ -253,7 +212,7 @@ define amdgpu_ps float @add_max_v2s16_vvv(<2 x i16> %a, <2 x i16> %b, <2 x i16>
; GCN: ; %bb.0:
; GCN-NEXT: v_pk_add_max_i16 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add <2 x i16> %a, %b
%add = call <2 x i16> @llvm.sadd.sat.i32(<2 x i16> %a, <2 x i16> %b)
%max = call <2 x i16> @llvm.smax.v216(<2 x i16> %add, <2 x i16> %c)
%ret = bitcast <2 x i16> %max to float
ret float %ret
Expand All @@ -264,7 +223,7 @@ define amdgpu_ps float @add_min_v2u16_vvv(<2 x i16> %a, <2 x i16> %b, <2 x i16>
; GCN: ; %bb.0:
; GCN-NEXT: v_pk_add_min_u16 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add <2 x i16> %a, %b
%add = call <2 x i16> @llvm.uadd.sat.i32(<2 x i16> %a, <2 x i16> %b)
%max = call <2 x i16> @llvm.umin.v216(<2 x i16> %add, <2 x i16> %c)
%ret = bitcast <2 x i16> %max to float
ret float %ret
Expand All @@ -275,7 +234,7 @@ define amdgpu_ps float @add_min_v2s16_vvv(<2 x i16> %a, <2 x i16> %b, <2 x i16>
; GCN: ; %bb.0:
; GCN-NEXT: v_pk_add_min_i16 v0, v0, v1, v2
; GCN-NEXT: ; return to shader part epilog
%add = add <2 x i16> %a, %b
%add = call <2 x i16> @llvm.sadd.sat.i32(<2 x i16> %a, <2 x i16> %b)
%max = call <2 x i16> @llvm.smin.v216(<2 x i16> %add, <2 x i16> %c)
%ret = bitcast <2 x i16> %max to float
ret float %ret
Expand Down
Loading