Expand Up
@@ -199,10 +199,7 @@ define i16 @v_usubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
; GFX6-NEXT: v_min_u32_e32 v2, v1, v2
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 24, v1
; GFX6-NEXT: v_mov_b32_e32 v2, 0xff
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v0
; GFX6-NEXT: v_and_b32_e32 v1, v1, v2
; GFX6-NEXT: v_and_b32_e32 v0, v0, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
Expand All
@@ -217,10 +214,9 @@ define i16 @v_usubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX8-NEXT: v_sub_u16_e64 v0, v0, v1 clamp
; GFX8-NEXT: v_sub_u16_e64 v1, v3, v2 clamp
; GFX8-NEXT: v_mov_b32_e32 v2, 0xff
; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX8-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: v_lshrrev_b16_e32 v1, 8, v1
; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_usubsat_v2i8:
Expand Down
Expand Up
@@ -283,10 +279,7 @@ define amdgpu_ps i16 @s_usubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX6-NEXT: s_cmp_lt_u32 s1, s2
; GFX6-NEXT: s_cselect_b32 s2, s1, s2
; GFX6-NEXT: s_sub_i32 s1, s1, s2
; GFX6-NEXT: s_movk_i32 s2, 0xff
; GFX6-NEXT: s_lshr_b32 s1, s1, 24
; GFX6-NEXT: s_and_b32 s1, s1, s2
; GFX6-NEXT: s_and_b32 s0, s0, s2
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: ; return to shader part epilog
Expand All
@@ -303,11 +296,10 @@ define amdgpu_ps i16 @s_usubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX8-NEXT: v_sub_u16_e64 v0, s0, v0 clamp
; GFX8-NEXT: s_lshl_b32 s0, s2, s4
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_mov_b32_e32 v2, 0xff
; GFX8-NEXT: v_sub_u16_e64 v1, s0, v1 clamp
; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX8-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: v_lshrrev_b16_e32 v1, 8, v1
; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-NEXT: ; return to shader part epilog
;
Expand Down
Expand Up
@@ -386,25 +378,20 @@ define i32 @v_usubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 24, v6
; GFX6-NEXT: v_min_u32_e32 v3, v2, v3
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 24, v1
; GFX6-NEXT: s_movk_i32 s4, 0xff
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v0
; GFX6-NEXT: v_and_b32_e32 v1, s4, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 24, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 24, v7
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 24, v1
; GFX6-NEXT: v_min_u32_e32 v4, v3, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 24, v2
; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 24, v2
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_and_b32_e32 v1, s4, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 24, v3
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_and_b32_e32 v1, s4, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v3
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
Expand Down
Expand Up
@@ -549,17 +536,12 @@ define amdgpu_ps i32 @s_usubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX6-NEXT: s_cmp_lt_u32 s3, s4
; GFX6-NEXT: s_cselect_b32 s4, s3, s4
; GFX6-NEXT: s_sub_i32 s3, s3, s4
; GFX6-NEXT: s_movk_i32 s4, 0xff
; GFX6-NEXT: s_and_b32 s1, s1, s4
; GFX6-NEXT: s_and_b32 s0, s0, s4
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s2, s4
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_lshl_b32 s1, s2, 16
; GFX6-NEXT: s_lshr_b32 s3, s3, 24
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s3, s4
; GFX6-NEXT: s_lshl_b32 s1, s1, 24
; GFX6-NEXT: s_lshl_b32 s1, s3, 24
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: ; return to shader part epilog
;
Expand Down
Expand Up
@@ -1802,10 +1784,7 @@ define amdgpu_ps i32 @s_usubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
; GFX6-NEXT: s_cmp_lt_u32 s1, s2
; GFX6-NEXT: s_cselect_b32 s2, s1, s2
; GFX6-NEXT: s_sub_i32 s1, s1, s2
; GFX6-NEXT: s_mov_b32 s2, 0xffff
; GFX6-NEXT: s_lshr_b32 s1, s1, 16
; GFX6-NEXT: s_and_b32 s1, s1, s2
; GFX6-NEXT: s_and_b32 s0, s0, s2
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: ; return to shader part epilog
Expand Down
Expand Up
@@ -1854,10 +1833,7 @@ define amdgpu_ps float @usubsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
; GFX6-NEXT: v_min_u32_e32 v1, s0, v1
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s0, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX6-NEXT: s_mov_b32 s0, 0xffff
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_and_b32_e32 v1, s0, v1
; GFX6-NEXT: v_and_b32_e32 v0, s0, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: ; return to shader part epilog
Expand Down
Expand Up
@@ -1900,10 +1876,7 @@ define amdgpu_ps float @usubsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
; GFX6-NEXT: v_min_u32_e32 v2, s0, v1
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX6-NEXT: s_mov_b32 s0, 0xffff
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_and_b32_e32 v1, s0, v1
; GFX6-NEXT: v_and_b32_e32 v0, s0, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: ; return to shader part epilog
Expand Down
Expand Up
@@ -1965,19 +1938,14 @@ define <2 x float> @v_usubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v7
; GFX6-NEXT: v_min_u32_e32 v4, v3, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX6-NEXT: s_mov_b32 s4, 0xffff
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v4
; GFX6-NEXT: v_and_b32_e32 v1, s4, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_and_b32_e32 v1, s4, v2
; GFX6-NEXT: v_and_b32_e32 v2, s4, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3
; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_usubsat_v4i16:
Expand Down
Expand Up
@@ -2040,16 +2008,11 @@ define amdgpu_ps <2 x i32> @s_usubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX6-NEXT: s_cmp_lt_u32 s3, s4
; GFX6-NEXT: s_cselect_b32 s4, s3, s4
; GFX6-NEXT: s_sub_i32 s3, s3, s4
; GFX6-NEXT: s_mov_b32 s4, 0xffff
; GFX6-NEXT: s_and_b32 s1, s1, s4
; GFX6-NEXT: s_lshr_b32 s3, s3, 16
; GFX6-NEXT: s_and_b32 s0, s0, s4
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_lshr_b32 s3, s3, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s2, s4
; GFX6-NEXT: s_and_b32 s2, s3, s4
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_or_b32 s1, s1, s2
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
; GFX6-NEXT: s_or_b32 s1, s2, s1
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_usubsat_v4i16:
Expand Down
Expand Up
@@ -2133,29 +2096,22 @@ define <3 x float> @v_usubsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v10
; GFX6-NEXT: v_min_u32_e32 v6, v4, v6
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v6
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX6-NEXT: s_mov_b32 s4, 0xffff
; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v11
; GFX6-NEXT: v_lshrrev_b32_e32 v0 , 16, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v1 , 16, v1
; GFX6-NEXT: v_min_u32_e32 v6, v5, v6
; GFX6-NEXT: v_and_b32_e32 v1, s4, v1
; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v6
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_and_b32_e32 v1, s4 , v2
; GFX6-NEXT: v_and_b32_e32 v2, s4 , v3
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16 , v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16 , v3
; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; GFX6-NEXT: v_and_b32_e32 v3, s4, v5
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: v_and_b32_e32 v2, s4, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v5
; GFX6-NEXT: v_or_b32_e32 v2, v4, v2
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_usubsat_v6i16:
Expand Down
Expand Up
@@ -2237,20 +2193,13 @@ define amdgpu_ps <3 x i32> @s_usubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX6-NEXT: s_cmp_lt_u32 s5, s6
; GFX6-NEXT: s_cselect_b32 s6, s5, s6
; GFX6-NEXT: s_sub_i32 s5, s5, s6
; GFX6-NEXT: s_mov_b32 s6, 0xffff
; GFX6-NEXT: s_and_b32 s1, s1, s6
; GFX6-NEXT: s_and_b32 s0, s0, s6
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s2, s6
; GFX6-NEXT: s_and_b32 s2, s3, s6
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
; GFX6-NEXT: s_lshr_b32 s5, s5, 16
; GFX6-NEXT: s_and_b32 s3, s5, s6
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_or_b32 s1, s1, s2
; GFX6-NEXT: s_and_b32 s2, s4, s6
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
; GFX6-NEXT: s_or_b32 s2, s2, s3
; GFX6-NEXT: s_or_b32 s1, s2, s1
; GFX6-NEXT: s_lshl_b32 s2, s5, 16
; GFX6-NEXT: s_or_b32 s2, s4, s2
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_usubsat_v6i16:
Expand Down
Expand Up
@@ -2344,36 +2293,27 @@ define <4 x float> @v_usubsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v14
; GFX6-NEXT: v_min_u32_e32 v8, v6, v8
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX6-NEXT: s_mov_b32 s4, 0xffff
; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v8
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_and_b32_e32 v1, s4, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v15
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_min_u32_e32 v8, v7, v8
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v8
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_and_b32_e32 v1, s4, v2
; GFX6-NEXT: v_and_b32_e32 v2, s4, v3
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3
; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v7
; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; GFX6-NEXT: v_and_b32_e32 v3, s4, v5
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v7
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: v_and_b32_e32 v2, s4, v4
; GFX6-NEXT: v_and_b32_e32 v4, s4, v7
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v5
; GFX6-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
; GFX6-NEXT: v_and_b32_e32 v3, s4, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v7
; GFX6-NEXT: v_or_b32_e32 v2, v4, v2
; GFX6-NEXT: v_or_b32_e32 v3, v6, v3
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_usubsat_v8i16:
Expand Down
Expand Up
@@ -2473,24 +2413,15 @@ define amdgpu_ps <4 x i32> @s_usubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX6-NEXT: s_cmp_lt_u32 s7, s8
; GFX6-NEXT: s_cselect_b32 s8, s7, s8
; GFX6-NEXT: s_sub_i32 s7, s7, s8
; GFX6-NEXT: s_mov_b32 s8, 0xffff
; GFX6-NEXT: s_and_b32 s1, s1, s8
; GFX6-NEXT: s_and_b32 s0, s0, s8
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s2, s8
; GFX6-NEXT: s_and_b32 s2, s3, s8
; GFX6-NEXT: s_and_b32 s3, s5, s8
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
; GFX6-NEXT: s_lshr_b32 s7, s7, 16
; GFX6-NEXT: s_or_b32 s1, s1, s2
; GFX6-NEXT: s_and_b32 s2, s4, s8
; GFX6-NEXT: s_and_b32 s4, s7, s8
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
; GFX6-NEXT: s_or_b32 s2, s2, s3
; GFX6-NEXT: s_and_b32 s3, s6, s8
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
; GFX6-NEXT: s_or_b32 s3, s3, s4
; GFX6-NEXT: s_lshl_b32 s3, s7, 16
; GFX6-NEXT: s_or_b32 s1, s2, s1
; GFX6-NEXT: s_lshl_b32 s2, s5, 16
; GFX6-NEXT: s_or_b32 s2, s4, s2
; GFX6-NEXT: s_or_b32 s3, s6, s3
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_usubsat_v8i16:
Expand Down