Expand Up
@@ -197,24 +197,24 @@ define amdgpu_kernel void @v_rcp_f16(ptr addrspace(1) %r, ptr addrspace(1) %b) #
;
; GFX9-LABEL: v_rcp_f16:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_ushort v1, v0, s[2:3 ] glc
; GFX9-NEXT: global_load_ushort v1, v0, s[6:7 ] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_rcp_f16_e32 v1, v1
; GFX9-NEXT: global_store_short v0, v1, s[0:1 ]
; GFX9-NEXT: global_store_short v0, v1, s[4:5 ]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: v_rcp_f16:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_ushort v1, v0, s[2:3 ] glc dlc
; GFX10-NEXT: global_load_ushort v1, v0, s[6:7 ] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_rcp_f16_e32 v1, v1
; GFX10-NEXT: global_store_short v0, v1, s[0:1 ]
; GFX10-NEXT: global_store_short v0, v1, s[4:5 ]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: v_rcp_f16:
Expand Down
Expand Up
@@ -293,24 +293,24 @@ define amdgpu_kernel void @v_rcp_f16_abs(ptr addrspace(1) %r, ptr addrspace(1) %
;
; GFX9-LABEL: v_rcp_f16_abs:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_ushort v1, v0, s[2:3 ] glc
; GFX9-NEXT: global_load_ushort v1, v0, s[6:7 ] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_rcp_f16_e64 v1, |v1|
; GFX9-NEXT: global_store_short v0, v1, s[0:1 ]
; GFX9-NEXT: global_store_short v0, v1, s[4:5 ]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: v_rcp_f16_abs:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_ushort v1, v0, s[2:3 ] glc dlc
; GFX10-NEXT: global_load_ushort v1, v0, s[6:7 ] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_rcp_f16_e64 v1, |v1|
; GFX10-NEXT: global_store_short v0, v1, s[0:1 ]
; GFX10-NEXT: global_store_short v0, v1, s[4:5 ]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: v_rcp_f16_abs:
Expand Down
Expand Up
@@ -392,24 +392,24 @@ define amdgpu_kernel void @reciprocal_f16_rounded(ptr addrspace(1) %r, ptr addrs
;
; GFX9-LABEL: reciprocal_f16_rounded:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_ushort v1, v0, s[2:3 ] glc
; GFX9-NEXT: global_load_ushort v1, v0, s[6:7 ] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_rcp_f16_e32 v1, v1
; GFX9-NEXT: global_store_short v0, v1, s[0:1 ]
; GFX9-NEXT: global_store_short v0, v1, s[4:5 ]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: reciprocal_f16_rounded:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_ushort v1, v0, s[2:3 ] glc dlc
; GFX10-NEXT: global_load_ushort v1, v0, s[6:7 ] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_rcp_f16_e32 v1, v1
; GFX10-NEXT: global_store_short v0, v1, s[0:1 ]
; GFX10-NEXT: global_store_short v0, v1, s[4:5 ]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: reciprocal_f16_rounded:
Expand Down
Expand Up
@@ -475,24 +475,24 @@ define amdgpu_kernel void @v_rcp_f16_afn(ptr addrspace(1) %r, ptr addrspace(1) %
;
; GFX9-LABEL: v_rcp_f16_afn:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_ushort v1, v0, s[2:3 ] glc
; GFX9-NEXT: global_load_ushort v1, v0, s[6:7 ] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_rcp_f16_e32 v1, v1
; GFX9-NEXT: global_store_short v0, v1, s[0:1 ]
; GFX9-NEXT: global_store_short v0, v1, s[4:5 ]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: v_rcp_f16_afn:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_ushort v1, v0, s[2:3 ] glc dlc
; GFX10-NEXT: global_load_ushort v1, v0, s[6:7 ] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_rcp_f16_e32 v1, v1
; GFX10-NEXT: global_store_short v0, v1, s[0:1 ]
; GFX10-NEXT: global_store_short v0, v1, s[4:5 ]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: v_rcp_f16_afn:
Expand Down
Expand Up
@@ -571,24 +571,24 @@ define amdgpu_kernel void @v_rcp_f16_neg(ptr addrspace(1) %r, ptr addrspace(1) %
;
; GFX9-LABEL: v_rcp_f16_neg:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_ushort v1, v0, s[2:3 ] glc
; GFX9-NEXT: global_load_ushort v1, v0, s[6:7 ] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_rcp_f16_e64 v1, -v1
; GFX9-NEXT: global_store_short v0, v1, s[0:1 ]
; GFX9-NEXT: global_store_short v0, v1, s[4:5 ]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: v_rcp_f16_neg:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_ushort v1, v0, s[2:3 ] glc dlc
; GFX10-NEXT: global_load_ushort v1, v0, s[6:7 ] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_rcp_f16_e64 v1, -v1
; GFX10-NEXT: global_store_short v0, v1, s[0:1 ]
; GFX10-NEXT: global_store_short v0, v1, s[4:5 ]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: v_rcp_f16_neg:
Expand Down
Expand Up
@@ -670,24 +670,24 @@ define amdgpu_kernel void @v_rsq_f16(ptr addrspace(1) %r, ptr addrspace(1) %b) #
;
; GFX9-LABEL: v_rsq_f16:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_ushort v1, v0, s[2:3 ] glc
; GFX9-NEXT: global_load_ushort v1, v0, s[6:7 ] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_rsq_f16_e32 v1, v1
; GFX9-NEXT: global_store_short v0, v1, s[0:1 ]
; GFX9-NEXT: global_store_short v0, v1, s[4:5 ]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: v_rsq_f16:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_ushort v1, v0, s[2:3 ] glc dlc
; GFX10-NEXT: global_load_ushort v1, v0, s[6:7 ] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_rsq_f16_e32 v1, v1
; GFX10-NEXT: global_store_short v0, v1, s[0:1 ]
; GFX10-NEXT: global_store_short v0, v1, s[4:5 ]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: v_rsq_f16:
Expand Down
Expand Up
@@ -771,26 +771,26 @@ define amdgpu_kernel void @v_rsq_f16_neg(ptr addrspace(1) %r, ptr addrspace(1) %
;
; GFX9-LABEL: v_rsq_f16_neg:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_ushort v1, v0, s[2:3 ] glc
; GFX9-NEXT: global_load_ushort v1, v0, s[6:7 ] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_rsq_f16_e32 v1, v1
; GFX9-NEXT: v_xor_b32_e32 v1, 0x8000, v1
; GFX9-NEXT: global_store_short v0, v1, s[0:1 ]
; GFX9-NEXT: global_store_short v0, v1, s[4:5 ]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: v_rsq_f16_neg:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_ushort v1, v0, s[2:3 ] glc dlc
; GFX10-NEXT: global_load_ushort v1, v0, s[6:7 ] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_rsq_f16_e32 v1, v1
; GFX10-NEXT: v_xor_b32_e32 v1, 0x8000, v1
; GFX10-NEXT: global_store_short v0, v1, s[0:1 ]
; GFX10-NEXT: global_store_short v0, v1, s[4:5 ]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: v_rsq_f16_neg:
Expand Down
Expand Up
@@ -879,28 +879,28 @@ define amdgpu_kernel void @v_rsq_f16_multi_use(ptr addrspace(1) %r, ptr addrspac
;
; GFX9-LABEL: v_rsq_f16_multi_use:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_ushort v1, v0, s[2:3 ] glc
; GFX9-NEXT: global_load_ushort v1, v0, s[6:7 ] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_rsq_f16_e32 v2, v1
; GFX9-NEXT: global_store_short v0, v1, s[0:1 ]
; GFX9-NEXT: global_store_short v0, v1, s[4:5 ]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_short v0, v2, s[0:1 ]
; GFX9-NEXT: global_store_short v0, v2, s[4:5 ]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: v_rsq_f16_multi_use:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_ushort v1, v0, s[2:3 ] glc dlc
; GFX10-NEXT: global_load_ushort v1, v0, s[6:7 ] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_rsq_f16_e32 v2, v1
; GFX10-NEXT: global_store_short v0, v1, s[0:1 ]
; GFX10-NEXT: global_store_short v0, v1, s[4:5 ]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_store_short v0, v2, s[0:1 ]
; GFX10-NEXT: global_store_short v0, v2, s[4:5 ]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: v_rsq_f16_multi_use:
Expand Down
Expand Up
@@ -987,26 +987,26 @@ define amdgpu_kernel void @v_rsq_f16_missing_contract0(ptr addrspace(1) %r, ptr
;
; GFX9-LABEL: v_rsq_f16_missing_contract0:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_ushort v1, v0, s[2:3 ] glc
; GFX9-NEXT: global_load_ushort v1, v0, s[6:7 ] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_sqrt_f16_e32 v1, v1
; GFX9-NEXT: v_rcp_f16_e32 v1, v1
; GFX9-NEXT: global_store_short v0, v1, s[0:1 ]
; GFX9-NEXT: global_store_short v0, v1, s[4:5 ]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: v_rsq_f16_missing_contract0:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_ushort v1, v0, s[2:3 ] glc dlc
; GFX10-NEXT: global_load_ushort v1, v0, s[6:7 ] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_sqrt_f16_e32 v1, v1
; GFX10-NEXT: v_rcp_f16_e32 v1, v1
; GFX10-NEXT: global_store_short v0, v1, s[0:1 ]
; GFX10-NEXT: global_store_short v0, v1, s[4:5 ]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: v_rsq_f16_missing_contract0:
Expand Down
Expand Up
@@ -1092,26 +1092,26 @@ define amdgpu_kernel void @v_rsq_f16_missing_contract1(ptr addrspace(1) %r, ptr
;
; GFX9-LABEL: v_rsq_f16_missing_contract1:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_ushort v1, v0, s[2:3 ] glc
; GFX9-NEXT: global_load_ushort v1, v0, s[6:7 ] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_sqrt_f16_e32 v1, v1
; GFX9-NEXT: v_rcp_f16_e32 v1, v1
; GFX9-NEXT: global_store_short v0, v1, s[0:1 ]
; GFX9-NEXT: global_store_short v0, v1, s[4:5 ]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: v_rsq_f16_missing_contract1:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_ushort v1, v0, s[2:3 ] glc dlc
; GFX10-NEXT: global_load_ushort v1, v0, s[6:7 ] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_sqrt_f16_e32 v1, v1
; GFX10-NEXT: v_rcp_f16_e32 v1, v1
; GFX10-NEXT: global_store_short v0, v1, s[0:1 ]
; GFX10-NEXT: global_store_short v0, v1, s[4:5 ]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: v_rsq_f16_missing_contract1:
Expand Down
Expand Up
@@ -1197,26 +1197,26 @@ define amdgpu_kernel void @v_neg_rsq_f16_missing_contract1(ptr addrspace(1) %r,
;
; GFX9-LABEL: v_neg_rsq_f16_missing_contract1:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_ushort v1, v0, s[2:3 ] glc
; GFX9-NEXT: global_load_ushort v1, v0, s[6:7 ] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_sqrt_f16_e32 v1, v1
; GFX9-NEXT: v_rcp_f16_e64 v1, -v1
; GFX9-NEXT: global_store_short v0, v1, s[0:1 ]
; GFX9-NEXT: global_store_short v0, v1, s[4:5 ]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: v_neg_rsq_f16_missing_contract1:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_ushort v1, v0, s[2:3 ] glc dlc
; GFX10-NEXT: global_load_ushort v1, v0, s[6:7 ] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_sqrt_f16_e32 v1, v1
; GFX10-NEXT: v_rcp_f16_e64 v1, -v1
; GFX10-NEXT: global_store_short v0, v1, s[0:1 ]
; GFX10-NEXT: global_store_short v0, v1, s[4:5 ]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: v_neg_rsq_f16_missing_contract1:
Expand Down