176 changes: 79 additions & 97 deletions llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/fabs.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ define amdgpu_kernel void @v_extract_fabs_no_fold_v2f16(<2 x half> addrspace(1)*
; CI-NEXT: flat_load_dword v0, v[0:1]
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_bfe_u32 v1, v0, 16, 15
; CI-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
; CI-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; CI-NEXT: flat_store_short v[0:1], v0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: flat_store_short v[0:1], v1
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/fshr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -802,14 +802,14 @@ define <2 x i16> @v_fshr_v2i16(<2 x i16> %src0, <2 x i16> %src1, <2 x i16> %src2
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_or_b32_e32 v5, 16, v5
; SI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; SI-NEXT: v_alignbit_b32 v1, v1, v3, v5
; SI-NEXT: v_or_b32_e32 v3, 16, v4
; SI-NEXT: v_or_b32_e32 v4, 16, v4
; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; SI-NEXT: v_alignbit_b32 v0, v0, v2, v3
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; SI-NEXT: v_alignbit_b32 v1, v1, v3, v5
; SI-NEXT: v_alignbit_b32 v0, v0, v2, v4
; SI-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
; SI-NEXT: v_or_b32_e32 v0, v0, v1
; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; SI-NEXT: v_or_b32_e32 v0, v0, v3
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_fshr_v2i16:
Expand Down Expand Up @@ -1021,17 +1021,17 @@ define <4 x i16> @v_fshr_v4i16(<4 x i16> %src0, <4 x i16> %src1, <4 x i16> %src2
; SI-NEXT: v_or_b32_e32 v4, 16, v11
; SI-NEXT: v_lshlrev_b32_e32 v5, 16, v7
; SI-NEXT: v_alignbit_b32 v3, v3, v5, v4
; SI-NEXT: v_or_b32_e32 v4, 16, v10
; SI-NEXT: v_lshlrev_b32_e32 v5, 16, v6
; SI-NEXT: v_alignbit_b32 v2, v2, v5, v4
; SI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; SI-NEXT: v_or_b32_e32 v5, 16, v10
; SI-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; SI-NEXT: v_alignbit_b32 v2, v2, v6, v5
; SI-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
; SI-NEXT: v_or_b32_e32 v2, v2, v3
; SI-NEXT: v_or_b32_e32 v2, v2, v4
; SI-NEXT: v_or_b32_e32 v0, v0, v1
; SI-NEXT: v_alignbit_b32 v1, v2, v1, 16
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v2
; SI-NEXT: v_and_b32_e32 v3, 0xffff, v3
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_fshr_v4i16:
Expand Down
40 changes: 18 additions & 22 deletions llvm/test/CodeGen/AMDGPU/idot4s.ll
Original file line number Diff line number Diff line change
Expand Up @@ -963,34 +963,30 @@ define amdgpu_kernel void @idot4_acc16_vecMul(<4 x i8> addrspace(1)* %src1,
; GFX7-NEXT: s_mov_b32 s2, -1
; GFX7-NEXT: buffer_load_ushort v1, off, s[0:3], 0
; GFX7-NEXT: s_waitcnt vmcnt(2)
; GFX7-NEXT: v_bfe_i32 v3, v2, 8, 8
; GFX7-NEXT: v_bfe_i32 v3, v2, 16, 8
; GFX7-NEXT: v_bfe_i32 v4, v2, 0, 8
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX7-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX7-NEXT: v_ashrrev_i32_e32 v5, 24, v2
; GFX7-NEXT: v_bfe_i32 v2, v2, 8, 8
; GFX7-NEXT: s_waitcnt vmcnt(1)
; GFX7-NEXT: v_bfe_i32 v6, v0, 8, 8
; GFX7-NEXT: v_bfe_i32 v6, v0, 16, 8
; GFX7-NEXT: v_bfe_i32 v7, v0, 0, 8
; GFX7-NEXT: v_or_b32_e32 v3, v4, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v6
; GFX7-NEXT: v_and_b32_e32 v6, 0xffff, v7
; GFX7-NEXT: v_bfe_i32 v8, v0, 16, 8
; GFX7-NEXT: v_or_b32_e32 v4, v6, v4
; GFX7-NEXT: v_and_b32_e32 v7, 0xffff, v8
; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v3
; GFX7-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX7-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; GFX7-NEXT: v_ashrrev_i32_e32 v8, 24, v0
; GFX7-NEXT: v_bfe_i32 v0, v0, 8, 8
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX7-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX7-NEXT: v_bfe_i32 v5, v2, 16, 8
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX7-NEXT: v_and_b32_e32 v7, 0xffff, v7
; GFX7-NEXT: v_alignbit_b32 v2, 0, v2, 16
; GFX7-NEXT: v_alignbit_b32 v0, 0, v0, 16
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mad_u32_u24 v1, v3, v4, v1
; GFX7-NEXT: v_ashrrev_i32_e32 v2, 24, v2
; GFX7-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX7-NEXT: v_ashrrev_i32_e32 v0, 24, v0
; GFX7-NEXT: v_mad_u32_u24 v1, v6, v8, v1
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7-NEXT: v_mad_u32_u24 v1, v5, v7, v1
; GFX7-NEXT: v_mad_u32_u24 v1, v4, v7, v1
; GFX7-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX7-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX7-NEXT: v_mad_u32_u24 v0, v2, v0, v1
; GFX7-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX7-NEXT: v_and_b32_e32 v8, 0xffff, v8
; GFX7-NEXT: v_mad_u32_u24 v0, v3, v6, v0
; GFX7-NEXT: v_mad_u32_u24 v0, v5, v8, v0
; GFX7-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX7-NEXT: s_endpgm
;
Expand Down
81 changes: 36 additions & 45 deletions llvm/test/CodeGen/AMDGPU/idot4u.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1851,27 +1851,23 @@ define amdgpu_kernel void @udot4_acc16_vecMul(<4 x i8> addrspace(1)* %src1,
; GFX7-NEXT: buffer_load_ushort v1, off, s[0:3], 0
; GFX7-NEXT: s_waitcnt vmcnt(2)
; GFX7-NEXT: v_and_b32_e32 v3, 0xff00, v2
; GFX7-NEXT: v_and_b32_e32 v4, 0xff, v2
; GFX7-NEXT: v_bfe_u32 v4, v2, 16, 8
; GFX7-NEXT: s_waitcnt vmcnt(1)
; GFX7-NEXT: v_and_b32_e32 v6, 0xff00, v0
; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v2
; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3
; GFX7-NEXT: v_and_b32_e32 v7, 0xff, v0
; GFX7-NEXT: v_or_b32_e32 v3, v4, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v6
; GFX7-NEXT: v_or_b32_e32 v4, v7, v4
; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v3
; GFX7-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v4
; GFX7-NEXT: v_and_b32_e32 v4, 0xff, v4
; GFX7-NEXT: v_bfe_u32 v7, v0, 16, 8
; GFX7-NEXT: v_lshrrev_b32_e32 v8, 24, v0
; GFX7-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX7-NEXT: v_lshlrev_b32_e32 v6, 8, v6
; GFX7-NEXT: v_alignbit_b32 v3, s10, v3, 16
; GFX7-NEXT: v_alignbit_b32 v6, 0, v6, 16
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mad_u32_u24 v1, v3, v4, v1
; GFX7-NEXT: v_bfe_u32 v5, v2, 16, 8
; GFX7-NEXT: v_bfe_u32 v8, v0, 16, 8
; GFX7-NEXT: v_mad_u32_u24 v1, v6, v7, v1
; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v2
; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0
; GFX7-NEXT: v_mad_u32_u24 v1, v5, v8, v1
; GFX7-NEXT: v_mad_u32_u24 v0, v2, v0, v1
; GFX7-NEXT: v_mad_u32_u24 v0, v3, v6, v0
; GFX7-NEXT: v_mad_u32_u24 v0, v4, v7, v0
; GFX7-NEXT: v_mad_u32_u24 v0, v5, v8, v0
; GFX7-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX7-NEXT: s_endpgm
;
Expand Down Expand Up @@ -2143,18 +2139,16 @@ define amdgpu_kernel void @udot4_acc8_vecMul(<4 x i8> addrspace(1)* %src1,
; GFX9-NODL-NEXT: s_waitcnt vmcnt(1)
; GFX9-NODL-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX9-NODL-NEXT: v_mul_lo_u16_sdwa v6, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3
; GFX9-NODL-NEXT: v_mul_lo_u16_e32 v8, v4, v5
; GFX9-NODL-NEXT: v_or_b32_sdwa v6, v8, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX9-NODL-NEXT: v_mul_lo_u16_sdwa v7, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
; GFX9-NODL-NEXT: v_lshlrev_b32_e32 v8, 16, v6
; GFX9-NODL-NEXT: v_or_b32_e32 v7, v7, v8
; GFX9-NODL-NEXT: v_lshrrev_b32_e32 v7, 8, v7
; GFX9-NODL-NEXT: v_mul_lo_u16_e32 v7, v4, v5
; GFX9-NODL-NEXT: v_lshrrev_b32_e32 v8, 8, v6
; GFX9-NODL-NEXT: v_or_b32_sdwa v6, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX9-NODL-NEXT: v_mul_lo_u16_sdwa v6, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PRESERVE src0_sel:BYTE_1 src1_sel:BYTE_1
; GFX9-NODL-NEXT: v_lshrrev_b32_e32 v6, 8, v6
; GFX9-NODL-NEXT: s_waitcnt vmcnt(0)
; GFX9-NODL-NEXT: v_mad_legacy_u16 v1, v1, v2, v3
; GFX9-NODL-NEXT: v_add_u16_e32 v1, v1, v7
; GFX9-NODL-NEXT: v_lshrrev_b32_e32 v6, 8, v6
; GFX9-NODL-NEXT: v_mad_legacy_u16 v1, v4, v5, v1
; GFX9-NODL-NEXT: v_add_u16_e32 v1, v1, v6
; GFX9-NODL-NEXT: v_mad_legacy_u16 v1, v4, v5, v1
; GFX9-NODL-NEXT: v_add_u16_e32 v1, v1, v8
; GFX9-NODL-NEXT: global_store_byte v0, v1, s[2:3]
; GFX9-NODL-NEXT: s_endpgm
;
Expand All @@ -2173,18 +2167,16 @@ define amdgpu_kernel void @udot4_acc8_vecMul(<4 x i8> addrspace(1)* %src1,
; GFX9-DL-NEXT: s_waitcnt vmcnt(1)
; GFX9-DL-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX9-DL-NEXT: v_mul_lo_u16_sdwa v6, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3
; GFX9-DL-NEXT: v_mul_lo_u16_e32 v8, v4, v5
; GFX9-DL-NEXT: v_or_b32_sdwa v6, v8, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX9-DL-NEXT: v_mul_lo_u16_sdwa v7, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1
; GFX9-DL-NEXT: v_lshlrev_b32_e32 v8, 16, v6
; GFX9-DL-NEXT: v_or_b32_e32 v7, v7, v8
; GFX9-DL-NEXT: v_lshrrev_b32_e32 v7, 8, v7
; GFX9-DL-NEXT: v_mul_lo_u16_e32 v7, v4, v5
; GFX9-DL-NEXT: v_lshrrev_b32_e32 v8, 8, v6
; GFX9-DL-NEXT: v_or_b32_sdwa v6, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX9-DL-NEXT: v_mul_lo_u16_sdwa v6, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PRESERVE src0_sel:BYTE_1 src1_sel:BYTE_1
; GFX9-DL-NEXT: v_lshrrev_b32_e32 v6, 8, v6
; GFX9-DL-NEXT: s_waitcnt vmcnt(0)
; GFX9-DL-NEXT: v_mad_legacy_u16 v1, v1, v2, v3
; GFX9-DL-NEXT: v_add_u16_e32 v1, v1, v7
; GFX9-DL-NEXT: v_lshrrev_b32_e32 v6, 8, v6
; GFX9-DL-NEXT: v_mad_legacy_u16 v1, v4, v5, v1
; GFX9-DL-NEXT: v_add_u16_e32 v1, v1, v6
; GFX9-DL-NEXT: v_mad_legacy_u16 v1, v4, v5, v1
; GFX9-DL-NEXT: v_add_u16_e32 v1, v1, v8
; GFX9-DL-NEXT: global_store_byte v0, v1, s[2:3]
; GFX9-DL-NEXT: s_endpgm
;
Expand All @@ -2203,24 +2195,23 @@ define amdgpu_kernel void @udot4_acc8_vecMul(<4 x i8> addrspace(1)* %src1,
; GFX10-DL-NEXT: v_lshrrev_b32_e32 v4, 24, v1
; GFX10-DL-NEXT: s_waitcnt vmcnt(1)
; GFX10-DL-NEXT: v_lshrrev_b32_e32 v5, 24, v2
; GFX10-DL-NEXT: v_lshrrev_b32_e32 v6, 16, v1
; GFX10-DL-NEXT: v_lshrrev_b32_e32 v7, 16, v2
; GFX10-DL-NEXT: v_lshrrev_b16 v8, 8, v2
; GFX10-DL-NEXT: v_lshrrev_b16 v6, 8, v1
; GFX10-DL-NEXT: v_lshrrev_b32_e32 v7, 16, v1
; GFX10-DL-NEXT: v_lshrrev_b32_e32 v8, 16, v2
; GFX10-DL-NEXT: v_lshrrev_b16 v9, 8, v2
; GFX10-DL-NEXT: v_mul_lo_u16 v4, v4, v5
; GFX10-DL-NEXT: v_lshrrev_b16 v5, 8, v1
; GFX10-DL-NEXT: v_mul_lo_u16 v9, v6, v7
; GFX10-DL-NEXT: s_waitcnt vmcnt(0)
; GFX10-DL-NEXT: v_mad_u16 v1, v1, v2, v3
; GFX10-DL-NEXT: v_mul_lo_u16 v5, v7, v8
; GFX10-DL-NEXT: v_mul_lo_u16 v6, v6, v9
; GFX10-DL-NEXT: v_lshlrev_b16 v4, 8, v4
; GFX10-DL-NEXT: v_mul_lo_u16 v5, v5, v8
; GFX10-DL-NEXT: v_or_b32_sdwa v4, v9, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX10-DL-NEXT: v_lshlrev_b16 v5, 8, v5
; GFX10-DL-NEXT: v_lshlrev_b32_e32 v8, 16, v4
; GFX10-DL-NEXT: v_lshlrev_b16 v6, 8, v6
; GFX10-DL-NEXT: v_or_b32_sdwa v5, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX10-DL-NEXT: v_lshrrev_b32_e32 v2, 8, v4
; GFX10-DL-NEXT: v_or_b32_sdwa v5, v5, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX10-DL-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX10-DL-NEXT: v_lshrrev_b32_e32 v5, 8, v5
; GFX10-DL-NEXT: v_add_nc_u16 v1, v1, v5
; GFX10-DL-NEXT: v_mad_u16 v1, v6, v7, v1
; GFX10-DL-NEXT: v_mad_u16 v1, v7, v8, v1
; GFX10-DL-NEXT: v_add_nc_u16 v1, v1, v2
; GFX10-DL-NEXT: global_store_byte v0, v1, s[0:1]
; GFX10-DL-NEXT: s_endpgm
Expand Down
426 changes: 188 additions & 238 deletions llvm/test/CodeGen/AMDGPU/idot8s.ll

Large diffs are not rendered by default.

283 changes: 119 additions & 164 deletions llvm/test/CodeGen/AMDGPU/idot8u.ll

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/saddsat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,10 @@ define <2 x i16> @v_saddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
; GFX6-NEXT: v_min_i32_e32 v0, 0x7fff, v0
; GFX6-NEXT: v_max_i32_e32 v1, 0xffff8000, v1
; GFX6-NEXT: v_max_i32_e32 v0, 0xffff8000, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX6-NEXT: v_or_b32_e32 v0, v0, v3
; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_saddsat_v2i16:
Expand Down
42 changes: 14 additions & 28 deletions llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -104,17 +104,11 @@ define amdgpu_kernel void @scalar_to_vector_v4i16() {
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_readfirstlane_b32 s0, v0
; SI-NEXT: s_lshl_b32 s1, s0, 8
; SI-NEXT: s_or_b32 s0, s1, s0
; SI-NEXT: s_and_b32 s1, s0, 0xff00
; SI-NEXT: s_lshr_b32 s4, s0, 8
; SI-NEXT: s_or_b32 s1, s4, s1
; SI-NEXT: s_lshl_b32 s4, s1, 16
; SI-NEXT: s_or_b32 s1, s1, s4
; SI-NEXT: s_or_b32 s0, s0, s4
; SI-NEXT: v_mov_b32_e32 v0, s0
; SI-NEXT: v_mov_b32_e32 v1, s1
; SI-NEXT: v_lshlrev_b32_e32 v1, 8, v0
; SI-NEXT: v_or_b32_e32 v0, v0, v1
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v0
; SI-NEXT: v_or_b32_e32 v0, v0, v1
; SI-NEXT: v_mov_b32_e32 v1, v0
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; SI-NEXT: s_endpgm
;
Expand All @@ -125,12 +119,10 @@ define amdgpu_kernel void @scalar_to_vector_v4i16() {
; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v0
; VI-NEXT: v_or_b32_e32 v0, v1, v0
; VI-NEXT: v_and_b32_e32 v1, 0xffffff00, v0
; VI-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; VI-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v0
; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: v_mov_b32_e32 v1, v0
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
bb:
Expand All @@ -148,17 +140,11 @@ define amdgpu_kernel void @scalar_to_vector_v4f16() {
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_readfirstlane_b32 s0, v0
; SI-NEXT: s_lshl_b32 s1, s0, 8
; SI-NEXT: s_or_b32 s0, s1, s0
; SI-NEXT: s_and_b32 s1, s0, 0xff00
; SI-NEXT: s_lshr_b32 s4, s0, 8
; SI-NEXT: s_or_b32 s1, s4, s1
; SI-NEXT: s_lshl_b32 s4, s1, 16
; SI-NEXT: s_or_b32 s1, s1, s4
; SI-NEXT: s_or_b32 s0, s0, s4
; SI-NEXT: v_mov_b32_e32 v0, s0
; SI-NEXT: v_mov_b32_e32 v1, s1
; SI-NEXT: v_lshlrev_b32_e32 v1, 8, v0
; SI-NEXT: v_or_b32_e32 v0, v0, v1
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v0
; SI-NEXT: v_or_b32_e32 v0, v0, v1
; SI-NEXT: v_mov_b32_e32 v1, v0
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; SI-NEXT: s_endpgm
;
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/shift-i128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,11 @@ define i128 @v_shl_i128_vk(i128 %lhs) {
; GCN-LABEL: v_shl_i128_vk:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_alignbit_b32 v4, v2, v1, 15
; GCN-NEXT: v_lshl_b64 v[2:3], v[2:3], 17
; GCN-NEXT: v_lshrrev_b32_e32 v4, 15, v1
; GCN-NEXT: v_or_b32_e32 v2, v2, v4
; GCN-NEXT: v_alignbit_b32 v1, v1, v0, 15
; GCN-NEXT: v_alignbit_b32 v3, v3, v2, 15
; GCN-NEXT: v_lshlrev_b32_e32 v0, 17, v0
; GCN-NEXT: v_mov_b32_e32 v2, v4
; GCN-NEXT: s_setpc_b64 s[30:31]
%shl = shl i128 %lhs, 17
ret i128 %shl
Expand All @@ -110,11 +110,11 @@ define i128 @v_ashr_i128_vk(i128 %lhs) {
; GCN-LABEL: v_ashr_i128_vk:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_ashr_i64 v[4:5], v[2:3], 33
; GCN-NEXT: v_alignbit_b32 v0, v2, v1, 1
; GCN-NEXT: v_alignbit_b32 v1, v3, v2, 1
; GCN-NEXT: v_mov_b32_e32 v2, v4
; GCN-NEXT: v_mov_b32_e32 v3, v5
; GCN-NEXT: v_mov_b32_e32 v4, v1
; GCN-NEXT: v_lshl_b64 v[0:1], v[2:3], 31
; GCN-NEXT: v_lshrrev_b32_e32 v4, 1, v4
; GCN-NEXT: v_ashr_i64 v[2:3], v[2:3], 33
; GCN-NEXT: v_or_b32_e32 v0, v4, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%shl = ashr i128 %lhs, 33
ret i128 %shl
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/ssubsat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,10 @@ define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
; GFX6-NEXT: v_min_i32_e32 v0, 0x7fff, v0
; GFX6-NEXT: v_max_i32_e32 v1, 0xffff8000, v1
; GFX6-NEXT: v_max_i32_e32 v0, 0xffff8000, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX6-NEXT: v_or_b32_e32 v0, v0, v3
; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_ssubsat_v2i16:
Expand Down
19 changes: 8 additions & 11 deletions llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll
Original file line number Diff line number Diff line change
Expand Up @@ -56,22 +56,19 @@ define amdgpu_kernel void @local_store_i55(i55 addrspace(3)* %ptr, i55 %arg) #0
; HAWAII-NEXT: v_mov_b32_e32 v0, s0
; HAWAII-NEXT: v_mov_b32_e32 v1, s5
; HAWAII-NEXT: flat_load_ubyte v0, v[0:1]
; HAWAII-NEXT: s_load_dword s0, s[4:5], 0x3
; HAWAII-NEXT: s_load_dword s1, s[4:5], 0x0
; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x2
; HAWAII-NEXT: s_load_dword s0, s[4:5], 0x0
; HAWAII-NEXT: s_load_dword s1, s[4:5], 0x2
; HAWAII-NEXT: s_load_dword s2, s[4:5], 0x3
; HAWAII-NEXT: s_mov_b32 m0, -1
; HAWAII-NEXT: s_waitcnt lgkmcnt(0)
; HAWAII-NEXT: s_and_b32 s3, s0, 0xffff
; HAWAII-NEXT: v_mov_b32_e32 v1, s1
; HAWAII-NEXT: v_mov_b32_e32 v2, s0
; HAWAII-NEXT: v_mov_b32_e32 v1, s0
; HAWAII-NEXT: v_mov_b32_e32 v2, s1
; HAWAII-NEXT: v_mov_b32_e32 v3, s2
; HAWAII-NEXT: ds_write_b16 v1, v2 offset:4
; HAWAII-NEXT: ds_write_b16 v1, v3 offset:4
; HAWAII-NEXT: s_waitcnt vmcnt(0)
; HAWAII-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; HAWAII-NEXT: v_or_b32_e32 v0, s3, v0
; HAWAII-NEXT: v_bfe_u32 v0, v0, 16, 7
; HAWAII-NEXT: v_and_b32_e32 v0, 0x7f, v0
; HAWAII-NEXT: ds_write_b8 v1, v0 offset:6
; HAWAII-NEXT: ds_write_b32 v1, v3
; HAWAII-NEXT: ds_write_b32 v1, v2
; HAWAII-NEXT: s_endpgm
;
; FIJI-LABEL: local_store_i55:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/trunc-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ define <2 x i16> @trunc_v2i64_arg_to_v2i16(<2 x i64> %arg0) #0 {
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v2
; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
; SI-NEXT: v_or_b32_e32 v0, v0, v1
; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v2
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: trunc_v2i64_arg_to_v2i16:
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/AMDGPU/uaddsat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,8 @@ define <2 x i16> @v_uaddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GFX6-NEXT: v_min_u32_e32 v1, 0xffff, v1
; GFX6-NEXT: v_min_u32_e32 v0, 0xffff, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_uaddsat_v2i16:
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/usubsat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,9 @@ define <2 x i16> @v_usubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
; GFX6-NEXT: v_max_u32_e32 v0, v0, v2
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_usubsat_v2i16:
Expand Down
39 changes: 15 additions & 24 deletions llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll
Original file line number Diff line number Diff line change
Expand Up @@ -90,16 +90,16 @@ define void @i56_or(i56* %a) {
;
; BE-LABEL: i56_or:
; BE: @ %bb.0:
; BE-NEXT: ldr r1, [r0]
; BE-NEXT: strb r1, [r0, #3]
; BE-NEXT: ldrh r2, [r0, #4]!
; BE-NEXT: ldrb r3, [r0, #2]
; BE-NEXT: mov r1, r0
; BE-NEXT: ldr r0, [r0]
; BE-NEXT: ldrh r2, [r1, #4]!
; BE-NEXT: ldrb r3, [r1, #2]
; BE-NEXT: orr r2, r3, r2, lsl #8
; BE-NEXT: orr r1, r2, r1, lsl #24
; BE-NEXT: orr r1, r1, #384
; BE-NEXT: strb r1, [r0, #2]
; BE-NEXT: lsr r1, r1, #8
; BE-NEXT: strh r1, [r0]
; BE-NEXT: orr r0, r2, r0, lsl #24
; BE-NEXT: orr r0, r0, #384
; BE-NEXT: strb r0, [r1, #2]
; BE-NEXT: lsr r0, r0, #8
; BE-NEXT: strh r0, [r1]
; BE-NEXT: mov pc, lr
%aa = load i56, i56* %a
%b = or i56 %aa, 384
Expand All @@ -118,16 +118,10 @@ define void @i56_and_or(i56* %a) {
;
; BE-LABEL: i56_and_or:
; BE: @ %bb.0:
; BE-NEXT: ldr r1, [r0]
; BE-NEXT: ldrh r1, [r0, #4]!
; BE-NEXT: mov r2, #128
; BE-NEXT: strb r1, [r0, #3]
; BE-NEXT: ldrh r12, [r0, #4]!
; BE-NEXT: ldrb r3, [r0, #2]
; BE-NEXT: orr r1, r1, #1
; BE-NEXT: strb r2, [r0, #2]
; BE-NEXT: orr r2, r3, r12, lsl #8
; BE-NEXT: orr r1, r2, r1, lsl #24
; BE-NEXT: orr r1, r1, #384
; BE-NEXT: lsr r1, r1, #8
; BE-NEXT: strh r1, [r0]
; BE-NEXT: mov pc, lr

Expand All @@ -149,13 +143,10 @@ define void @i56_insert_bit(i56* %a, i1 zeroext %bit) {
;
; BE-LABEL: i56_insert_bit:
; BE: @ %bb.0:
; BE-NEXT: ldr r2, [r0]
; BE-NEXT: strb r2, [r0, #3]
; BE-NEXT: ldrh r12, [r0, #4]!
; BE-NEXT: ldrb r3, [r0, #2]
; BE-NEXT: orr r3, r3, r12, lsl #8
; BE-NEXT: orr r2, r3, r2, lsl #24
; BE-NEXT: bic r2, r2, #8192
; BE-NEXT: ldrh r2, [r0, #4]!
; BE-NEXT: mov r3, #57088
; BE-NEXT: orr r3, r3, #16711680
; BE-NEXT: and r2, r3, r2, lsl #8
; BE-NEXT: orr r1, r2, r1, lsl #13
; BE-NEXT: lsr r1, r1, #8
; BE-NEXT: strh r1, [r0]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/ARM/parity.ll
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ define i17 @parity_17(i17 %x) {
; CHECK-LABEL: parity_17:
; CHECK: @ %bb.0:
; CHECK-NEXT: bfc r0, #17, #15
; CHECK-NEXT: eor r0, r0, r0, lsr #16
; CHECK-NEXT: eor r0, r0, r0, lsr #8
; CHECK-NEXT: eor r1, r0, r0, lsr #16
; CHECK-NEXT: eor r0, r1, r0, lsr #8
; CHECK-NEXT: eor r0, r0, r0, lsr #4
; CHECK-NEXT: eor r0, r0, r0, lsr #2
; CHECK-NEXT: eor r0, r0, r0, lsr #1
Expand Down
38 changes: 19 additions & 19 deletions llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -84,35 +84,35 @@ define float @fooul(float %X) #0 {
; PPC64-NEXT: addi 3, 5, 0
; PPC64-NEXT: .LBB2_2: # %entry
; PPC64-NEXT: sradi 4, 3, 53
; PPC64-NEXT: clrldi 5, 3, 63
; PPC64-NEXT: rldicl 5, 3, 63, 1
; PPC64-NEXT: addi 4, 4, 1
; PPC64-NEXT: clrldi 6, 3, 63
; PPC64-NEXT: cmpldi 4, 1
; PPC64-NEXT: rldicl 4, 3, 63, 1
; PPC64-NEXT: or 5, 5, 4
; PPC64-NEXT: rldicl 6, 5, 11, 53
; PPC64-NEXT: addi 6, 6, 1
; PPC64-NEXT: clrldi 7, 5, 53
; PPC64-NEXT: cmpldi 1, 6, 1
; PPC64-NEXT: clrldi 6, 3, 53
; PPC64-NEXT: clrldi 4, 3, 53
; PPC64-NEXT: or 6, 6, 5
; PPC64-NEXT: clrldi 7, 6, 53
; PPC64-NEXT: addi 4, 4, 2047
; PPC64-NEXT: addi 7, 7, 2047
; PPC64-NEXT: addi 6, 6, 2047
; PPC64-NEXT: or 4, 7, 4
; PPC64-NEXT: or 6, 6, 3
; PPC64-NEXT: rldicl 4, 4, 53, 11
; PPC64-NEXT: rldicr 6, 6, 0, 52
; PPC64-NEXT: or 4, 4, 3
; PPC64-NEXT: or 5, 7, 5
; PPC64-NEXT: rldicl 7, 3, 10, 54
; PPC64-NEXT: rldicr 4, 4, 0, 52
; PPC64-NEXT: addi 7, 7, 1
; PPC64-NEXT: bc 12, 1, .LBB2_4
; PPC64-NEXT: # %bb.3: # %entry
; PPC64-NEXT: ori 6, 3, 0
; PPC64-NEXT: ori 4, 3, 0
; PPC64-NEXT: b .LBB2_4
; PPC64-NEXT: .LBB2_4: # %entry
; PPC64-NEXT: rldicl 4, 4, 11, 1
; PPC64-NEXT: cmpdi 3, 0
; PPC64-NEXT: std 6, -32(1)
; PPC64-NEXT: bc 12, 5, .LBB2_6
; PPC64-NEXT: rldicl 5, 5, 53, 11
; PPC64-NEXT: std 4, -32(1)
; PPC64-NEXT: rldicl 4, 5, 11, 1
; PPC64-NEXT: cmpldi 7, 1
; PPC64-NEXT: bc 12, 1, .LBB2_6
; PPC64-NEXT: # %bb.5: # %entry
; PPC64-NEXT: ori 4, 5, 0
; PPC64-NEXT: ori 4, 6, 0
; PPC64-NEXT: b .LBB2_6
; PPC64-NEXT: .LBB2_6: # %entry
; PPC64-NEXT: cmpdi 3, 0
; PPC64-NEXT: std 4, -24(1)
; PPC64-NEXT: bc 12, 0, .LBB2_8
; PPC64-NEXT: # %bb.7: # %entry
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/bswap-bitreverse.ll
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ define i32 @test_bswap_i32(i32 %a) nounwind {
;
; RV64I-LABEL: test_bswap_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: srliw a1, a0, 8
; RV64I-NEXT: srli a1, a0, 8
; RV64I-NEXT: lui a2, 16
; RV64I-NEXT: addiw a2, a2, -256
; RV64I-NEXT: and a1, a1, a2
Expand Down Expand Up @@ -491,7 +491,7 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind {
;
; RV64I-LABEL: test_bitreverse_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: srliw a1, a0, 8
; RV64I-NEXT: srli a1, a0, 8
; RV64I-NEXT: lui a2, 16
; RV64I-NEXT: addiw a2, a2, -256
; RV64I-NEXT: and a1, a1, a2
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ define i32 @test_cttz_i32(i32 %a) nounwind {
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: lui a1, 209715
; RV64I-NEXT: addiw a1, a1, 819
; RV64I-NEXT: and a2, a0, a1
Expand Down Expand Up @@ -285,7 +285,7 @@ define i32 @test_cttz_i32(i32 %a) nounwind {
; RV64M-NEXT: lui a2, 349525
; RV64M-NEXT: addiw a2, a2, 1365
; RV64M-NEXT: and a1, a1, a2
; RV64M-NEXT: subw a0, a0, a1
; RV64M-NEXT: sub a0, a0, a1
; RV64M-NEXT: lui a1, 209715
; RV64M-NEXT: addiw a1, a1, 819
; RV64M-NEXT: and a2, a0, a1
Expand Down Expand Up @@ -683,7 +683,7 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind {
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: lui a1, 209715
; RV64I-NEXT: addiw a1, a1, 819
; RV64I-NEXT: and a2, a0, a1
Expand Down Expand Up @@ -739,7 +739,7 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind {
; RV64M-NEXT: lui a2, 349525
; RV64M-NEXT: addiw a2, a2, 1365
; RV64M-NEXT: and a1, a1, a2
; RV64M-NEXT: subw a0, a0, a1
; RV64M-NEXT: sub a0, a0, a1
; RV64M-NEXT: lui a1, 209715
; RV64M-NEXT: addiw a1, a1, 819
; RV64M-NEXT: and a2, a0, a1
Expand Down Expand Up @@ -1214,7 +1214,7 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: lui a1, 209715
; RV64I-NEXT: addiw a1, a1, 819
; RV64I-NEXT: and a2, a0, a1
Expand Down Expand Up @@ -1297,7 +1297,7 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
; RV64M-NEXT: lui a2, 349525
; RV64M-NEXT: addiw a2, a2, 1365
; RV64M-NEXT: and a1, a1, a2
; RV64M-NEXT: subw a0, a0, a1
; RV64M-NEXT: sub a0, a0, a1
; RV64M-NEXT: lui a1, 209715
; RV64M-NEXT: addiw a1, a1, 819
; RV64M-NEXT: and a2, a0, a1
Expand Down Expand Up @@ -1805,7 +1805,7 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: lui a1, 209715
; RV64I-NEXT: addiw a1, a1, 819
; RV64I-NEXT: and a2, a0, a1
Expand Down Expand Up @@ -1877,7 +1877,7 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
; RV64M-NEXT: lui a2, 349525
; RV64M-NEXT: addiw a2, a2, 1365
; RV64M-NEXT: and a1, a1, a2
; RV64M-NEXT: subw a0, a0, a1
; RV64M-NEXT: sub a0, a0, a1
; RV64M-NEXT: lui a1, 209715
; RV64M-NEXT: addiw a1, a1, 819
; RV64M-NEXT: and a2, a0, a1
Expand Down Expand Up @@ -2300,7 +2300,7 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: lui a1, 209715
; RV64I-NEXT: addiw a1, a1, 819
; RV64I-NEXT: and a2, a0, a1
Expand Down Expand Up @@ -2350,7 +2350,7 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
; RV64M-NEXT: lui a2, 349525
; RV64M-NEXT: addiw a2, a2, 1365
; RV64M-NEXT: and a1, a1, a2
; RV64M-NEXT: subw a0, a0, a1
; RV64M-NEXT: sub a0, a0, a1
; RV64M-NEXT: lui a1, 209715
; RV64M-NEXT: addiw a1, a1, 819
; RV64M-NEXT: and a2, a0, a1
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/RISCV/rv64zbb-zbp-zbkb.ll
Original file line number Diff line number Diff line change
Expand Up @@ -355,11 +355,11 @@ define i64 @roriw_bug(i64 %x) nounwind {
; CHECK-LABEL: roriw_bug:
; CHECK: # %bb.0:
; CHECK-NEXT: slli a1, a0, 31
; CHECK-NEXT: andi a0, a0, -2
; CHECK-NEXT: srli a2, a0, 1
; CHECK-NEXT: or a1, a1, a2
; CHECK-NEXT: sext.w a1, a1
; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: andi a2, a0, -2
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: or a0, a1, a0
; CHECK-NEXT: sext.w a0, a0
; CHECK-NEXT: xor a0, a2, a0
; CHECK-NEXT: ret
%a = shl i64 %x, 31
%b = and i64 %x, 18446744073709551614
Expand Down
26 changes: 13 additions & 13 deletions llvm/test/CodeGen/RISCV/rv64zbb.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind {
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: lui a1, 209715
; RV64I-NEXT: addiw a1, a1, 819
; RV64I-NEXT: and a2, a0, a1
Expand Down Expand Up @@ -83,7 +83,7 @@ define signext i32 @log2_i32(i32 signext %a) nounwind {
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: lui a1, 209715
; RV64I-NEXT: addiw a1, a1, 819
; RV64I-NEXT: and a2, a0, a1
Expand Down Expand Up @@ -146,7 +146,7 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: lui a1, 209715
; RV64I-NEXT: addiw a1, a1, 819
; RV64I-NEXT: and a2, a0, a1
Expand Down Expand Up @@ -204,7 +204,7 @@ define signext i32 @findLastSet_i32(i32 signext %a) nounwind {
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: lui a1, 209715
; RV64I-NEXT: addiw a1, a1, 819
; RV64I-NEXT: and a2, a0, a1
Expand Down Expand Up @@ -273,7 +273,7 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: lui a1, 209715
; RV64I-NEXT: addiw a1, a1, 819
; RV64I-NEXT: and a2, a0, a1
Expand Down Expand Up @@ -380,7 +380,7 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind {
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: lui a1, 209715
; RV64I-NEXT: addiw a1, a1, 819
; RV64I-NEXT: and a2, a0, a1
Expand Down Expand Up @@ -423,7 +423,7 @@ define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: lui a1, 209715
; RV64I-NEXT: addiw a1, a1, 819
; RV64I-NEXT: and a2, a0, a1
Expand Down Expand Up @@ -465,7 +465,7 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: lui a1, 209715
; RV64I-NEXT: addiw a1, a1, 819
; RV64I-NEXT: and a2, a0, a1
Expand Down Expand Up @@ -520,7 +520,7 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind {
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: lui a1, 209715
; RV64I-NEXT: addiw a1, a1, 819
; RV64I-NEXT: and a2, a0, a1
Expand Down Expand Up @@ -622,7 +622,7 @@ define signext i32 @ctpop_i32(i32 signext %a) nounwind {
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: lui a1, 209715
; RV64I-NEXT: addiw a1, a1, 819
; RV64I-NEXT: and a2, a0, a1
Expand Down Expand Up @@ -660,7 +660,7 @@ define signext i32 @ctpop_i32_load(i32* %p) nounwind {
; RV64I-NEXT: lui a2, 349525
; RV64I-NEXT: addiw a2, a2, 1365
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: lui a1, 209715
; RV64I-NEXT: addiw a1, a1, 819
; RV64I-NEXT: and a2, a0, a1
Expand Down Expand Up @@ -1028,7 +1028,7 @@ declare i32 @llvm.bswap.i32(i32)
define signext i32 @bswap_i32(i32 signext %a) nounwind {
; RV64I-LABEL: bswap_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: srliw a1, a0, 8
; RV64I-NEXT: srli a1, a0, 8
; RV64I-NEXT: lui a2, 16
; RV64I-NEXT: addiw a2, a2, -256
; RV64I-NEXT: and a1, a1, a2
Expand All @@ -1055,7 +1055,7 @@ define signext i32 @bswap_i32(i32 signext %a) nounwind {
define void @bswap_i32_nosext(i32 signext %a, i32* %x) nounwind {
; RV64I-LABEL: bswap_i32_nosext:
; RV64I: # %bb.0:
; RV64I-NEXT: srliw a2, a0, 8
; RV64I-NEXT: srli a2, a0, 8
; RV64I-NEXT: lui a3, 16
; RV64I-NEXT: addiw a3, a3, -256
; RV64I-NEXT: and a2, a2, a3
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/RISCV/rv64zbp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2447,7 +2447,7 @@ declare i32 @llvm.bswap.i32(i32)
define signext i32 @bswap_i32(i32 signext %a) nounwind {
; RV64I-LABEL: bswap_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: srliw a1, a0, 8
; RV64I-NEXT: srli a1, a0, 8
; RV64I-NEXT: lui a2, 16
; RV64I-NEXT: addiw a2, a2, -256
; RV64I-NEXT: and a1, a1, a2
Expand All @@ -2473,7 +2473,7 @@ define signext i32 @bswap_i32(i32 signext %a) nounwind {
define void @bswap_i32_nosext(i32 signext %a, i32* %x) nounwind {
; RV64I-LABEL: bswap_i32_nosext:
; RV64I: # %bb.0:
; RV64I-NEXT: srliw a2, a0, 8
; RV64I-NEXT: srli a2, a0, 8
; RV64I-NEXT: lui a3, 16
; RV64I-NEXT: addiw a3, a3, -256
; RV64I-NEXT: and a2, a2, a3
Expand Down Expand Up @@ -2614,7 +2614,7 @@ declare i32 @llvm.bitreverse.i32(i32)
define signext i32 @bitreverse_i32(i32 signext %a) nounwind {
; RV64I-LABEL: bitreverse_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: srliw a1, a0, 8
; RV64I-NEXT: srli a1, a0, 8
; RV64I-NEXT: lui a2, 16
; RV64I-NEXT: addiw a2, a2, -256
; RV64I-NEXT: and a1, a1, a2
Expand Down Expand Up @@ -2661,7 +2661,7 @@ define signext i32 @bitreverse_i32(i32 signext %a) nounwind {
define void @bitreverse_i32_nosext(i32 signext %a, i32* %x) nounwind {
; RV64I-LABEL: bitreverse_i32_nosext:
; RV64I: # %bb.0:
; RV64I-NEXT: srliw a2, a0, 8
; RV64I-NEXT: srli a2, a0, 8
; RV64I-NEXT: lui a3, 16
; RV64I-NEXT: addiw a3, a3, -256
; RV64I-NEXT: and a2, a2, a3
Expand Down Expand Up @@ -2780,7 +2780,7 @@ define i32 @bswap_rotr_i32(i32 %a) {
; RV64I-NEXT: slli a2, a0, 24
; RV64I-NEXT: or a1, a2, a1
; RV64I-NEXT: srliw a2, a0, 24
; RV64I-NEXT: srliw a0, a0, 16
; RV64I-NEXT: srli a0, a0, 16
; RV64I-NEXT: slli a0, a0, 8
; RV64I-NEXT: or a0, a0, a2
; RV64I-NEXT: slliw a0, a0, 16
Expand All @@ -2801,7 +2801,7 @@ define i32 @bswap_rotl_i32(i32 %a) {
; RV64I-LABEL: bswap_rotl_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: srliw a1, a0, 24
; RV64I-NEXT: srliw a2, a0, 16
; RV64I-NEXT: srli a2, a0, 16
; RV64I-NEXT: slli a2, a2, 8
; RV64I-NEXT: or a1, a2, a1
; RV64I-NEXT: slli a2, a0, 8
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/sextw-removal.ll
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ define void @test5(i32 signext %arg, i32 signext %arg1) nounwind {
; RV64I-NEXT: mv a1, a0
; RV64I-NEXT: srli a0, a0, 1
; RV64I-NEXT: and a0, a0, s0
; RV64I-NEXT: subw a0, a1, a0
; RV64I-NEXT: sub a0, a1, a0
; RV64I-NEXT: and a2, a0, s1
; RV64I-NEXT: srli a0, a0, 2
; RV64I-NEXT: and a0, a0, s1
Expand Down
63 changes: 33 additions & 30 deletions llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -76,38 +76,41 @@ define void @fun2(<8 x i32> %src, <8 x i31>* %p)
; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
; CHECK-NEXT: .cfi_offset %r14, -48
; CHECK-NEXT: .cfi_offset %r15, -40
; CHECK-NEXT: vlgvf %r0, %v26, 3
; CHECK-NEXT: vlgvf %r4, %v24, 1
; CHECK-NEXT: vlgvf %r3, %v24, 2
; CHECK-NEXT: srlk %r1, %r0, 8
; CHECK-NEXT: vlgvf %r1, %v26, 3
; CHECK-NEXT: vlgvf %r0, %v26, 2
; CHECK-NEXT: stc %r1, 30(%r2)
; CHECK-NEXT: srlk %r3, %r1, 8
; CHECK-NEXT: risbgn %r1, %r1, 33, 167, 0
; CHECK-NEXT: vlgvf %r5, %v24, 2
; CHECK-NEXT: rosbg %r1, %r0, 2, 32, 31
; CHECK-NEXT: sth %r3, 28(%r2)
; CHECK-NEXT: srlg %r1, %r1, 24
; CHECK-NEXT: vlgvf %r3, %v24, 3
; CHECK-NEXT: st %r1, 24(%r2)
; CHECK-NEXT: vlgvf %r1, %v26, 0
; CHECK-NEXT: risbgn %r14, %r5, 6, 164, 27
; CHECK-NEXT: sllg %r4, %r3, 60
; CHECK-NEXT: rosbg %r14, %r3, 37, 63, 60
; CHECK-NEXT: sllg %r3, %r14, 8
; CHECK-NEXT: rosbg %r4, %r1, 4, 34, 29
; CHECK-NEXT: rosbg %r3, %r4, 56, 63, 8
; CHECK-NEXT: stg %r3, 8(%r2)
; CHECK-NEXT: vlgvf %r3, %v24, 1
; CHECK-NEXT: sllg %r4, %r3, 58
; CHECK-NEXT: rosbg %r4, %r5, 6, 36, 27
; CHECK-NEXT: vlgvf %r5, %v24, 0
; CHECK-NEXT: sth %r1, 28(%r2)
; CHECK-NEXT: sllg %r1, %r4, 58
; CHECK-NEXT: sllg %r5, %r5, 25
; CHECK-NEXT: stc %r0, 30(%r2)
; CHECK-NEXT: rosbg %r1, %r3, 6, 36, 27
; CHECK-NEXT: vlgvf %r3, %v24, 3
; CHECK-NEXT: rosbg %r5, %r4, 39, 63, 58
; CHECK-NEXT: sllg %r4, %r5, 8
; CHECK-NEXT: rosbg %r1, %r3, 37, 63, 60
; CHECK-NEXT: vlgvf %r5, %v26, 1
; CHECK-NEXT: rosbg %r4, %r1, 56, 63, 8
; CHECK-NEXT: stg %r4, 0(%r2)
; CHECK-NEXT: vlgvf %r4, %v26, 2
; CHECK-NEXT: sllg %r14, %r5, 62
; CHECK-NEXT: sllg %r3, %r3, 60
; CHECK-NEXT: rosbg %r14, %r4, 2, 32, 31
; CHECK-NEXT: rosbg %r14, %r0, 33, 63, 0
; CHECK-NEXT: srlg %r0, %r14, 24
; CHECK-NEXT: st %r0, 24(%r2)
; CHECK-NEXT: vlgvf %r0, %v26, 0
; CHECK-NEXT: rosbg %r3, %r0, 4, 34, 29
; CHECK-NEXT: sllg %r0, %r1, 8
; CHECK-NEXT: rosbg %r3, %r5, 35, 63, 62
; CHECK-NEXT: rosbg %r0, %r3, 56, 63, 8
; CHECK-NEXT: stg %r0, 8(%r2)
; CHECK-NEXT: sllg %r0, %r3, 8
; CHECK-NEXT: rosbg %r0, %r14, 56, 63, 8
; CHECK-NEXT: rosbg %r5, %r3, 39, 63, 58
; CHECK-NEXT: sllg %r3, %r5, 8
; CHECK-NEXT: rosbg %r3, %r4, 56, 63, 8
; CHECK-NEXT: stg %r3, 0(%r2)
; CHECK-NEXT: vlgvf %r3, %v26, 1
; CHECK-NEXT: sllg %r4, %r3, 62
; CHECK-NEXT: rosbg %r4, %r0, 2, 32, 31
; CHECK-NEXT: risbgn %r0, %r1, 4, 162, 29
; CHECK-NEXT: rosbg %r0, %r3, 35, 63, 62
; CHECK-NEXT: sllg %r0, %r0, 8
; CHECK-NEXT: rosbg %r0, %r4, 56, 63, 8
; CHECK-NEXT: stg %r0, 16(%r2)
; CHECK-NEXT: lmg %r14, %r15, 112(%r15)
; CHECK-NEXT: br %r14
Expand Down
33 changes: 16 additions & 17 deletions llvm/test/CodeGen/X86/bitreverse.ll
Original file line number Diff line number Diff line change
Expand Up @@ -399,37 +399,36 @@ define i4 @test_bitreverse_i4(i4 %a) {
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: andb $15, %al
; X86-NEXT: andb $8, %al
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: addb %cl, %dl
; X86-NEXT: andb $4, %dl
; X86-NEXT: shlb $3, %cl
; X86-NEXT: andb $8, %cl
; X86-NEXT: orb %dl, %cl
; X86-NEXT: movl %eax, %edx
; X86-NEXT: shrb %dl
; X86-NEXT: andb $2, %dl
; X86-NEXT: orb %cl, %dl
; X86-NEXT: movb %cl, %ah
; X86-NEXT: shlb $3, %ah
; X86-NEXT: andb $8, %ah
; X86-NEXT: orb %dl, %ah
; X86-NEXT: shrb %cl
; X86-NEXT: andb $2, %cl
; X86-NEXT: orb %ah, %cl
; X86-NEXT: shrb $3, %al
; X86-NEXT: orb %dl, %al
; X86-NEXT: orb %cl, %al
; X86-NEXT: retl
;
; X64-LABEL: test_bitreverse_i4:
; X64: # %bb.0:
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: leal (%rdi,%rdi), %ecx
; X64-NEXT: leal (,%rdi,8), %edx
; X64-NEXT: movl %edi, %eax
; X64-NEXT: andb $15, %al
; X64-NEXT: andb $8, %al
; X64-NEXT: leal (%rdi,%rdi), %ecx
; X64-NEXT: andb $4, %cl
; X64-NEXT: leal (,%rdi,8), %edx
; X64-NEXT: andb $8, %dl
; X64-NEXT: orb %cl, %dl
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: shrb %cl
; X64-NEXT: andb $2, %cl
; X64-NEXT: orb %dl, %cl
; X64-NEXT: shrb %dil
; X64-NEXT: andb $2, %dil
; X64-NEXT: orb %dil, %dl
; X64-NEXT: shrb $3, %al
; X64-NEXT: orb %cl, %al
; X64-NEXT: orb %dl, %al
; X64-NEXT: retq
;
; X86XOP-LABEL: test_bitreverse_i4:
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/ctpop-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -88,16 +88,16 @@ define i8 @test4(i8 %x) nounwind readnone {
;
; NO-POPCOUNT-LABEL: test4:
; NO-POPCOUNT: # %bb.0:
; NO-POPCOUNT-NEXT: andb $127, %dil
; NO-POPCOUNT-NEXT: movl %edi, %eax
; NO-POPCOUNT-NEXT: shrb %al
; NO-POPCOUNT-NEXT: andb $21, %al
; NO-POPCOUNT-NEXT: subb %al, %dil
; NO-POPCOUNT-NEXT: movl %edi, %ecx
; NO-POPCOUNT-NEXT: andb $127, %cl
; NO-POPCOUNT-NEXT: shrb %dil
; NO-POPCOUNT-NEXT: andb $21, %dil
; NO-POPCOUNT-NEXT: subb %dil, %cl
; NO-POPCOUNT-NEXT: movl %ecx, %eax
; NO-POPCOUNT-NEXT: andb $51, %al
; NO-POPCOUNT-NEXT: shrb $2, %cl
; NO-POPCOUNT-NEXT: andb $51, %cl
; NO-POPCOUNT-NEXT: shrb $2, %dil
; NO-POPCOUNT-NEXT: andb $51, %dil
; NO-POPCOUNT-NEXT: addb %dil, %cl
; NO-POPCOUNT-NEXT: addb %al, %cl
; NO-POPCOUNT-NEXT: movl %ecx, %eax
; NO-POPCOUNT-NEXT: shrb $4, %al
; NO-POPCOUNT-NEXT: addb %cl, %al
Expand Down
55 changes: 24 additions & 31 deletions llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ define void @i24_and_or(ptr %a) {
; X86-NEXT: shll $16, %ecx
; X86-NEXT: orl %edx, %ecx
; X86-NEXT: orl $384, %ecx # imm = 0x180
; X86-NEXT: andl $16777088, %ecx # imm = 0xFFFF80
; X86-NEXT: andl $-128, %ecx
; X86-NEXT: movw %cx, (%eax)
; X86-NEXT: retl
;
Expand All @@ -53,7 +53,7 @@ define void @i24_and_or(ptr %a) {
; X64-NEXT: shll $16, %ecx
; X64-NEXT: orl %eax, %ecx
; X64-NEXT: orl $384, %ecx # imm = 0x180
; X64-NEXT: andl $16777088, %ecx # imm = 0xFFFF80
; X64-NEXT: andl $-128, %ecx
; X64-NEXT: movw %cx, (%rdi)
; X64-NEXT: retq
%b = load i24, ptr %a, align 1
Expand Down Expand Up @@ -114,19 +114,13 @@ define void @i56_or(ptr %a) {
;
; X64-LABEL: i56_or:
; X64: # %bb.0:
; X64-NEXT: movzwl 4(%rdi), %eax
; X64-NEXT: movzbl 6(%rdi), %ecx
; X64-NEXT: movb %cl, 6(%rdi)
; X64-NEXT: # kill: def $ecx killed $ecx def $rcx
; X64-NEXT: shll $16, %ecx
; X64-NEXT: orl %eax, %ecx
; X64-NEXT: shlq $32, %rcx
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: orq %rcx, %rax
; X64-NEXT: orq $384, %rax # imm = 0x180
; X64-NEXT: movl %eax, (%rdi)
; X64-NEXT: shrq $32, %rax
; X64-NEXT: movw %ax, 4(%rdi)
; X64-NEXT: movzbl 6(%rdi), %eax
; X64-NEXT: shll $16, %eax
; X64-NEXT: movzwl 4(%rdi), %ecx
; X64-NEXT: movw %cx, 4(%rdi)
; X64-NEXT: shrq $16, %rax
; X64-NEXT: movb %al, 6(%rdi)
; X64-NEXT: orl $384, (%rdi) # imm = 0x180
; X64-NEXT: retq
%aa = load i56, ptr %a, align 1
%b = or i56 %aa, 384
Expand Down Expand Up @@ -183,22 +177,21 @@ define void @i56_insert_bit(ptr %a, i1 zeroext %bit) {
;
; X64-LABEL: i56_insert_bit:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %eax
; X64-NEXT: movzwl 4(%rdi), %ecx
; X64-NEXT: movzbl 6(%rdi), %edx
; X64-NEXT: movb %dl, 6(%rdi)
; X64-NEXT: # kill: def $edx killed $edx def $rdx
; X64-NEXT: shll $16, %edx
; X64-NEXT: orl %ecx, %edx
; X64-NEXT: shlq $32, %rdx
; X64-NEXT: movl (%rdi), %ecx
; X64-NEXT: orq %rdx, %rcx
; X64-NEXT: shlq $13, %rax
; X64-NEXT: andq $-8193, %rcx # imm = 0xDFFF
; X64-NEXT: orq %rax, %rcx
; X64-NEXT: movl %ecx, (%rdi)
; X64-NEXT: shrq $32, %rcx
; X64-NEXT: movw %cx, 4(%rdi)
; X64-NEXT: movzwl 4(%rdi), %eax
; X64-NEXT: movzbl 6(%rdi), %ecx
; X64-NEXT: movb %cl, 6(%rdi)
; X64-NEXT: # kill: def $ecx killed $ecx def $rcx
; X64-NEXT: shll $16, %ecx
; X64-NEXT: orl %eax, %ecx
; X64-NEXT: shlq $32, %rcx
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: orq %rcx, %rax
; X64-NEXT: shll $13, %esi
; X64-NEXT: andq $-8193, %rax # imm = 0xDFFF
; X64-NEXT: orl %eax, %esi
; X64-NEXT: shrq $32, %rax
; X64-NEXT: movw %ax, 4(%rdi)
; X64-NEXT: movl %esi, (%rdi)
; X64-NEXT: retq
%extbit = zext i1 %bit to i56
%b = load i56, ptr %a, align 1
Expand Down
6 changes: 4 additions & 2 deletions llvm/test/CodeGen/X86/ins_subreg_coalesce-1.ll
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-- -mattr=-bmi | FileCheck %s

; TODO: This might not be testing the original issue anymore? Should the movl still be removed?
define fastcc i32 @t() nounwind {
; CHECK-LABEL: t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movzwl 0, %eax
; CHECK-NEXT: orl $2, %eax
; CHECK-NEXT: movw %ax, 0
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: orl $2, %ecx
; CHECK-NEXT: movw %cx, 0
; CHECK-NEXT: shrl $3, %eax
; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: retl
Expand Down
19 changes: 9 additions & 10 deletions llvm/test/CodeGen/X86/load-local-v4i5.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,22 @@ define void @_start() {
; CHECK-NEXT: movzbl -9(%rsp), %ecx
; CHECK-NEXT: movzbl -10(%rsp), %edx
; CHECK-NEXT: movzbl -11(%rsp), %esi
; CHECK-NEXT: movzbl %cl, %edi
; CHECK-NEXT: shrb %cl
; CHECK-NEXT: movb %cl, -2(%rsp)
; CHECK-NEXT: andl $31, %eax
; CHECK-NEXT: andl $31, %esi
; CHECK-NEXT: shll $5, %esi
; CHECK-NEXT: orl %eax, %esi
; CHECK-NEXT: andl $31, %edx
; CHECK-NEXT: shll $10, %edx
; CHECK-NEXT: orl %esi, %edx
; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: shll $15, %ecx
; CHECK-NEXT: orl %edx, %ecx
; CHECK-NEXT: movw %cx, -4(%rsp)
; CHECK-NEXT: shrl $16, %ecx
; CHECK-NEXT: andl $15, %ecx
; CHECK-NEXT: movb %cl, -2(%rsp)
; CHECK-NEXT: movb %al, -5(%rsp)
; CHECK-NEXT: cmpb $31, %al
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: shll $15, %eax
; CHECK-NEXT: orl %edx, %eax
; CHECK-NEXT: movw %ax, -4(%rsp)
; CHECK-NEXT: movb %dil, -5(%rsp)
; CHECK-NEXT: cmpb $31, %dil
; CHECK-NEXT: je .LBB0_2
; CHECK-NEXT: # %bb.1: # %Then
; CHECK-NEXT: int3
Expand Down
50 changes: 24 additions & 26 deletions llvm/test/CodeGen/X86/masked_compressstore.ll
Original file line number Diff line number Diff line change
Expand Up @@ -517,21 +517,20 @@ define void @compressstore_v16f64_v16i1(ptr %base, <16 x double> %V, <16 x i1> %
; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: movzbl %al, %eax
; AVX512F-NEXT: movl %eax, %ecx
; AVX512F-NEXT: shrl %ecx
; AVX512F-NEXT: andl $-43, %ecx
; AVX512F-NEXT: subl %ecx, %eax
; AVX512F-NEXT: movl %eax, %ecx
; AVX512F-NEXT: andl $858993459, %ecx ## imm = 0x33333333
; AVX512F-NEXT: shrl $2, %eax
; AVX512F-NEXT: movzbl %al, %ecx
; AVX512F-NEXT: shrl %eax
; AVX512F-NEXT: andl $85, %eax
; AVX512F-NEXT: subl %eax, %ecx
; AVX512F-NEXT: movl %ecx, %eax
; AVX512F-NEXT: andl $858993459, %eax ## imm = 0x33333333
; AVX512F-NEXT: addl %ecx, %eax
; AVX512F-NEXT: movl %eax, %ecx
; AVX512F-NEXT: shrl $4, %ecx
; AVX512F-NEXT: shrl $2, %ecx
; AVX512F-NEXT: andl $858993459, %ecx ## imm = 0x33333333
; AVX512F-NEXT: addl %eax, %ecx
; AVX512F-NEXT: andl $252645135, %ecx ## imm = 0xF0F0F0F
; AVX512F-NEXT: imull $16843009, %ecx, %eax ## imm = 0x1010101
; AVX512F-NEXT: movl %ecx, %eax
; AVX512F-NEXT: shrl $4, %eax
; AVX512F-NEXT: addl %ecx, %eax
; AVX512F-NEXT: andl $252645135, %eax ## imm = 0xF0F0F0F
; AVX512F-NEXT: imull $16843009, %eax, %eax ## imm = 0x1010101
; AVX512F-NEXT: shrl $24, %eax
; AVX512F-NEXT: kshiftrw $8, %k1, %k2
; AVX512F-NEXT: vcompresspd %zmm1, (%rdi,%rax,8) {%k2}
Expand Down Expand Up @@ -571,21 +570,20 @@ define void @compressstore_v16f64_v16i1(ptr %base, <16 x double> %V, <16 x i1> %
; AVX512VLBW-NEXT: vpsllw $7, %xmm2, %xmm2
; AVX512VLBW-NEXT: vpmovb2m %xmm2, %k1
; AVX512VLBW-NEXT: kmovd %k1, %eax
; AVX512VLBW-NEXT: movzbl %al, %eax
; AVX512VLBW-NEXT: movl %eax, %ecx
; AVX512VLBW-NEXT: shrl %ecx
; AVX512VLBW-NEXT: andl $-43, %ecx
; AVX512VLBW-NEXT: subl %ecx, %eax
; AVX512VLBW-NEXT: movl %eax, %ecx
; AVX512VLBW-NEXT: andl $858993459, %ecx ## imm = 0x33333333
; AVX512VLBW-NEXT: shrl $2, %eax
; AVX512VLBW-NEXT: movzbl %al, %ecx
; AVX512VLBW-NEXT: shrl %eax
; AVX512VLBW-NEXT: andl $85, %eax
; AVX512VLBW-NEXT: subl %eax, %ecx
; AVX512VLBW-NEXT: movl %ecx, %eax
; AVX512VLBW-NEXT: andl $858993459, %eax ## imm = 0x33333333
; AVX512VLBW-NEXT: addl %ecx, %eax
; AVX512VLBW-NEXT: movl %eax, %ecx
; AVX512VLBW-NEXT: shrl $4, %ecx
; AVX512VLBW-NEXT: shrl $2, %ecx
; AVX512VLBW-NEXT: andl $858993459, %ecx ## imm = 0x33333333
; AVX512VLBW-NEXT: addl %eax, %ecx
; AVX512VLBW-NEXT: andl $252645135, %ecx ## imm = 0xF0F0F0F
; AVX512VLBW-NEXT: imull $16843009, %ecx, %eax ## imm = 0x1010101
; AVX512VLBW-NEXT: movl %ecx, %eax
; AVX512VLBW-NEXT: shrl $4, %eax
; AVX512VLBW-NEXT: addl %ecx, %eax
; AVX512VLBW-NEXT: andl $252645135, %eax ## imm = 0xF0F0F0F
; AVX512VLBW-NEXT: imull $16843009, %eax, %eax ## imm = 0x1010101
; AVX512VLBW-NEXT: shrl $24, %eax
; AVX512VLBW-NEXT: kshiftrw $8, %k1, %k2
; AVX512VLBW-NEXT: vcompresspd %zmm1, (%rdi,%rax,8) {%k2}
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/vector-mulfix-legalize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,15 @@ define <4 x i16> @smulfixsat(<4 x i16> %a) {
; CHECK-NEXT: movl $32768, %ecx # imm = 0x8000
; CHECK-NEXT: cmovll %ecx, %edx
; CHECK-NEXT: pextrw $1, %xmm0, %esi
; CHECK-NEXT: movswl %si, %edi
; CHECK-NEXT: leal (%rdi,%rdi), %eax
; CHECK-NEXT: leal (%rsi,%rsi), %edi
; CHECK-NEXT: movswl %si, %eax
; CHECK-NEXT: movl %eax, %esi
; CHECK-NEXT: shrl $16, %esi
; CHECK-NEXT: shldw $1, %ax, %si
; CHECK-NEXT: sarl $15, %edi
; CHECK-NEXT: cmpl $16384, %edi # imm = 0x4000
; CHECK-NEXT: shldw $1, %di, %si
; CHECK-NEXT: sarl $16, %eax
; CHECK-NEXT: cmpl $16384, %eax # imm = 0x4000
; CHECK-NEXT: cmovgel %r8d, %esi
; CHECK-NEXT: cmpl $-16384, %edi # imm = 0xC000
; CHECK-NEXT: cmpl $-16384, %eax # imm = 0xC000
; CHECK-NEXT: cmovll %ecx, %esi
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: cwtl
Expand Down