3,066 changes: 187 additions & 2,879 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll

Large diffs are not rendered by default.

6 changes: 0 additions & 6 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,6 @@ define <3 x half> @v_fmul_v3f16_fneg_lhs(<3 x half> %a, <3 x half> %b) {
; GFX8-LABEL: v_fmul_v3f16_fneg_lhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX8-NEXT: v_mov_b32_e32 v4, 0x80008000
; GFX8-NEXT: v_xor_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
Expand Down Expand Up @@ -198,9 +195,6 @@ define <3 x half> @v_fmul_v3f16_fneg_rhs(<3 x half> %a, <3 x half> %b) {
; GFX8-LABEL: v_fmul_v3f16_fneg_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX8-NEXT: v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
; GFX8-NEXT: v_mov_b32_e32 v4, 0x80008000
; GFX8-NEXT: v_xor_b32_sdwa v3, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
Expand Down
7 changes: 0 additions & 7 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4583,10 +4583,6 @@ define amdgpu_ps i48 @s_fshr_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <
;
; GFX8-LABEL: s_fshr_v3i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_lshr_b32 s8, s4, 16
; GFX8-NEXT: s_and_b32 s4, s4, 0xffff
; GFX8-NEXT: s_lshl_b32 s8, s8, 16
; GFX8-NEXT: s_or_b32 s4, s4, s8
; GFX8-NEXT: s_bfe_u32 s8, 1, 0x100000
; GFX8-NEXT: s_bfe_u32 s9, s2, 0x100000
; GFX8-NEXT: s_bfe_u32 s10, 15, 0x100000
Expand Down Expand Up @@ -4844,9 +4840,6 @@ define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
; GFX8-LABEL: v_fshr_v3i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v4
; GFX8-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX8-NEXT: v_or_b32_sdwa v4, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b16_e32 v7, 1, v0
; GFX8-NEXT: v_lshrrev_b16_e32 v8, 15, v2
; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v2
Expand Down
6,890 changes: 1,039 additions & 5,851 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll

Large diffs are not rendered by default.

200 changes: 79 additions & 121 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir

Large diffs are not rendered by default.

12 changes: 3 additions & 9 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir
Original file line number Diff line number Diff line change
Expand Up @@ -1232,23 +1232,17 @@ body: |
; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64)
; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128)
; VI-LABEL: name: test_ashr_s128_s32_0
; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128)
; GFX9PLUS-LABEL: name: test_ashr_s128_s32_0
; GFX9PLUS: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; GFX9PLUS-NEXT: {{ $}}
; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
; GFX9PLUS-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64)
; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128)
%0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = G_CONSTANT i32 0
%3:_(s128) = G_ASHR %0, %1
Expand Down
29 changes: 1 addition & 28 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir
Original file line number Diff line number Diff line change
Expand Up @@ -247,34 +247,7 @@ body: |
; CHECK: liveins: $vgpr0_vgpr1_vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0
%3:_(<6 x s16>) = G_CONCAT_VECTORS %1, %2
Expand Down
753 changes: 253 additions & 500 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctpop.mir

Large diffs are not rendered by default.

334 changes: 26 additions & 308 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir

Large diffs are not rendered by default.

22 changes: 2 additions & 20 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir
Original file line number Diff line number Diff line change
Expand Up @@ -979,16 +979,7 @@ body: |
; CHECK-LABEL: name: extract_v2s16_v3s16_offset0
; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<2 x s16>) = G_EXTRACT %0, 0
$vgpr0 = COPY %1
Expand All @@ -1002,16 +993,7 @@ body: |
; CHECK-LABEL: name: extract_v2s16_v5s16_offset0
; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
%0:_(<5 x s16>) = G_IMPLICIT_DEF
%1:_(<2 x s16>) = G_EXTRACT %0, 0
$vgpr0 = COPY %1
Expand Down
16 changes: 6 additions & 10 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir
Original file line number Diff line number Diff line change
Expand Up @@ -401,17 +401,13 @@ body: |
; CHECK-LABEL: name: test_freeze_v33s32
; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32)
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32)
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR]]
; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR1]]
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]]
; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]]
; CHECK-NEXT: [[FREEZE2:%[0-9]+]]:_(s32) = G_FREEZE [[DEF1]]
; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<16 x s32>)
; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE1]](<16 x s32>)
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[UV32]](s32), [[UV33]](s32), [[UV34]](s32), [[UV35]](s32), [[UV36]](s32), [[UV37]](s32), [[UV38]](s32), [[UV39]](s32), [[UV40]](s32), [[UV41]](s32), [[UV42]](s32), [[UV43]](s32), [[UV44]](s32), [[UV45]](s32), [[UV46]](s32), [[UV47]](s32), [[UV48]](s32), [[UV49]](s32), [[UV50]](s32), [[UV51]](s32), [[UV52]](s32), [[UV53]](s32), [[UV54]](s32), [[UV55]](s32), [[UV56]](s32), [[UV57]](s32), [[UV58]](s32), [[UV59]](s32), [[UV60]](s32), [[UV61]](s32), [[UV62]](s32), [[UV63]](s32), [[FREEZE2]](s32)
; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<33 x s32>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<16 x s32>)
; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE1]](<16 x s32>)
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32), [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32), [[FREEZE2]](s32)
; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<33 x s32>)
%0:_(<33 x s32>) = G_IMPLICIT_DEF
%1:_(<33 x s32>) = G_FREEZE %0
S_NOP 0, implicit %1
Expand Down
110 changes: 45 additions & 65 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir
Original file line number Diff line number Diff line change
Expand Up @@ -839,75 +839,55 @@ body: |
; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>)
; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>)
; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16)
; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16)
; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>)
; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>)
; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32)
; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16)
; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[DEF]](s16)
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16)
; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR4]], [[BUILD_VECTOR6]]
; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1
; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16)
; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BUILD_VECTOR4]], [[BUILD_VECTOR7]]
; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR6]]
; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
; GFX9-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C3]](s16), [[C3]](s16)
; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR]], [[AND]](<2 x s16>)
; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR2]], [[BUILD_VECTOR8]](<2 x s16>)
; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR3]], [[AND1]](<2 x s16>)
; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL]], [[LSHR4]]
; GFX9-NEXT: [[BUILD_VECTOR9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16)
; GFX9-NEXT: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR5]], [[BUILD_VECTOR9]]
; GFX9-NEXT: [[BUILD_VECTOR10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16)
; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BUILD_VECTOR5]], [[BUILD_VECTOR10]]
; GFX9-NEXT: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR9]]
; GFX9-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C3]](s16), [[C3]](s16)
; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR1]], [[AND2]](<2 x s16>)
; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR3]], [[BUILD_VECTOR11]](<2 x s16>)
; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR5]], [[AND3]](<2 x s16>)
; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR6]]
; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](<2 x s16>)
; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32)
; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32)
; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>)
; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32)
; GFX9-NEXT: [[BUILD_VECTOR12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC9]](s16), [[TRUNC10]](s16)
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>)
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16)
; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>)
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16)
; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY4]], [[BUILD_VECTOR3]]
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1
; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16)
; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY4]], [[BUILD_VECTOR4]]
; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR3]]
; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16)
; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[AND]](<2 x s16>)
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY2]], [[BUILD_VECTOR5]](<2 x s16>)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR]], [[AND1]](<2 x s16>)
; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL]], [[LSHR1]]
; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16)
; GFX9-NEXT: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR6]]
; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16)
; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BUILD_VECTOR2]], [[BUILD_VECTOR7]]
; GFX9-NEXT: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR6]]
; GFX9-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16)
; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR]], [[AND2]](<2 x s16>)
; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR1]], [[BUILD_VECTOR8]](<2 x s16>)
; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR2]], [[AND3]](<2 x s16>)
; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR3]]
; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>)
; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32)
; GFX9-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
; GFX9-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32)
; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; GFX9-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32)
; GFX9-NEXT: [[BUILD_VECTOR13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC13]](s16), [[TRUNC14]](s16)
; GFX9-NEXT: [[BUILD_VECTOR14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC11]](s16), [[TRUNC12]](s16)
; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR12]](<2 x s16>)
; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR14]](<2 x s16>)
; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR13]](<2 x s16>)
; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C3]](s32)
; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32)
; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32)
; GFX9-NEXT: [[BUILD_VECTOR9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16)
; GFX9-NEXT: [[BUILD_VECTOR10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16)
; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>)
; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR10]](<2 x s16>)
; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR9]](<2 x s16>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = COPY $vgpr2
Expand Down
506 changes: 236 additions & 270 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir

Large diffs are not rendered by default.

47 changes: 5 additions & 42 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir
Original file line number Diff line number Diff line change
Expand Up @@ -488,30 +488,23 @@ body: |
; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>)
; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]]
; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[UV6]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>)
%0:_(<5 x s16>) = G_IMPLICIT_DEF
%1:_(<8 x s16>) = G_IMPLICIT_DEF
Expand All @@ -528,38 +521,8 @@ body: |
; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]]
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV3]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>)
%0:_(<6 x s16>) = G_IMPLICIT_DEF
%1:_(<8 x s16>) = G_IMPLICIT_DEF
Expand Down
53 changes: 12 additions & 41 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
Original file line number Diff line number Diff line change
Expand Up @@ -829,47 +829,18 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]]
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C3]]
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32)
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[SHL3]](s32)
; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[SHL3]](s32)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL5]], [[C5]]
; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[XOR]]
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]]
; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[OR3]], [[C]](s32)
; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[OR3]], [[C1]](s32)
; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[OR3]], [[C2]](s32)
; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C3]]
; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL6]]
; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]]
; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL7]]
; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C3]]
; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32)
; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL8]]
; CHECK-NEXT: $vgpr0 = COPY [[OR6]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[SHL]](s32)
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[SHL]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C2]]
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[XOR]]
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; CHECK-NEXT: $vgpr0 = COPY [[OR]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
Expand Down
166 changes: 48 additions & 118 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -400,24 +400,17 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s,
; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>)
; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>)
; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>)
; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]]
; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; PACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; PACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; PACKED-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; PACKED-NEXT: $vgpr0 = COPY [[BITCAST4]](<2 x s16>)
; PACKED-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x s16>)
; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>)
; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
%tex = extractvalue { <3 x half>, i32 } %res, 0
Expand Down Expand Up @@ -640,21 +633,13 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc,
; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>)
; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; PACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
; PACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>)
; PACKED-NEXT: $vgpr1 = COPY [[BITCAST]](<2 x s16>)
; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
ret <3 x half> %tex
Expand Down Expand Up @@ -707,21 +692,13 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc,
; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>)
; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; PACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
; PACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s16>)
; PACKED-NEXT: $vgpr1 = COPY [[BITCAST]](<2 x s16>)
; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
ret <3 x half> %tex
Expand Down Expand Up @@ -1210,22 +1187,14 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs
; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>)
; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
%tex = extractvalue { <3 x half>, i32 } %res, 0
Expand Down Expand Up @@ -1287,22 +1256,14 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs
; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>)
; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
%tex = extractvalue { <3 x half>, i32 } %res, 0
Expand Down Expand Up @@ -1364,22 +1325,14 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs
; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>)
; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
%tex = extractvalue { <3 x half>, i32 } %res, 0
Expand Down
333 changes: 37 additions & 296 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir

Large diffs are not rendered by default.

843 changes: 43 additions & 800 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir

Large diffs are not rendered by default.

1,766 changes: 180 additions & 1,586 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir

Large diffs are not rendered by default.

1,313 changes: 108 additions & 1,205 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir

Large diffs are not rendered by default.

667 changes: 25 additions & 642 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir

Large diffs are not rendered by default.

12 changes: 3 additions & 9 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir
Original file line number Diff line number Diff line change
Expand Up @@ -1222,23 +1222,17 @@ body: |
; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64)
; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128)
; VI-LABEL: name: test_lshr_s128_s32_0
; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128)
; GFX9-LABEL: name: test_lshr_s128_s32_0
; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128)
%0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = G_CONSTANT i32 0
%3:_(s128) = G_LSHR %0, %1
Expand Down
210 changes: 84 additions & 126 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir

Large diffs are not rendered by default.

25 changes: 9 additions & 16 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir
Original file line number Diff line number Diff line change
Expand Up @@ -171,32 +171,25 @@ body: |
; CHECK-NEXT: bb.2:
; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[CONCAT_VECTORS]](<4 x s16>), %bb.0, [[CONCAT_VECTORS1]](<4 x s16>), %bb.1
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[PHI]](<4 x s16>)
; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C6]](s32)
; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C6]](s32)
; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C6]](s32)
; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C7]]
; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C7]]
; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C7]]
; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32)
; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]]
; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C7]]
; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C7]]
; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C7]]
; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C6]](s32)
; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]]
; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C7]]
; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C7]]
; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32)
; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]]
; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>)
; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV2]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31
bb.0:
Expand Down
73 changes: 26 additions & 47 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
Original file line number Diff line number Diff line change
Expand Up @@ -399,63 +399,42 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[C]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C1]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C1]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>)
; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32)
; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]]
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]]
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C1]](s32)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]]
; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32)
; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C1]](s32)
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]]
; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C1]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]]
; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<4 x s16>) = G_SELECT [[ICMP]](s1), [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[SELECT]](<4 x s16>)
; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32)
; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32)
; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C2]]
; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C2]]
; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32)
; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]]
; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]]
; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C2]]
; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]]
; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C2]]
; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C2]]
; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32)
; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]]
; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>)
; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C1]](s32)
; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]]
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]]
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]]
; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]]
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C2]]
; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
Expand Down
42 changes: 18 additions & 24 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir
Original file line number Diff line number Diff line change
Expand Up @@ -469,10 +469,9 @@ body: |
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64)
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s192)
; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[MV]](s64), [[MV]](s64)
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96)
; GFX8-LABEL: name: test_sext_inreg_s96_8
; GFX8: liveins: $vgpr0_vgpr1_vgpr2
Expand All @@ -483,10 +482,9 @@ body: |
; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64)
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64)
; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s192)
; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[MV]](s64), [[MV]](s64)
; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192)
; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96)
; GFX6-LABEL: name: test_sext_inreg_s96_8
; GFX6: liveins: $vgpr0_vgpr1_vgpr2
Expand All @@ -497,10 +495,9 @@ body: |
; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64)
; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64)
; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV2]](s192)
; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[MV]](s64), [[MV]](s64)
; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC1]](s96)
%0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
%1:_(s96) = G_SEXT_INREG %0, 8
Expand Down Expand Up @@ -563,10 +560,9 @@ body: |
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64)
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s160) = G_TRUNC [[MV2]](s320)
; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[MV]](s64), [[MV]](s64), [[MV]](s64), [[MV]](s64)
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s160) = G_TRUNC [[MV1]](s320)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[TRUNC1]](s160)
; GFX8-LABEL: name: test_sext_inreg_s160_8
; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
Expand All @@ -577,10 +573,9 @@ body: |
; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64)
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64)
; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s160) = G_TRUNC [[MV2]](s320)
; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[MV]](s64), [[MV]](s64), [[MV]](s64), [[MV]](s64)
; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s160) = G_TRUNC [[MV1]](s320)
; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[TRUNC1]](s160)
; GFX6-LABEL: name: test_sext_inreg_s160_8
; GFX6: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
Expand All @@ -591,10 +586,9 @@ body: |
; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64)
; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64)
; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s160) = G_TRUNC [[MV2]](s320)
; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32)
; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[SEXT_INREG]](s64), [[MV]](s64), [[MV]](s64), [[MV]](s64), [[MV]](s64)
; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s160) = G_TRUNC [[MV1]](s320)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[TRUNC1]](s160)
%0:_(s160) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
%1:_(s160) = G_SEXT_INREG %0, 8
Expand Down
12 changes: 3 additions & 9 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir
Original file line number Diff line number Diff line change
Expand Up @@ -1153,23 +1153,17 @@ body: |
; SI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
; SI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64)
; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128)
; VI-LABEL: name: test_shl_s128_s32_0
; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
; VI-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128)
; GFX9-LABEL: name: test_shl_s128_s32_0
; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
; GFX9-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]](s128)
%0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s32) = G_CONSTANT i32 0
%3:_(s128) = G_SHL %0, %1
Expand Down
207 changes: 50 additions & 157 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir

Large diffs are not rendered by default.

137 changes: 37 additions & 100 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir
Original file line number Diff line number Diff line change
Expand Up @@ -109,38 +109,22 @@ body: |
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>)
; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>)
; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>)
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16)
; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16)
; GFX9-NEXT: [[ADD:%[0-9]+]]:_(<2 x s16>) = G_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(<2 x s16>) = G_ADD [[BUILD_VECTOR1]], [[BUILD_VECTOR3]]
; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[ADD]](<2 x s16>)
; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[ADD1]](<2 x s16>)
; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32)
; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16)
; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[DEF]](s16)
; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR4]](<2 x s16>)
; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR5]](<2 x s16>)
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16)
; GFX9-NEXT: [[ADD:%[0-9]+]]:_(<2 x s16>) = G_ADD [[COPY]], [[COPY2]]
; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(<2 x s16>) = G_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[ADD1]](<2 x s16>)
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
; GFX9-NEXT: $vgpr0 = COPY [[ADD]](<2 x s16>)
; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
%3:_(<2 x s16>) = COPY $vgpr0
%4:_(<2 x s16>) = COPY $vgpr1
Expand Down Expand Up @@ -209,34 +193,18 @@ body: |
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>)
; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>)
; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16)
; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s16>)
; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16)
; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>)
; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>)
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[COPY2]](<2 x s16>)
; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16)
; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL1]](s16), [[DEF]](s16)
; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR3]](<2 x s16>)
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL1]](s16), [[DEF]](s16)
; GFX9-NEXT: $vgpr0 = COPY [[SHL]](<2 x s16>)
; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR]](<2 x s16>)
; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
%3:_(<2 x s16>) = COPY $vgpr0
%4:_(<2 x s16>) = COPY $vgpr1
Expand Down Expand Up @@ -421,62 +389,31 @@ body: |
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>)
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>)
; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>)
; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>)
; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>)
; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>)
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[DEF]](s16)
; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16)
; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC7]](s16), [[TRUNC8]](s16)
; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC9]](s16), [[DEF]](s16)
; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR]]
; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR3]]
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16)
; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]]
; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY3]]
; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR1]]
; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR4]]
; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY1]]
; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY4]]
; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
; GFX9-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR2]]
; GFX9-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR5]]
; GFX9-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR]]
; GFX9-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR1]]
; GFX9-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]]
; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE]](<2 x s16>)
; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32)
; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32)
; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x s16>)
; GFX9-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32)
; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
; GFX9-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32)
; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE2]](<2 x s16>)
; GFX9-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32)
; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC10]](s16), [[TRUNC11]](s16)
; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC12]](s16), [[TRUNC13]](s16)
; GFX9-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC14]](s16), [[DEF]](s16)
; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR6]](<2 x s16>)
; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR7]](<2 x s16>)
; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR8]](<2 x s16>)
; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE2]](<2 x s16>)
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](<2 x s16>)
; GFX9-NEXT: $vgpr1 = COPY [[FMAXNUM_IEEE1]](<2 x s16>)
; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
%2:_(<2 x s16>) = COPY $vgpr0
%3:_(<2 x s16>) = COPY $vgpr1
Expand Down
198 changes: 78 additions & 120 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir

Large diffs are not rendered by default.

15 changes: 6 additions & 9 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir
Original file line number Diff line number Diff line change
Expand Up @@ -792,16 +792,13 @@ body: |
; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C2]](s32)
; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL10]]
; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32)
; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL11]]
; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL9]]
; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s32), [[OR13]](s32)
; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL9]]
; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s32), [[OR12]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR11]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR12]](s32)
; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL3]]
; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]]
; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR14]](s32), [[OR15]](s32)
; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[UV8]](s32)
; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL3]]
; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]]
; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR13]](s32), [[OR14]](s32)
; CHECK-NEXT: [[MV9:%[0-9]+]]:_(s384) = G_MERGE_VALUES [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s112) = G_TRUNC [[MV9]](s384)
; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s112)
Expand Down
63 changes: 26 additions & 37 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -698,16 +698,12 @@ define amdgpu_ps i48 @s_orn2_v3i16(<3 x i16> inreg %src0, <3 x i16> inreg %src1)
; GFX6-NEXT: s_mov_b32 s1, 0xffff
; GFX6-NEXT: s_or_b32 s6, s5, s6
; GFX6-NEXT: s_and_b32 s7, s7, 0xffff
; GFX6-NEXT: s_xor_b64 s[0:1], s[6:7], s[0:1]
; GFX6-NEXT: s_and_b32 s3, s3, 0xffff
; GFX6-NEXT: s_lshr_b32 s5, s0, 16
; GFX6-NEXT: s_xor_b64 s[0:1], s[6:7], s[0:1]
; GFX6-NEXT: s_and_b32 s2, s2, 0xffff
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
; GFX6-NEXT: s_or_b32 s2, s2, s3
; GFX6-NEXT: s_and_b32 s3, s4, 0xffff
; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
; GFX6-NEXT: s_lshl_b32 s4, s5, 16
; GFX6-NEXT: s_or_b32 s0, s0, s4
; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GFX6-NEXT: s_lshr_b32 s2, s0, 16
Expand Down Expand Up @@ -756,14 +752,10 @@ define amdgpu_ps i48 @s_orn2_v3i16_commute(<3 x i16> inreg %src0, <3 x i16> inre
; GFX6-NEXT: s_mov_b32 s1, 0xffff
; GFX6-NEXT: s_or_b32 s6, s5, s6
; GFX6-NEXT: s_and_b32 s7, s7, 0xffff
; GFX6-NEXT: s_xor_b64 s[0:1], s[6:7], s[0:1]
; GFX6-NEXT: s_lshr_b32 s5, s0, 16
; GFX6-NEXT: s_and_b32 s3, s3, 0xffff
; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
; GFX6-NEXT: s_lshl_b32 s5, s5, 16
; GFX6-NEXT: s_xor_b64 s[0:1], s[6:7], s[0:1]
; GFX6-NEXT: s_and_b32 s2, s2, 0xffff
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
; GFX6-NEXT: s_or_b32 s0, s0, s5
; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
; GFX6-NEXT: s_or_b32 s2, s2, s3
; GFX6-NEXT: s_and_b32 s3, s4, 0xffff
Expand Down Expand Up @@ -808,30 +800,31 @@ define amdgpu_ps { i48, i48 } @s_orn2_v3i16_multi_use(<3 x i16> inreg %src0, <3
; GFX6-LABEL: s_orn2_v3i16_multi_use:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_and_b32 s6, s6, 0xffff
; GFX6-NEXT: s_mov_b32 s0, -1
; GFX6-NEXT: s_mov_b32 s0, s2
; GFX6-NEXT: s_mov_b32 s1, s3
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: s_and_b32 s5, s5, 0xffff
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: s_mov_b32 s1, 0xffff
; GFX6-NEXT: s_mov_b32 s3, 0xffff
; GFX6-NEXT: s_or_b32 s6, s5, s6
; GFX6-NEXT: s_and_b32 s7, s7, 0xffff
; GFX6-NEXT: s_xor_b64 s[0:1], s[6:7], s[0:1]
; GFX6-NEXT: s_lshr_b32 s5, s0, 16
; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
; GFX6-NEXT: s_xor_b64 s[2:3], s[6:7], s[2:3]
; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s4, 0xffff
; GFX6-NEXT: s_and_b32 s3, s3, 0xffff
; GFX6-NEXT: s_and_b32 s2, s2, 0xffff
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
; GFX6-NEXT: s_and_b32 s7, s4, 0xffff
; GFX6-NEXT: s_and_b32 s4, s0, 0xffff
; GFX6-NEXT: s_lshl_b32 s5, s5, 16
; GFX6-NEXT: s_or_b32 s6, s2, s3
; GFX6-NEXT: s_or_b32 s2, s4, s5
; GFX6-NEXT: s_and_b32 s3, s1, 0xffff
; GFX6-NEXT: s_or_b64 s[0:1], s[6:7], s[2:3]
; GFX6-NEXT: s_lshr_b32 s2, s0, 16
; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
; GFX6-NEXT: s_lshr_b32 s4, s0, 16
; GFX6-NEXT: s_lshr_b32 s5, s2, 16
; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_or_b32 s0, s0, s2
; GFX6-NEXT: s_lshl_b32 s4, s4, 16
; GFX6-NEXT: s_or_b32 s0, s0, s4
; GFX6-NEXT: s_and_b32 s2, s2, 0xffff
; GFX6-NEXT: s_lshl_b32 s4, s5, 16
; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
; GFX6-NEXT: s_or_b32 s2, s4, s5
; GFX6-NEXT: s_or_b32 s2, s2, s4
; GFX6-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_orn2_v3i16_multi_use:
Expand Down Expand Up @@ -883,22 +876,18 @@ define <3 x i16> @v_orn2_v3i16(<3 x i16> %src0, <3 x i16> %src1) {
; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
; GFX6-NEXT: v_xor_b32_e32 v3, -1, v3
; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v5
; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v3
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_xor_b32_e32 v3, -1, v3
; GFX6-NEXT: v_xor_b32_e32 v4, 0xfff5, v4
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2
; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v5
; GFX6-NEXT: v_xor_b32_e32 v4, 0xfff5, v4
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v4
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
; GFX6-NEXT: v_or_b32_e32 v2, v1, v3
; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v4
; GFX6-NEXT: v_or_b32_e32 v0, v0, v3
; GFX6-NEXT: v_or_b32_e32 v2, v1, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
Expand Down