Skip to content

Commit

Permalink
[GlobalISel] Combine scalar unmerge(trunc)
Browse files Browse the repository at this point in the history
Summary:
Combine unmerge(trunc) to enable other merge combines.
Without this combine, the scalar unmerge(trunc(merge))
pattern cannot be combined and easily lead to
hard-to-legalize merge/unmerge artifacts.

Reviewed By: arsenm

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D79567
  • Loading branch information
gargaroff committed Jun 2, 2020
1 parent b3c6a36 commit 052c962
Show file tree
Hide file tree
Showing 10 changed files with 613 additions and 1,063 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,9 @@ class LegalizationArtifactCombiner {
const LLT DestTy = MRI.getType(MI.getOperand(0).getReg());
const LLT SrcTy = MRI.getType(MI.getOperand(NumDefs).getReg());

const unsigned CastSrcSize = CastSrcTy.getSizeInBits();
const unsigned DestSize = DestTy.getSizeInBits();

if (CastOpc == TargetOpcode::G_TRUNC) {
if (SrcTy.isVector() && SrcTy.getScalarType() == DestTy.getScalarType()) {
// %1:_(<4 x s8>) = G_TRUNC %0(<4 x s32>)
Expand Down Expand Up @@ -379,6 +382,41 @@ class LegalizationArtifactCombiner {
markInstAndDefDead(MI, CastMI, DeadInsts);
return true;
}

if (CastSrcTy.isScalar() && SrcTy.isScalar() && !DestTy.isVector()) {
// %1:_(s16) = G_TRUNC %0(s32)
// %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %1
// =>
// %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0

// Unmerge(trunc) can be combined if the trunc source size is a multiple
// of the unmerge destination size
if (CastSrcSize % DestSize != 0)
return false;

// Check if the new unmerge is supported
if (isInstUnsupported(
{TargetOpcode::G_UNMERGE_VALUES, {DestTy, CastSrcTy}}))
return false;

// Gather the original destination registers and create new ones for the
// unused bits
const unsigned NewNumDefs = CastSrcSize / DestSize;
SmallVector<Register, 2> DstRegs(NewNumDefs);
for (unsigned Idx = 0; Idx < NewNumDefs; ++Idx) {
if (Idx < NumDefs)
DstRegs[Idx] = MI.getOperand(Idx).getReg();
else
DstRegs[Idx] = MRI.createGenericVirtualRegister(DestTy);
}

// Build new unmerge
Builder.setInstr(MI);
Builder.buildUnmerge(DstRegs, CastSrcReg);
UpdatedDefs.append(DstRegs.begin(), DstRegs.begin() + NumDefs);
markInstAndDefDead(MI, CastMI, DeadInsts);
return true;
}
}

// TODO: support combines with other casts as well
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,11 @@ body: |
liveins: $w0
; CHECK-LABEL: name: test_unmerge_s4
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](s32)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32)
; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s8)
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT]], [[C]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s8)
; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[LSHR]](s32)
; CHECK: $x0 = COPY [[ANYEXT]](s64)
; CHECK: $x1 = COPY [[ANYEXT1]](s64)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -929,9 +929,8 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
; CHECK: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64)
; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV]](s192)
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s96)
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
; CHECK: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32), implicit [[UV2]](s32)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
Expand All @@ -952,20 +951,19 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
; CHECK: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64)
; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV]](s192)
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s96)
; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
; CHECK: S_ENDPGM 0, implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16), implicit [[TRUNC4]](s16), implicit [[TRUNC5]](s16), implicit [[TRUNC6]](s16)
; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16), implicit [[TRUNC4]](s16), implicit [[TRUNC5]](s16)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(s64) = COPY $vgpr4_vgpr5
Expand Down
21 changes: 8 additions & 13 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,8 @@ define <2 x float> @v_uitofp_v2i8_to_v2f32(i16 %arg0) nounwind {
; SI-LABEL: v_uitofp_v2i8_to_v2f32:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v0
; SI-NEXT: v_lshrrev_b32_e32 v1, 8, v0
; SI-NEXT: s_movk_i32 s4, 0xff
; SI-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; SI-NEXT: v_and_b32_e32 v0, s4, v0
; SI-NEXT: v_and_b32_e32 v1, s4, v1
; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
Expand All @@ -179,12 +178,9 @@ define <2 x float> @v_uitofp_v2i8_to_v2f32(i16 %arg0) nounwind {
; VI-LABEL: v_uitofp_v2i8_to_v2f32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: s_movk_i32 s4, 0xff
; VI-NEXT: v_mov_b32_e32 v1, s4
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
; VI-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; VI-NEXT: v_cvt_f32_ubyte0_e32 v1, v0
; VI-NEXT: v_mov_b32_e32 v0, v2
; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v0
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
; VI-NEXT: s_setpc_b64 s[30:31]
%val = bitcast i16 %arg0 to <2 x i8>
%cvt = uitofp <2 x i8> %val to <2 x float>
Expand All @@ -195,10 +191,9 @@ define <3 x float> @v_uitofp_v3i8_to_v3f32(i32 %arg0) nounwind {
; SI-LABEL: v_uitofp_v3i8_to_v3f32:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v0
; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; SI-NEXT: v_lshrrev_b32_e32 v1, 8, v0
; SI-NEXT: s_movk_i32 s4, 0xff
; SI-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; SI-NEXT: v_and_b32_e32 v0, s4, v0
; SI-NEXT: v_and_b32_e32 v1, s4, v1
; SI-NEXT: v_and_b32_e32 v2, s4, v2
Expand All @@ -212,11 +207,11 @@ define <3 x float> @v_uitofp_v3i8_to_v3f32(i32 %arg0) nounwind {
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: s_movk_i32 s4, 0xff
; VI-NEXT: v_mov_b32_e32 v2, s4
; VI-NEXT: v_and_b32_sdwa v1, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v0
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
; VI-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; VI-NEXT: v_cvt_f32_ubyte0_e32 v2, v0
; VI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
; VI-NEXT: v_mov_b32_e32 v0, v3
; VI-NEXT: s_setpc_b64 s[30:31]
%trunc = trunc i32 %arg0 to i24
Expand Down
69 changes: 30 additions & 39 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir
Original file line number Diff line number Diff line change
Expand Up @@ -291,26 +291,19 @@ body: |
; CHECK-LABEL: name: test_bitcast_s24_to_v3s8
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32)
; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
; CHECK: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64)
; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192)
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s96)
; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
; CHECK: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16)
; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16)
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32)
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32)
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32)
; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s24) = G_TRUNC %0
Expand All @@ -328,36 +321,31 @@ body: |
; CHECK-LABEL: name: test_bitcast_s48_to_v3s16
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
; CHECK: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[DEF]](s64), [[DEF]](s64)
; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192)
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s96)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
; CHECK: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
Expand Down Expand Up @@ -449,14 +437,17 @@ body: |
; CHECK-LABEL: name: test_bitcast_s16_to_v2s8
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
; CHECK: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C]](s16)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32)
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32)
; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s16) = G_TRUNC %0
Expand Down

0 comments on commit 052c962

Please sign in to comment.