diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index 095c75015d0a6..2129588d4aedc 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -100,6 +100,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index f553ec92c3f56..4de5578ca7cf3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -484,7 +484,7 @@ bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const { const unsigned SrcSize = SrcTy.getSizeInBits(); if (SrcSize < 32) - return false; + return selectImpl(MI, *CoverageInfo); const DebugLoc &DL = MI.getDebugLoc(); const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 9e76c47038b55..9db6946ceb4a8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -158,20 +158,30 @@ def brtarget : Operand; class HasOneUseUnaryOp : PatFrag< (ops node:$src0), (op $src0), - [{ return N->hasOneUse(); }] ->; + [{ return N->hasOneUse(); }]> { + + let GISelPredicateCode = [{ + return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg()); + }]; +} class HasOneUseBinOp : PatFrag< (ops node:$src0, node:$src1), (op $src0, $src1), - [{ return N->hasOneUse(); }] ->; + [{ return N->hasOneUse(); }]> { + let GISelPredicateCode = [{ + return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg()); + }]; +} class HasOneUseTernaryOp : PatFrag< (ops node:$src0, node:$src1, node:$src2), (op $src0, $src1, $src2), - [{ return N->hasOneUse(); }] ->; + [{ return N->hasOneUse(); }]> { + let GISelPredicateCode = [{ + return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg()); + }]; +} let Properties = [SDNPCommutative, SDNPAssociative] in { def smax_oneuse : HasOneUseBinOp; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 32c05ae10b6ba..5bd11592df1f4 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1793,54 +1793,59 @@ def : ExpPattern; // COPY is workaround tablegen bug from multiple outputs // from S_LSHL_B32's multiple outputs from implicit scc def. def : GCNPat < - (v2i16 (build_vector (i16 0), i16:$src1)), - (v2i16 (COPY (S_LSHL_B32 i16:$src1, (i16 16)))) + (v2i16 (build_vector (i16 0), (i16 SReg_32:$src1))), + (S_LSHL_B32 SReg_32:$src1, (i16 16)) >; def : GCNPat < - (v2i16 (build_vector i16:$src0, (i16 undef))), - (v2i16 (COPY $src0)) + (v2i16 (build_vector (i16 SReg_32:$src0), (i16 undef))), + (COPY_TO_REGCLASS SReg_32:$src0, SReg_32) +>; + +def : GCNPat < + (v2i16 (build_vector (i16 VGPR_32:$src0), (i16 undef))), + (COPY_TO_REGCLASS VGPR_32:$src0, VGPR_32) >; def : GCNPat < (v2f16 (build_vector f16:$src0, (f16 undef))), - (v2f16 (COPY $src0)) + (COPY $src0) >; def : GCNPat < - (v2i16 (build_vector (i16 undef), i16:$src1)), - (v2i16 (COPY (S_LSHL_B32 $src1, (i32 16)))) + (v2i16 (build_vector (i16 undef), (i16 SReg_32:$src1))), + (S_LSHL_B32 SReg_32:$src1, (i32 16)) >; def : GCNPat < - (v2f16 (build_vector (f16 undef), f16:$src1)), - (v2f16 (COPY (S_LSHL_B32 $src1, (i32 16)))) + (v2f16 (build_vector (f16 undef), (f16 SReg_32:$src1))), + (S_LSHL_B32 SReg_32:$src1, (i32 16)) >; let SubtargetPredicate = HasVOP3PInsts in { def : GCNPat < - (v2i16 (build_vector i16:$src0, i16:$src1)), - (v2i16 (S_PACK_LL_B32_B16 $src0, $src1)) + (v2i16 (build_vector (i16 SReg_32:$src0), (i16 SReg_32:$src1))), + (S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1) >; // With multiple uses of the shift, this will duplicate the shift and // increase register pressure. def : GCNPat < - (v2i16 (build_vector i16:$src0, (i16 (trunc (srl_oneuse i32:$src1, (i32 16)))))), - (v2i16 (S_PACK_LH_B32_B16 i16:$src0, i32:$src1)) + (v2i16 (build_vector (i16 SReg_32:$src0), (i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))), + (v2i16 (S_PACK_LH_B32_B16 SReg_32:$src0, SReg_32:$src1)) >; def : GCNPat < - (v2i16 (build_vector (i16 (trunc (srl_oneuse i32:$src0, (i32 16)))), - (i16 (trunc (srl_oneuse i32:$src1, (i32 16)))))), - (v2i16 (S_PACK_HH_B32_B16 $src0, $src1)) + (v2i16 (build_vector (i16 (trunc (srl_oneuse SReg_32:$src0, (i32 16)))), + (i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))), + (S_PACK_HH_B32_B16 SReg_32:$src0, SReg_32:$src1) >; // TODO: Should source modifiers be matched to v_pack_b32_f16? def : GCNPat < - (v2f16 (build_vector f16:$src0, f16:$src1)), - (v2f16 (S_PACK_LL_B32_B16 $src0, $src1)) + (v2f16 (build_vector (f16 SReg_32:$src0), (f16 SReg_32:$src1))), + (S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1) >; } // End SubtargetPredicate = HasVOP3PInsts diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.v2s16.mir new file mode 100644 index 0000000000000..b14b17fc4322e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.v2s16.mir @@ -0,0 +1,239 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s + +--- +name: test_build_vector_s_v2s16_s_s16_s_s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: test_build_vector_s_v2s16_s_s16_s_s16 + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY1]] + ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + + %2:sgpr(s16) = G_TRUNC %0 + %3:sgpr(s16) = G_TRUNC %1 + + %4:sgpr(<2 x s16>) = G_BUILD_VECTOR %2, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: test_build_vector_s_pack_lh +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: test_build_vector_s_pack_lh + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_PACK_LH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LH_B32_B16 [[COPY]], [[COPY1]] + ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LH_B32_B16_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + + %2:sgpr(s32) = G_CONSTANT i32 16 + %3:sgpr(s32) = G_LSHR %1, %2 + + %4:sgpr(s16) = G_TRUNC %0 + %5:sgpr(s16) = G_TRUNC %3 + + %6:sgpr(<2 x s16>) = G_BUILD_VECTOR %4, %5 + S_ENDPGM 0, implicit %6 +... + +# There is no s_pack_hl_b32 +--- +name: test_build_vector_s_pack_lh_swapped +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: test_build_vector_s_pack_lh_swapped + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 + ; GFX9: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[COPY]] + ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + + %2:sgpr(s32) = G_CONSTANT i32 16 + %3:sgpr(s32) = G_LSHR %1, %2 + + %4:sgpr(s16) = G_TRUNC %0 + %5:sgpr(s16) = G_TRUNC %3 + + %6:sgpr(<2 x s16>) = G_BUILD_VECTOR %5, %4 + S_ENDPGM 0, implicit %6 +... + +--- +name: test_build_vector_s_pack_hh +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; GFX9-LABEL: name: test_build_vector_s_pack_hh + ; GFX9: liveins: $sgpr0, $sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[COPY]], [[COPY1]] + ; GFX9: S_ENDPGM 0, implicit [[S_PACK_HH_B32_B16_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + + %2:sgpr(s32) = G_CONSTANT i32 16 + %3:sgpr(s32) = G_LSHR %0, %2 + %4:sgpr(s32) = G_LSHR %1, %2 + + %5:sgpr(s16) = G_TRUNC %3 + %6:sgpr(s16) = G_TRUNC %4 + + %7:sgpr(<2 x s16>) = G_BUILD_VECTOR %5, %6 + S_ENDPGM 0, implicit %7 +... + +# TODO: Should this use an and instead? +--- +name: test_build_vector_s_v2s16_s_s16_s_0_s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; GFX9-LABEL: name: test_build_vector_s_v2s16_s_s16_s_0_s16 + ; GFX9: liveins: $sgpr0 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_MOV_B32_]] + ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] + %0:sgpr(s32) = COPY $sgpr0 + + %1:sgpr(s16) = G_TRUNC %0 + %2:sgpr(s16) = G_CONSTANT i16 0 + + %3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: test_build_vector_s_v2s16_s_0_s16_s_s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; GFX9-LABEL: name: test_build_vector_s_v2s16_s_0_s16_s_s16 + ; GFX9: liveins: $sgpr0 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc + ; GFX9: S_ENDPGM 0, implicit [[S_LSHL_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + + %1:sgpr(s16) = G_CONSTANT i16 0 + %2:sgpr(s16) = G_TRUNC %0 + + %3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: test_build_vector_v_v2s16_v_s16_s_undef_s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX9-LABEL: name: test_build_vector_v_v2s16_v_s16_s_undef_s16 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: S_ENDPGM 0, implicit [[COPY]] + %0:vgpr(s32) = COPY $vgpr0 + + %1:vgpr(s16) = G_TRUNC %0 + %2:sgpr(s16) = G_IMPLICIT_DEF + + %3:vgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: test_build_vector_s_v2s16_s_s16_s_undef_s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; GFX9-LABEL: name: test_build_vector_s_v2s16_s_s16_s_undef_s16 + ; GFX9: liveins: $sgpr0 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: S_ENDPGM 0, implicit [[COPY]] + %0:sgpr(s32) = COPY $sgpr0 + + %1:sgpr(s16) = G_TRUNC %0 + %2:sgpr(s16) = G_IMPLICIT_DEF + + %3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: test_build_vector_s_v2s16_s_undef_s16_s_s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; GFX9-LABEL: name: test_build_vector_s_v2s16_s_undef_s16_s_s16 + ; GFX9: liveins: $sgpr0 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc + ; GFX9: S_ENDPGM 0, implicit [[S_LSHL_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + + %1:sgpr(s16) = G_IMPLICIT_DEF + %2:sgpr(s16) = G_TRUNC %0 + + %3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2 + S_ENDPGM 0, implicit %3 +...