diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 83ebb045247427..7706b085a0be44 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -4144,6 +4144,7 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, Register Use = MI.getOperand(3).getReg(); MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB(); + B.setInsertPt(B.getMBB(), BrCond->getIterator()); if (IntrID == Intrinsic::amdgcn_if) { B.buildInstr(AMDGPU::SI_IF) .addDef(Def) @@ -4184,6 +4185,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB(); Register Reg = MI.getOperand(2).getReg(); + + B.setInsertPt(B.getMBB(), BrCond->getIterator()); B.buildInstr(AMDGPU::SI_LOOP) .addUse(Reg) .addMBB(UncondBrTarget); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir index 52d44d2d08fb6d..068ad6780a427f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-brcond.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=WAVE64 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=WAVE32 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE64 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -O0 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s --- name: legal_brcond_vcc @@ -310,3 +310,99 @@ body: | bb.2: ... + +# There's another instruction between the intrinsic and the +# conditional branch, so we need to move the insert point. +--- +name: brcond_si_if_need_insert_terminator_point +body: | + ; WAVE64-LABEL: name: brcond_si_if_need_insert_terminator_point + ; WAVE64: bb.0: + ; WAVE64: successors: %bb.1(0x80000000) + ; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; WAVE64: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] + ; WAVE64: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; WAVE64: [[SI_IF:%[0-9]+]]:sreg_64_xexec(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64: G_BR %bb.1 + ; WAVE64: bb.1: + ; WAVE64: S_ENDPGM 0, implicit [[COPY2]](s32) + ; WAVE32-LABEL: name: brcond_si_if_need_insert_terminator_point + ; WAVE32: bb.0: + ; WAVE32: successors: %bb.1(0x80000000) + ; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; WAVE32: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] + ; WAVE32: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; WAVE32: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32: G_BR %bb.1 + ; WAVE32: bb.1: + ; WAVE32: S_ENDPGM 0, implicit [[COPY2]](s32) + bb.0: + successors: %bb.1 + liveins: $vgpr0, $vgpr1, $vgpr2 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s1) = G_ICMP intpred(ne), %0, %1 + %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2 + %5:_(s32) = COPY $vgpr2 + G_BRCOND %3, %bb.1 + + bb.1: + S_ENDPGM 0, implicit %5 +... + +--- +name: brcond_si_loop_need_terminator_insert_point +tracksRegLiveness: true +body: | + ; WAVE64-LABEL: name: brcond_si_loop_need_terminator_insert_point + ; WAVE64: bb.0: + ; WAVE64: successors: %bb.1(0x80000000) + ; WAVE64: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 + ; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; WAVE64: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE64: bb.1: + ; WAVE64: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; WAVE64: S_NOP 0 + ; WAVE64: S_NOP 0 + ; WAVE64: S_NOP 0 + ; WAVE64: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE64: G_BR %bb.2 + ; WAVE64: bb.2: + ; WAVE64: S_NOP 0 + ; WAVE32-LABEL: name: brcond_si_loop_need_terminator_insert_point + ; WAVE32: bb.0: + ; WAVE32: successors: %bb.1(0x80000000) + ; WAVE32: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 + ; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; WAVE32: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1 + ; WAVE32: bb.1: + ; WAVE32: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; WAVE32: S_NOP 0 + ; WAVE32: S_NOP 0 + ; WAVE32: S_NOP 0 + ; WAVE32: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; WAVE32: G_BR %bb.2 + ; WAVE32: bb.2: + ; WAVE32: S_NOP 0 + bb.0: + liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s64) = COPY $sgpr0_sgpr1 + + bb.1: + successors: %bb.1, %bb.2 + S_NOP 0 + %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2 + S_NOP 0 + S_NOP 0 + G_BRCOND %3, %bb.2 + G_BR %bb.1 + + bb.2: + S_NOP 0 +...