Skip to content

Commit

Permalink
AMDGPU/GlobalISel: Split 64-bit G_CTPOP in RegBankSelect
Browse files Browse the repository at this point in the history
  • Loading branch information
arsenm committed Feb 9, 2020
1 parent 6135f5e commit c437f6c
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 10 deletions.
41 changes: 38 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Expand Up @@ -2084,6 +2084,29 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
MI.eraseFromParent();
return;
}
case AMDGPU::G_CTPOP: {
MachineIRBuilder B(MI);
MachineFunction &MF = B.getMF();

const RegisterBank *DstBank =
OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
if (DstBank == &AMDGPU::SGPRRegBank)
break;

Register SrcReg = MI.getOperand(1).getReg();
const LLT S32 = LLT::scalar(32);
LLT Ty = MRI.getType(SrcReg);
if (Ty == S32)
break;

ApplyRegBankMapping ApplyVALU(*this, MRI, &AMDGPU::VGPRRegBank);
GISelObserverWrapper Observer(&ApplyVALU);
LegalizerHelper Helper(MF, Observer, B);

if (Helper.narrowScalar(MI, 1, S32) != LegalizerHelper::Legalized)
llvm_unreachable("widenScalar should have succeeded");
return;
}
case AMDGPU::G_SEXT:
case AMDGPU::G_ZEXT: {
Register SrcReg = MI.getOperand(1).getReg();
Expand Down Expand Up @@ -3172,9 +3195,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_BITCAST:
case AMDGPU::G_INTTOPTR:
case AMDGPU::G_PTRTOINT:
case AMDGPU::G_CTLZ_ZERO_UNDEF:
case AMDGPU::G_CTTZ_ZERO_UNDEF:
case AMDGPU::G_CTPOP:
case AMDGPU::G_BSWAP:
case AMDGPU::G_BITREVERSE:
case AMDGPU::G_FABS:
Expand All @@ -3184,6 +3204,21 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
break;
}
case AMDGPU::G_CTLZ:
case AMDGPU::G_CTLZ_ZERO_UNDEF:
case AMDGPU::G_CTTZ:
case AMDGPU::G_CTTZ_ZERO_UNDEF:
case AMDGPU::G_CTPOP: {
unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);

// This should really be getValueMappingSGPR64Only, but allowing the generic
// code to handle the register split just makes using LegalizerHelper more
// difficult.
OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
break;
}
case AMDGPU::G_TRUNC: {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
Expand Down
55 changes: 48 additions & 7 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctpop.mir
Expand Up @@ -3,29 +3,70 @@
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s

---
name: ctpop_i32_s
name: ctpop_s32_s
legalized: true

body: |
bb.0:
liveins: $sgpr0
; CHECK-LABEL: name: ctpop_i32_s
; CHECK-LABEL: name: ctpop_s32_s
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[CTPOP:%[0-9]+]]:sgpr(s32) = G_CTPOP [[COPY]]
; CHECK: [[CTPOP:%[0-9]+]]:sgpr(s32) = G_CTPOP [[COPY]](s32)
; CHECK: S_ENDPGM 0, implicit [[CTPOP]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = G_CTPOP %0
S_ENDPGM 0, implicit %1
...

---
name: ctpop_i32_v
name: ctpop_s32_v
legalized: true

body: |
bb.0:
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: ctpop_i32_v
liveins: $vgpr0
; CHECK-LABEL: name: ctpop_s32_v
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[CTPOP:%[0-9]+]]:vgpr(s32) = G_CTPOP [[COPY]]
; CHECK: [[CTPOP:%[0-9]+]]:vgpr(s32) = G_CTPOP [[COPY]](s32)
; CHECK: S_ENDPGM 0, implicit [[CTPOP]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = G_CTPOP %0
S_ENDPGM 0, implicit %1
...

---
name: ctpop_s64_s
legalized: true

body: |
bb.0:
liveins: $sgpr0_sgpr1
; CHECK-LABEL: name: ctpop_s64_s
; CHECK: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
; CHECK: [[CTPOP:%[0-9]+]]:sgpr(s32) = G_CTPOP [[COPY]](s64)
; CHECK: S_ENDPGM 0, implicit [[CTPOP]](s32)
%0:_(s64) = COPY $sgpr0_sgpr1
%1:_(s32) = G_CTPOP %0
S_ENDPGM 0, implicit %1
...

---
name: ctpop_s64_v
legalized: true

body: |
bb.0:
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: ctpop_s64_v
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK: [[CTPOP:%[0-9]+]]:vgpr(s32) = G_CTPOP [[UV]](s32)
; CHECK: [[CTPOP1:%[0-9]+]]:vgpr(s32) = G_CTPOP [[UV1]](s32)
; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[CTPOP1]], [[CTPOP]]
; CHECK: S_ENDPGM 0, implicit [[ADD]](s32)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s32) = G_CTPOP %0
S_ENDPGM 0, implicit %1
...

0 comments on commit c437f6c

Please sign in to comment.