Skip to content

Commit

Permalink
GlobalISel: Add default implementation of assignValueToReg
Browse files Browse the repository at this point in the history
Refactor insertion of the asserting ops. This enables using them for
AMDGPU.

This code should essentially be the same for every target. Mips, X86
and ARM all have different code there now, but this seems to be an
accident. The assignment functions are called with different types
than they would be in the DAG, so this is all likely an assortment of
hacks to get around that.
  • Loading branch information
arsenm committed Mar 3, 2021
1 parent 8da0903 commit 78dcff4
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 76 deletions.
8 changes: 8 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
Expand Up @@ -209,6 +209,14 @@ class CallLowering {
IncomingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
CCAssignFn *AssignFn)
: ValueHandler(true, MIRBuilder, MRI, AssignFn) {}

/// Insert G_ASSERT_ZEXT/G_ASSERT_SEXT or other hint instruction based on \p
/// VA, returning the new register if a hint was inserted.
Register buildExtensionHint(CCValAssign &VA, Register SrcReg, LLT NarrowTy);

/// Provides a default implementation for argument handling.
void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override;
};

struct OutgoingValueHandler : public ValueHandler {
Expand Down
38 changes: 38 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
Expand Up @@ -985,3 +985,41 @@ Register CallLowering::ValueHandler::extendRegister(Register ValReg,
}

void CallLowering::ValueHandler::anchor() {}

Register CallLowering::IncomingValueHandler::buildExtensionHint(CCValAssign &VA,
Register SrcReg,
LLT NarrowTy) {
switch (VA.getLocInfo()) {
case CCValAssign::LocInfo::ZExt: {
return MIRBuilder
.buildAssertZExt(MRI.cloneVirtualRegister(SrcReg), SrcReg,
NarrowTy.getScalarSizeInBits())
.getReg(0);
}
case CCValAssign::LocInfo::SExt: {
return MIRBuilder
.buildAssertSExt(MRI.cloneVirtualRegister(SrcReg), SrcReg,
NarrowTy.getScalarSizeInBits())
.getReg(0);
break;
}
default:
return SrcReg;
}
}

void CallLowering::IncomingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
CCValAssign &VA) {
const LLT LocTy(VA.getLocVT());
const LLT ValTy = MRI.getType(ValVReg);

if (ValTy.getSizeInBits() == LocTy.getSizeInBits()) {
MIRBuilder.buildCopy(ValVReg, PhysReg);
return;
}

auto Copy = MIRBuilder.buildCopy(LocTy, PhysReg);
auto Hint = buildExtensionHint(VA, Copy.getReg(0), ValTy);
MIRBuilder.buildTrunc(ValVReg, Hint);
}
29 changes: 1 addition & 28 deletions llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
Expand Up @@ -70,34 +70,7 @@ struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
markPhysRegUsed(PhysReg);
switch (VA.getLocInfo()) {
default:
MIRBuilder.buildCopy(ValVReg, PhysReg);
break;
case CCValAssign::LocInfo::ZExt: {
auto WideTy = LLT{VA.getLocVT()};
auto NarrowTy = MRI.getType(ValVReg);
MIRBuilder.buildTrunc(ValVReg,
MIRBuilder.buildAssertZExt(
WideTy, MIRBuilder.buildCopy(WideTy, PhysReg),
NarrowTy.getSizeInBits()));
break;
}
case CCValAssign::LocInfo::SExt: {
auto WideTy = LLT{VA.getLocVT()};
auto NarrowTy = MRI.getType(ValVReg);
MIRBuilder.buildTrunc(ValVReg,
MIRBuilder.buildAssertSExt(
WideTy, MIRBuilder.buildCopy(WideTy, PhysReg),
NarrowTy.getSizeInBits()));
break;
}
case CCValAssign::LocInfo::AExt: {
auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
MIRBuilder.buildTrunc(ValVReg, Copy);
break;
}
}
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}

void assignValueToAddress(Register ValVReg, Register Addr, uint64_t MemSize,
Expand Down
63 changes: 25 additions & 38 deletions llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
Expand Up @@ -29,28 +29,22 @@ using namespace llvm;

namespace {

struct AMDGPUValueHandler : public CallLowering::ValueHandler {
AMDGPUValueHandler(bool IsIncoming, MachineIRBuilder &B,
MachineRegisterInfo &MRI, CCAssignFn *AssignFn)
: ValueHandler(IsIncoming, B, MRI, AssignFn) {}

/// Wrapper around extendRegister to ensure we extend to a full 32-bit
/// register.
Register extendRegisterMin32(Register ValVReg, CCValAssign &VA) {
if (VA.getLocVT().getSizeInBits() < 32) {
// 16-bit types are reported as legal for 32-bit registers. We need to
// extend and do a 32-bit copy to avoid the verifier complaining about it.
return MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0);
}

return extendRegister(ValVReg, VA);
/// Wrapper around extendRegister to ensure we extend to a full 32-bit register.
static Register extendRegisterMin32(CallLowering::ValueHandler &Handler,
Register ValVReg, CCValAssign &VA) {
if (VA.getLocVT().getSizeInBits() < 32) {
// 16-bit types are reported as legal for 32-bit registers. We need to
// extend and do a 32-bit copy to avoid the verifier complaining about it.
return Handler.MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0);
}
};

struct AMDGPUOutgoingValueHandler : public AMDGPUValueHandler {
return Handler.extendRegister(ValVReg, VA);
}

struct AMDGPUOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
AMDGPUOutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
MachineInstrBuilder MIB, CCAssignFn *AssignFn)
: AMDGPUValueHandler(false, B, MRI, AssignFn), MIB(MIB) {}
: OutgoingValueHandler(B, MRI, AssignFn), MIB(MIB) {}

MachineInstrBuilder MIB;

Expand All @@ -66,7 +60,7 @@ struct AMDGPUOutgoingValueHandler : public AMDGPUValueHandler {

void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
Register ExtReg = extendRegisterMin32(ValVReg, VA);
Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);

// If this is a scalar return, insert a readfirstlane just in case the value
// ends up in a VGPR.
Expand All @@ -93,12 +87,12 @@ struct AMDGPUOutgoingValueHandler : public AMDGPUValueHandler {
}
};

struct AMDGPUIncomingArgHandler : public AMDGPUValueHandler {
struct AMDGPUIncomingArgHandler : public CallLowering::IncomingValueHandler {
uint64_t StackUsed = 0;

AMDGPUIncomingArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
CCAssignFn *AssignFn)
: AMDGPUValueHandler(true, B, MRI, AssignFn) {}
: IncomingValueHandler(B, MRI, AssignFn) {}

Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
Expand All @@ -119,22 +113,16 @@ struct AMDGPUIncomingArgHandler : public AMDGPUValueHandler {
// 16-bit types are reported as legal for 32-bit registers. We need to do
// a 32-bit copy, and truncate to avoid the verifier complaining about it.
auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);
MIRBuilder.buildTrunc(ValVReg, Copy);

// If we have signext/zeroext, it applies to the whole 32-bit register
// before truncation.
auto Extended =
buildExtensionHint(VA, Copy.getReg(0), LLT(VA.getLocVT()));
MIRBuilder.buildTrunc(ValVReg, Extended);
return;
}

switch (VA.getLocInfo()) {
case CCValAssign::LocInfo::SExt:
case CCValAssign::LocInfo::ZExt:
case CCValAssign::LocInfo::AExt: {
auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
MIRBuilder.buildTrunc(ValVReg, Copy);
break;
}
default:
MIRBuilder.buildCopy(ValVReg, PhysReg);
break;
}
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}

void assignValueToAddress(Register ValVReg, Register Addr, uint64_t MemSize,
Expand Down Expand Up @@ -180,8 +168,7 @@ struct CallReturnHandler : public AMDGPUIncomingArgHandler {
MachineInstrBuilder MIB;
};

struct AMDGPUOutgoingArgHandler : public AMDGPUValueHandler {
MachineInstrBuilder MIB;
struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {
CCAssignFn *AssignFnVarArg;

/// For tail calls, the byte offset of the call's argument area from the
Expand All @@ -197,7 +184,7 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUValueHandler {
MachineRegisterInfo &MRI, MachineInstrBuilder MIB,
CCAssignFn *AssignFn, CCAssignFn *AssignFnVarArg,
bool IsTailCall = false, int FPDiff = 0)
: AMDGPUValueHandler(false, MIRBuilder, MRI, AssignFn), MIB(MIB),
: AMDGPUOutgoingValueHandler(MIRBuilder, MRI, MIB, AssignFn),
AssignFnVarArg(AssignFnVarArg), FPDiff(FPDiff), IsTailCall(IsTailCall) {
}

Expand Down Expand Up @@ -226,7 +213,7 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUValueHandler {
void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegisterMin32(ValVReg, VA);
Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
}

Expand Down
48 changes: 42 additions & 6 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
Expand Up @@ -50,7 +50,8 @@ define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 {
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 1
; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
Expand All @@ -70,7 +71,8 @@ define void @void_func_i1_signext(i1 signext %arg0) #0 {
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 1
; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
Expand Down Expand Up @@ -139,7 +141,8 @@ define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 {
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 8
; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
Expand All @@ -159,7 +162,8 @@ define void @void_func_i8_signext(i8 signext %arg0) #0 {
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 8
; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
Expand Down Expand Up @@ -194,7 +198,8 @@ define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 {
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
Expand All @@ -214,7 +219,8 @@ define void @void_func_i16_signext(i16 signext %arg0) #0 {
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 16
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
Expand Down Expand Up @@ -288,6 +294,36 @@ define void @void_func_i32(i32 %arg0) #0 {
ret void
}

; The signext is an no-op
define void @void_func_i32_signext(i32 signext %arg0) #0 {
; CHECK-LABEL: name: void_func_i32_signext
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]]
store i32 %arg0, i32 addrspace(1)* undef
ret void
}

; The zeroext is an no-op
define void @void_func_i32_zeroext(i32 zeroext %arg0) #0 {
; CHECK-LABEL: name: void_func_i32_zeroext
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]]
store i32 %arg0, i32 addrspace(1)* undef
ret void
}

define void @void_func_p3i8(i8 addrspace(3)* %arg0) #0 {
; CHECK-LABEL: name: void_func_p3i8
; CHECK: bb.1 (%ir-block.0):
Expand Down
5 changes: 1 addition & 4 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
Expand Up @@ -85,11 +85,8 @@ define zeroext i16 @v_mul_i16_zeroext(i16 zeroext %num, i16 zeroext %den) {
; GFX7-LABEL: v_mul_i16_zeroext:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_mov_b32 s4, 0xffff
; GFX7-NEXT: v_and_b32_e32 v0, s4, v0
; GFX7-NEXT: v_and_b32_e32 v1, s4, v1
; GFX7-NEXT: v_mul_u32_u24_e32 v0, v0, v1
; GFX7-NEXT: v_and_b32_e32 v0, s4, v0
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_mul_i16_zeroext:
Expand Down

0 comments on commit 78dcff4

Please sign in to comment.