From eb41627799b30667fe7fe06d485d5501c8923f50 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 22 Feb 2020 10:00:38 -0500 Subject: [PATCH] AMDGPU/GlobalISel: Improve handling of illegal return types Most importantly, this fixes ret i8. Also make sure to handle signext/zeroext for odd types > i32. Some of the corresponding argument passing fixes also need to be handled. --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 84 +++-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h | 8 +- .../AMDGPU/GlobalISel/function-returns.ll | 312 ++++++++++++++++-- 3 files changed, 353 insertions(+), 51 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index ec0f38afc48bd..f32f9ec0e6dc1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -153,10 +153,26 @@ AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI) : CallLowering(&TLI) { } +// FIXME: Compatability shim +static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc) { + switch (MIOpc) { + case TargetOpcode::G_SEXT: + return ISD::SIGN_EXTEND; + case TargetOpcode::G_ZEXT: + return ISD::ZERO_EXTEND; + case TargetOpcode::G_ANYEXT: + return ISD::ANY_EXTEND; + default: + llvm_unreachable("not an extend opcode"); + } +} + void AMDGPUCallLowering::splitToValueTypes( - const ArgInfo &OrigArg, SmallVectorImpl &SplitArgs, - const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv, - SplitArgTy PerformArgSplit) const { + MachineIRBuilder &B, + const ArgInfo &OrigArg, unsigned OrigArgIdx, + SmallVectorImpl &SplitArgs, + const DataLayout &DL, CallingConv::ID CallConv, + SplitArgTy PerformArgSplit) const { const SITargetLowering &TLI = *getTLI(); LLVMContext &Ctx = OrigArg.Ty->getContext(); @@ -170,28 +186,46 @@ void AMDGPUCallLowering::splitToValueTypes( int SplitIdx = 0; for (EVT VT : SplitVTs) { - unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT); + Register Reg = OrigArg.Regs[SplitIdx]; Type *Ty = VT.getTypeForEVT(Ctx); + LLT LLTy = getLLTForType(*Ty, DL); + if (OrigArgIdx == AttributeList::ReturnIndex && VT.isScalarInteger()) { + unsigned ExtendOp = TargetOpcode::G_ANYEXT; + if (OrigArg.Flags[0].isSExt()) { + assert(OrigArg.Regs.size() == 1 && "expect only simple return values"); + ExtendOp = TargetOpcode::G_SEXT; + } else if (OrigArg.Flags[0].isZExt()) { + assert(OrigArg.Regs.size() == 1 && "expect only simple return values"); + ExtendOp = TargetOpcode::G_ZEXT; + } + EVT ExtVT = TLI.getTypeForExtReturn(Ctx, VT, + extOpcodeToISDExtOpcode(ExtendOp)); + if (ExtVT != VT) { + VT = ExtVT; + Ty = ExtVT.getTypeForEVT(Ctx); + LLTy = getLLTForType(*Ty, DL); + Reg = B.buildInstr(ExtendOp, {LLTy}, {Reg}).getReg(0); + } + } + + unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT); + MVT RegVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT); if (NumParts == 1) { // No splitting to do, but we want to replace the original type (e.g. [1 x // double] -> double). - SplitArgs.emplace_back(OrigArg.Regs[SplitIdx], Ty, - OrigArg.Flags, OrigArg.IsFixed); + SplitArgs.emplace_back(Reg, Ty, OrigArg.Flags, OrigArg.IsFixed); ++SplitIdx; continue; } - LLT LLTy = getLLTForType(*Ty, DL); - SmallVector SplitRegs; - - EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT); - Type *PartTy = PartVT.getTypeForEVT(Ctx); + Type *PartTy = EVT(RegVT).getTypeForEVT(Ctx); LLT PartLLT = getLLTForType(*PartTy, DL); + MachineRegisterInfo &MRI = *B.getMRI(); // FIXME: Should we be reporting all of the part registers for a single // argument, and let handleAssignments take care of the repacking? @@ -201,7 +235,7 @@ void AMDGPUCallLowering::splitToValueTypes( SplitArgs.emplace_back(ArrayRef(PartReg), PartTy, OrigArg.Flags); } - PerformArgSplit(SplitRegs, LLTy, PartLLT, SplitIdx); + PerformArgSplit(SplitRegs, Reg, LLTy, PartLLT, SplitIdx); ++SplitIdx; } @@ -221,6 +255,7 @@ static LLT getMultipleType(LLT OrigTy, int Factor) { static void unpackRegsToOrigType(MachineIRBuilder &B, ArrayRef DstRegs, Register SrcReg, + const CallLowering::ArgInfo &Info, LLT SrcTy, LLT PartTy) { assert(DstRegs.size() > 1 && "Nothing to unpack"); @@ -266,24 +301,26 @@ bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B, auto &MF = B.getMF(); const auto &F = MF.getFunction(); const DataLayout &DL = MF.getDataLayout(); + MachineRegisterInfo *MRI = B.getMRI(); CallingConv::ID CC = F.getCallingConv(); const SITargetLowering &TLI = *getTLI(); - MachineRegisterInfo &MRI = MF.getRegInfo(); ArgInfo OrigRetInfo(VRegs, Val->getType()); setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F); SmallVector SplitRetInfos; splitToValueTypes( - OrigRetInfo, SplitRetInfos, DL, MRI, CC, - [&](ArrayRef Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) { - unpackRegsToOrigType(B, Regs, VRegs[VTSplitIdx], LLTy, PartLLT); + B, OrigRetInfo, AttributeList::ReturnIndex, SplitRetInfos, DL, CC, + [&](ArrayRef Regs, Register SrcReg, LLT LLTy, LLT PartLLT, + int VTSplitIdx) { + unpackRegsToOrigType(B, Regs, SrcReg, + SplitRetInfos[VTSplitIdx], + LLTy, PartLLT); }); CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg()); - - OutgoingValueHandler RetHandler(B, MF.getRegInfo(), Ret, AssignFn); + OutgoingValueHandler RetHandler(B, *MRI, Ret, AssignFn); return handleAssignments(B, SplitRetInfos, RetHandler); } @@ -308,7 +345,7 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, return true; } - auto const &ST = B.getMF().getSubtarget(); + auto const &ST = MF.getSubtarget(); unsigned ReturnOpc = IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return; @@ -663,13 +700,16 @@ bool AMDGPUCallLowering::lowerFormalArguments( } ArgInfo OrigArg(VRegs[Idx], Arg.getType()); - setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F); + const unsigned OrigArgIdx = Idx + AttributeList::FirstArgIndex; + setArgFlags(OrigArg, OrigArgIdx, DL, F); splitToValueTypes( - OrigArg, SplitArgs, DL, MRI, CC, + B, OrigArg, OrigArgIdx, SplitArgs, DL, CC, // FIXME: We should probably be passing multiple registers to // handleAssignments to do this - [&](ArrayRef Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) { + [&](ArrayRef Regs, Register DstReg, + LLT LLTy, LLT PartLLT, int VTSplitIdx) { + assert(DstReg == VRegs[Idx][VTSplitIdx]); packSplitRegsToOrigType(B, VRegs[Idx][VTSplitIdx], Regs, LLTy, PartLLT); }); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h index 53a562586bc06..3651dd40bc9f5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -30,11 +30,13 @@ class AMDGPUCallLowering: public CallLowering { unsigned Align, Register DstReg) const; /// A function of this type is used to perform value split action. - using SplitArgTy = std::function, LLT, LLT, int)>; + using SplitArgTy = std::function, Register, LLT, LLT, int)>; - void splitToValueTypes(const ArgInfo &OrigArgInfo, + void splitToValueTypes(MachineIRBuilder &B, + const ArgInfo &OrigArgInfo, + unsigned OrigArgIdx, SmallVectorImpl &SplitArgs, - const DataLayout &DL, MachineRegisterInfo &MRI, + const DataLayout &DL, CallingConv::ID CallConv, SplitArgTy SplitArg) const; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll index 008b3c4912605..82ecb616aa114 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll @@ -46,41 +46,92 @@ define signext i1 @i1_signext_func_void() #0 { ret i1 %val } +define i7 @i7_func_void() #0 { + ; CHECK-LABEL: name: i7_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load 1 from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s7) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + %val = load i7, i7 addrspace(1)* undef + ret i7 %val +} + +define zeroext i7 @i7_zeroext_func_void() #0 { + ; CHECK-LABEL: name: i7_zeroext_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load 1 from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s7) + ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + %val = load i7, i7 addrspace(1)* undef + ret i7 %val +} + +define signext i7 @i7_signext_func_void() #0 { + ; CHECK-LABEL: name: i7_signext_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load 1 from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s7) + ; CHECK: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + %val = load i7, i7 addrspace(1)* undef + ret i7 %val +} + define i8 @i8_func_void() #0 { ; CHECK-LABEL: name: i8_func_void - ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: bb.1 (%ir-block.0): ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load 1 from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val } define zeroext i8 @i8_zeroext_func_void() #0 { ; CHECK-LABEL: name: i8_zeroext_func_void - ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: bb.1 (%ir-block.0): ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load 1 from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) + ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val } define signext i8 @i8_signext_func_void() #0 { ; CHECK-LABEL: name: i8_signext_func_void - ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: bb.1 (%ir-block.0): ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load 1 from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) + ; CHECK: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val } @@ -151,12 +202,44 @@ define i48 @i48_func_void() #0 { ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load 6 from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[DEF1]], [[LOAD]](s48), 0 - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[INSERT]](s64), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[INSERT]](s64), 32 - ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32) - ; CHECK: $vgpr1 = COPY [[EXTRACT1]](s32) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + %val = load i48, i48 addrspace(1)* undef, align 8 + ret i48 %val +} + +define signext i48 @i48_signext_func_void() #0 { + ; CHECK-LABEL: name: i48_signext_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load 6 from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + %val = load i48, i48 addrspace(1)* undef, align 8 + ret i48 %val +} + +define zeroext i48 @i48_zeroext_func_void() #0 { + ; CHECK-LABEL: name: i48_zeroext_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load 6 from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load i48, i48 addrspace(1)* undef, align 8 @@ -186,14 +269,47 @@ define i65 @i65_func_void() #0 { ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load 9 from `i65 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[DEF1:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF1]], [[LOAD]](s65), 0 - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[INSERT]](s96), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[INSERT]](s96), 32 - ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[INSERT]](s96), 64 - ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32) - ; CHECK: $vgpr1 = COPY [[EXTRACT1]](s32) - ; CHECK: $vgpr2 = COPY [[EXTRACT2]](s32) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s96) = G_ANYEXT [[LOAD]](s65) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s96) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + %val = load i65, i65 addrspace(1)* undef + ret i65 %val +} + +define signext i65 @i65_signext_func_void() #0 { + ; CHECK-LABEL: name: i65_signext_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load 9 from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[LOAD]](s65) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s96) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + %val = load i65, i65 addrspace(1)* undef + ret i65 %val +} + +define zeroext i65 @i65_zeroext_func_void() #0 { + ; CHECK-LABEL: name: i65_zeroext_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load 9 from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[LOAD]](s65) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s96) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, i65 addrspace(1)* undef @@ -854,16 +970,19 @@ define <4 x i8> @v4i8_func_void() #0 { define {i8, i32} @struct_i8_i32_func_void() #0 { ; CHECK-LABEL: name: struct_i8_i32_func_void - ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: bb.1 (%ir-block.0): ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load 1 from `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: $vgpr1 = COPY [[LOAD1]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef ret { i8, i32 } %val } @@ -1060,4 +1179,145 @@ define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret %arg0) #0 ret void } +define i1022 @i1022_func_void() #0 { + ; CHECK-LABEL: name: i1022_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load 128 from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s1024) = G_ANYEXT [[LOAD]](s1022) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s1024) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: $vgpr6 = COPY [[UV6]](s32) + ; CHECK: $vgpr7 = COPY [[UV7]](s32) + ; CHECK: $vgpr8 = COPY [[UV8]](s32) + ; CHECK: $vgpr9 = COPY [[UV9]](s32) + ; CHECK: $vgpr10 = COPY [[UV10]](s32) + ; CHECK: $vgpr11 = COPY [[UV11]](s32) + ; CHECK: $vgpr12 = COPY [[UV12]](s32) + ; CHECK: $vgpr13 = COPY [[UV13]](s32) + ; CHECK: $vgpr14 = COPY [[UV14]](s32) + ; CHECK: $vgpr15 = COPY [[UV15]](s32) + ; CHECK: $vgpr16 = COPY [[UV16]](s32) + ; CHECK: $vgpr17 = COPY [[UV17]](s32) + ; CHECK: $vgpr18 = COPY [[UV18]](s32) + ; CHECK: $vgpr19 = COPY [[UV19]](s32) + ; CHECK: $vgpr20 = COPY [[UV20]](s32) + ; CHECK: $vgpr21 = COPY [[UV21]](s32) + ; CHECK: $vgpr22 = COPY [[UV22]](s32) + ; CHECK: $vgpr23 = COPY [[UV23]](s32) + ; CHECK: $vgpr24 = COPY [[UV24]](s32) + ; CHECK: $vgpr25 = COPY [[UV25]](s32) + ; CHECK: $vgpr26 = COPY [[UV26]](s32) + ; CHECK: $vgpr27 = COPY [[UV27]](s32) + ; CHECK: $vgpr28 = COPY [[UV28]](s32) + ; CHECK: $vgpr29 = COPY [[UV29]](s32) + ; CHECK: $vgpr30 = COPY [[UV30]](s32) + ; CHECK: $vgpr31 = COPY [[UV31]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + %val = load i1022, i1022 addrspace(1)* undef + ret i1022 %val +} + +define signext i1022 @i1022_signext_func_void() #0 { + ; CHECK-LABEL: name: i1022_signext_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load 128 from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[LOAD]](s1022) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s1024) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: $vgpr6 = COPY [[UV6]](s32) + ; CHECK: $vgpr7 = COPY [[UV7]](s32) + ; CHECK: $vgpr8 = COPY [[UV8]](s32) + ; CHECK: $vgpr9 = COPY [[UV9]](s32) + ; CHECK: $vgpr10 = COPY [[UV10]](s32) + ; CHECK: $vgpr11 = COPY [[UV11]](s32) + ; CHECK: $vgpr12 = COPY [[UV12]](s32) + ; CHECK: $vgpr13 = COPY [[UV13]](s32) + ; CHECK: $vgpr14 = COPY [[UV14]](s32) + ; CHECK: $vgpr15 = COPY [[UV15]](s32) + ; CHECK: $vgpr16 = COPY [[UV16]](s32) + ; CHECK: $vgpr17 = COPY [[UV17]](s32) + ; CHECK: $vgpr18 = COPY [[UV18]](s32) + ; CHECK: $vgpr19 = COPY [[UV19]](s32) + ; CHECK: $vgpr20 = COPY [[UV20]](s32) + ; CHECK: $vgpr21 = COPY [[UV21]](s32) + ; CHECK: $vgpr22 = COPY [[UV22]](s32) + ; CHECK: $vgpr23 = COPY [[UV23]](s32) + ; CHECK: $vgpr24 = COPY [[UV24]](s32) + ; CHECK: $vgpr25 = COPY [[UV25]](s32) + ; CHECK: $vgpr26 = COPY [[UV26]](s32) + ; CHECK: $vgpr27 = COPY [[UV27]](s32) + ; CHECK: $vgpr28 = COPY [[UV28]](s32) + ; CHECK: $vgpr29 = COPY [[UV29]](s32) + ; CHECK: $vgpr30 = COPY [[UV30]](s32) + ; CHECK: $vgpr31 = COPY [[UV31]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + %val = load i1022, i1022 addrspace(1)* undef + ret i1022 %val +} + +define zeroext i1022 @i1022_zeroext_func_void() #0 { + ; CHECK-LABEL: name: i1022_zeroext_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load 128 from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s1024) = G_ZEXT [[LOAD]](s1022) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s1024) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: $vgpr6 = COPY [[UV6]](s32) + ; CHECK: $vgpr7 = COPY [[UV7]](s32) + ; CHECK: $vgpr8 = COPY [[UV8]](s32) + ; CHECK: $vgpr9 = COPY [[UV9]](s32) + ; CHECK: $vgpr10 = COPY [[UV10]](s32) + ; CHECK: $vgpr11 = COPY [[UV11]](s32) + ; CHECK: $vgpr12 = COPY [[UV12]](s32) + ; CHECK: $vgpr13 = COPY [[UV13]](s32) + ; CHECK: $vgpr14 = COPY [[UV14]](s32) + ; CHECK: $vgpr15 = COPY [[UV15]](s32) + ; CHECK: $vgpr16 = COPY [[UV16]](s32) + ; CHECK: $vgpr17 = COPY [[UV17]](s32) + ; CHECK: $vgpr18 = COPY [[UV18]](s32) + ; CHECK: $vgpr19 = COPY [[UV19]](s32) + ; CHECK: $vgpr20 = COPY [[UV20]](s32) + ; CHECK: $vgpr21 = COPY [[UV21]](s32) + ; CHECK: $vgpr22 = COPY [[UV22]](s32) + ; CHECK: $vgpr23 = COPY [[UV23]](s32) + ; CHECK: $vgpr24 = COPY [[UV24]](s32) + ; CHECK: $vgpr25 = COPY [[UV25]](s32) + ; CHECK: $vgpr26 = COPY [[UV26]](s32) + ; CHECK: $vgpr27 = COPY [[UV27]](s32) + ; CHECK: $vgpr28 = COPY [[UV28]](s32) + ; CHECK: $vgpr29 = COPY [[UV29]](s32) + ; CHECK: $vgpr30 = COPY [[UV30]](s32) + ; CHECK: $vgpr31 = COPY [[UV31]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + %val = load i1022, i1022 addrspace(1)* undef + ret i1022 %val +} + attributes #0 = { nounwind }