48 changes: 30 additions & 18 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6855,7 +6855,9 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
State.getMachineFunction().getSubtarget());
const bool IsPPC64 = Subtarget.isPPC64();
const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
const unsigned PtrSize = IsPPC64 ? 8 : 4;
const Align PtrAlign(PtrSize);
const Align StackAlign(16);
const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;

if (ValVT == MVT::f128)
Expand All @@ -6876,12 +6878,16 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
PPC::V6, PPC::V7, PPC::V8, PPC::V9,
PPC::V10, PPC::V11, PPC::V12, PPC::V13};

const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;

if (ArgFlags.isByVal()) {
if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
if (ByValAlign > StackAlign)
report_fatal_error("Pass-by-value arguments with alignment greater than "
"register width are not supported.");
"16 are not supported.");

const unsigned ByValSize = ArgFlags.getByValSize();
const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;

// An empty aggregate parameter takes up no storage and no registers,
// but needs a MemLoc for a stack slot for the formal arguments side.
Expand All @@ -6891,11 +6897,23 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
return false;
}

const unsigned StackSize = alignTo(ByValSize, PtrAlign);
unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
for (const unsigned E = Offset + StackSize; Offset < E;
Offset += PtrAlign.value()) {
if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
// Shadow allocate any registers that are not properly aligned.
unsigned NextReg = State.getFirstUnallocated(GPRs);
while (NextReg != GPRs.size() &&
!isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
// Shadow allocate next registers since its aligment is not strict enough.
unsigned Reg = State.AllocateReg(GPRs);
// Allocate the stack space shadowed by said register.
State.AllocateStack(PtrSize, PtrAlign);
assert(Reg && "Alocating register unexpectedly failed.");
(void)Reg;
NextReg = State.getFirstUnallocated(GPRs);
}

const unsigned StackSize = alignTo(ByValSize, ObjAlign);
unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
if (unsigned Reg = State.AllocateReg(GPRs))
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
else {
State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
Expand All @@ -6917,12 +6935,12 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
[[fallthrough]];
case MVT::i1:
case MVT::i32: {
const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
// AIX integer arguments are always passed in register width.
if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
: CCValAssign::LocInfo::ZExt;
if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
if (unsigned Reg = State.AllocateReg(GPRs))
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
else
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
Expand All @@ -6942,8 +6960,8 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));

// Reserve and initialize GPRs or initialize the PSA as required.
for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
for (unsigned I = 0; I < StoreSize; I += PtrSize) {
if (unsigned Reg = State.AllocateReg(GPRs)) {
assert(FReg && "An FPR should be available when a GPR is reserved.");
if (State.isVarArg()) {
// Successfully reserved GPRs are only initialized for vararg calls.
Expand Down Expand Up @@ -6995,9 +7013,6 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
return false;
}

const unsigned PtrSize = IsPPC64 ? 8 : 4;
ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;

unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
// Burn any underaligned registers and their shadowed stack space until
// we reach the required alignment.
Expand Down Expand Up @@ -7347,9 +7362,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
const MCPhysReg ArgReg = VA.getLocReg();
const PPCFrameLowering *FL = Subtarget.getFrameLowering();

if (Flags.getNonZeroByValAlign() > PtrByteSize)
report_fatal_error("Over aligned byvals not supported yet.");

const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
const int FI = MF.getFrameInfo().CreateFixedObject(
StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
Expand Down
31 changes: 4 additions & 27 deletions llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
Original file line number Diff line number Diff line change
Expand Up @@ -4153,27 +4153,6 @@ class VPatBinaryNoMaskTU<string intrinsic_name,
(op2_type op2_kind:$rs2),
GPR:$vl, sew, TU_MU)>;

class VPatBinaryNoMaskRoundingMode<string intrinsic_name,
string inst,
ValueType result_type,
ValueType op1_type,
ValueType op2_type,
int sew,
VReg op1_reg_class,
DAGOperand op2_kind> :
Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
(result_type (undef)),
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
(XLenVT timm:$round),
VLOpFrag)),
(!cast<Instruction>(inst)
(result_type (IMPLICIT_DEF)),
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
(XLenVT timm:$round),
GPR:$vl, sew, TA_MA)>;

class VPatBinaryNoMaskTURoundingMode<string intrinsic_name,
string inst,
ValueType result_type,
Expand Down Expand Up @@ -4827,8 +4806,6 @@ multiclass VPatBinaryRoundingMode<string intrinsic,
VReg result_reg_class,
VReg op1_reg_class,
DAGOperand op2_kind> {
def : VPatBinaryNoMaskRoundingMode<intrinsic, inst, result_type, op1_type, op2_type,
sew, op1_reg_class, op2_kind>;
def : VPatBinaryNoMaskTURoundingMode<intrinsic, inst, result_type, op1_type, op2_type,
sew, result_reg_class, op1_reg_class, op2_kind>;
def : VPatBinaryMaskTARoundingMode<intrinsic, inst, result_type, op1_type, op2_type,
Expand Down Expand Up @@ -6306,7 +6283,7 @@ foreach vti = AllIntegerVectors in {
vti.RegClass:$rs1,
(NegImm simm5_plus1:$rs2),
GPR:$vl,
vti.Log2SEW, TA_MA)>;
vti.Log2SEW, TU_MU)>;
def : Pat<(vti.Vector (int_riscv_vsub_mask (vti.Vector vti.RegClass:$merge),
(vti.Vector vti.RegClass:$rs1),
(vti.Scalar simm5_plus1:$rs2),
Expand Down Expand Up @@ -6962,12 +6939,12 @@ defm : VPatBinaryV_VV_VX_VI<"int_riscv_vsra", "PseudoVSRA", AllIntegerVectors,
foreach vti = AllIntegerVectors in {
// Emit shift by 1 as an add since it might be faster.
let Predicates = GetVTypePredicates<vti>.Predicates in {
def : Pat<(vti.Vector (int_riscv_vsll (vti.Vector undef),
def : Pat<(vti.Vector (int_riscv_vsll (vti.Vector vti.RegClass:$merge),
(vti.Vector vti.RegClass:$rs1),
(XLenVT 1), VLOpFrag)),
(!cast<Instruction>("PseudoVADD_VV_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1,
vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TA_MA)>;
vti.RegClass:$merge, vti.RegClass:$rs1,
vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
def : Pat<(vti.Vector (int_riscv_vsll_mask (vti.Vector vti.RegClass:$merge),
(vti.Vector vti.RegClass:$rs1),
(XLenVT 1),
Expand Down
17 changes: 9 additions & 8 deletions llvm/lib/Transforms/IPO/AttributorAttributes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,8 @@ struct AAReturnedFromReturnedValues : public BaseType {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
StateType S(StateType::getBestState(this->getState()));
clampReturnedValueStates<AAType, StateType, IRAttributeKind, RecurseForSelectAndPHI>(
clampReturnedValueStates<AAType, StateType, IRAttributeKind,
RecurseForSelectAndPHI>(
A, *this, S,
PropagateCallBaseContext ? this->getCallBaseContext() : nullptr);
// TODO: If we know we visited all returned values, thus no are assumed
Expand Down Expand Up @@ -6973,10 +6974,9 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
if (AI.LibraryFunctionId != LibFunc___kmpc_alloc_shared) {
Instruction *CtxI = isa<InvokeInst>(AI.CB) ? AI.CB : AI.CB->getNextNode();
if (!Explorer || !Explorer->findInContextOf(UniqueFree, CtxI)) {
LLVM_DEBUG(
dbgs()
<< "[H2S] unique free call might not be executed with the allocation "
<< *UniqueFree << "\n");
LLVM_DEBUG(dbgs() << "[H2S] unique free call might not be executed "
"with the allocation "
<< *UniqueFree << "\n");
return false;
}
}
Expand Down Expand Up @@ -10406,11 +10406,12 @@ struct AANoFPClassFloating : public AANoFPClassImpl {

struct AANoFPClassReturned final
: AAReturnedFromReturnedValues<AANoFPClass, AANoFPClassImpl,
AANoFPClassImpl::StateType, false, Attribute::None, false> {
AANoFPClassImpl::StateType, false,
Attribute::None, false> {
AANoFPClassReturned(const IRPosition &IRP, Attributor &A)
: AAReturnedFromReturnedValues<AANoFPClass, AANoFPClassImpl,
AANoFPClassImpl::StateType, false, Attribute::None, false>(
IRP, A) {}
AANoFPClassImpl::StateType, false,
Attribute::None, false>(IRP, A) {}

/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
Expand Down
273 changes: 63 additions & 210 deletions llvm/lib/Transforms/IPO/FunctionImport.cpp

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ entry:

declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, ptr byval(%struct.S) align 32)

; CHECK: LLVM ERROR: Pass-by-value arguments with alignment greater than register width are not supported.
; CHECK: LLVM ERROR: Pass-by-value arguments with alignment greater than 16 are not supported.
40 changes: 40 additions & 0 deletions llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \
; RUN: -mattr=-altivec -verify-machineinstrs < %s | \
; RUN: FileCheck --check-prefix=32BIT %s

; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \
; RUN: -mattr=-altivec -verify-machineinstrs < %s | \
; RUN: FileCheck --check-prefix=64BIT %s

%struct.vec_struct = type { <4 x i32> }

; Function Attrs: norecurse nounwind readonly
define i32 @vec_struct_test(i32 %i, ptr nocapture readonly byval(%struct.vec_struct) align 16 %vs) {
; 32BIT-LABEL: name: vec_struct_test
; 32BIT: bb.0.entry:
; 32BIT-NEXT: liveins: $r3, $r5, $r6, $r7, $r8
; 32BIT-NEXT: {{ $}}
; 32BIT-NEXT: STW killed renamable $r7, 8, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 8, align 8)
; 32BIT-NEXT: STW killed renamable $r6, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4)
; 32BIT-NEXT: STW renamable $r5, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0, align 16)
; 32BIT-NEXT: STW killed renamable $r8, 12, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 12)
; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r5, killed renamable $r3
; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3
;
; 64BIT-LABEL: name: vec_struct_test
; 64BIT: bb.0.entry:
; 64BIT-NEXT: liveins: $x3, $x5, $x6
; 64BIT-NEXT: {{ $}}
; 64BIT-NEXT: STD renamable $x5, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0, align 16)
; 64BIT-NEXT: STD killed renamable $x6, 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8)
; 64BIT-NEXT: renamable $x4 = RLDICL killed renamable $x5, 32, 32
; 64BIT-NEXT: renamable $r3 = nsw ADD4 renamable $r4, renamable $r3, implicit killed $x3, implicit killed $x4, implicit-def $x3
; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3
entry:
%vsi = getelementptr inbounds i8, ptr %vs, i32 0
%0 = load <4 x i32>, ptr %vsi, align 16
%vecext = extractelement <4 x i32> %0, i32 0
%add = add nsw i32 %vecext, %i
ret i32 %add
}
49 changes: 49 additions & 0 deletions llvm/test/CodeGen/PowerPC/aix-vector-byval.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \
; RUN: -mattr=-altivec -verify-machineinstrs < %s | \
; RUN: FileCheck --check-prefix=32BIT %s

; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \
; RUN: -mattr=-altivec -verify-machineinstrs < %s | \
; RUN: FileCheck --check-prefix=64BIT %s

%struct.B = type { <8 x i16>, i32, i32, [8 x i8] }

; Function Attrs: nounwind
define i32 @caller() {
; 32BIT-LABEL: name: caller
; 32BIT: bb.0.entry:
; 32BIT-NEXT: renamable $r3 = LWZ 28, %stack.0.vs :: (load (s32) from unknown-address + 4)
; 32BIT-NEXT: STW killed renamable $r3, 60, $r1 :: (store (s32) into unknown-address + 4, basealign 16)
; 32BIT-NEXT: renamable $r3 = LWZ 24, %stack.0.vs :: (load (s32) from %stack.0.vs + 24, align 8, basealign 16)
; 32BIT-NEXT: STW killed renamable $r3, 56, $r1 :: (store (s32), align 16)
; 32BIT-NEXT: ADJCALLSTACKDOWN 64, 0, implicit-def dead $r1, implicit $r1
; 32BIT-NEXT: renamable $r10 = LWZ 20, %stack.0.vs :: (load (s32) from %stack.0.vs + 20)
; 32BIT-NEXT: renamable $r9 = LWZ 16, %stack.0.vs :: (load (s32) from %stack.0.vs + 16, align 16)
; 32BIT-NEXT: renamable $r8 = LWZ 12, %stack.0.vs :: (load (s32) from %stack.0.vs + 12)
; 32BIT-NEXT: renamable $r7 = LWZ 8, %stack.0.vs :: (load (s32) from %stack.0.vs + 8, align 8)
; 32BIT-NEXT: renamable $r6 = LWZ 4, %stack.0.vs :: (load (s32) from %stack.0.vs + 4)
; 32BIT-NEXT: renamable $r5 = LWZ 0, %stack.0.vs :: (load (s32) from %stack.0.vs, align 16)
; 32BIT-NEXT: $r3 = LI 0
; 32BIT-NEXT: BL_NOP <mcsymbol .vec_struct_test[PR]>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1, implicit-def $r3
; 32BIT-NEXT: ADJCALLSTACKUP 64, 0, implicit-def dead $r1, implicit $r1
; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3
;
; 64BIT-LABEL: name: caller
; 64BIT: bb.0.entry:
; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
; 64BIT-NEXT: renamable $x8 = LD 24, %stack.0.vs :: (load (s64) from %stack.0.vs + 24)
; 64BIT-NEXT: renamable $x7 = LD 16, %stack.0.vs :: (load (s64) from %stack.0.vs + 16, align 16)
; 64BIT-NEXT: renamable $x6 = LD 8, %stack.0.vs :: (load (s64) from %stack.0.vs + 8)
; 64BIT-NEXT: renamable $x5 = LD 0, %stack.0.vs :: (load (s64) from %stack.0.vs, align 16)
; 64BIT-NEXT: $x3 = LI8 0
; 64BIT-NEXT: BL8_NOP <mcsymbol .vec_struct_test[PR]>, csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x2, implicit-def $r1, implicit-def $x3
; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3
entry:
%vs = alloca %struct.B, align 16
%call = tail call i32 @vec_struct_test(i32 0, ptr nonnull byval(%struct.B) align 16 %vs)
ret i32 %call
}

declare i32 @vec_struct_test(i32, ptr byval(%struct.B) align 16)
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
; SPILL-O0-NEXT: addi a1, a1, 16
; SPILL-O0-NEXT: vs1r.v v9, (a1) # Unknown-size Folded Spill
; SPILL-O0-NEXT: # implicit-def: $v8
; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, tu, ma
; SPILL-O0-NEXT: vfadd.vv v8, v9, v10
; SPILL-O0-NEXT: addi a0, sp, 16
; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
Expand All @@ -38,7 +38,7 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
; SPILL-O0-NEXT: # kill: def $x11 killed $x10
; SPILL-O0-NEXT: lw a0, 8(sp) # 4-byte Folded Reload
; SPILL-O0-NEXT: # implicit-def: $v8
; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, tu, ma
; SPILL-O0-NEXT: vfadd.vv v8, v9, v10
; SPILL-O0-NEXT: csrr a0, vlenb
; SPILL-O0-NEXT: slli a0, a0, 1
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
; SPILL-O0-NEXT: addi a1, a1, 32
; SPILL-O0-NEXT: vs1r.v v9, (a1) # Unknown-size Folded Spill
; SPILL-O0-NEXT: # implicit-def: $v8
; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, tu, ma
; SPILL-O0-NEXT: vfadd.vv v8, v9, v10
; SPILL-O0-NEXT: addi a0, sp, 32
; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
Expand All @@ -41,7 +41,7 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
; SPILL-O0-NEXT: # kill: def $x11 killed $x10
; SPILL-O0-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
; SPILL-O0-NEXT: # implicit-def: $v8
; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, tu, ma
; SPILL-O0-NEXT: vfadd.vv v8, v9, v10
; SPILL-O0-NEXT: csrr a0, vlenb
; SPILL-O0-NEXT: slli a0, a0, 1
Expand Down
16 changes: 16 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/vsll.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2108,6 +2108,22 @@ entry:
ret <vscale x 1 x i8> %a
}

define <vscale x 1 x i8> @intrinsic_vsll_1_tu_nxv1i8_nxv1i8_i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsll_1_tu_nxv1i8_nxv1i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma
; CHECK-NEXT: vadd.vv v8, v9, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i8> @llvm.riscv.vsll.nxv1i8(
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
iXLen 1,
iXLen %2)

ret <vscale x 1 x i8> %a
}

define <vscale x 1 x i8> @intrinsic_vsll_mask_vi_nxv1i8_nxv1i8_i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
; CHECK-LABEL: intrinsic_vsll_mask_vi_nxv1i8_nxv1i8_i8:
; CHECK: # %bb.0: # %entry
Expand Down
44 changes: 22 additions & 22 deletions llvm/test/CodeGen/RISCV/rvv/vsub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2147,7 +2147,7 @@ entry:
define <vscale x 1 x i8> @intrinsic_vsub_vi_tu_nxv1i8_nxv1i8_i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv1i8_nxv1i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma
; CHECK-NEXT: vadd.vi v8, v9, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2196,7 +2196,7 @@ entry:
define <vscale x 2 x i8> @intrinsic_vsub_vi_tu_nxv2i8_nxv2i8_i8(<vscale x 2 x i8> %0, <vscale x 2 x i8> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv2i8_nxv2i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma
; CHECK-NEXT: vadd.vi v8, v9, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2245,7 +2245,7 @@ entry:
define <vscale x 4 x i8> @intrinsic_vsub_vi_tu_nxv4i8_nxv4i8_i8(<vscale x 4 x i8> %0, <vscale x 4 x i8> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv4i8_nxv4i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
; CHECK-NEXT: vadd.vi v8, v9, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2294,7 +2294,7 @@ entry:
define <vscale x 8 x i8> @intrinsic_vsub_vi_tu_nxv8i8_nxv8i8_i8(<vscale x 8 x i8> %0, <vscale x 8 x i8> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv8i8_nxv8i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma
; CHECK-NEXT: vadd.vi v8, v9, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2343,7 +2343,7 @@ entry:
define <vscale x 16 x i8> @intrinsic_vsub_vi_tu_nxv16i8_nxv16i8_i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv16i8_nxv16i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, ma
; CHECK-NEXT: vadd.vi v8, v10, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2392,7 +2392,7 @@ entry:
define <vscale x 32 x i8> @intrinsic_vsub_vi_tu_nxv32i8_nxv32i8_i8(<vscale x 32 x i8> %0, <vscale x 32 x i8> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv32i8_nxv32i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma
; CHECK-NEXT: vadd.vi v8, v12, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2441,7 +2441,7 @@ entry:
define <vscale x 64 x i8> @intrinsic_vsub_vi_tu_nxv64i8_nxv64i8_i8(<vscale x 64 x i8> %0, <vscale x 64 x i8> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv64i8_nxv64i8_i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma
; CHECK-NEXT: vadd.vi v8, v16, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2490,7 +2490,7 @@ entry:
define <vscale x 1 x i16> @intrinsic_vsub_vi_tu_nxv1i16_nxv1i16_i16(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv1i16_nxv1i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
; CHECK-NEXT: vadd.vi v8, v9, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2539,7 +2539,7 @@ entry:
define <vscale x 2 x i16> @intrinsic_vsub_vi_tu_nxv2i16_nxv2i16_i16(<vscale x 2 x i16> %0, <vscale x 2 x i16> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv2i16_nxv2i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
; CHECK-NEXT: vadd.vi v8, v9, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2588,7 +2588,7 @@ entry:
define <vscale x 4 x i16> @intrinsic_vsub_vi_tu_nxv4i16_nxv4i16_i16(<vscale x 4 x i16> %0, <vscale x 4 x i16> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv4i16_nxv4i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
; CHECK-NEXT: vadd.vi v8, v9, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2637,7 +2637,7 @@ entry:
define <vscale x 8 x i16> @intrinsic_vsub_vi_tu_nxv8i16_nxv8i16_i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv8i16_nxv8i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma
; CHECK-NEXT: vadd.vi v8, v10, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2686,7 +2686,7 @@ entry:
define <vscale x 16 x i16> @intrinsic_vsub_vi_tu_nxv16i16_nxv16i16_i16(<vscale x 16 x i16> %0, <vscale x 16 x i16> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv16i16_nxv16i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma
; CHECK-NEXT: vadd.vi v8, v12, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2735,7 +2735,7 @@ entry:
define <vscale x 32 x i16> @intrinsic_vsub_vi_tu_nxv32i16_nxv32i16_i16(<vscale x 32 x i16> %0, <vscale x 32 x i16> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv32i16_nxv32i16_i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma
; CHECK-NEXT: vadd.vi v8, v16, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2784,7 +2784,7 @@ entry:
define <vscale x 1 x i32> @intrinsic_vsub_vi_tu_nxv1i32_nxv1i32_i32(<vscale x 1 x i32> %0, <vscale x 1 x i32> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv1i32_nxv1i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma
; CHECK-NEXT: vadd.vi v8, v9, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2833,7 +2833,7 @@ entry:
define <vscale x 2 x i32> @intrinsic_vsub_vi_tu_nxv2i32_nxv2i32_i32(<vscale x 2 x i32> %0, <vscale x 2 x i32> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv2i32_nxv2i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
; CHECK-NEXT: vadd.vi v8, v9, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2882,7 +2882,7 @@ entry:
define <vscale x 4 x i32> @intrinsic_vsub_vi_tu_nxv4i32_nxv4i32_i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv4i32_nxv4i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
; CHECK-NEXT: vadd.vi v8, v10, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2931,7 +2931,7 @@ entry:
define <vscale x 8 x i32> @intrinsic_vsub_vi_tu_nxv8i32_nxv8i32_i32(<vscale x 8 x i32> %0, <vscale x 8 x i32> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv8i32_nxv8i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma
; CHECK-NEXT: vadd.vi v8, v12, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -2980,7 +2980,7 @@ entry:
define <vscale x 16 x i32> @intrinsic_vsub_vi_tu_nxv16i32_nxv16i32_i32(<vscale x 16 x i32> %0, <vscale x 16 x i32> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv16i32_nxv16i32_i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma
; CHECK-NEXT: vadd.vi v8, v16, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -3029,7 +3029,7 @@ entry:
define <vscale x 1 x i64> @intrinsic_vsub_vi_tu_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv1i64_nxv1i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma
; CHECK-NEXT: vadd.vi v8, v9, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -3078,7 +3078,7 @@ entry:
define <vscale x 2 x i64> @intrinsic_vsub_vi_tu_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv2i64_nxv2i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma
; CHECK-NEXT: vadd.vi v8, v10, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -3127,7 +3127,7 @@ entry:
define <vscale x 4 x i64> @intrinsic_vsub_vi_tu_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv4i64_nxv4i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma
; CHECK-NEXT: vadd.vi v8, v12, -9
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -3176,7 +3176,7 @@ entry:
define <vscale x 8 x i64> @intrinsic_vsub_vi_tu_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, <vscale x 8 x i64> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vsub_vi_tu_nxv8i64_nxv8i64_i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma
; CHECK-NEXT: vadd.vi v8, v16, -9
; CHECK-NEXT: ret
entry:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/ThinLTO/X86/funcimport-stats.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
; RUN: cat %t4 | grep 'Is importing aliasee' | count 1
; RUN: cat %t4 | FileCheck %s

; CHECK: - [[NUM_FUNCS:[0-9]+]] function definitions and 0 function declarations imported from
; CHECK-NEXT: - [[NUM_VARS:[0-9]+]] global vars definition and 0 global vars declaration imported from
; CHECK: - [[NUM_FUNCS:[0-9]+]] functions imported from
; CHECK-NEXT: - [[NUM_VARS:[0-9]+]] global vars imported from

; CHECK: [[NUM_FUNCS]] function-import - Number of functions imported in backend
; CHECK-NEXT: [[NUM_FUNCS]] function-import - Number of functions thin link decided to import
Expand Down
221 changes: 0 additions & 221 deletions llvm/test/ThinLTO/X86/import_callee_declaration.ll

This file was deleted.

5 changes: 2 additions & 3 deletions llvm/test/Transforms/FunctionImport/funcimport.ll
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,7 @@ declare void @variadic_va_start(...)
; GUID-DAG: GUID {{.*}} is linkoncefunc

; DUMP: Module [[M1:.*]] imports from 1 module
; DUMP-NEXT: 15 function definitions and 0 function declarations imported from [[M2:.*]]
; DUMP-NEXT: 4 var definitions and 0 var declarations imported from [[M2]]

; DUMP-NEXT: 15 functions imported from [[M2:.*]]
; DUMP-NEXT: 4 vars imported from [[M2]]
; DUMP: Imported 15 functions for Module [[M1]]
; DUMP-NEXT: Imported 4 global variables for Module [[M1]]
6 changes: 1 addition & 5 deletions llvm/tools/llvm-link/llvm-link.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,13 +377,9 @@ static bool importFunctions(const char *argv0, Module &DestModule) {
if (Verbose)
errs() << "Importing " << FunctionName << " from " << FileName << "\n";

// `-import` specifies the `<filename,function-name>` pairs to import as
// definition, so make the import type definition directly.
// FIXME: A follow-up patch should add test coverage for import declaration
// in `llvm-link` CLI (e.g., by introducing a new command line option).
auto &Entry =
ImportList[FileNameStringCache.insert(FileName).first->getKey()];
Entry[F->getGUID()] = GlobalValueSummary::Definition;
Entry.insert(F->getGUID());
}
auto CachedModuleLoader = [&](StringRef Identifier) {
return ModuleLoaderCache.takeModule(std::string(Identifier));
Expand Down
5 changes: 2 additions & 3 deletions llvm/tools/llvm-lto/llvm-lto.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -692,9 +692,8 @@ class ThinLTOProcessing {
// Build a map of module to the GUIDs and summary objects that should
// be written to its index.
std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex;
GVSummaryPtrSet DecSummaries;
ThinGenerator.gatherImportedSummariesForModule(
*TheModule, *Index, ModuleToSummariesForIndex, DecSummaries, *Input);
*TheModule, *Index, ModuleToSummariesForIndex, *Input);

std::string OutputName = OutputFilename;
if (OutputName.empty()) {
Expand All @@ -704,7 +703,7 @@ class ThinLTOProcessing {
std::error_code EC;
raw_fd_ostream OS(OutputName, EC, sys::fs::OpenFlags::OF_None);
error(EC, "error opening the file '" + OutputName + "'");
writeIndexToFile(*Index, OS, &ModuleToSummariesForIndex, &DecSummaries);
writeIndexToFile(*Index, OS, &ModuleToSummariesForIndex);
}
}

Expand Down
1 change: 1 addition & 0 deletions utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ cc_library(
linkopts = select({
"@platforms//os:windows": [
"ws2_32.lib",
"ntdll.lib",
],
"@platforms//os:freebsd": [
"-pthread",
Expand Down