50 changes: 36 additions & 14 deletions clang/lib/AST/Interp/Interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E,
llvm::ArrayRef<int64_t> ArrayIndices, int64_t &Result);

inline bool Invalid(InterpState &S, CodePtr OpPC);

enum class ArithOp { Add, Sub };

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -522,6 +524,11 @@ bool IncDecHelper(InterpState &S, CodePtr OpPC, const Pointer &Ptr) {
if (Ptr.isDummy())
return false;

if constexpr (std::is_same_v<T, Boolean>) {
if (!S.getLangOpts().CPlusPlus14)
return Invalid(S, OpPC);
}

const T &Value = Ptr.deref<T>();
T Result;

Expand Down Expand Up @@ -572,7 +579,8 @@ bool IncDecHelper(InterpState &S, CodePtr OpPC, const Pointer &Ptr) {
template <PrimType Name, class T = typename PrimConv<Name>::T>
bool Inc(InterpState &S, CodePtr OpPC) {
const Pointer &Ptr = S.Stk.pop<Pointer>();

if (Ptr.isDummy())
return false;
if (!CheckInitialized(S, OpPC, Ptr, AK_Increment))
return false;

Expand All @@ -585,7 +593,8 @@ bool Inc(InterpState &S, CodePtr OpPC) {
template <PrimType Name, class T = typename PrimConv<Name>::T>
bool IncPop(InterpState &S, CodePtr OpPC) {
const Pointer &Ptr = S.Stk.pop<Pointer>();

if (Ptr.isDummy())
return false;
if (!CheckInitialized(S, OpPC, Ptr, AK_Increment))
return false;

Expand All @@ -599,7 +608,8 @@ bool IncPop(InterpState &S, CodePtr OpPC) {
template <PrimType Name, class T = typename PrimConv<Name>::T>
bool Dec(InterpState &S, CodePtr OpPC) {
const Pointer &Ptr = S.Stk.pop<Pointer>();

if (Ptr.isDummy())
return false;
if (!CheckInitialized(S, OpPC, Ptr, AK_Decrement))
return false;

Expand All @@ -612,7 +622,8 @@ bool Dec(InterpState &S, CodePtr OpPC) {
template <PrimType Name, class T = typename PrimConv<Name>::T>
bool DecPop(InterpState &S, CodePtr OpPC) {
const Pointer &Ptr = S.Stk.pop<Pointer>();

if (Ptr.isDummy())
return false;
if (!CheckInitialized(S, OpPC, Ptr, AK_Decrement))
return false;

Expand Down Expand Up @@ -641,7 +652,8 @@ bool IncDecFloatHelper(InterpState &S, CodePtr OpPC, const Pointer &Ptr,

inline bool Incf(InterpState &S, CodePtr OpPC, llvm::RoundingMode RM) {
const Pointer &Ptr = S.Stk.pop<Pointer>();

if (Ptr.isDummy())
return false;
if (!CheckInitialized(S, OpPC, Ptr, AK_Increment))
return false;

Expand All @@ -650,7 +662,8 @@ inline bool Incf(InterpState &S, CodePtr OpPC, llvm::RoundingMode RM) {

inline bool IncfPop(InterpState &S, CodePtr OpPC, llvm::RoundingMode RM) {
const Pointer &Ptr = S.Stk.pop<Pointer>();

if (Ptr.isDummy())
return false;
if (!CheckInitialized(S, OpPC, Ptr, AK_Increment))
return false;

Expand All @@ -660,6 +673,9 @@ inline bool IncfPop(InterpState &S, CodePtr OpPC, llvm::RoundingMode RM) {
inline bool Decf(InterpState &S, CodePtr OpPC, llvm::RoundingMode RM) {
const Pointer &Ptr = S.Stk.pop<Pointer>();

if (Ptr.isDummy())
return false;

if (!CheckInitialized(S, OpPC, Ptr, AK_Decrement))
return false;

Expand All @@ -669,6 +685,8 @@ inline bool Decf(InterpState &S, CodePtr OpPC, llvm::RoundingMode RM) {
inline bool DecfPop(InterpState &S, CodePtr OpPC, llvm::RoundingMode RM) {
const Pointer &Ptr = S.Stk.pop<Pointer>();

if (Ptr.isDummy())
return false;
if (!CheckInitialized(S, OpPC, Ptr, AK_Decrement))
return false;

Expand Down Expand Up @@ -774,9 +792,9 @@ inline bool CmpHelperEQ<Pointer>(InterpState &S, CodePtr OpPC, CompareFn Fn) {
// element in the same array are NOT equal. They have the same Base value,
// but a different Offset. This is a pretty rare case, so we fix this here
// by comparing pointers to the first elements.
if (LHS.isArrayRoot())
if (!LHS.isDummy() && LHS.isArrayRoot())
VL = LHS.atIndex(0).getByteOffset();
if (RHS.isArrayRoot())
if (!RHS.isDummy() && RHS.isArrayRoot())
VR = RHS.atIndex(0).getByteOffset();

S.Stk.push<BoolT>(BoolT::from(Fn(Compare(VL, VR))));
Expand Down Expand Up @@ -1680,7 +1698,7 @@ bool CastFloatingIntegral(InterpState &S, CodePtr OpPC) {
auto Status = F.convertToInteger(Result);

// Float-to-Integral overflow check.
if ((Status & APFloat::opStatus::opInvalidOp) && F.isFinite()) {
if ((Status & APFloat::opStatus::opInvalidOp)) {
const Expr *E = S.Current->getExpr(OpPC);
QualType Type = E->getType();

Expand Down Expand Up @@ -1895,7 +1913,7 @@ inline bool ArrayElemPtr(InterpState &S, CodePtr OpPC) {
const T &Offset = S.Stk.pop<T>();
const Pointer &Ptr = S.Stk.peek<Pointer>();

if (!CheckDummy(S, OpPC, Ptr))
if (Ptr.isDummy())
return true;

if (!OffsetHelper<T, ArithOp::Add>(S, OpPC, Offset, Ptr))
Expand All @@ -1909,7 +1927,7 @@ inline bool ArrayElemPtrPop(InterpState &S, CodePtr OpPC) {
const T &Offset = S.Stk.pop<T>();
const Pointer &Ptr = S.Stk.pop<Pointer>();

if (!CheckDummy(S, OpPC, Ptr)) {
if (Ptr.isDummy()) {
S.Stk.push<Pointer>(Ptr);
return true;
}
Expand All @@ -1933,7 +1951,7 @@ inline bool ArrayElemPop(InterpState &S, CodePtr OpPC, uint32_t Index) {
inline bool ArrayDecay(InterpState &S, CodePtr OpPC) {
const Pointer &Ptr = S.Stk.pop<Pointer>();

if (Ptr.isDummy()) {
if (Ptr.isZero() || Ptr.isDummy()) {
S.Stk.push<Pointer>(Ptr);
return true;
}
Expand Down Expand Up @@ -2056,8 +2074,12 @@ inline bool CallVirt(InterpState &S, CodePtr OpPC, const Function *Func,
size_t ThisOffset = ArgSize - (Func->hasRVO() ? primSize(PT_Ptr) : 0);
Pointer &ThisPtr = S.Stk.peek<Pointer>(ThisOffset);

const CXXRecordDecl *DynamicDecl =
ThisPtr.getDeclDesc()->getType()->getAsCXXRecordDecl();
QualType DynamicType = ThisPtr.getDeclDesc()->getType();
const CXXRecordDecl *DynamicDecl;
if (DynamicType->isPointerType() || DynamicType->isReferenceType())
DynamicDecl = DynamicType->getPointeeCXXRecordDecl();
else
DynamicDecl = ThisPtr.getDeclDesc()->getType()->getAsCXXRecordDecl();
const auto *StaticDecl = cast<CXXRecordDecl>(Func->getParentDecl());
const auto *InitialFunction = cast<CXXMethodDecl>(Func->getDecl());
const CXXMethodDecl *Overrider = S.getContext().getOverridingFunction(
Expand Down
8 changes: 4 additions & 4 deletions clang/lib/AST/Interp/Opcodes.td
Original file line number Diff line number Diff line change
Expand Up @@ -563,10 +563,10 @@ def Inv: Opcode {
}

// Increment and decrement.
def Inc: IntegerOpcode;
def IncPop : IntegerOpcode;
def Dec: IntegerOpcode;
def DecPop: IntegerOpcode;
def Inc: AluOpcode;
def IncPop : AluOpcode;
def Dec: AluOpcode;
def DecPop: AluOpcode;

// Float increment and decrement.
def Incf: FloatOpcode;
Expand Down
1 change: 0 additions & 1 deletion clang/lib/AST/Interp/Pointer.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,6 @@ class Pointer {
assert(Offset == PastEndMark && "cannot get base of a block");
return Pointer(Pointee, Base, 0);
}
assert(Offset == Base && "not an inner field");
unsigned NewBase = Base - getInlineDesc()->Offset;
return Pointer(Pointee, NewBase, NewBase);
}
Expand Down
25 changes: 20 additions & 5 deletions clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,8 +361,8 @@ getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields,
if (const auto *FD = dyn_cast<FieldDecl>(VD))
Fields.insert(FD);
} else if (auto *InitList = dyn_cast<InitListExpr>(&S)) {
if (RecordDecl *RD = InitList->getType()->getAsRecordDecl())
for (const auto *FD : getFieldsForInitListExpr(RD))
if (InitList->getType()->isRecordType())
for (const auto *FD : getFieldsForInitListExpr(InitList))
Fields.insert(FD);
}
}
Expand Down Expand Up @@ -983,7 +983,7 @@ StorageLocation &Environment::createObjectInternal(const ValueDecl *D,
}

Value *Val = nullptr;
if (InitExpr)
if (InitExpr) {
// In the (few) cases where an expression is intentionally
// "uninterpreted", `InitExpr` is not associated with a value. There are
// two ways to handle this situation: propagate the status, so that
Expand All @@ -998,6 +998,11 @@ StorageLocation &Environment::createObjectInternal(const ValueDecl *D,
// default value (assuming we don't update the environment API to return
// references).
Val = getValue(*InitExpr);

if (!Val && isa<ImplicitValueInitExpr>(InitExpr) &&
InitExpr->getType()->isPointerType())
Val = &getOrCreateNullPointerValue(InitExpr->getType()->getPointeeType());
}
if (!Val)
Val = createValue(Ty);

Expand Down Expand Up @@ -1104,12 +1109,22 @@ RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME,
return Env.get<RecordStorageLocation>(*Base);
}

std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD) {
std::vector<const FieldDecl *>
getFieldsForInitListExpr(const InitListExpr *InitList) {
const RecordDecl *RD = InitList->getType()->getAsRecordDecl();
assert(RD != nullptr);

std::vector<const FieldDecl *> Fields;

if (InitList->getType()->isUnionType()) {
Fields.push_back(InitList->getInitializedFieldInUnion());
return Fields;
}

// Unnamed bitfields are only used for padding and do not appear in
// `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s
// field list, and we thus need to remove them before mapping inits to
// fields to avoid mapping inits to the wrongs fields.
std::vector<FieldDecl *> Fields;
llvm::copy_if(
RD->fields(), std::back_inserter(Fields),
[](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); });
Expand Down
40 changes: 28 additions & 12 deletions clang/lib/Analysis/FlowSensitive/Transfer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -663,14 +663,7 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
void VisitInitListExpr(const InitListExpr *S) {
QualType Type = S->getType();

if (Type->isUnionType()) {
// FIXME: Initialize unions properly.
if (auto *Val = Env.createValue(Type))
Env.setValue(*S, *Val);
return;
}

if (!Type->isStructureOrClassType()) {
if (!Type->isRecordType()) {
// Until array initialization is implemented, we skip arrays and don't
// need to care about cases where `getNumInits() > 1`.
if (!Type->isArrayType() && S->getNumInits() == 1)
Expand All @@ -688,14 +681,26 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;

// This only contains the direct fields for the given type.
std::vector<FieldDecl *> FieldsForInit =
getFieldsForInitListExpr(Type->getAsRecordDecl());
std::vector<const FieldDecl *> FieldsForInit = getFieldsForInitListExpr(S);

// `S->inits()` contains all the initializer epressions, including the
// `S->inits()` contains all the initializer expressions, including the
// ones for direct base classes.
auto Inits = S->inits();
ArrayRef<Expr *> Inits = S->inits();
size_t InitIdx = 0;

// Unions initialized with an empty initializer list need special treatment.
// For structs/classes initialized with an empty initializer list, Clang
// puts `ImplicitValueInitExpr`s in `InitListExpr::inits()`, but for unions,
// it doesn't do this -- so we create an `ImplicitValueInitExpr` ourselves.
std::optional<ImplicitValueInitExpr> ImplicitValueInitForUnion;
SmallVector<Expr *> InitsForUnion;
if (S->getType()->isUnionType() && Inits.empty()) {
assert(FieldsForInit.size() == 1);
ImplicitValueInitForUnion.emplace(FieldsForInit.front()->getType());
InitsForUnion.push_back(&*ImplicitValueInitForUnion);
Inits = InitsForUnion;
}

// Initialize base classes.
if (auto* R = S->getType()->getAsCXXRecordDecl()) {
assert(FieldsForInit.size() + R->getNumBases() == Inits.size());
Expand Down Expand Up @@ -731,6 +736,17 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
FieldLocs.insert({Field, &Loc});
}

// In the case of a union, we don't in general have initializers for all
// of the fields. Create storage locations for the remaining fields (but
// don't associate them with values).
if (Type->isUnionType()) {
for (const FieldDecl *Field :
Env.getDataflowAnalysisContext().getModeledFields(Type)) {
if (auto [it, inserted] = FieldLocs.insert({Field, nullptr}); inserted)
it->second = &Env.createStorageLocation(Field->getType());
}
}

// Check that we satisfy the invariant that a `RecordStorageLoation`
// contains exactly the set of modeled fields for that type.
// `ModeledFields` includes fields from all the bases, but only the
Expand Down
259 changes: 129 additions & 130 deletions clang/lib/Analysis/UnsafeBufferUsage.cpp

Large diffs are not rendered by default.

52 changes: 52 additions & 0 deletions clang/lib/CodeGen/ABIInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,58 @@ ABIArgInfo ABIInfo::getNaturalAlignIndirectInReg(QualType Ty,
/*ByVal*/ false, Realign);
}

void ABIInfo::appendAttributeMangling(TargetAttr *Attr,
raw_ostream &Out) const {
if (Attr->isDefaultVersion())
return;
appendAttributeMangling(Attr->getFeaturesStr(), Out);
}

void ABIInfo::appendAttributeMangling(TargetVersionAttr *Attr,
raw_ostream &Out) const {
appendAttributeMangling(Attr->getNamesStr(), Out);
}

void ABIInfo::appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index,
raw_ostream &Out) const {
appendAttributeMangling(Attr->getFeatureStr(Index), Out);
Out << '.' << Attr->getMangledIndex(Index);
}

void ABIInfo::appendAttributeMangling(StringRef AttrStr,
raw_ostream &Out) const {
if (AttrStr == "default") {
Out << ".default";
return;
}

Out << '.';
const TargetInfo &TI = CGT.getTarget();
ParsedTargetAttr Info = TI.parseTargetAttr(AttrStr);

llvm::sort(Info.Features, [&TI](StringRef LHS, StringRef RHS) {
// Multiversioning doesn't allow "no-${feature}", so we can
// only have "+" prefixes here.
assert(LHS.starts_with("+") && RHS.starts_with("+") &&
"Features should always have a prefix.");
return TI.multiVersionSortPriority(LHS.substr(1)) >
TI.multiVersionSortPriority(RHS.substr(1));
});

bool IsFirst = true;
if (!Info.CPU.empty()) {
IsFirst = false;
Out << "arch_" << Info.CPU;
}

for (StringRef Feat : Info.Features) {
if (!IsFirst)
Out << '_';
IsFirst = false;
Out << Feat.substr(1);
}
}

// Pin the vtable to this file.
SwiftABIInfo::~SwiftABIInfo() = default;

Expand Down
10 changes: 10 additions & 0 deletions clang/lib/CodeGen/ABIInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#ifndef LLVM_CLANG_LIB_CODEGEN_ABIINFO_H
#define LLVM_CLANG_LIB_CODEGEN_ABIINFO_H

#include "clang/AST/Attr.h"
#include "clang/AST/CharUnits.h"
#include "clang/AST/Type.h"
#include "llvm/IR/CallingConv.h"
Expand Down Expand Up @@ -111,6 +112,15 @@ class ABIInfo {

CodeGen::ABIArgInfo getNaturalAlignIndirectInReg(QualType Ty,
bool Realign = false) const;

virtual void appendAttributeMangling(TargetAttr *Attr,
raw_ostream &Out) const;
virtual void appendAttributeMangling(TargetVersionAttr *Attr,
raw_ostream &Out) const;
virtual void appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index,
raw_ostream &Out) const;
virtual void appendAttributeMangling(StringRef AttrStr,
raw_ostream &Out) const;
};

/// Target specific hooks for defining how a type should be passed or returned
Expand Down
32 changes: 16 additions & 16 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,14 @@ class EmitAssemblyHelper {
TargetTriple.getVendor() != llvm::Triple::Apple;
}

/// Check whether we should emit a flag for UnifiedLTO.
/// The UnifiedLTO module flag should be set when UnifiedLTO is enabled for
/// ThinLTO or Full LTO with module summaries.
bool shouldEmitUnifiedLTOModueFlag() const {
return CodeGenOpts.UnifiedLTO &&
(CodeGenOpts.PrepareForThinLTO || shouldEmitRegularLTOSummary());
}

public:
EmitAssemblyHelper(DiagnosticsEngine &_Diags,
const HeaderSearchOptions &HeaderSearchOpts,
Expand Down Expand Up @@ -1036,7 +1044,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
if (!actionRequiresCodeGen(Action) && CodeGenOpts.VerifyModule)
MPM.addPass(VerifierPass());

if (Action == Backend_EmitBC || Action == Backend_EmitLL) {
if (Action == Backend_EmitBC || Action == Backend_EmitLL ||
CodeGenOpts.FatLTO) {
if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) {
if (!TheModule->getModuleFlag("EnableSplitLTOUnit"))
TheModule->addModuleFlag(llvm::Module::Error, "EnableSplitLTOUnit",
Expand All @@ -1047,11 +1056,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
if (!ThinLinkOS)
return;
}
if (CodeGenOpts.UnifiedLTO)
TheModule->addModuleFlag(llvm::Module::Error, "UnifiedLTO", uint32_t(1));
MPM.addPass(ThinLTOBitcodeWriterPass(
*OS, ThinLinkOS ? &ThinLinkOS->os() : nullptr));
} else {
} else if (Action == Backend_EmitLL) {
MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists,
/*EmitLTOSummary=*/true));
}
Expand All @@ -1065,24 +1072,17 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
if (!TheModule->getModuleFlag("EnableSplitLTOUnit"))
TheModule->addModuleFlag(llvm::Module::Error, "EnableSplitLTOUnit",
uint32_t(1));
if (CodeGenOpts.UnifiedLTO)
TheModule->addModuleFlag(llvm::Module::Error, "UnifiedLTO", uint32_t(1));
}
if (Action == Backend_EmitBC)
if (Action == Backend_EmitBC) {
MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists,
EmitLTOSummary));
else
} else if (Action == Backend_EmitLL) {
MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists,
EmitLTOSummary));
}
}
}
if (CodeGenOpts.FatLTO) {
// Set the EnableSplitLTOUnit and UnifiedLTO module flags, since FatLTO
// uses a different action than Backend_EmitBC or Backend_EmitLL.
if (!TheModule->getModuleFlag("EnableSplitLTOUnit"))
TheModule->addModuleFlag(llvm::Module::Error, "EnableSplitLTOUnit",
uint32_t(CodeGenOpts.EnableSplitLTOUnit));
if (CodeGenOpts.UnifiedLTO && !TheModule->getModuleFlag("UnifiedLTO"))

if (shouldEmitUnifiedLTOModueFlag())
TheModule->addModuleFlag(llvm::Module::Error, "UnifiedLTO", uint32_t(1));
}

Expand Down
52 changes: 51 additions & 1 deletion clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13952,6 +13952,8 @@ Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
return Builder.getFalse();
return EmitX86CpuSupports(FeatureStr);
}

Expand Down Expand Up @@ -14041,6 +14043,8 @@ Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
ArgStr.split(Features, "+");
for (auto &Feature : Features) {
Feature = Feature.trim();
if (!llvm::AArch64::parseArchExtension(Feature))
return Builder.getFalse();
if (Feature != "default")
Features.push_back(Feature);
}
Expand Down Expand Up @@ -16639,7 +16643,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
.Case(Name, {FA_WORD, Bitmask})
#include "llvm/TargetParser/PPCTargetParser.def"
.Default({0, 0});
assert(BitMask && "Invalid target feature string. Missed by SemaChecking?");
if (!BitMask)
return Builder.getFalse();
Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
Expand Down Expand Up @@ -18007,6 +18012,51 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
/*ReturnType*/ T0->getScalarType(), Intrinsic::dx_dot,
ArrayRef<Value *>{Op0, Op1}, nullptr, "dx.dot");
} break;
case Builtin::BI__builtin_hlsl_lerp: {
Value *X = EmitScalarExpr(E->getArg(0));
Value *Y = EmitScalarExpr(E->getArg(1));
Value *S = EmitScalarExpr(E->getArg(2));
llvm::Type *Xty = X->getType();
llvm::Type *Yty = Y->getType();
llvm::Type *Sty = S->getType();
if (!Xty->isVectorTy() && !Yty->isVectorTy() && !Sty->isVectorTy()) {
if (Xty->isFloatingPointTy()) {
auto V = Builder.CreateFSub(Y, X);
V = Builder.CreateFMul(S, V);
return Builder.CreateFAdd(X, V, "dx.lerp");
}
llvm_unreachable("Scalar Lerp is only supported on floats.");
}
// A VectorSplat should have happened
assert(Xty->isVectorTy() && Yty->isVectorTy() && Sty->isVectorTy() &&
"Lerp of vector and scalar is not supported.");

[[maybe_unused]] auto *XVecTy =
E->getArg(0)->getType()->getAs<VectorType>();
[[maybe_unused]] auto *YVecTy =
E->getArg(1)->getType()->getAs<VectorType>();
[[maybe_unused]] auto *SVecTy =
E->getArg(2)->getType()->getAs<VectorType>();
// A HLSLVectorTruncation should have happend
assert(XVecTy->getNumElements() == YVecTy->getNumElements() &&
XVecTy->getNumElements() == SVecTy->getNumElements() &&
"Lerp requires vectors to be of the same size.");
assert(XVecTy->getElementType()->isRealFloatingType() &&
XVecTy->getElementType() == YVecTy->getElementType() &&
XVecTy->getElementType() == SVecTy->getElementType() &&
"Lerp requires float vectors to be of the same type.");
return Builder.CreateIntrinsic(
/*ReturnType*/ Xty, Intrinsic::dx_lerp, ArrayRef<Value *>{X, Y, S},
nullptr, "dx.lerp");
}
case Builtin::BI__builtin_hlsl_elementwise_frac: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
if (!E->getArg(0)->getType()->hasFloatingRepresentation())
llvm_unreachable("frac operand must have a float representation");
return Builder.CreateIntrinsic(
/*ReturnType*/ Op0->getType(), Intrinsic::dx_frac,
ArrayRef<Value *>{Op0}, nullptr, "dx.frac");
}
}
return nullptr;
}
Expand Down
17 changes: 8 additions & 9 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3221,12 +3221,11 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,

llvm::StructType *STy =
dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
llvm::TypeSize StructSize;
llvm::TypeSize PtrElementSize;
if (ArgI.isDirect() && !ArgI.getCanBeFlattened() && STy &&
STy->getNumElements() > 1) {
StructSize = CGM.getDataLayout().getTypeAllocSize(STy);
PtrElementSize =
[[maybe_unused]] llvm::TypeSize StructSize =
CGM.getDataLayout().getTypeAllocSize(STy);
[[maybe_unused]] llvm::TypeSize PtrElementSize =
CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(Ty));
if (STy->containsHomogeneousScalableVectorTypes()) {
assert(StructSize == PtrElementSize &&
Expand Down Expand Up @@ -5310,12 +5309,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,

llvm::StructType *STy =
dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType());
llvm::Type *SrcTy = ConvertTypeForMem(I->Ty);
llvm::TypeSize SrcTypeSize;
llvm::TypeSize DstTypeSize;
if (STy && ArgInfo.isDirect() && !ArgInfo.getCanBeFlattened()) {
SrcTypeSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
DstTypeSize = CGM.getDataLayout().getTypeAllocSize(STy);
llvm::Type *SrcTy = ConvertTypeForMem(I->Ty);
[[maybe_unused]] llvm::TypeSize SrcTypeSize =
CGM.getDataLayout().getTypeAllocSize(SrcTy);
[[maybe_unused]] llvm::TypeSize DstTypeSize =
CGM.getDataLayout().getTypeAllocSize(STy);
if (STy->containsHomogeneousScalableVectorTypes()) {
assert(SrcTypeSize == DstTypeSize &&
"Only allow non-fractional movement of structure with "
Expand Down
64 changes: 40 additions & 24 deletions clang/lib/CodeGen/CGStmtOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7023,31 +7023,47 @@ void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) {
S.getSingleClause<OMPUseClause>())) &&
"OMPNowaitClause clause is used separately in OMPInteropDirective.");

if (const auto *C = S.getSingleClause<OMPInitClause>()) {
llvm::Value *InteropvarPtr =
EmitLValue(C->getInteropVar()).getPointer(*this);
llvm::omp::OMPInteropType InteropType = llvm::omp::OMPInteropType::Unknown;
if (C->getIsTarget()) {
InteropType = llvm::omp::OMPInteropType::Target;
} else {
assert(C->getIsTargetSync() && "Expected interop-type target/targetsync");
InteropType = llvm::omp::OMPInteropType::TargetSync;
auto ItOMPInitClause = S.getClausesOfKind<OMPInitClause>();
if (!ItOMPInitClause.empty()) {
// Look at the multiple init clauses
for (const OMPInitClause *C : ItOMPInitClause) {
llvm::Value *InteropvarPtr =
EmitLValue(C->getInteropVar()).getPointer(*this);
llvm::omp::OMPInteropType InteropType =
llvm::omp::OMPInteropType::Unknown;
if (C->getIsTarget()) {
InteropType = llvm::omp::OMPInteropType::Target;
} else {
assert(C->getIsTargetSync() &&
"Expected interop-type target/targetsync");
InteropType = llvm::omp::OMPInteropType::TargetSync;
}
OMPBuilder.createOMPInteropInit(Builder, InteropvarPtr, InteropType,
Device, NumDependences, DependenceList,
Data.HasNowaitClause);
}
}
auto ItOMPDestroyClause = S.getClausesOfKind<OMPDestroyClause>();
if (!ItOMPDestroyClause.empty()) {
// Look at the multiple destroy clauses
for (const OMPDestroyClause *C : ItOMPDestroyClause) {
llvm::Value *InteropvarPtr =
EmitLValue(C->getInteropVar()).getPointer(*this);
OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device,
NumDependences, DependenceList,
Data.HasNowaitClause);
}
}
auto ItOMPUseClause = S.getClausesOfKind<OMPUseClause>();
if (!ItOMPUseClause.empty()) {
// Look at the multiple use clauses
for (const OMPUseClause *C : ItOMPUseClause) {
llvm::Value *InteropvarPtr =
EmitLValue(C->getInteropVar()).getPointer(*this);
OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device,
NumDependences, DependenceList,
Data.HasNowaitClause);
}
OMPBuilder.createOMPInteropInit(Builder, InteropvarPtr, InteropType, Device,
NumDependences, DependenceList,
Data.HasNowaitClause);
} else if (const auto *C = S.getSingleClause<OMPDestroyClause>()) {
llvm::Value *InteropvarPtr =
EmitLValue(C->getInteropVar()).getPointer(*this);
OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device,
NumDependences, DependenceList,
Data.HasNowaitClause);
} else if (const auto *C = S.getSingleClause<OMPUseClause>()) {
llvm::Value *InteropvarPtr =
EmitLValue(C->getInteropVar()).getPointer(*this);
OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device,
NumDependences, DependenceList,
Data.HasNowaitClause);
}
}

Expand Down
115 changes: 18 additions & 97 deletions clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,8 +397,8 @@ CodeGenModule::CodeGenModule(ASTContext &C,
// Enable TBAA unless it's suppressed. ThreadSanitizer needs TBAA even at O0.
if (LangOpts.Sanitize.has(SanitizerKind::Thread) ||
(!CodeGenOpts.RelaxedAliasing && CodeGenOpts.OptimizationLevel > 0))
TBAA.reset(new CodeGenTBAA(Context, TheModule, CodeGenOpts, getLangOpts(),
getCXXABI().getMangleContext()));
TBAA.reset(new CodeGenTBAA(Context, getTypes(), TheModule, CodeGenOpts,
getLangOpts(), getCXXABI().getMangleContext()));

// If debug info or coverage generation is enabled, create the CGDebugInfo
// object.
Expand Down Expand Up @@ -1727,59 +1727,6 @@ static void AppendCPUSpecificCPUDispatchMangling(const CodeGenModule &CGM,
Out << ".resolver";
}

static void AppendTargetVersionMangling(const CodeGenModule &CGM,
const TargetVersionAttr *Attr,
raw_ostream &Out) {
if (Attr->isDefaultVersion()) {
Out << ".default";
return;
}
Out << "._";
const TargetInfo &TI = CGM.getTarget();
llvm::SmallVector<StringRef, 8> Feats;
Attr->getFeatures(Feats);
llvm::stable_sort(Feats, [&TI](const StringRef FeatL, const StringRef FeatR) {
return TI.multiVersionSortPriority(FeatL) <
TI.multiVersionSortPriority(FeatR);
});
for (const auto &Feat : Feats) {
Out << 'M';
Out << Feat;
}
}

static void AppendTargetMangling(const CodeGenModule &CGM,
const TargetAttr *Attr, raw_ostream &Out) {
if (Attr->isDefaultVersion())
return;

Out << '.';
const TargetInfo &Target = CGM.getTarget();
ParsedTargetAttr Info = Target.parseTargetAttr(Attr->getFeaturesStr());
llvm::sort(Info.Features, [&Target](StringRef LHS, StringRef RHS) {
// Multiversioning doesn't allow "no-${feature}", so we can
// only have "+" prefixes here.
assert(LHS.starts_with("+") && RHS.starts_with("+") &&
"Features should always have a prefix.");
return Target.multiVersionSortPriority(LHS.substr(1)) >
Target.multiVersionSortPriority(RHS.substr(1));
});

bool IsFirst = true;

if (!Info.CPU.empty()) {
IsFirst = false;
Out << "arch_" << Info.CPU;
}

for (StringRef Feat : Info.Features) {
if (!IsFirst)
Out << '_';
IsFirst = false;
Out << Feat.substr(1);
}
}

// Returns true if GD is a function decl with internal linkage and
// needs a unique suffix after the mangled name.
static bool isUniqueInternalLinkageDecl(GlobalDecl GD,
Expand All @@ -1789,41 +1736,6 @@ static bool isUniqueInternalLinkageDecl(GlobalDecl GD,
(CGM.getFunctionLinkage(GD) == llvm::GlobalValue::InternalLinkage);
}

static void AppendTargetClonesMangling(const CodeGenModule &CGM,
const TargetClonesAttr *Attr,
unsigned VersionIndex,
raw_ostream &Out) {
const TargetInfo &TI = CGM.getTarget();
if (TI.getTriple().isAArch64()) {
StringRef FeatureStr = Attr->getFeatureStr(VersionIndex);
if (FeatureStr == "default") {
Out << ".default";
return;
}
Out << "._";
SmallVector<StringRef, 8> Features;
FeatureStr.split(Features, "+");
llvm::stable_sort(Features,
[&TI](const StringRef FeatL, const StringRef FeatR) {
return TI.multiVersionSortPriority(FeatL) <
TI.multiVersionSortPriority(FeatR);
});
for (auto &Feat : Features) {
Out << 'M';
Out << Feat;
}
} else {
Out << '.';
StringRef FeatureStr = Attr->getFeatureStr(VersionIndex);
if (FeatureStr.starts_with("arch="))
Out << "arch_" << FeatureStr.substr(sizeof("arch=") - 1);
else
Out << FeatureStr;

Out << '.' << Attr->getMangledIndex(VersionIndex);
}
}

static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
const NamedDecl *ND,
bool OmitMultiVersionMangling = false) {
Expand Down Expand Up @@ -1877,16 +1789,25 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
FD->getAttr<CPUSpecificAttr>(),
GD.getMultiVersionIndex(), Out);
break;
case MultiVersionKind::Target:
AppendTargetMangling(CGM, FD->getAttr<TargetAttr>(), Out);
case MultiVersionKind::Target: {
auto *Attr = FD->getAttr<TargetAttr>();
const ABIInfo &Info = CGM.getTargetCodeGenInfo().getABIInfo();
Info.appendAttributeMangling(Attr, Out);
break;
case MultiVersionKind::TargetVersion:
AppendTargetVersionMangling(CGM, FD->getAttr<TargetVersionAttr>(), Out);
}
case MultiVersionKind::TargetVersion: {
auto *Attr = FD->getAttr<TargetVersionAttr>();
const ABIInfo &Info = CGM.getTargetCodeGenInfo().getABIInfo();
Info.appendAttributeMangling(Attr, Out);
break;
case MultiVersionKind::TargetClones:
AppendTargetClonesMangling(CGM, FD->getAttr<TargetClonesAttr>(),
GD.getMultiVersionIndex(), Out);
}
case MultiVersionKind::TargetClones: {
auto *Attr = FD->getAttr<TargetClonesAttr>();
unsigned Index = GD.getMultiVersionIndex();
const ABIInfo &Info = CGM.getTargetCodeGenInfo().getABIInfo();
Info.appendAttributeMangling(Attr, Index, Out);
break;
}
case MultiVersionKind::None:
llvm_unreachable("None multiversion type isn't valid here");
}
Expand Down
44 changes: 33 additions & 11 deletions clang/lib/CodeGen/CodeGenTBAA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
//===----------------------------------------------------------------------===//

#include "CodeGenTBAA.h"
#include "CGRecordLayout.h"
#include "CodeGenTypes.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Attr.h"
#include "clang/AST/Mangle.h"
Expand All @@ -26,16 +28,16 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
using namespace clang;
using namespace CodeGen;

CodeGenTBAA::CodeGenTBAA(ASTContext &Ctx, llvm::Module &M,
const CodeGenOptions &CGO,
CodeGenTBAA::CodeGenTBAA(ASTContext &Ctx, CodeGenTypes &CGTypes,
llvm::Module &M, const CodeGenOptions &CGO,
const LangOptions &Features, MangleContext &MContext)
: Context(Ctx), Module(M), CodeGenOpts(CGO),
Features(Features), MContext(MContext), MDHelper(M.getContext()),
Root(nullptr), Char(nullptr)
{}
: Context(Ctx), CGTypes(CGTypes), Module(M), CodeGenOpts(CGO),
Features(Features), MContext(MContext), MDHelper(M.getContext()),
Root(nullptr), Char(nullptr) {}

CodeGenTBAA::~CodeGenTBAA() {
}
Expand Down Expand Up @@ -294,14 +296,34 @@ CodeGenTBAA::CollectFields(uint64_t BaseOffset,
return false;

const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
const CGRecordLayout &CGRL = CGTypes.getCGRecordLayout(RD);

unsigned idx = 0;
for (RecordDecl::field_iterator i = RD->field_begin(),
e = RD->field_end(); i != e; ++i, ++idx) {
if ((*i)->isZeroSize(Context) || (*i)->isUnnamedBitfield())
for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
i != e; ++i, ++idx) {
if ((*i)->isZeroSize(Context))
continue;
uint64_t Offset = BaseOffset +
Layout.getFieldOffset(idx) / Context.getCharWidth();

uint64_t Offset =
BaseOffset + Layout.getFieldOffset(idx) / Context.getCharWidth();

// Create a single field for consecutive named bitfields using char as
// base type.
if ((*i)->isBitField()) {
const CGBitFieldInfo &Info = CGRL.getBitFieldInfo(*i);
if (Info.Offset != 0)
continue;
unsigned CurrentBitFieldSize = Info.StorageSize;
uint64_t Size =
llvm::divideCeil(CurrentBitFieldSize, Context.getCharWidth());
llvm::MDNode *TBAAType = getChar();
llvm::MDNode *TBAATag =
getAccessTagInfo(TBAAAccessInfo(TBAAType, Size));
Fields.push_back(
llvm::MDBuilder::TBAAStructField(Offset, Size, TBAATag));
continue;
}

QualType FieldQTy = i->getType();
if (!CollectFields(Offset, FieldQTy, Fields,
MayAlias || TypeHasMayAlias(FieldQTy)))
Expand Down
7 changes: 5 additions & 2 deletions clang/lib/CodeGen/CodeGenTBAA.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ namespace clang {
class Type;

namespace CodeGen {
class CodeGenTypes;

// TBAAAccessKind - A kind of TBAA memory access descriptor.
enum class TBAAAccessKind : unsigned {
Expand Down Expand Up @@ -115,6 +116,7 @@ struct TBAAAccessInfo {
/// while lowering AST types to LLVM types.
class CodeGenTBAA {
ASTContext &Context;
CodeGenTypes &CGTypes;
llvm::Module &Module;
const CodeGenOptions &CodeGenOpts;
const LangOptions &Features;
Expand Down Expand Up @@ -167,8 +169,9 @@ class CodeGenTBAA {
llvm::MDNode *getBaseTypeInfoHelper(const Type *Ty);

public:
CodeGenTBAA(ASTContext &Ctx, llvm::Module &M, const CodeGenOptions &CGO,
const LangOptions &Features, MangleContext &MContext);
CodeGenTBAA(ASTContext &Ctx, CodeGenTypes &CGTypes, llvm::Module &M,
const CodeGenOptions &CGO, const LangOptions &Features,
MangleContext &MContext);
~CodeGenTBAA();

/// getTypeInfo - Get metadata used to describe accesses to objects of the
Expand Down
38 changes: 36 additions & 2 deletions clang/lib/CodeGen/Targets/AArch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "ABIInfoImpl.h"
#include "TargetInfo.h"
#include "clang/Basic/DiagnosticFrontend.h"
#include "llvm/TargetParser/AArch64TargetParser.h"

using namespace clang;
using namespace clang::CodeGen;
Expand Down Expand Up @@ -75,6 +76,12 @@ class AArch64ABIInfo : public ABIInfo {
bool allowBFloatArgsAndRet() const override {
return getTarget().hasBFloat16Type();
}

using ABIInfo::appendAttributeMangling;
void appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index,
raw_ostream &Out) const override;
void appendAttributeMangling(StringRef AttrStr,
raw_ostream &Out) const override;
};

class AArch64SwiftABIInfo : public SwiftABIInfo {
Expand Down Expand Up @@ -125,8 +132,7 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
assert(Error.empty());

auto *Fn = cast<llvm::Function>(GV);
static const char *SignReturnAddrStr[] = {"none", "non-leaf", "all"};
Fn->addFnAttr("sign-return-address", SignReturnAddrStr[static_cast<int>(BPI.SignReturnAddr)]);
Fn->addFnAttr("sign-return-address", BPI.getSignReturnAddrStr());

if (BPI.SignReturnAddr != LangOptions::SignReturnAddressScopeKind::None) {
Fn->addFnAttr("sign-return-address-key",
Expand Down Expand Up @@ -857,6 +863,34 @@ void AArch64TargetCodeGenInfo::checkFunctionCallABI(
<< Callee->getDeclName();
}

void AArch64ABIInfo::appendAttributeMangling(TargetClonesAttr *Attr,
unsigned Index,
raw_ostream &Out) const {
appendAttributeMangling(Attr->getFeatureStr(Index), Out);
}

void AArch64ABIInfo::appendAttributeMangling(StringRef AttrStr,
raw_ostream &Out) const {
if (AttrStr == "default") {
Out << ".default";
return;
}

Out << "._";
SmallVector<StringRef, 8> Features;
AttrStr.split(Features, "+");
for (auto &Feat : Features)
Feat = Feat.trim();

llvm::sort(Features, [](const StringRef LHS, const StringRef RHS) {
return LHS.compare(RHS) < 0;
});

for (auto &Feat : Features)
if (auto Ext = llvm::AArch64::parseArchExtension(Feat))
Out << 'M' << Ext->Name;
}

std::unique_ptr<TargetCodeGenInfo>
CodeGen::createAArch64TargetCodeGenInfo(CodeGenModule &CGM,
AArch64ABIKind Kind) {
Expand Down
8 changes: 1 addition & 7 deletions clang/lib/CodeGen/Targets/ARM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,13 +152,7 @@ class ARMTargetCodeGenInfo : public TargetCodeGenInfo {
diag::warn_target_unsupported_branch_protection_attribute)
<< Arch;
} else {
static const char *SignReturnAddrStr[] = {"none", "non-leaf", "all"};
assert(static_cast<unsigned>(BPI.SignReturnAddr) <= 2 &&
"Unexpected SignReturnAddressScopeKind");
Fn->addFnAttr(
"sign-return-address",
SignReturnAddrStr[static_cast<int>(BPI.SignReturnAddr)]);

Fn->addFnAttr("sign-return-address", BPI.getSignReturnAddrStr());
Fn->addFnAttr("branch-target-enforcement",
BPI.BranchTargetEnforcement ? "true" : "false");
}
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Driver/ToolChains/Arch/AArch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,11 +165,14 @@ getAArch64MicroArchFeaturesFromMtune(const Driver &D, StringRef Mtune,
// Handle CPU name is 'native'.
if (MtuneLowerCase == "native")
MtuneLowerCase = std::string(llvm::sys::getHostCPUName());

// 'cyclone' and later have zero-cycle register moves and zeroing.
if (MtuneLowerCase == "cyclone" ||
StringRef(MtuneLowerCase).starts_with("apple")) {
Features.push_back("+zcm");
Features.push_back("+zcz");
}

return true;
}

Expand Down
9 changes: 4 additions & 5 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4479,10 +4479,9 @@ renderDebugOptions(const ToolChain &TC, const Driver &D, const llvm::Triple &T,
options::OPT_gpubnames, options::OPT_gno_pubnames);
if (DwarfFission != DwarfFissionKind::None ||
(PubnamesArg && checkDebugInfoOption(PubnamesArg, Args, D, TC)))
if (DebuggerTuning != llvm::DebuggerKind::LLDB &&
(!PubnamesArg ||
(!PubnamesArg->getOption().matches(options::OPT_gno_gnu_pubnames) &&
!PubnamesArg->getOption().matches(options::OPT_gno_pubnames))))
if (!PubnamesArg ||
(!PubnamesArg->getOption().matches(options::OPT_gno_gnu_pubnames) &&
!PubnamesArg->getOption().matches(options::OPT_gno_pubnames)))
CmdArgs.push_back(PubnamesArg && PubnamesArg->getOption().matches(
options::OPT_gpubnames)
? "-gpubnames"
Expand Down Expand Up @@ -5959,7 +5958,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,

if (Arg *A = Args.getLastArg(options::OPT_fbasic_block_address_map,
options::OPT_fno_basic_block_address_map)) {
if (Triple.isX86() && Triple.isOSBinFormatELF()) {
if ((Triple.isX86() || Triple.isAArch64()) && Triple.isOSBinFormatELF()) {
if (A->getOption().matches(options::OPT_fbasic_block_address_map))
A->render(Args, CmdArgs);
} else {
Expand Down
12 changes: 6 additions & 6 deletions clang/lib/Driver/ToolChains/CommonArgs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2763,14 +2763,10 @@ void tools::addOpenMPDeviceRTL(const Driver &D,
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
StringRef BitcodeSuffix,
const llvm::Triple &Triple) {
const llvm::Triple &Triple,
const ToolChain &HostTC) {
SmallVector<StringRef, 8> LibraryPaths;

// Add path to clang lib / lib64 folder.
SmallString<256> DefaultLibPath = llvm::sys::path::parent_path(D.Dir);
llvm::sys::path::append(DefaultLibPath, CLANG_INSTALL_LIBDIR_BASENAME);
LibraryPaths.emplace_back(DefaultLibPath.c_str());

// Add user defined library paths from LIBRARY_PATH.
std::optional<std::string> LibPath =
llvm::sys::Process::GetEnv("LIBRARY_PATH");
Expand All @@ -2782,6 +2778,10 @@ void tools::addOpenMPDeviceRTL(const Driver &D,
LibraryPaths.emplace_back(Path.trim());
}

// Check all of the standard library search paths used by the compiler.
for (const auto &LibPath : HostTC.getFilePaths())
LibraryPaths.emplace_back(LibPath);

OptSpecifier LibomptargetBCPathOpt =
Triple.isAMDGCN() ? options::OPT_libomptarget_amdgpu_bc_path_EQ
: options::OPT_libomptarget_nvptx_bc_path_EQ;
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Driver/ToolChains/CommonArgs.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,8 @@ void addMachineOutlinerArgs(const Driver &D, const llvm::opt::ArgList &Args,

void addOpenMPDeviceRTL(const Driver &D, const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
StringRef BitcodeSuffix, const llvm::Triple &Triple);
StringRef BitcodeSuffix, const llvm::Triple &Triple,
const ToolChain &HostTC);

void addOutlineAtomicsArgs(const Driver &D, const ToolChain &TC,
const llvm::opt::ArgList &Args,
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Driver/ToolChains/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -903,7 +903,7 @@ void CudaToolChain::addClangTargetOptions(
return;

addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, GpuArch.str(),
getTriple());
getTriple(), HostTC);
}
}

Expand Down
17 changes: 12 additions & 5 deletions clang/lib/Driver/ToolChains/Linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,11 +237,18 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
ExtraOpts.push_back("relro");
}

// Android ARM/AArch64 use max-page-size=4096 to reduce VMA usage. Note, lld
// from 11 onwards default max-page-size to 65536 for both ARM and AArch64.
if ((Triple.isARM() || Triple.isAArch64()) && Triple.isAndroid()) {
ExtraOpts.push_back("-z");
ExtraOpts.push_back("max-page-size=4096");
// Note, lld from 11 onwards default max-page-size to 65536 for both ARM and
// AArch64.
if (Triple.isAndroid()) {
if (Triple.isARM()) {
// Android ARM uses max-page-size=4096 to reduce VMA usage.
ExtraOpts.push_back("-z");
ExtraOpts.push_back("max-page-size=4096");
} else if (Triple.isAArch64()) {
// Android AArch64 uses max-page-size=16384 to support 4k/16k page sizes.
ExtraOpts.push_back("-z");
ExtraOpts.push_back("max-page-size=16384");
}
}

if (GCCInstallation.getParentLibPath().contains("opt/rh/"))
Expand Down
55 changes: 31 additions & 24 deletions clang/lib/Frontend/CompilerInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1061,30 +1061,7 @@ bool CompilerInstance::ExecuteAction(FrontendAction &Act) {
}
}

if (getDiagnosticOpts().ShowCarets) {
// We can have multiple diagnostics sharing one diagnostic client.
// Get the total number of warnings/errors from the client.
unsigned NumWarnings = getDiagnostics().getClient()->getNumWarnings();
unsigned NumErrors = getDiagnostics().getClient()->getNumErrors();

if (NumWarnings)
OS << NumWarnings << " warning" << (NumWarnings == 1 ? "" : "s");
if (NumWarnings && NumErrors)
OS << " and ";
if (NumErrors)
OS << NumErrors << " error" << (NumErrors == 1 ? "" : "s");
if (NumWarnings || NumErrors) {
OS << " generated";
if (getLangOpts().CUDA) {
if (!getLangOpts().CUDAIsDevice) {
OS << " when compiling for host";
} else {
OS << " when compiling for " << getTargetOpts().CPU;
}
}
OS << ".\n";
}
}
printDiagnosticStats();

if (getFrontendOpts().ShowStats) {
if (hasFileManager()) {
Expand Down Expand Up @@ -1112,6 +1089,36 @@ bool CompilerInstance::ExecuteAction(FrontendAction &Act) {
return !getDiagnostics().getClient()->getNumErrors();
}

void CompilerInstance::printDiagnosticStats() {
if (!getDiagnosticOpts().ShowCarets)
return;

raw_ostream &OS = getVerboseOutputStream();

// We can have multiple diagnostics sharing one diagnostic client.
// Get the total number of warnings/errors from the client.
unsigned NumWarnings = getDiagnostics().getClient()->getNumWarnings();
unsigned NumErrors = getDiagnostics().getClient()->getNumErrors();

if (NumWarnings)
OS << NumWarnings << " warning" << (NumWarnings == 1 ? "" : "s");
if (NumWarnings && NumErrors)
OS << " and ";
if (NumErrors)
OS << NumErrors << " error" << (NumErrors == 1 ? "" : "s");
if (NumWarnings || NumErrors) {
OS << " generated";
if (getLangOpts().CUDA) {
if (!getLangOpts().CUDAIsDevice) {
OS << " when compiling for host";
} else {
OS << " when compiling for " << getTargetOpts().CPU;
}
}
OS << ".\n";
}
}

void CompilerInstance::LoadRequestedPlugins() {
// Load any requested plugins.
for (const std::string &Path : getFrontendOpts().Plugins) {
Expand Down
19 changes: 11 additions & 8 deletions clang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,17 @@ CompilerInvocationBase::shallow_copy_assign(const CompilerInvocationBase &X) {
return *this;
}

CompilerInvocation::CompilerInvocation(const CowCompilerInvocation &X)
: CompilerInvocationBase(EmptyConstructor{}) {
CompilerInvocationBase::deep_copy_assign(X);
}

CompilerInvocation &
CompilerInvocation::operator=(const CowCompilerInvocation &X) {
CompilerInvocationBase::deep_copy_assign(X);
return *this;
}

namespace {
template <typename T>
T &ensureOwned(std::shared_ptr<T> &Storage) {
Expand Down Expand Up @@ -1975,14 +1986,6 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
Opts.LinkBitcodeFiles.push_back(F);
}

if (Arg *A = Args.getLastArg(OPT_ftlsmodel_EQ)) {
if (T.isOSAIX()) {
StringRef Name = A->getValue();
if (Name == "local-dynamic")
Diags.Report(diag::err_aix_unsupported_tls_model) << Name;
}
}

if (Arg *A = Args.getLastArg(OPT_fdenormal_fp_math_EQ)) {
StringRef Val = A->getValue();
Opts.FPDenormalMode = llvm::parseDenormalFPAttribute(Val);
Expand Down
72 changes: 6 additions & 66 deletions clang/lib/Headers/__clang_hip_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -1306,75 +1306,15 @@ float min(float __x, float __y) { return __builtin_fminf(__x, __y); }
__DEVICE__
double min(double __x, double __y) { return __builtin_fmin(__x, __y); }

// Define host min/max functions.
#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__) && \
!defined(__HIP_NO_HOST_MIN_MAX_IN_GLOBAL_NAMESPACE__)

#pragma push_macro("DEFINE_MIN_MAX_FUNCTIONS")
#pragma push_macro("DEFINE_MIN_MAX_FUNCTIONS")
#define DEFINE_MIN_MAX_FUNCTIONS(ret_type, type1, type2) \
inline ret_type min(const type1 __a, const type2 __b) { \
return (__a < __b) ? __a : __b; \
} \
inline ret_type max(const type1 __a, const type2 __b) { \
return (__a > __b) ? __a : __b; \
}

// Define min and max functions for same type comparisons
DEFINE_MIN_MAX_FUNCTIONS(int, int, int)
DEFINE_MIN_MAX_FUNCTIONS(unsigned int, unsigned int, unsigned int)
DEFINE_MIN_MAX_FUNCTIONS(long, long, long)
DEFINE_MIN_MAX_FUNCTIONS(unsigned long, unsigned long, unsigned long)
DEFINE_MIN_MAX_FUNCTIONS(long long, long long, long long)
DEFINE_MIN_MAX_FUNCTIONS(unsigned long long, unsigned long long,
unsigned long long)

// The host min/max functions below accept mixed signed/unsigned integer
// parameters and perform unsigned comparisons, which may produce unexpected
// results if a signed integer was passed unintentionally. To avoid this
// happening silently, these overloaded functions are not defined by default.
// However, for compatibility with CUDA, they will be defined if users define
// __HIP_DEFINE_MIXED_HOST_MIN_MAX__.
#ifdef __HIP_DEFINE_MIXED_HOST_MIN_MAX__
DEFINE_MIN_MAX_FUNCTIONS(unsigned int, int, unsigned int)
DEFINE_MIN_MAX_FUNCTIONS(unsigned int, unsigned int, int)
DEFINE_MIN_MAX_FUNCTIONS(unsigned long, long, unsigned long)
DEFINE_MIN_MAX_FUNCTIONS(unsigned long, unsigned long, long)
DEFINE_MIN_MAX_FUNCTIONS(unsigned long long, long long, unsigned long long)
DEFINE_MIN_MAX_FUNCTIONS(unsigned long long, unsigned long long, long long)
#endif // ifdef __HIP_DEFINE_MIXED_HOST_MIN_MAX__

// Floating-point comparisons using built-in functions
inline float min(float const __a, float const __b) {
return __builtin_fminf(__a, __b);
}
inline double min(double const __a, double const __b) {
return __builtin_fmin(__a, __b);
}
inline double min(float const __a, double const __b) {
return __builtin_fmin(__a, __b);
}
inline double min(double const __a, float const __b) {
return __builtin_fmin(__a, __b);
#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
__host__ inline static int min(int __arg1, int __arg2) {
return __arg1 < __arg2 ? __arg1 : __arg2;
}

inline float max(float const __a, float const __b) {
return __builtin_fmaxf(__a, __b);
}
inline double max(double const __a, double const __b) {
return __builtin_fmax(__a, __b);
}
inline double max(float const __a, double const __b) {
return __builtin_fmax(__a, __b);
__host__ inline static int max(int __arg1, int __arg2) {
return __arg1 > __arg2 ? __arg1 : __arg2;
}
inline double max(double const __a, float const __b) {
return __builtin_fmax(__a, __b);
}

#pragma pop_macro("DEFINE_MIN_MAX_FUNCTIONS")

#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__) &&
// !defined(__HIP_NO_HOST_MIN_MAX_IN_GLOBAL_NAMESPACE__)
#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
#endif

#pragma pop_macro("__DEVICE__")
Expand Down
121 changes: 62 additions & 59 deletions clang/lib/Headers/emmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -2099,9 +2099,11 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a,
}

/// Adds, with saturation, the corresponding elements of two 128-bit
/// signed [16 x i8] vectors, saving each sum in the corresponding element of
/// a 128-bit result vector of [16 x i8]. Positive sums greater than 0x7F are
/// saturated to 0x7F. Negative sums less than 0x80 are saturated to 0x80.
/// signed [16 x i8] vectors, saving each sum in the corresponding element
/// of a 128-bit result vector of [16 x i8].
///
/// Positive sums greater than 0x7F are saturated to 0x7F. Negative sums
/// less than 0x80 are saturated to 0x80.
///
/// \headerfile <x86intrin.h>
///
Expand All @@ -2119,10 +2121,11 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a,
}

/// Adds, with saturation, the corresponding elements of two 128-bit
/// signed [8 x i16] vectors, saving each sum in the corresponding element of
/// a 128-bit result vector of [8 x i16]. Positive sums greater than 0x7FFF
/// are saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
/// 0x8000.
/// signed [8 x i16] vectors, saving each sum in the corresponding element
/// of a 128-bit result vector of [8 x i16].
///
/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
/// less than 0x8000 are saturated to 0x8000.
///
/// \headerfile <x86intrin.h>
///
Expand All @@ -2141,8 +2144,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a,

/// Adds, with saturation, the corresponding elements of two 128-bit
/// unsigned [16 x i8] vectors, saving each sum in the corresponding element
/// of a 128-bit result vector of [16 x i8]. Positive sums greater than 0xFF
/// are saturated to 0xFF. Negative sums are saturated to 0x00.
/// of a 128-bit result vector of [16 x i8].
///
/// Positive sums greater than 0xFF are saturated to 0xFF. Negative sums are
/// saturated to 0x00.
///
/// \headerfile <x86intrin.h>
///
Expand All @@ -2161,8 +2166,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a,

/// Adds, with saturation, the corresponding elements of two 128-bit
/// unsigned [8 x i16] vectors, saving each sum in the corresponding element
/// of a 128-bit result vector of [8 x i16]. Positive sums greater than
/// 0xFFFF are saturated to 0xFFFF. Negative sums are saturated to 0x0000.
/// of a 128-bit result vector of [8 x i16].
///
/// Positive sums greater than 0xFFFF are saturated to 0xFFFF. Negative sums
/// are saturated to 0x0000.
///
/// \headerfile <x86intrin.h>
///
Expand Down Expand Up @@ -2518,10 +2525,12 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a,
return (__m128i)((__v2du)__a - (__v2du)__b);
}

/// Subtracts corresponding 8-bit signed integer values in the input and
/// returns the differences in the corresponding bytes in the destination.
/// Differences greater than 0x7F are saturated to 0x7F, and differences less
/// than 0x80 are saturated to 0x80.
/// Subtracts, with saturation, corresponding 8-bit signed integer values in
/// the input and returns the differences in the corresponding bytes in the
/// destination.
///
/// Differences greater than 0x7F are saturated to 0x7F, and differences
/// less than 0x80 are saturated to 0x80.
///
/// \headerfile <x86intrin.h>
///
Expand All @@ -2538,8 +2547,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a,
return (__m128i)__builtin_elementwise_sub_sat((__v16qs)__a, (__v16qs)__b);
}

/// Subtracts corresponding 16-bit signed integer values in the input and
/// returns the differences in the corresponding bytes in the destination.
/// Subtracts, with saturation, corresponding 16-bit signed integer values in
/// the input and returns the differences in the corresponding bytes in the
/// destination.
///
/// Differences greater than 0x7FFF are saturated to 0x7FFF, and values less
/// than 0x8000 are saturated to 0x8000.
///
Expand All @@ -2558,9 +2569,11 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a,
return (__m128i)__builtin_elementwise_sub_sat((__v8hi)__a, (__v8hi)__b);
}

/// Subtracts corresponding 8-bit unsigned integer values in the input
/// and returns the differences in the corresponding bytes in the
/// destination. Differences less than 0x00 are saturated to 0x00.
/// Subtracts, with saturation, corresponding 8-bit unsigned integer values in
/// the input and returns the differences in the corresponding bytes in the
/// destination.
///
/// Differences less than 0x00 are saturated to 0x00.
///
/// \headerfile <x86intrin.h>
///
Expand All @@ -2577,9 +2590,11 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a,
return (__m128i)__builtin_elementwise_sub_sat((__v16qu)__a, (__v16qu)__b);
}

/// Subtracts corresponding 16-bit unsigned integer values in the input
/// and returns the differences in the corresponding bytes in the
/// destination. Differences less than 0x0000 are saturated to 0x0000.
/// Subtracts, with saturation, corresponding 16-bit unsigned integer values in
/// the input and returns the differences in the corresponding bytes in the
/// destination.
///
/// Differences less than 0x0000 are saturated to 0x0000.
///
/// \headerfile <x86intrin.h>
///
Expand Down Expand Up @@ -4050,80 +4065,68 @@ void _mm_mfence(void);
} // extern "C"
#endif

/// Converts 16-bit signed integers from both 128-bit integer vector
/// operands into 8-bit signed integers, and packs the results into the
/// destination. Positive values greater than 0x7F are saturated to 0x7F.
/// Negative values less than 0x80 are saturated to 0x80.
/// Converts, with saturation, 16-bit signed integers from both 128-bit integer
/// vector operands into 8-bit signed integers, and packs the results into
/// the destination.
///
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
/// less than 0x80 are saturated to 0x80.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VPACKSSWB / PACKSSWB </c> instruction.
///
/// \param __a
/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as
/// a signed integer and is converted to a 8-bit signed integer with
/// saturation. Values greater than 0x7F are saturated to 0x7F. Values less
/// than 0x80 are saturated to 0x80. The converted [8 x i8] values are
/// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
/// written to the lower 64 bits of the result.
/// \param __b
/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as
/// a signed integer and is converted to a 8-bit signed integer with
/// saturation. Values greater than 0x7F are saturated to 0x7F. Values less
/// than 0x80 are saturated to 0x80. The converted [8 x i8] values are
/// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
/// written to the higher 64 bits of the result.
/// \returns A 128-bit vector of [16 x i8] containing the converted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a,
__m128i __b) {
return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
}

/// Converts 32-bit signed integers from both 128-bit integer vector
/// operands into 16-bit signed integers, and packs the results into the
/// destination. Positive values greater than 0x7FFF are saturated to 0x7FFF.
/// Negative values less than 0x8000 are saturated to 0x8000.
/// Converts, with saturation, 32-bit signed integers from both 128-bit integer
/// vector operands into 16-bit signed integers, and packs the results into
/// the destination.
///
/// Positive values greater than 0x7FFF are saturated to 0x7FFF. Negative
/// values less than 0x8000 are saturated to 0x8000.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VPACKSSDW / PACKSSDW </c> instruction.
///
/// \param __a
/// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as
/// a signed integer and is converted to a 16-bit signed integer with
/// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values
/// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values
/// A 128-bit integer vector of [4 x i32]. The converted [4 x i16] values
/// are written to the lower 64 bits of the result.
/// \param __b
/// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as
/// a signed integer and is converted to a 16-bit signed integer with
/// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values
/// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values
/// A 128-bit integer vector of [4 x i32]. The converted [4 x i16] values
/// are written to the higher 64 bits of the result.
/// \returns A 128-bit vector of [8 x i16] containing the converted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a,
__m128i __b) {
return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
}

/// Converts 16-bit signed integers from both 128-bit integer vector
/// operands into 8-bit unsigned integers, and packs the results into the
/// destination. Values greater than 0xFF are saturated to 0xFF. Values less
/// than 0x00 are saturated to 0x00.
/// Converts, with saturation, 16-bit signed integers from both 128-bit integer
/// vector operands into 8-bit unsigned integers, and packs the results into
/// the destination.
///
/// Values greater than 0xFF are saturated to 0xFF. Values less than 0x00
/// are saturated to 0x00.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VPACKUSWB / PACKUSWB </c> instruction.
///
/// \param __a
/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as
/// a signed integer and is converted to an 8-bit unsigned integer with
/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
/// than 0x00 are saturated to 0x00. The converted [8 x i8] values are
/// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
/// written to the lower 64 bits of the result.
/// \param __b
/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as
/// a signed integer and is converted to an 8-bit unsigned integer with
/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
/// than 0x00 are saturated to 0x00. The converted [8 x i8] values are
/// A 128-bit integer vector of [8 x i16]. The converted [8 x i8] values are
/// written to the higher 64 bits of the result.
/// \returns A 128-bit vector of [16 x i8] containing the converted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a,
Expand Down
48 changes: 32 additions & 16 deletions clang/lib/Headers/fmaintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)

/// Computes a scalar multiply-add of the single-precision values in the
/// low 32 bits of 128-bit vectors of [4 x float].
/// \code
///
/// \code{.operation}
/// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0]
/// result[127:32] = __A[127:32]
/// \endcode
Expand Down Expand Up @@ -88,7 +89,8 @@ _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)

/// Computes a scalar multiply-add of the double-precision values in the
/// low 64 bits of 128-bit vectors of [2 x double].
/// \code
///
/// \code{.operation}
/// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0]
/// result[127:64] = __A[127:64]
/// \endcode
Expand Down Expand Up @@ -156,7 +158,8 @@ _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)

/// Computes a scalar multiply-subtract of the single-precision values in
/// the low 32 bits of 128-bit vectors of [4 x float].
/// \code
///
/// \code{.operation}
/// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
/// result[127:32] = __A[127:32]
/// \endcode
Expand Down Expand Up @@ -184,7 +187,8 @@ _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)

/// Computes a scalar multiply-subtract of the double-precision values in
/// the low 64 bits of 128-bit vectors of [2 x double].
/// \code
///
/// \code{.operation}
/// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
/// result[127:64] = __A[127:64]
/// \endcode
Expand Down Expand Up @@ -252,7 +256,8 @@ _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)

/// Computes a scalar negated multiply-add of the single-precision values in
/// the low 32 bits of 128-bit vectors of [4 x float].
/// \code
///
/// \code{.operation}
/// result[31:0] = -(__A[31:0] * __B[31:0]) + __C[31:0]
/// result[127:32] = __A[127:32]
/// \endcode
Expand Down Expand Up @@ -280,7 +285,8 @@ _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)

/// Computes a scalar negated multiply-add of the double-precision values
/// in the low 64 bits of 128-bit vectors of [2 x double].
/// \code
///
/// \code{.operation}
/// result[63:0] = -(__A[63:0] * __B[63:0]) + __C[63:0]
/// result[127:64] = __A[127:64]
/// \endcode
Expand Down Expand Up @@ -348,7 +354,8 @@ _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)

/// Computes a scalar negated multiply-subtract of the single-precision
/// values in the low 32 bits of 128-bit vectors of [4 x float].
/// \code
///
/// \code{.operation}
/// result[31:0] = -(__A[31:0] * __B[31:0]) - __C[31:0]
/// result[127:32] = __A[127:32]
/// \endcode
Expand Down Expand Up @@ -376,7 +383,8 @@ _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)

/// Computes a scalar negated multiply-subtract of the double-precision
/// values in the low 64 bits of 128-bit vectors of [2 x double].
/// \code
///
/// \code{.operation}
/// result[63:0] = -(__A[63:0] * __B[63:0]) - __C[63:0]
/// result[127:64] = __A[127:64]
/// \endcode
Expand Down Expand Up @@ -404,7 +412,8 @@ _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)

/// Computes a multiply with alternating add/subtract of 128-bit vectors of
/// [4 x float].
/// \code
///
/// \code{.operation}
/// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
/// result[63:32] = (__A[63:32] * __B[63:32]) + __C[63:32]
/// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64]
Expand All @@ -430,7 +439,8 @@ _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)

/// Computes a multiply with alternating add/subtract of 128-bit vectors of
/// [2 x double].
/// \code
///
/// \code{.operation}
/// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
/// result[127:64] = (__A[127:64] * __B[127:64]) + __C[127:64]
/// \endcode
Expand All @@ -454,7 +464,8 @@ _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)

/// Computes a multiply with alternating add/subtract of 128-bit vectors of
/// [4 x float].
/// \code
///
/// \code{.operation}
/// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0]
/// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32]
/// result[95:64] = (__A[95:64] * __B[95:64]) + __C[95:64]
Expand All @@ -480,7 +491,8 @@ _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)

/// Computes a multiply with alternating add/subtract of 128-bit vectors of
/// [2 x double].
/// \code
///
/// \code{.operation}
/// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0]
/// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64]
/// \endcode
Expand Down Expand Up @@ -664,7 +676,8 @@ _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)

/// Computes a multiply with alternating add/subtract of 256-bit vectors of
/// [8 x float].
/// \code
///
/// \code{.operation}
/// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
/// result[63:32] = (__A[63:32] * __B[63:32]) + __C[63:32]
/// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64]
Expand Down Expand Up @@ -694,7 +707,8 @@ _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)

/// Computes a multiply with alternating add/subtract of 256-bit vectors of
/// [4 x double].
/// \code
///
/// \code{.operation}
/// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
/// result[127:64] = (__A[127:64] * __B[127:64]) + __C[127:64]
/// result[191:128] = (__A[191:128] * __B[191:128]) - __C[191:128]
Expand All @@ -720,7 +734,8 @@ _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)

/// Computes a vector multiply with alternating add/subtract of 256-bit
/// vectors of [8 x float].
/// \code
///
/// \code{.operation}
/// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0]
/// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32]
/// result[95:64] = (__A[95:64] * __B[95:64]) + __C[95:64]
Expand Down Expand Up @@ -750,7 +765,8 @@ _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)

/// Computes a vector multiply with alternating add/subtract of 256-bit
/// vectors of [4 x double].
/// \code
///
/// \code{.operation}
/// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0]
/// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64]
/// result[191:128] = (__A[191:128] * __B[191:128]) + __C[191:128]
Expand Down
102 changes: 102 additions & 0 deletions clang/lib/Headers/hlsl/hlsl_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,74 @@ double3 floor(double3);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_floor)
double4 floor(double4);

//===----------------------------------------------------------------------===//
// frac builtins
//===----------------------------------------------------------------------===//

/// \fn T frac(T x)
/// \brief Returns the fractional (or decimal) part of x. \a x parameter.
/// \param x The specified input value.
///
/// If \a the return value is greater than or equal to 0 and less than 1.

_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac)
half frac(half);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac)
half2 frac(half2);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac)
half3 frac(half3);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac)
half4 frac(half4);

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac)
float frac(float);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac)
float2 frac(float2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac)
float3 frac(float3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_frac)
float4 frac(float4);

//===----------------------------------------------------------------------===//
// lerp builtins
//===----------------------------------------------------------------------===//

/// \fn T lerp(T x, T y, T s)
/// \brief Returns the linear interpolation of x to y by s.
/// \param x [in] The first-floating point value.
/// \param y [in] The second-floating point value.
/// \param s [in] A value that linearly interpolates between the x parameter and
/// the y parameter.
///
/// Linear interpolation is based on the following formula: x*(1-s) + y*s which
/// can equivalently be written as x + s(y-x).

_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp)
half lerp(half, half, half);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp)
half2 lerp(half2, half2, half2);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp)
half3 lerp(half3, half3, half3);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp)
half4 lerp(half4, half4, half4);

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp)
float lerp(float, float, float);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp)
float2 lerp(float2, float2, float2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp)
float3 lerp(float3, float3, float3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp)
float4 lerp(float4, float4, float4);

//===----------------------------------------------------------------------===//
// log builtins
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -763,6 +831,40 @@ uint64_t3 reversebits(uint64_t3);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse)
uint64_t4 reversebits(uint64_t4);

//===----------------------------------------------------------------------===//
// round builtins
//===----------------------------------------------------------------------===//

/// \fn T round(T x)
/// \brief Rounds the specified value \a x to the nearest integer.
/// \param x The specified input value.
///
/// The return value is the \a x parameter, rounded to the nearest integer
/// within a floating-point type. Halfway cases are
/// rounded to the nearest even value.

_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_round)
half round(half);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_round)
half2 round(half2);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_round)
half3 round(half3);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_round)
half4 round(half4);

_HLSL_BUILTIN_ALIAS(__builtin_elementwise_round)
float round(float);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_round)
float2 round(float2);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_round)
float3 round(float3);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_round)
float4 round(float4);

//===----------------------------------------------------------------------===//
// sin builtins
//===----------------------------------------------------------------------===//
Expand Down
148 changes: 70 additions & 78 deletions clang/lib/Headers/mmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,28 +105,23 @@ _mm_cvtm64_si64(__m64 __m)
return (long long)__m;
}

/// Converts 16-bit signed integers from both 64-bit integer vector
/// parameters of [4 x i16] into 8-bit signed integer values, and constructs
/// a 64-bit integer vector of [8 x i8] as the result. Positive values
/// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80
/// are saturated to 0x80.
/// Converts, with saturation, 16-bit signed integers from both 64-bit integer
/// vector parameters of [4 x i16] into 8-bit signed integer values, and
/// constructs a 64-bit integer vector of [8 x i8] as the result.
///
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
/// less than 0x80 are saturated to 0x80.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> PACKSSWB </c> instruction.
///
/// \param __m1
/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
/// 16-bit signed integer and is converted to an 8-bit signed integer with
/// saturation. Positive values greater than 0x7F are saturated to 0x7F.
/// Negative values less than 0x80 are saturated to 0x80. The converted
/// [4 x i8] values are written to the lower 32 bits of the result.
/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
/// written to the lower 32 bits of the result.
/// \param __m2
/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
/// 16-bit signed integer and is converted to an 8-bit signed integer with
/// saturation. Positive values greater than 0x7F are saturated to 0x7F.
/// Negative values less than 0x80 are saturated to 0x80. The converted
/// [4 x i8] values are written to the upper 32 bits of the result.
/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
/// written to the upper 32 bits of the result.
/// \returns A 64-bit integer vector of [8 x i8] containing the converted
/// values.
static __inline__ __m64 __DEFAULT_FN_ATTRS
Expand All @@ -135,28 +130,23 @@ _mm_packs_pi16(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
}

/// Converts 32-bit signed integers from both 64-bit integer vector
/// parameters of [2 x i32] into 16-bit signed integer values, and constructs
/// a 64-bit integer vector of [4 x i16] as the result. Positive values
/// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than
/// 0x8000 are saturated to 0x8000.
/// Converts, with saturation, 32-bit signed integers from both 64-bit integer
/// vector parameters of [2 x i32] into 16-bit signed integer values, and
/// constructs a 64-bit integer vector of [4 x i16] as the result.
///
/// Positive values greater than 0x7FFF are saturated to 0x7FFF. Negative
/// values less than 0x8000 are saturated to 0x8000.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> PACKSSDW </c> instruction.
///
/// \param __m1
/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
/// 32-bit signed integer and is converted to a 16-bit signed integer with
/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
/// Negative values less than 0x8000 are saturated to 0x8000. The converted
/// [2 x i16] values are written to the lower 32 bits of the result.
/// A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are
/// written to the lower 32 bits of the result.
/// \param __m2
/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
/// 32-bit signed integer and is converted to a 16-bit signed integer with
/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
/// Negative values less than 0x8000 are saturated to 0x8000. The converted
/// [2 x i16] values are written to the upper 32 bits of the result.
/// A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are
/// written to the upper 32 bits of the result.
/// \returns A 64-bit integer vector of [4 x i16] containing the converted
/// values.
static __inline__ __m64 __DEFAULT_FN_ATTRS
Expand All @@ -165,28 +155,23 @@ _mm_packs_pi32(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
}

/// Converts 16-bit signed integers from both 64-bit integer vector
/// parameters of [4 x i16] into 8-bit unsigned integer values, and
/// constructs a 64-bit integer vector of [8 x i8] as the result. Values
/// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated
/// to 0.
/// Converts, with saturation, 16-bit signed integers from both 64-bit integer
/// vector parameters of [4 x i16] into 8-bit unsigned integer values, and
/// constructs a 64-bit integer vector of [8 x i8] as the result.
///
/// Values greater than 0xFF are saturated to 0xFF. Values less than 0 are
/// saturated to 0.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> PACKUSWB </c> instruction.
///
/// \param __m1
/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
/// 16-bit signed integer and is converted to an 8-bit unsigned integer with
/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
/// than 0 are saturated to 0. The converted [4 x i8] values are written to
/// the lower 32 bits of the result.
/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
/// written to the lower 32 bits of the result.
/// \param __m2
/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
/// 16-bit signed integer and is converted to an 8-bit unsigned integer with
/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
/// than 0 are saturated to 0. The converted [4 x i8] values are written to
/// the upper 32 bits of the result.
/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
/// written to the upper 32 bits of the result.
/// \returns A 64-bit integer vector of [8 x i8] containing the converted
/// values.
static __inline__ __m64 __DEFAULT_FN_ATTRS
Expand Down Expand Up @@ -400,11 +385,13 @@ _mm_add_pi32(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
}

/// Adds each 8-bit signed integer element of the first 64-bit integer
/// vector of [8 x i8] to the corresponding 8-bit signed integer element of
/// the second 64-bit integer vector of [8 x i8]. Positive sums greater than
/// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to
/// 0x80. The results are packed into a 64-bit integer vector of [8 x i8].
/// Adds, with saturation, each 8-bit signed integer element of the first
/// 64-bit integer vector of [8 x i8] to the corresponding 8-bit signed
/// integer element of the second 64-bit integer vector of [8 x i8].
///
/// Positive sums greater than 0x7F are saturated to 0x7F. Negative sums
/// less than 0x80 are saturated to 0x80. The results are packed into a
/// 64-bit integer vector of [8 x i8].
///
/// \headerfile <x86intrin.h>
///
Expand All @@ -422,12 +409,13 @@ _mm_adds_pi8(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
}

/// Adds each 16-bit signed integer element of the first 64-bit integer
/// vector of [4 x i16] to the corresponding 16-bit signed integer element of
/// the second 64-bit integer vector of [4 x i16]. Positive sums greater than
/// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are
/// saturated to 0x8000. The results are packed into a 64-bit integer vector
/// of [4 x i16].
/// Adds, with saturation, each 16-bit signed integer element of the first
/// 64-bit integer vector of [4 x i16] to the corresponding 16-bit signed
/// integer element of the second 64-bit integer vector of [4 x i16].
///
/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
/// less than 0x8000 are saturated to 0x8000. The results are packed into a
/// 64-bit integer vector of [4 x i16].
///
/// \headerfile <x86intrin.h>
///
Expand All @@ -445,11 +433,12 @@ _mm_adds_pi16(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
}

/// Adds each 8-bit unsigned integer element of the first 64-bit integer
/// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of
/// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are
/// saturated to 0xFF. The results are packed into a 64-bit integer vector of
/// [8 x i8].
/// Adds, with saturation, each 8-bit unsigned integer element of the first
/// 64-bit integer vector of [8 x i8] to the corresponding 8-bit unsigned
/// integer element of the second 64-bit integer vector of [8 x i8].
///
/// Sums greater than 0xFF are saturated to 0xFF. The results are packed
/// into a 64-bit integer vector of [8 x i8].
///
/// \headerfile <x86intrin.h>
///
Expand All @@ -467,11 +456,12 @@ _mm_adds_pu8(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
}

/// Adds each 16-bit unsigned integer element of the first 64-bit integer
/// vector of [4 x i16] to the corresponding 16-bit unsigned integer element
/// of the second 64-bit integer vector of [4 x i16]. Sums greater than
/// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit
/// integer vector of [4 x i16].
/// Adds, with saturation, each 16-bit unsigned integer element of the first
/// 64-bit integer vector of [4 x i16] to the corresponding 16-bit unsigned
/// integer element of the second 64-bit integer vector of [4 x i16].
///
/// Sums greater than 0xFFFF are saturated to 0xFFFF. The results are packed
/// into a 64-bit integer vector of [4 x i16].
///
/// \headerfile <x86intrin.h>
///
Expand Down Expand Up @@ -552,12 +542,13 @@ _mm_sub_pi32(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
}

/// Subtracts each 8-bit signed integer element of the second 64-bit
/// integer vector of [8 x i8] from the corresponding 8-bit signed integer
/// element of the first 64-bit integer vector of [8 x i8]. Positive results
/// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80
/// are saturated to 0x80. The results are packed into a 64-bit integer
/// vector of [8 x i8].
/// Subtracts, with saturation, each 8-bit signed integer element of the second
/// 64-bit integer vector of [8 x i8] from the corresponding 8-bit signed
/// integer element of the first 64-bit integer vector of [8 x i8].
///
/// Positive results greater than 0x7F are saturated to 0x7F. Negative
/// results less than 0x80 are saturated to 0x80. The results are packed
/// into a 64-bit integer vector of [8 x i8].
///
/// \headerfile <x86intrin.h>
///
Expand All @@ -575,12 +566,13 @@ _mm_subs_pi8(__m64 __m1, __m64 __m2)
return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
}

/// Subtracts each 16-bit signed integer element of the second 64-bit
/// integer vector of [4 x i16] from the corresponding 16-bit signed integer
/// element of the first 64-bit integer vector of [4 x i16]. Positive results
/// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than
/// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit
/// integer vector of [4 x i16].
/// Subtracts, with saturation, each 16-bit signed integer element of the
/// second 64-bit integer vector of [4 x i16] from the corresponding 16-bit
/// signed integer element of the first 64-bit integer vector of [4 x i16].
///
/// Positive results greater than 0x7FFF are saturated to 0x7FFF. Negative
/// results less than 0x8000 are saturated to 0x8000. The results are packed
/// into a 64-bit integer vector of [4 x i16].
///
/// \headerfile <x86intrin.h>
///
Expand Down
16 changes: 9 additions & 7 deletions clang/lib/Headers/prfchwintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@
#define __PRFCHWINTRIN_H

/// Loads a memory sequence containing the specified memory address into
/// all data cache levels. The cache-coherency state is set to exclusive.
/// Data can be read from and written to the cache line without additional
/// delay.
/// all data cache levels.
///
/// The cache-coherency state is set to exclusive. Data can be read from
/// and written to the cache line without additional delay.
///
/// \headerfile <x86intrin.h>
///
Expand All @@ -32,10 +33,11 @@ _m_prefetch(void *__P)
}

/// Loads a memory sequence containing the specified memory address into
/// the L1 data cache and sets the cache-coherency to modified. This
/// provides a hint to the processor that the cache line will be modified.
/// It is intended for use when the cache line will be written to shortly
/// after the prefetch is performed.
/// the L1 data cache and sets the cache-coherency state to modified.
///
/// This provides a hint to the processor that the cache line will be
/// modified. It is intended for use when the cache line will be written to
/// shortly after the prefetch is performed.
///
/// Note that the effect of this intrinsic is dependent on the processor
/// implementation.
Expand Down
20 changes: 8 additions & 12 deletions clang/lib/Headers/smmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1431,8 +1431,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V) {
}

/* SSE4 Pack with Unsigned Saturation. */
/// Converts 32-bit signed integers from both 128-bit integer vector
/// operands into 16-bit unsigned integers, and returns the packed result.
/// Converts, with saturation, 32-bit signed integers from both 128-bit integer
/// vector operands into 16-bit unsigned integers, and returns the packed
/// result.
///
/// Values greater than 0xFFFF are saturated to 0xFFFF. Values less than
/// 0x0000 are saturated to 0x0000.
///
Expand All @@ -1441,17 +1443,11 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V) {
/// This intrinsic corresponds to the <c> VPACKUSDW / PACKUSDW </c> instruction.
///
/// \param __V1
/// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a
/// signed integer and is converted to a 16-bit unsigned integer with
/// saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values
/// less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values
/// are written to the lower 64 bits of the result.
/// A 128-bit vector of [4 x i32]. The converted [4 x i16] values are
/// written to the lower 64 bits of the result.
/// \param __V2
/// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a
/// signed integer and is converted to a 16-bit unsigned integer with
/// saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values
/// less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values
/// are written to the higher 64 bits of the result.
/// A 128-bit vector of [4 x i32]. The converted [4 x i16] values are
/// written to the higher 64 bits of the result.
/// \returns A 128-bit vector of [8 x i16] containing the converted values.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi32(__m128i __V1,
__m128i __V2) {
Expand Down
36 changes: 20 additions & 16 deletions clang/lib/Headers/tmmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,10 +271,11 @@ _mm_hadd_pi32(__m64 __a, __m64 __b)
return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
}

/// Horizontally adds the adjacent pairs of values contained in 2 packed
/// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are
/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
/// 0x8000.
/// Horizontally adds, with saturation, the adjacent pairs of values contained
/// in two packed 128-bit vectors of [8 x i16].
///
/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
/// less than 0x8000 are saturated to 0x8000.
///
/// \headerfile <x86intrin.h>
///
Expand All @@ -296,10 +297,11 @@ _mm_hadds_epi16(__m128i __a, __m128i __b)
return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
}

/// Horizontally adds the adjacent pairs of values contained in 2 packed
/// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are
/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
/// 0x8000.
/// Horizontally adds, with saturation, the adjacent pairs of values contained
/// in two packed 64-bit vectors of [4 x i16].
///
/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
/// less than 0x8000 are saturated to 0x8000.
///
/// \headerfile <x86intrin.h>
///
Expand Down Expand Up @@ -413,10 +415,11 @@ _mm_hsub_pi32(__m64 __a, __m64 __b)
return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
}

/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 128-bit vectors of [8 x i16]. Positive differences greater than
/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
/// saturated to 0x8000.
/// Horizontally subtracts, with saturation, the adjacent pairs of values
/// contained in two packed 128-bit vectors of [8 x i16].
///
/// Positive differences greater than 0x7FFF are saturated to 0x7FFF.
/// Negative differences less than 0x8000 are saturated to 0x8000.
///
/// \headerfile <x86intrin.h>
///
Expand All @@ -438,10 +441,11 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b)
return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
}

/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 64-bit vectors of [4 x i16]. Positive differences greater than
/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
/// saturated to 0x8000.
/// Horizontally subtracts, with saturation, the adjacent pairs of values
/// contained in two packed 64-bit vectors of [4 x i16].
///
/// Positive differences greater than 0x7FFF are saturated to 0x7FFF.
/// Negative differences less than 0x8000 are saturated to 0x8000.
///
/// \headerfile <x86intrin.h>
///
Expand Down
3 changes: 1 addition & 2 deletions clang/lib/Index/IndexSymbol.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -552,8 +552,7 @@ StringRef index::getSymbolSubKindString(SymbolSubKind K) {
case SymbolSubKind::AccessorSetter: return "acc-set";
case SymbolSubKind::UsingTypename: return "using-typename";
case SymbolSubKind::UsingValue: return "using-value";
case SymbolSubKind::UsingEnum:
return "using-enum";
case SymbolSubKind::UsingEnum: return "using-enum";
}
llvm_unreachable("invalid symbol subkind");
}
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Index/IndexingAction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ index::createIndexingAction(std::shared_ptr<IndexDataConsumer> DataConsumer,
}

static bool topLevelDeclVisitor(void *context, const Decl *D) {
IndexingContext &IndexCtx = *static_cast<IndexingContext*>(context);
IndexingContext &IndexCtx = *static_cast<IndexingContext *>(context);
return IndexCtx.indexTopLevelDecl(D);
}

Expand Down
4 changes: 4 additions & 0 deletions clang/lib/InstallAPI/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
set(LLVM_LINK_COMPONENTS
Support
TextAPI
Core
)

add_clang_library(clangInstallAPI
FileList.cpp
Frontend.cpp
HeaderFile.cpp
Visitor.cpp

LINK_LIBS
clangAST
clangBasic
clangLex
)
129 changes: 129 additions & 0 deletions clang/lib/InstallAPI/Frontend.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
//===- Frontend.cpp ---------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "clang/InstallAPI/Frontend.h"
#include "clang/AST/Availability.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"

using namespace llvm;
using namespace llvm::MachO;

namespace clang::installapi {

GlobalRecord *FrontendRecordsSlice::addGlobal(
StringRef Name, RecordLinkage Linkage, GlobalRecord::Kind GV,
const clang::AvailabilityInfo Avail, const Decl *D, const HeaderType Access,
SymbolFlags Flags) {

auto *GR = llvm::MachO::RecordsSlice::addGlobal(Name, Linkage, GV, Flags);
FrontendRecords.insert({GR, FrontendAttrs{Avail, D, Access}});
return GR;
}

ObjCInterfaceRecord *FrontendRecordsSlice::addObjCInterface(
StringRef Name, RecordLinkage Linkage, const clang::AvailabilityInfo Avail,
const Decl *D, HeaderType Access, bool IsEHType) {
ObjCIFSymbolKind SymType =
ObjCIFSymbolKind::Class | ObjCIFSymbolKind::MetaClass;
if (IsEHType)
SymType |= ObjCIFSymbolKind::EHType;
auto *ObjCR =
llvm::MachO::RecordsSlice::addObjCInterface(Name, Linkage, SymType);
FrontendRecords.insert({ObjCR, FrontendAttrs{Avail, D, Access}});
return ObjCR;
}

std::optional<HeaderType>
InstallAPIContext::findAndRecordFile(const FileEntry *FE,
const Preprocessor &PP) {
if (!FE)
return std::nullopt;

// Check if header has been looked up already and whether it is something
// installapi should use.
auto It = KnownFiles.find(FE);
if (It != KnownFiles.end()) {
if (It->second != HeaderType::Unknown)
return It->second;
else
return std::nullopt;
}

// If file was not found, search by how the header was
// included. This is primarily to resolve headers found
// in a different location than what passed directly as input.
StringRef IncludeName = PP.getHeaderSearchInfo().getIncludeNameForHeader(FE);
auto BackupIt = KnownIncludes.find(IncludeName.str());
if (BackupIt != KnownIncludes.end()) {
KnownFiles[FE] = BackupIt->second;
return BackupIt->second;
}

// Record that the file was found to avoid future string searches for the
// same file.
KnownFiles.insert({FE, HeaderType::Unknown});
return std::nullopt;
}

void InstallAPIContext::addKnownHeader(const HeaderFile &H) {
auto FE = FM->getFile(H.getPath());
if (!FE)
return; // File does not exist.
KnownFiles[*FE] = H.getType();

if (!H.useIncludeName())
return;

KnownIncludes[H.getIncludeName()] = H.getType();
}

static StringRef getFileExtension(clang::Language Lang) {
switch (Lang) {
default:
llvm_unreachable("Unexpected language option.");
case clang::Language::C:
return ".c";
case clang::Language::CXX:
return ".cpp";
case clang::Language::ObjC:
return ".m";
case clang::Language::ObjCXX:
return ".mm";
}
}

std::unique_ptr<MemoryBuffer> createInputBuffer(InstallAPIContext &Ctx) {
assert(Ctx.Type != HeaderType::Unknown &&
"unexpected access level for parsing");
SmallString<4096> Contents;
raw_svector_ostream OS(Contents);
for (const HeaderFile &H : Ctx.InputHeaders) {
if (H.getType() != Ctx.Type)
continue;
if (Ctx.LangMode == Language::C || Ctx.LangMode == Language::CXX)
OS << "#include ";
else
OS << "#import ";
if (H.useIncludeName())
OS << "<" << H.getIncludeName() << ">";
else
OS << "\"" << H.getPath() << "\"";

Ctx.addKnownHeader(H);
}
if (Contents.empty())
return nullptr;

SmallString<64> BufferName(
{"installapi-includes-", Ctx.Slice->getTriple().str(), "-",
getName(Ctx.Type), getFileExtension(Ctx.LangMode)});
return llvm::MemoryBuffer::getMemBufferCopy(Contents, BufferName);
}

} // namespace clang::installapi
Loading