131 changes: 113 additions & 18 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "CGObjCRuntime.h"
#include "CGOpenCLRuntime.h"
#include "CGRecordLayout.h"
#include "CGValue.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "ConstantEmitter.h"
Expand All @@ -25,8 +26,10 @@
#include "clang/AST/ASTContext.h"
#include "clang/AST/Attr.h"
#include "clang/AST/Decl.h"
#include "clang/AST/Expr.h"
#include "clang/AST/OSLog.h"
#include "clang/AST/OperationKinds.h"
#include "clang/AST/Type.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/TargetOptions.h"
Expand Down Expand Up @@ -67,6 +70,7 @@
#include "llvm/TargetParser/X86TargetParser.h"
#include <optional>
#include <sstream>
#include <utility>

using namespace clang;
using namespace CodeGen;
Expand Down Expand Up @@ -95,6 +99,76 @@ static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
I->addAnnotationMetadata("auto-init");
}

static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) {
Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1));
const auto *OutArg2 = dyn_cast<HLSLOutArgExpr>(E->getArg(2));

CallArgList Args;
LValue Op1TmpLValue =
CGF->EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType());
LValue Op2TmpLValue =
CGF->EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType());

if (CGF->getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee())
Args.reverseWritebacks();

Value *LowBits = nullptr;
Value *HighBits = nullptr;

if (CGF->CGM.getTarget().getTriple().isDXIL()) {

llvm::Type *RetElementTy = CGF->Int32Ty;
if (auto *Op0VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>())
RetElementTy = llvm::VectorType::get(
CGF->Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements()));
auto *RetTy = llvm::StructType::get(RetElementTy, RetElementTy);

CallInst *CI = CGF->Builder.CreateIntrinsic(
RetTy, Intrinsic::dx_splitdouble, {Op0}, nullptr, "hlsl.splitdouble");

LowBits = CGF->Builder.CreateExtractValue(CI, 0);
HighBits = CGF->Builder.CreateExtractValue(CI, 1);

} else {
// For Non DXIL targets we generate the instructions.

if (!Op0->getType()->isVectorTy()) {
FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2);
Value *Bitcast = CGF->Builder.CreateBitCast(Op0, DestTy);

LowBits = CGF->Builder.CreateExtractElement(Bitcast, (uint64_t)0);
HighBits = CGF->Builder.CreateExtractElement(Bitcast, 1);
} else {
int NumElements = 1;
if (const auto *VecTy =
E->getArg(0)->getType()->getAs<clang::VectorType>())
NumElements = VecTy->getNumElements();

FixedVectorType *Uint32VecTy =
FixedVectorType::get(CGF->Int32Ty, NumElements * 2);
Value *Uint32Vec = CGF->Builder.CreateBitCast(Op0, Uint32VecTy);
if (NumElements == 1) {
LowBits = CGF->Builder.CreateExtractElement(Uint32Vec, (uint64_t)0);
HighBits = CGF->Builder.CreateExtractElement(Uint32Vec, 1);
} else {
SmallVector<int> EvenMask, OddMask;
for (int I = 0, E = NumElements; I != E; ++I) {
EvenMask.push_back(I * 2);
OddMask.push_back(I * 2 + 1);
}
LowBits = CGF->Builder.CreateShuffleVector(Uint32Vec, EvenMask);
HighBits = CGF->Builder.CreateShuffleVector(Uint32Vec, OddMask);
}
}
}
CGF->Builder.CreateStore(LowBits, Op1TmpLValue.getAddress());
auto *LastInst =
CGF->Builder.CreateStore(HighBits, Op2TmpLValue.getAddress());
CGF->EmitWritebacks(Args);
return LastInst;
}

/// getBuiltinLibFunction - Given a builtin id for a function like
/// "__builtin_fabsf", return a Function* for "fabsf".
llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
Expand Down Expand Up @@ -18959,6 +19033,14 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: {
CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef<Value *>{Op0},
nullptr, "hlsl.radians");
}
case Builtin::BI__builtin_hlsl_elementwise_splitdouble: {

assert((E->getArg(0)->getType()->hasFloatingRepresentation() &&
E->getArg(1)->getType()->hasUnsignedIntegerRepresentation() &&
E->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) &&
"asuint operands types mismatch");
return handleHlslSplitdouble(E, this);
}
}
return nullptr;
}
Expand Down Expand Up @@ -19055,7 +19137,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
Args.push_back(llvm::PoisonValue::get(IntTy));
for (unsigned I = 0; I != E->getNumArgs(); ++I) {
llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E);
if (I <= !InsertOld && Size < 32) {
if (I <= (InsertOld ? 0u : 1u) && Size < 32) {
if (!DataTy->isIntegerTy())
V = Builder.CreateBitCast(
V, llvm::IntegerType::get(Builder.getContext(), Size));
Expand Down Expand Up @@ -20492,8 +20574,8 @@ static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
#undef MMA_VARIANTS_B1_XOR
}

static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
const CallExpr *E) {
static Value *MakeLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
const CallExpr *E) {
Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
QualType ArgType = E->getArg(0)->getType();
clang::CharUnits Align = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType);
Expand All @@ -20503,6 +20585,21 @@ static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
{Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
}

static Value *MakeLdg(CodeGenFunction &CGF, const CallExpr *E) {
Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
QualType ArgType = E->getArg(0)->getType();
clang::CharUnits AlignV = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType);
llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());

// Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
auto *ASC = CGF.Builder.CreateAddrSpaceCast(Ptr, CGF.Builder.getPtrTy(1));
auto *LD = CGF.Builder.CreateAlignedLoad(ElemTy, ASC, AlignV.getAsAlign());
MDNode *MD = MDNode::get(CGF.Builder.getContext(), {});
LD->setMetadata(LLVMContext::MD_invariant_load, MD);

return LD;
}

static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
const CallExpr *E) {
Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
Expand Down Expand Up @@ -20536,9 +20633,11 @@ static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
return nullptr;
}

if (IntrinsicID == Intrinsic::nvvm_ldg_global_f ||
IntrinsicID == Intrinsic::nvvm_ldu_global_f)
return MakeLdgLdu(IntrinsicID, CGF, E);
if (BuiltinID == NVPTX::BI__nvvm_ldg_h || BuiltinID == NVPTX::BI__nvvm_ldg_h2)
return MakeLdg(CGF, E);

if (IntrinsicID == Intrinsic::nvvm_ldu_global_f)
return MakeLdu(IntrinsicID, CGF, E);

SmallVector<Value *, 16> Args;
auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
Expand Down Expand Up @@ -20675,16 +20774,15 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
case NVPTX::BI__nvvm_ldg_ul2:
case NVPTX::BI__nvvm_ldg_ull:
case NVPTX::BI__nvvm_ldg_ull2:
// PTX Interoperability section 2.2: "For a vector with an even number of
// elements, its alignment is set to number of elements times the alignment
// of its member: n*alignof(t)."
return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E);
case NVPTX::BI__nvvm_ldg_f:
case NVPTX::BI__nvvm_ldg_f2:
case NVPTX::BI__nvvm_ldg_f4:
case NVPTX::BI__nvvm_ldg_d:
case NVPTX::BI__nvvm_ldg_d2:
return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E);
// PTX Interoperability section 2.2: "For a vector with an even number of
// elements, its alignment is set to number of elements times the alignment
// of its member: n*alignof(t)."
return MakeLdg(*this, E);

case NVPTX::BI__nvvm_ldu_c:
case NVPTX::BI__nvvm_ldu_sc:
Expand Down Expand Up @@ -20715,13 +20813,13 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
case NVPTX::BI__nvvm_ldu_ul2:
case NVPTX::BI__nvvm_ldu_ull:
case NVPTX::BI__nvvm_ldu_ull2:
return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
return MakeLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
case NVPTX::BI__nvvm_ldu_f:
case NVPTX::BI__nvvm_ldu_f2:
case NVPTX::BI__nvvm_ldu_f4:
case NVPTX::BI__nvvm_ldu_d:
case NVPTX::BI__nvvm_ldu_d2:
return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
return MakeLdu(Intrinsic::nvvm_ldu_global_f, *this, E);

case NVPTX::BI__nvvm_atom_cta_add_gen_i:
case NVPTX::BI__nvvm_atom_cta_add_gen_l:
Expand Down Expand Up @@ -21195,14 +21293,11 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
*this);
case NVPTX::BI__nvvm_ldg_h:
return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
case NVPTX::BI__nvvm_ldg_h2:
return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
return MakeHalfType(Intrinsic::not_intrinsic, BuiltinID, E, *this);
case NVPTX::BI__nvvm_ldu_h:
case NVPTX::BI__nvvm_ldu_h2:
return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
case NVPTX::BI__nvvm_ldu_h2: {
return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
}
case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
Expand Down
105 changes: 76 additions & 29 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Path.h"
#include "llvm/Transforms/Utils/Local.h"
#include <optional>
using namespace clang;
Expand Down Expand Up @@ -1410,6 +1411,30 @@ static Address emitAddressAtOffset(CodeGenFunction &CGF, Address addr,
return addr;
}

static std::pair<llvm::Value *, bool>
CoerceScalableToFixed(CodeGenFunction &CGF, llvm::FixedVectorType *ToTy,
llvm::ScalableVectorType *FromTy, llvm::Value *V,
StringRef Name = "") {
// If we are casting a scalable i1 predicate vector to a fixed i8
// vector, first bitcast the source.
if (FromTy->getElementType()->isIntegerTy(1) &&
FromTy->getElementCount().isKnownMultipleOf(8) &&
ToTy->getElementType() == CGF.Builder.getInt8Ty()) {
FromTy = llvm::ScalableVectorType::get(
ToTy->getElementType(),
FromTy->getElementCount().getKnownMinValue() / 8);
V = CGF.Builder.CreateBitCast(V, FromTy);
}
if (FromTy->getElementType() == ToTy->getElementType()) {
llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);

V->setName(Name + ".coerce");
V = CGF.Builder.CreateExtractVector(ToTy, V, Zero, "cast.fixed");
return {V, true};
}
return {V, false};
}

namespace {

/// Encapsulates information about the way function arguments from
Expand Down Expand Up @@ -3196,26 +3221,14 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
// a VLAT at the function boundary and the types match up, use
// llvm.vector.extract to convert back to the original VLST.
if (auto *VecTyTo = dyn_cast<llvm::FixedVectorType>(ConvertType(Ty))) {
llvm::Value *Coerced = Fn->getArg(FirstIRArg);
llvm::Value *ArgVal = Fn->getArg(FirstIRArg);
if (auto *VecTyFrom =
dyn_cast<llvm::ScalableVectorType>(Coerced->getType())) {
// If we are casting a scalable i1 predicate vector to a fixed i8
// vector, bitcast the source and use a vector extract.
if (VecTyFrom->getElementType()->isIntegerTy(1) &&
VecTyFrom->getElementCount().isKnownMultipleOf(8) &&
VecTyTo->getElementType() == Builder.getInt8Ty()) {
VecTyFrom = llvm::ScalableVectorType::get(
VecTyTo->getElementType(),
VecTyFrom->getElementCount().getKnownMinValue() / 8);
Coerced = Builder.CreateBitCast(Coerced, VecTyFrom);
}
if (VecTyFrom->getElementType() == VecTyTo->getElementType()) {
llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty);

dyn_cast<llvm::ScalableVectorType>(ArgVal->getType())) {
auto [Coerced, Extracted] = CoerceScalableToFixed(
*this, VecTyTo, VecTyFrom, ArgVal, Arg->getName());
if (Extracted) {
assert(NumIRArgs == 1);
Coerced->setName(Arg->getName() + ".coerce");
ArgVals.push_back(ParamValue::forDirect(Builder.CreateExtractVector(
VecTyTo, Coerced, Zero, "cast.fixed")));
ArgVals.push_back(ParamValue::forDirect(Coerced));
break;
}
}
Expand Down Expand Up @@ -3326,16 +3339,33 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
ArgVals.push_back(ParamValue::forIndirect(alloca));

auto coercionType = ArgI.getCoerceAndExpandType();
auto unpaddedCoercionType = ArgI.getUnpaddedCoerceAndExpandType();
auto *unpaddedStruct = dyn_cast<llvm::StructType>(unpaddedCoercionType);

alloca = alloca.withElementType(coercionType);

unsigned argIndex = FirstIRArg;
unsigned unpaddedIndex = 0;
for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
llvm::Type *eltType = coercionType->getElementType(i);
if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType))
continue;

auto eltAddr = Builder.CreateStructGEP(alloca, i);
auto elt = Fn->getArg(argIndex++);
llvm::Value *elt = Fn->getArg(argIndex++);

auto paramType = unpaddedStruct
? unpaddedStruct->getElementType(unpaddedIndex++)
: unpaddedCoercionType;

if (auto *VecTyTo = dyn_cast<llvm::FixedVectorType>(eltType)) {
if (auto *VecTyFrom = dyn_cast<llvm::ScalableVectorType>(paramType)) {
bool Extracted;
std::tie(elt, Extracted) = CoerceScalableToFixed(
*this, VecTyTo, VecTyFrom, elt, elt->getName());
assert(Extracted && "Unexpected scalable to fixed vector coercion");
}
}
Builder.CreateStore(elt, eltAddr);
}
assert(argIndex == FirstIRArg + NumIRArgs);
Expand Down Expand Up @@ -3930,17 +3960,24 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,

case ABIArgInfo::CoerceAndExpand: {
auto coercionType = RetAI.getCoerceAndExpandType();
auto unpaddedCoercionType = RetAI.getUnpaddedCoerceAndExpandType();
auto *unpaddedStruct = dyn_cast<llvm::StructType>(unpaddedCoercionType);

// Load all of the coerced elements out into results.
llvm::SmallVector<llvm::Value*, 4> results;
Address addr = ReturnValue.withElementType(coercionType);
unsigned unpaddedIndex = 0;
for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
auto coercedEltType = coercionType->getElementType(i);
if (ABIArgInfo::isPaddingForCoerceAndExpand(coercedEltType))
continue;

auto eltAddr = Builder.CreateStructGEP(addr, i);
auto elt = Builder.CreateLoad(eltAddr);
llvm::Value *elt = CreateCoercedLoad(
eltAddr,
unpaddedStruct ? unpaddedStruct->getElementType(unpaddedIndex++)
: unpaddedCoercionType,
*this);
results.push_back(elt);
}

Expand Down Expand Up @@ -4207,12 +4244,6 @@ static void emitWriteback(CodeGenFunction &CGF,
CGF.EmitBlock(contBB);
}

static void emitWritebacks(CodeGenFunction &CGF,
const CallArgList &args) {
for (const auto &I : args.writebacks())
emitWriteback(CGF, I);
}

static void deactivateArgCleanupsBeforeCall(CodeGenFunction &CGF,
const CallArgList &CallArgs) {
ArrayRef<CallArgList::CallArgCleanup> Cleanups =
Expand Down Expand Up @@ -4681,6 +4712,11 @@ void CallArg::copyInto(CodeGenFunction &CGF, Address Addr) const {
IsUsed = true;
}

void CodeGenFunction::EmitWritebacks(const CallArgList &args) {
for (const auto &I : args.writebacks())
emitWriteback(*this, I);
}

void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
QualType type) {
DisableDebugLocationUpdates Dis(*this, E);
Expand Down Expand Up @@ -5112,7 +5148,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
RawAddress SRetAlloca = RawAddress::invalid();
llvm::Value *UnusedReturnSizePtr = nullptr;
if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) {
if (IsVirtualFunctionPointerThunk && RetAI.isIndirect()) {
// For virtual function pointer thunks and musttail calls, we must always
// forward an incoming SRet pointer to the callee, because a local alloca
// would be de-allocated before the call. These cases both guarantee that
// there will be an incoming SRet argument of the correct type.
if ((IsVirtualFunctionPointerThunk || IsMustTail) && RetAI.isIndirect()) {
SRetPtr = makeNaturalAddressForPointer(CurFn->arg_begin() +
IRFunctionArgs.getSRetArgNo(),
RetTy, CharUnits::fromQuantity(1));
Expand Down Expand Up @@ -5468,6 +5508,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
case ABIArgInfo::CoerceAndExpand: {
auto coercionType = ArgInfo.getCoerceAndExpandType();
auto layout = CGM.getDataLayout().getStructLayout(coercionType);
auto unpaddedCoercionType = ArgInfo.getUnpaddedCoerceAndExpandType();
auto *unpaddedStruct = dyn_cast<llvm::StructType>(unpaddedCoercionType);

llvm::Value *tempSize = nullptr;
Address addr = Address::invalid();
Expand Down Expand Up @@ -5498,11 +5540,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
addr = addr.withElementType(coercionType);

unsigned IRArgPos = FirstIRArg;
unsigned unpaddedIndex = 0;
for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
llvm::Type *eltType = coercionType->getElementType(i);
if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue;
Address eltAddr = Builder.CreateStructGEP(addr, i);
llvm::Value *elt = Builder.CreateLoad(eltAddr);
llvm::Value *elt = CreateCoercedLoad(
eltAddr,
unpaddedStruct ? unpaddedStruct->getElementType(unpaddedIndex++)
: unpaddedCoercionType,
*this);
if (ArgHasMaybeUndefAttr)
elt = Builder.CreateFreeze(elt);
IRCallArgs[IRArgPos++] = elt;
Expand Down Expand Up @@ -5893,7 +5940,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Emit any call-associated writebacks immediately. Arguably this
// should happen after any return-value munging.
if (CallArgs.hasWritebacks())
emitWritebacks(*this, CallArgs);
EmitWritebacks(CallArgs);

// The stack cleanup for inalloca arguments has to run out of the normal
// lexical order, so deactivate it and run it manually here.
Expand Down
16 changes: 14 additions & 2 deletions clang/lib/CodeGen/CGDebugInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -624,8 +624,6 @@ void CGDebugInfo::CreateCompileUnit() {
} else if (LO.OpenCL && (!CGM.getCodeGenOpts().DebugStrictDwarf ||
CGM.getCodeGenOpts().DwarfVersion >= 5)) {
LangTag = llvm::dwarf::DW_LANG_OpenCL;
} else if (LO.RenderScript) {
LangTag = llvm::dwarf::DW_LANG_GOOGLE_RenderScript;
} else if (LO.C11 && !(CGO.DebugStrictDwarf && CGO.DwarfVersion < 5)) {
LangTag = llvm::dwarf::DW_LANG_C11;
} else if (LO.C99) {
Expand Down Expand Up @@ -783,6 +781,13 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
#define SVE_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
#include "clang/Basic/AArch64SVEACLETypes.def"
{
if (BT->getKind() == BuiltinType::MFloat8) {
Encoding = llvm::dwarf::DW_ATE_unsigned_char;
BTName = BT->getName(CGM.getLangOpts());
// Bit size and offset of the type.
uint64_t Size = CGM.getContext().getTypeSize(BT);
return DBuilder.createBasicType(BTName, Size, Encoding);
}
ASTContext::BuiltinVectorTypeInfo Info =
// For svcount_t, only the lower 2 bytes are relevant.
BT->getKind() == BuiltinType::SveCount
Expand Down Expand Up @@ -909,6 +914,13 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
TheCU, TheCU->getFile(), 0); \
return SingletonId; \
}
#define AMDGPU_NAMED_BARRIER_TYPE(Name, Id, SingletonId, Width, Align, Scope) \
case BuiltinType::Id: { \
if (!SingletonId) \
SingletonId = \
DBuilder.createBasicType(Name, Width, llvm::dwarf::DW_ATE_unsigned); \
return SingletonId; \
}
#include "clang/Basic/AMDGPUTypes.def"
case BuiltinType::UChar:
case BuiltinType::Char_U:
Expand Down
13 changes: 10 additions & 3 deletions clang/lib/CodeGen/CGExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5460,9 +5460,8 @@ LValue CodeGenFunction::EmitOpaqueValueLValue(const OpaqueValueExpr *e) {
return getOrCreateOpaqueLValueMapping(e);
}

void CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E,
CallArgList &Args, QualType Ty) {

std::pair<LValue, LValue>
CodeGenFunction::EmitHLSLOutArgLValues(const HLSLOutArgExpr *E, QualType Ty) {
// Emitting the casted temporary through an opaque value.
LValue BaseLV = EmitLValue(E->getArgLValue());
OpaqueValueMappingData::bind(*this, E->getOpaqueArgLValue(), BaseLV);
Expand All @@ -5476,6 +5475,13 @@ void CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E,
TempLV);

OpaqueValueMappingData::bind(*this, E->getCastedTemporary(), TempLV);
return std::make_pair(BaseLV, TempLV);
}

LValue CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E,
CallArgList &Args, QualType Ty) {

auto [BaseLV, TempLV] = EmitHLSLOutArgLValues(E, Ty);

llvm::Value *Addr = TempLV.getAddress().getBasePointer();
llvm::Type *ElTy = ConvertTypeForMem(TempLV.getType());
Expand All @@ -5488,6 +5494,7 @@ void CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E,
Args.addWriteback(BaseLV, TmpAddr, nullptr, E->getWritebackCast(),
LifetimeSize);
Args.add(RValue::get(TmpAddr, *this), Ty);
return TempLV;
}

LValue
Expand Down
41 changes: 37 additions & 4 deletions clang/lib/CodeGen/CGHLSLRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,16 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD,
BasicBlock *BB = BasicBlock::Create(Ctx, "entry", EntryFn);
IRBuilder<> B(BB);
llvm::SmallVector<Value *> Args;

SmallVector<OperandBundleDef, 1> OB;
if (CGM.shouldEmitConvergenceTokens()) {
assert(EntryFn->isConvergent());
llvm::Value *I = B.CreateIntrinsic(
llvm::Intrinsic::experimental_convergence_entry, {}, {});
llvm::Value *bundleArgs[] = {I};
OB.emplace_back("convergencectrl", bundleArgs);
}

// FIXME: support struct parameters where semantics are on members.
// See: https://github.com/llvm/llvm-project/issues/57874
unsigned SRetOffset = 0;
Expand All @@ -419,7 +429,7 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD,
Args.push_back(emitInputSemantic(B, *PD, Param.getType()));
}

CallInst *CI = B.CreateCall(FunctionCallee(Fn), Args);
CallInst *CI = B.CreateCall(FunctionCallee(Fn), Args, OB);
CI->setCallingConv(Fn->getCallingConv());
// FIXME: Handle codegen for return type semantics.
// See: https://github.com/llvm/llvm-project/issues/57875
Expand Down Expand Up @@ -474,14 +484,22 @@ void CGHLSLRuntime::generateGlobalCtorDtorCalls() {
for (auto &F : M.functions()) {
if (!F.hasFnAttribute("hlsl.shader"))
continue;
IRBuilder<> B(&F.getEntryBlock(), F.getEntryBlock().begin());
auto *Token = getConvergenceToken(F.getEntryBlock());
Instruction *IP = &*F.getEntryBlock().begin();
SmallVector<OperandBundleDef, 1> OB;
if (Token) {
llvm::Value *bundleArgs[] = {Token};
OB.emplace_back("convergencectrl", bundleArgs);
IP = Token->getNextNode();
}
IRBuilder<> B(IP);
for (auto *Fn : CtorFns)
B.CreateCall(FunctionCallee(Fn));
B.CreateCall(FunctionCallee(Fn), {}, OB);

// Insert global dtors before the terminator of the last instruction
B.SetInsertPoint(F.back().getTerminator());
for (auto *Fn : DtorFns)
B.CreateCall(FunctionCallee(Fn));
B.CreateCall(FunctionCallee(Fn), {}, OB);
}

// No need to keep global ctors/dtors for non-lib profile after call to
Expand Down Expand Up @@ -579,3 +597,18 @@ llvm::Function *CGHLSLRuntime::createResourceBindingInitFn() {
Builder.CreateRetVoid();
return InitResBindingsFunc;
}

llvm::Instruction *CGHLSLRuntime::getConvergenceToken(BasicBlock &BB) {
if (!CGM.shouldEmitConvergenceTokens())
return nullptr;

auto E = BB.end();
for (auto I = BB.begin(); I != E; ++I) {
auto *II = dyn_cast<llvm::IntrinsicInst>(&*I);
if (II && llvm::isConvergenceControlIntrinsic(II->getIntrinsicID())) {
return II;
}
}
llvm_unreachable("Convergence token should have been emitted.");
return nullptr;
}
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CGHLSLRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ class CGHLSLRuntime {

bool needsResourceBindingInitFn();
llvm::Function *createResourceBindingInitFn();
llvm::Instruction *getConvergenceToken(llvm::BasicBlock &BB);

private:
void addBufferResourceAnnotation(llvm::GlobalVariable *GV,
Expand Down
32 changes: 22 additions & 10 deletions clang/lib/CodeGen/CGOpenMPRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1192,6 +1192,7 @@ struct PushAndPopStackRAII {
CodeGenFunction::JumpDest Dest =
CGF.getOMPCancelDestination(OMPD_parallel);
CGF.EmitBranchThroughCleanup(Dest);
return llvm::Error::success();
};

// TODO: Remove this once we emit parallel regions through the
Expand Down Expand Up @@ -2331,8 +2332,11 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
auto *OMPRegionInfo =
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
CGF.Builder.restoreIP(OMPBuilder.createBarrier(
CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
EmitChecks);
assert(AfterIP && "unexpected error creating barrier");
CGF.Builder.restoreIP(*AfterIP);
return;
}

Expand Down Expand Up @@ -5928,8 +5932,10 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
};

OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
IsOffloadEntry, OutlinedFn, OutlinedFnID);
llvm::Error Err = OMPBuilder.emitTargetRegionFunction(
EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
OutlinedFnID);
assert(!Err && "unexpected error creating target region");

if (!OutlinedFn)
return;
Expand Down Expand Up @@ -9670,9 +9676,12 @@ static void emitTargetCallKernelLaunch(
NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
DynCGGroupMem, HasNoWait);

CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
RTLoc, AllocaIP));
llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
OMPRuntime->getOMPBuilder().emitKernelLaunch(
CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
RTLoc, AllocaIP);
assert(AfterIP && "unexpected error creating kernel launch");
CGF.Builder.restoreIP(*AfterIP);
};

if (RequiresOuterTask)
Expand Down Expand Up @@ -10349,9 +10358,12 @@ void CGOpenMPRuntime::emitTargetDataCalls(
InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
CGF.Builder.GetInsertPoint());
llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
CGF.Builder.restoreIP(OMPBuilder.createTargetData(
OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
/*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc));
llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
OMPBuilder.createTargetData(
OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
/*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc);
assert(AfterIP && "unexpected error creating target data");
CGF.Builder.restoreIP(*AfterIP);
}

void CGOpenMPRuntime::emitTargetDataStandAloneCall(
Expand Down
13 changes: 8 additions & 5 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1753,11 +1753,14 @@ void CGOpenMPRuntimeGPU::emitReduction(
Idx++;
}

CGF.Builder.restoreIP(OMPBuilder.createReductionsGPU(
OmpLoc, AllocaIP, CodeGenIP, ReductionInfos, false, TeamsReduction,
DistributeReduction, llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang,
CGF.getTarget().getGridValue(), C.getLangOpts().OpenMPCUDAReductionBufNum,
RTLoc));
llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
OMPBuilder.createReductionsGPU(
OmpLoc, AllocaIP, CodeGenIP, ReductionInfos, false, TeamsReduction,
DistributeReduction, llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang,
CGF.getTarget().getGridValue(),
C.getLangOpts().OpenMPCUDAReductionBufNum, RTLoc);
assert(AfterIP && "unexpected error creating GPU reductions");
CGF.Builder.restoreIP(*AfterIP);
return;
}

Expand Down
90 changes: 67 additions & 23 deletions clang/lib/CodeGen/CGStmtOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1809,6 +1809,7 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
// thus calls destructors etc.
auto FiniCB = [this](InsertPointTy IP) {
OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
return llvm::Error::success();
};

// Privatization callback that performs appropriate action for
Expand All @@ -1831,15 +1832,18 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
InsertPointTy CodeGenIP) {
OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
*this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel");
return llvm::Error::success();
};

CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
Builder.restoreIP(
llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
IfCond, NumThreads, ProcBind, S.hasCancel()));
IfCond, NumThreads, ProcBind, S.hasCancel());
assert(AfterIP && "unexpected error creating parallel");
Builder.restoreIP(*AfterIP);
return;
}

Expand Down Expand Up @@ -2128,9 +2132,13 @@ void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {

RunCleanupsScope BodyScope(*this);
EmitStmt(BodyStmt);
return llvm::Error::success();
};
llvm::CanonicalLoopInfo *CL =

llvm::Expected<llvm::CanonicalLoopInfo *> Result =
OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal);
assert(Result && "unexpected error creating canonical loop");
llvm::CanonicalLoopInfo *CL = *Result;

// Finish up the loop.
Builder.restoreIP(CL->getAfterIP());
Expand Down Expand Up @@ -4016,11 +4024,13 @@ static void emitOMPForDirective(const OMPLoopDirective &S, CodeGenFunction &CGF,
CGM.getOpenMPRuntime().getOMPBuilder();
llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
OMPBuilder.applyWorkshareLoop(
CGF.Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier,
SchedKind, ChunkSize, /*HasSimdModifier=*/false,
/*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
/*HasOrderedClause=*/false);
llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
OMPBuilder.applyWorkshareLoop(
CGF.Builder.getCurrentDebugLocation(), CLI, AllocaIP,
NeedsBarrier, SchedKind, ChunkSize, /*HasSimdModifier=*/false,
/*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
/*HasOrderedClause=*/false);
assert(AfterIP && "unexpected error creating workshare loop");
return;
}

Expand Down Expand Up @@ -4257,6 +4267,7 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {

auto FiniCB = [this](InsertPointTy IP) {
OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
return llvm::Error::success();
};

const CapturedStmt *ICS = S.getInnermostCapturedStmt();
Expand All @@ -4269,6 +4280,7 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
InsertPointTy CodeGenIP) {
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
*this, SubStmt, AllocaIP, CodeGenIP, "section");
return llvm::Error::success();
};
SectionCBVector.push_back(SectionCB);
}
Expand All @@ -4277,6 +4289,7 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
InsertPointTy CodeGenIP) {
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
*this, CapturedStmt, AllocaIP, CodeGenIP, "section");
return llvm::Error::success();
};
SectionCBVector.push_back(SectionCB);
}
Expand All @@ -4298,9 +4311,12 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
Builder.restoreIP(OMPBuilder.createSections(
Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(),
S.getSingleClause<OMPNowaitClause>()));
llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
OMPBuilder.createSections(Builder, AllocaIP, SectionCBVector, PrivCB,
FiniCB, S.hasCancel(),
S.getSingleClause<OMPNowaitClause>());
assert(AfterIP && "unexpected error creating sections");
Builder.restoreIP(*AfterIP);
return;
}
{
Expand All @@ -4326,17 +4342,22 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
auto FiniCB = [this](InsertPointTy IP) {
OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
return llvm::Error::success();
};

auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
InsertPointTy CodeGenIP) {
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
*this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section");
return llvm::Error::success();
};

LexicalScope Scope(*this, S.getSourceRange());
EmitStopPoint(&S);
Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB));
llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
OMPBuilder.createSection(Builder, BodyGenCB, FiniCB);
assert(AfterIP && "unexpected error creating section");
Builder.restoreIP(*AfterIP);

return;
}
Expand Down Expand Up @@ -4407,17 +4428,22 @@ void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {

auto FiniCB = [this](InsertPointTy IP) {
OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
return llvm::Error::success();
};

auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
InsertPointTy CodeGenIP) {
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
*this, MasterRegionBodyStmt, AllocaIP, CodeGenIP, "master");
return llvm::Error::success();
};

LexicalScope Scope(*this, S.getSourceRange());
EmitStopPoint(&S);
Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB);
assert(AfterIP && "unexpected error creating master");
Builder.restoreIP(*AfterIP);

return;
}
Expand Down Expand Up @@ -4453,18 +4479,22 @@ void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {

auto FiniCB = [this](InsertPointTy IP) {
OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
return llvm::Error::success();
};

auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
InsertPointTy CodeGenIP) {
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
*this, MaskedRegionBodyStmt, AllocaIP, CodeGenIP, "masked");
return llvm::Error::success();
};

LexicalScope Scope(*this, S.getSourceRange());
EmitStopPoint(&S);
Builder.restoreIP(
OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal));
llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal);
assert(AfterIP && "unexpected error creating masked");
Builder.restoreIP(*AfterIP);

return;
}
Expand Down Expand Up @@ -4493,19 +4523,23 @@ void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {

auto FiniCB = [this](InsertPointTy IP) {
OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
return llvm::Error::success();
};

auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
InsertPointTy CodeGenIP) {
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
*this, CriticalRegionBodyStmt, AllocaIP, CodeGenIP, "critical");
return llvm::Error::success();
};

LexicalScope Scope(*this, S.getSourceRange());
EmitStopPoint(&S);
Builder.restoreIP(OMPBuilder.createCritical(
Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
HintInst));
llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
OMPBuilder.createCritical(Builder, BodyGenCB, FiniCB,
S.getDirectiveName().getAsString(), HintInst);
assert(AfterIP && "unexpected error creating critical");
Builder.restoreIP(*AfterIP);

return;
}
Expand Down Expand Up @@ -5464,11 +5498,15 @@ void CodeGenFunction::EmitOMPTaskgroupDirective(
InsertPointTy CodeGenIP) {
Builder.restoreIP(CodeGenIP);
EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
return llvm::Error::success();
};
CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
if (!CapturedStmtInfo)
CapturedStmtInfo = &CapStmtInfo;
Builder.restoreIP(OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB));
llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB);
assert(AfterIP && "unexpected error creating taskgroup");
Builder.restoreIP(*AfterIP);
return;
}
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Expand Down Expand Up @@ -6041,6 +6079,7 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {

auto FiniCB = [this](InsertPointTy IP) {
OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
return llvm::Error::success();
};

auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP,
Expand All @@ -6064,11 +6103,14 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
*this, CS->getCapturedStmt(), AllocaIP, CodeGenIP, "ordered");
}
return llvm::Error::success();
};

OMPLexicalScope Scope(*this, S, OMPD_unknown);
Builder.restoreIP(
OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C));
llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C);
assert(AfterIP && "unexpected error creating ordered");
Builder.restoreIP(*AfterIP);
}
return;
}
Expand Down Expand Up @@ -7344,8 +7386,10 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
if (IfCond)
IfCondition = EmitScalarExpr(IfCond,
/*IgnoreResultAssign=*/true);
return Builder.restoreIP(
OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion()));
llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion());
assert(AfterIP && "unexpected error creating cancel");
return Builder.restoreIP(*AfterIP);
}
}

Expand Down
2 changes: 1 addition & 1 deletion clang/lib/CodeGen/CodeGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -852,7 +852,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
if (Fe.Effect.kind() == FunctionEffect::Kind::NonBlocking)
Fn->addFnAttr(llvm::Attribute::SanitizeRealtime);
else if (Fe.Effect.kind() == FunctionEffect::Kind::Blocking)
Fn->addFnAttr(llvm::Attribute::SanitizeRealtimeUnsafe);
Fn->addFnAttr(llvm::Attribute::SanitizeRealtimeBlocking);
}

// Apply fuzzing attribute to the function.
Expand Down
10 changes: 8 additions & 2 deletions clang/lib/CodeGen/CodeGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -4296,8 +4296,11 @@ class CodeGenFunction : public CodeGenTypeCache {
LValue EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E);
LValue EmitOpaqueValueLValue(const OpaqueValueExpr *e);
LValue EmitHLSLArrayAssignLValue(const BinaryOperator *E);
void EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, CallArgList &Args,
QualType Ty);

std::pair<LValue, LValue> EmitHLSLOutArgLValues(const HLSLOutArgExpr *E,
QualType Ty);
LValue EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, CallArgList &Args,
QualType Ty);

Address EmitExtVectorElementLValue(LValue V);

Expand Down Expand Up @@ -5147,6 +5150,9 @@ class CodeGenFunction : public CodeGenTypeCache {
SourceLocation ArgLoc, AbstractCallee AC,
unsigned ParmNum);

/// EmitWriteback - Emit callbacks for function.
void EmitWritebacks(const CallArgList &Args);

/// EmitCallArg - Emit a single call argument.
void EmitCallArg(CallArgList &args, const Expr *E, QualType ArgType);

Expand Down
4 changes: 4 additions & 0 deletions clang/lib/CodeGen/CodeGenTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,10 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
#define AMDGPU_OPAQUE_PTR_TYPE(Name, Id, SingletonId, Width, Align, AS) \
case BuiltinType::Id: \
return llvm::PointerType::get(getLLVMContext(), AS);
#define AMDGPU_NAMED_BARRIER_TYPE(Name, Id, SingletonId, Width, Align, Scope) \
case BuiltinType::Id: \
return llvm::TargetExtType::get(getLLVMContext(), "amdgcn.named.barrier", \
{}, {Scope});
#include "clang/Basic/AMDGPUTypes.def"
#define HLSL_INTANGIBLE_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
#include "clang/Basic/HLSLIntangibleTypes.def"
Expand Down
385 changes: 329 additions & 56 deletions clang/lib/CodeGen/Targets/AArch64.cpp

Large diffs are not rendered by default.

11 changes: 0 additions & 11 deletions clang/lib/CodeGen/Targets/ARM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -420,12 +420,6 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
/*Realign=*/TyAlign > ABIAlign);
}

// On RenderScript, coerce Aggregates <= 64 bytes to an integer array of
// same size and alignment.
if (getTarget().isRenderScriptTarget()) {
return coerceToIntArray(Ty, getContext(), getVMContext());
}

// Otherwise, pass by coercing to a structure of the appropriate size.
llvm::Type* ElemTy;
unsigned SizeRegs;
Expand Down Expand Up @@ -609,11 +603,6 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic,
// are returned indirectly.
uint64_t Size = getContext().getTypeSize(RetTy);
if (Size <= 32) {
// On RenderScript, coerce Aggregates <= 4 bytes to an integer array of
// same size and alignment.
if (getTarget().isRenderScriptTarget()) {
return coerceToIntArray(RetTy, getContext(), getVMContext());
}
if (getDataLayout().isBigEndian())
// Return in 32 bit integer integer type (as if loaded by LDR, AAPCS 5.4)
return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
Expand Down
7 changes: 5 additions & 2 deletions clang/lib/CodeGen/Targets/RISCV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,11 @@ class RISCVTargetCodeGenInfo : public TargetCodeGenInfo {
const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;

auto *Fn = cast<llvm::Function>(GV);

if (CGM.getCodeGenOpts().CFProtectionReturn)
Fn->addFnAttr("hw-shadow-stack");

const auto *Attr = FD->getAttr<RISCVInterruptAttr>();
if (!Attr)
return;
Expand All @@ -604,8 +609,6 @@ class RISCVTargetCodeGenInfo : public TargetCodeGenInfo {
case RISCVInterruptAttr::machine: Kind = "machine"; break;
}

auto *Fn = cast<llvm::Function>(GV);

Fn->addFnAttr("interrupt", Kind);
}
};
Expand Down
4 changes: 3 additions & 1 deletion clang/lib/Driver/ToolChain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -897,7 +897,9 @@ bool ToolChain::needsProfileRT(const ArgList &Args) {
Args.hasArg(options::OPT_fprofile_instr_generate) ||
Args.hasArg(options::OPT_fprofile_instr_generate_EQ) ||
Args.hasArg(options::OPT_fcreate_profile) ||
Args.hasArg(options::OPT_forder_file_instrumentation);
Args.hasArg(options::OPT_forder_file_instrumentation) ||
Args.hasArg(options::OPT_fprofile_generate_cold_function_coverage) ||
Args.hasArg(options::OPT_fprofile_generate_cold_function_coverage_EQ);
}

bool ToolChain::needsGCovInstrumentation(const llvm::opt::ArgList &Args) {
Expand Down
11 changes: 11 additions & 0 deletions clang/lib/Driver/ToolChains/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,17 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
Args.MakeArgString("-plugin-opt=-mattr=" + llvm::join(Features, ",")));
}

if (Args.hasArg(options::OPT_stdlib))
CmdArgs.append({"-lc", "-lm"});
if (Args.hasArg(options::OPT_startfiles)) {
std::optional<std::string> IncludePath = getToolChain().getStdlibPath();
if (!IncludePath)
IncludePath = "/lib";
SmallString<128> P(*IncludePath);
llvm::sys::path::append(P, "crt1.o");
CmdArgs.push_back(Args.MakeArgString(P));
}

CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
C.addCommand(std::make_unique<Command>(
Expand Down
30 changes: 25 additions & 5 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,26 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C,
}
}

if (auto *ColdFuncCoverageArg = Args.getLastArg(
options::OPT_fprofile_generate_cold_function_coverage,
options::OPT_fprofile_generate_cold_function_coverage_EQ)) {
SmallString<128> Path(
ColdFuncCoverageArg->getOption().matches(
options::OPT_fprofile_generate_cold_function_coverage_EQ)
? ColdFuncCoverageArg->getValue()
: "");
llvm::sys::path::append(Path, "default_%m.profraw");
// FIXME: Idealy the file path should be passed through
// `-fprofile-instrument-path=`(InstrProfileOutput), however, this field is
// shared with other profile use path(see PGOOptions), we need to refactor
// PGOOptions to make it work.
CmdArgs.push_back("-mllvm");
CmdArgs.push_back(Args.MakeArgString(
Twine("--instrument-cold-function-only-path=") + Path));
CmdArgs.push_back("-mllvm");
CmdArgs.push_back("--pgo-function-entry-coverage");
}

Arg *PGOGenArg = nullptr;
if (PGOGenerateArg) {
assert(!CSPGOGenerateArg);
Expand Down Expand Up @@ -2838,7 +2858,7 @@ static std::string ComplexArithmeticStr(LangOptions::ComplexRangeKind Range) {

static void EmitComplexRangeDiag(const Driver &D, std::string str1,
std::string str2) {
if ((str1.compare(str2) != 0) && !str2.empty() && !str1.empty()) {
if (str1 != str2 && !str2.empty() && !str1.empty()) {
D.Diag(clang::diag::warn_drv_overriding_option) << str1 << str2;
}
}
Expand Down Expand Up @@ -2996,8 +3016,8 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
EmitComplexRangeDiag(D, RenderComplexRangeOption(Range),
"-fno-cx-limited-range");
} else {
if (GccRangeComplexOption.compare("-fcx-limited-range") != 0 &&
GccRangeComplexOption.compare("-fno-cx-fortran-rules") != 0)
if (GccRangeComplexOption != "-fcx-limited-range" &&
GccRangeComplexOption != "-fno-cx-fortran-rules")
EmitComplexRangeDiag(D, GccRangeComplexOption,
"-fno-cx-limited-range");
}
Expand Down Expand Up @@ -3042,8 +3062,8 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
break;
}
if (!GccRangeComplexOption.empty()) {
if (GccRangeComplexOption.compare("-fcx-limited-range") != 0) {
if (GccRangeComplexOption.compare("-fcx-fortran-rules") != 0) {
if (GccRangeComplexOption != "-fcx-limited-range") {
if (GccRangeComplexOption != "-fcx-fortran-rules") {
if (RangeVal != LangOptions::ComplexRangeKind::CX_Improved)
EmitComplexRangeDiag(D, GccRangeComplexOption,
ComplexArithmeticStr(RangeVal));
Expand Down
12 changes: 8 additions & 4 deletions clang/lib/Driver/ToolChains/CommonArgs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1623,10 +1623,14 @@ bool tools::addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
}

bool tools::addXRayRuntime(const ToolChain&TC, const ArgList &Args, ArgStringList &CmdArgs) {
if (Args.hasArg(options::OPT_shared))
return false;

if (TC.getXRayArgs().needsXRayRt()) {
if (Args.hasArg(options::OPT_shared)) {
if (TC.getXRayArgs().needsXRayDSORt()) {
CmdArgs.push_back("--whole-archive");
CmdArgs.push_back(TC.getCompilerRTArgString(Args, "xray-dso"));
CmdArgs.push_back("--no-whole-archive");
return true;
}
} else if (TC.getXRayArgs().needsXRayRt()) {
CmdArgs.push_back("--whole-archive");
CmdArgs.push_back(TC.getCompilerRTArgString(Args, "xray"));
for (const auto &Mode : TC.getXRayArgs().modeList())
Expand Down
11 changes: 11 additions & 0 deletions clang/lib/Driver/ToolChains/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,17 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
llvm::sys::path::append(DefaultLibPath, CLANG_INSTALL_LIBDIR_BASENAME);
CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));

if (Args.hasArg(options::OPT_stdlib))
CmdArgs.append({"-lc", "-lm"});
if (Args.hasArg(options::OPT_startfiles)) {
std::optional<std::string> IncludePath = getToolChain().getStdlibPath();
if (!IncludePath)
IncludePath = "/lib";
SmallString<128> P(*IncludePath);
llvm::sys::path::append(P, "crt1.o");
CmdArgs.push_back(Args.MakeArgString(P));
}

C.addCommand(std::make_unique<Command>(
JA, *this,
ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8,
Expand Down
31 changes: 31 additions & 0 deletions clang/lib/Driver/ToolChains/Flang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,32 @@ void Flang::AddAArch64TargetArgs(const ArgList &Args,
}
}

void Flang::AddPPCTargetArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
const Driver &D = getToolChain().getDriver();
bool VecExtabi = false;

if (const Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) {
StringRef V = A->getValue();
if (V == "vec-extabi")
VecExtabi = true;
else if (V == "vec-default")
VecExtabi = false;
else
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getSpelling() << V;
}

const llvm::Triple &T = getToolChain().getTriple();
if (VecExtabi) {
if (!T.isOSAIX()) {
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< "-mabi=vec-extabi" << T.str();
}
CmdArgs.push_back("-mabi=vec-extabi");
}
}

void Flang::AddRISCVTargetArgs(const ArgList &Args,
ArgStringList &CmdArgs) const {
const llvm::Triple &Triple = getToolChain().getTriple();
Expand Down Expand Up @@ -383,6 +409,11 @@ void Flang::addTargetOptions(const ArgList &Args,
getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false);
AddX86_64TargetArgs(Args, CmdArgs);
break;
case llvm::Triple::ppc:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le:
AddPPCTargetArgs(Args, CmdArgs);
break;
}

if (Arg *A = Args.getLastArg(options::OPT_fveclib)) {
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/Driver/ToolChains/Flang.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,13 @@ class LLVM_LIBRARY_VISIBILITY Flang : public Tool {
void AddX86_64TargetArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const;

/// Add specific options for PPC target.
///
/// \param [in] Args The list of input driver arguments
/// \param [out] CmdArgs The list of output command arguments
void AddPPCTargetArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const;

/// Extract offload options from the driver arguments and add them to
/// the command arguments.
/// \param [in] C The current compilation for the driver invocation
Expand Down
17 changes: 15 additions & 2 deletions clang/lib/Driver/ToolChains/PS4CPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,20 @@ void tools::PS5cpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-pie");

if (!Relocatable) {
CmdArgs.push_back("--eh-frame-hdr");
CmdArgs.push_back("--hash-style=sysv");

// Add a build-id by default to allow the PlayStation symbol server to
// index the symbols. `uuid` is the cheapest fool-proof method.
// (The non-determinism and alternative methods are noted in the downstream
// PlayStation docs).
CmdArgs.push_back("--build-id=uuid");

// All references are expected to be resolved at static link time for both
// executables and dynamic libraries. This has been the default linking
// behaviour for numerous PlayStation generations.
CmdArgs.push_back("--unresolved-symbols=report-all");

// Lazy binding of PLTs is not supported on PlayStation. They are placed in
// the RelRo segment.
CmdArgs.push_back("-z");
Expand Down Expand Up @@ -416,8 +430,7 @@ toolchains::PS4PS5Base::PS4PS5Base(const Driver &D, const llvm::Triple &Triple,
}

void toolchains::PS4PS5Base::AddClangSystemIncludeArgs(
const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
const ArgList &DriverArgs, ArgStringList &CC1Args) const {
const Driver &D = getDriver();

if (DriverArgs.hasArg(options::OPT_nostdinc))
Expand Down
2 changes: 0 additions & 2 deletions clang/lib/Driver/Types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,6 @@ bool types::isDerivedFromC(ID Id) {
case TY_PP_ObjCXX:
case TY_PP_ObjCXX_Alias:
case TY_ObjCXX:
case TY_RenderScript:
case TY_PP_CHeader:
case TY_CHeader:
case TY_CLHeader:
Expand Down Expand Up @@ -328,7 +327,6 @@ types::ID types::lookupTypeForExtension(llvm::StringRef Ext) {
.Case("ll", TY_LLVM_IR)
.Case("mi", TY_PP_ObjC)
.Case("mm", TY_ObjCXX)
.Case("rs", TY_RenderScript)
.Case("adb", TY_Ada)
.Case("ads", TY_Ada)
.Case("asm", TY_PP_Asm)
Expand Down
21 changes: 21 additions & 0 deletions clang/lib/Driver/XRayArgs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,23 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) {
<< XRayInstrument->getSpelling() << Triple.str();
}

if (Args.hasFlag(options::OPT_fxray_shared, options::OPT_fno_xray_shared,
false)) {
XRayShared = true;

// DSO instrumentation is currently limited to x86_64
if (Triple.getArch() != llvm::Triple::x86_64) {
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< "-fxray-shared" << Triple.str();
}

unsigned PICLvl = std::get<1>(tools::ParsePICArgs(TC, Args));
if (!PICLvl) {
D.Diag(diag::err_opt_not_valid_without_opt) << "-fxray-shared"
<< "-fPIC";
}
}

// Both XRay and -fpatchable-function-entry use
// TargetOpcode::PATCHABLE_FUNCTION_ENTER.
if (Arg *A = Args.getLastArg(options::OPT_fpatchable_function_entry_EQ))
Expand Down Expand Up @@ -177,6 +194,10 @@ void XRayArgs::addArgs(const ToolChain &TC, const ArgList &Args,
Args.addOptOutFlag(CmdArgs, options::OPT_fxray_function_index,
options::OPT_fno_xray_function_index);

if (XRayShared)
Args.addOptInFlag(CmdArgs, options::OPT_fxray_shared,
options::OPT_fno_xray_shared);

if (const Arg *A =
Args.getLastArg(options::OPT_fxray_instruction_threshold_EQ)) {
int Value;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,6 @@ StringRef getLanguageName(Language Lang) {
case Language::OpenCL:
case Language::OpenCLCXX:
case Language::CUDA:
case Language::RenderScript:
case Language::HIP:
case Language::HLSL:

Expand Down
12 changes: 6 additions & 6 deletions clang/lib/Format/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ add_clang_library(clangFormat
file(GLOB_RECURSE files
*.cpp
*.h
../../include/clang/Format/*.h
../../tools/clang-format/*.cpp
../../unittests/Format/*.cpp
../../unittests/Format/*.h
${CLANG_SOURCE_DIR}/include/clang/Format/*.h
${CLANG_SOURCE_DIR}/tools/clang-format/*.cpp
${CLANG_SOURCE_DIR}/unittests/Format/*.cpp
${CLANG_SOURCE_DIR}/unittests/Format/*.h
)

set(check_format_depends)
Expand All @@ -46,8 +46,8 @@ foreach (file IN LISTS files)
COMMAND clang-format ${file} | diff -u ${file} -
VERBATIM
COMMENT "Checking format of ${file}..."
)
list(APPEND check_format_depends "clang-format-check-format${i}")
)
list(APPEND check_format_depends clang-format-check-format${i})

math(EXPR i ${i}+1)
endforeach ()
Expand Down
2 changes: 0 additions & 2 deletions clang/lib/Frontend/ASTUnit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2699,8 +2699,6 @@ InputKind ASTUnit::getInputKind() const {
Lang = Language::OpenCL;
else if (LangOpts.CUDA)
Lang = Language::CUDA;
else if (LangOpts.RenderScript)
Lang = Language::RenderScript;
else if (LangOpts.CPlusPlus)
Lang = LangOpts.ObjC ? Language::ObjCXX : Language::CXX;
else
Expand Down
7 changes: 0 additions & 7 deletions clang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2846,9 +2846,6 @@ static void GenerateFrontendArgs(const FrontendOptions &Opts,
case Language::ObjCXX:
Lang = "objective-c++";
break;
case Language::RenderScript:
Lang = "renderscript";
break;
case Language::Asm:
Lang = "assembler-with-cpp";
break;
Expand Down Expand Up @@ -3071,7 +3068,6 @@ static bool ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args,
.Case("c++", Language::CXX)
.Case("objective-c", Language::ObjC)
.Case("objective-c++", Language::ObjCXX)
.Case("renderscript", Language::RenderScript)
.Case("hlsl", Language::HLSL)
.Default(Language::Unknown);

Expand Down Expand Up @@ -3499,7 +3495,6 @@ static bool IsInputCompatibleWithStandard(InputKind IK,

case Language::C:
case Language::ObjC:
case Language::RenderScript:
return S.getLanguage() == Language::C;

case Language::OpenCL:
Expand Down Expand Up @@ -3551,8 +3546,6 @@ static StringRef GetInputKindName(InputKind IK) {
return "C++ for OpenCL";
case Language::CUDA:
return "CUDA";
case Language::RenderScript:
return "RenderScript";
case Language::HIP:
return "HIP";

Expand Down
3 changes: 1 addition & 2 deletions clang/lib/Frontend/FrontendAction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ static std::error_code collectModuleHeaderIncludes(

// Add includes for each of these headers.
for (auto HK : {Module::HK_Normal, Module::HK_Private}) {
for (Module::Header &H : Module->Headers[HK]) {
for (const Module::Header &H : Module->getHeaders(HK)) {
Module->addTopHeader(H.Entry);
// Use the path as specified in the module map file. We'll look for this
// file relative to the module build directory (the directory containing
Expand Down Expand Up @@ -534,7 +534,6 @@ static Module *prepareToBuildModule(CompilerInstance &CI,
}
if (*OriginalModuleMap != CI.getSourceManager().getFileEntryRefForID(
CI.getSourceManager().getMainFileID())) {
M->IsInferred = true;
auto FileCharacter =
M->IsSystem ? SrcMgr::C_System_ModuleMap : SrcMgr::C_User_ModuleMap;
FileID OriginalModuleMapFID = CI.getSourceManager().getOrCreateFileID(
Expand Down
1 change: 0 additions & 1 deletion clang/lib/Frontend/FrontendActions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1108,7 +1108,6 @@ void PrintPreambleAction::ExecuteAction() {
case Language::Unknown:
case Language::Asm:
case Language::LLVM_IR:
case Language::RenderScript:
// We can't do anything with these.
return;
}
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Headers/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,8 @@ set(x86_files
mm3dnow.h
mmintrin.h
movdirintrin.h
movrs_avx10_2_512intrin.h
movrs_avx10_2intrin.h
mwaitxintrin.h
nmmintrin.h
pconfigintrin.h
Expand All @@ -241,6 +243,7 @@ set(x86_files
shaintrin.h
sm3intrin.h
sm4intrin.h
sm4evexintrin.h
smmintrin.h
tbmintrin.h
tmmintrin.h
Expand Down
62 changes: 33 additions & 29 deletions clang/lib/Headers/emmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -2110,8 +2110,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a,
/// A 128-bit vector of [4 x i32].
/// \returns A 128-bit vector of [4 x i32] containing the sums of both
/// parameters.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a,
__m128i __b) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_add_epi32(__m128i __a, __m128i __b) {
return (__m128i)((__v4su)__a + (__v4su)__b);
}

Expand Down Expand Up @@ -2147,8 +2147,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_si64(__m64 __a, __m64 __b) {
/// A 128-bit vector of [2 x i64].
/// \returns A 128-bit vector of [2 x i64] containing the sums of both
/// parameters.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a,
__m128i __b) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_add_epi64(__m128i __a, __m128i __b) {
return (__m128i)((__v2du)__a + (__v2du)__b);
}

Expand Down Expand Up @@ -2539,8 +2539,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a,
/// A 128-bit integer vector containing the subtrahends.
/// \returns A 128-bit integer vector containing the differences of the values
/// in the operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a,
__m128i __b) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_sub_epi32(__m128i __a, __m128i __b) {
return (__m128i)((__v4su)__a - (__v4su)__b);
}

Expand Down Expand Up @@ -2573,8 +2573,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_si64(__m64 __a, __m64 __b) {
/// A 128-bit integer vector containing the subtrahends.
/// \returns A 128-bit integer vector containing the differences of the values
/// in the operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a,
__m128i __b) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_sub_epi64(__m128i __a, __m128i __b) {
return (__m128i)((__v2du)__a - (__v2du)__b);
}

Expand Down Expand Up @@ -3512,8 +3512,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void) {
/// destination vector of [2 x i64].
/// \returns An initialized 128-bit vector of [2 x i64] containing the values
/// provided in the operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1,
long long __q0) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_set_epi64x(long long __q1, long long __q0) {
return __extension__(__m128i)(__v2di){__q0, __q1};
}

Expand All @@ -3533,9 +3533,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1,
/// destination vector of [2 x i64].
/// \returns An initialized 128-bit vector of [2 x i64] containing the values
/// provided in the operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1,
__m64 __q0) {
return _mm_set_epi64x((long long)__q1, (long long)__q0);
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_set_epi64(__m64 __q1, __m64 __q0) {
return _mm_set_epi64x((long long)__q1[0], (long long)__q0[0]);
}

/// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with
Expand All @@ -3560,8 +3560,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1,
/// vector.
/// \returns An initialized 128-bit vector of [4 x i32] containing the values
/// provided in the operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2,
int __i1, int __i0) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set_epi32(int __i3,
int __i2,
int __i1,
int __i0) {
return __extension__(__m128i)(__v4si){__i0, __i1, __i2, __i3};
}

Expand Down Expand Up @@ -3599,7 +3601,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2,
/// vector.
/// \returns An initialized 128-bit vector of [8 x i16] containing the values
/// provided in the operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3,
short __w2, short __w1, short __w0) {
return __extension__(__m128i)(__v8hi){__w0, __w1, __w2, __w3,
Expand Down Expand Up @@ -3648,7 +3650,7 @@ _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3,
/// Initializes bits [7:0] of the destination vector.
/// \returns An initialized 128-bit vector of [16 x i8] containing the values
/// provided in the operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11,
char __b10, char __b9, char __b8, char __b7, char __b6, char __b5,
char __b4, char __b3, char __b2, char __b1, char __b0) {
Expand All @@ -3670,7 +3672,8 @@ _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11,
/// vector.
/// \returns An initialized 128-bit integer vector of [2 x i64] with both
/// elements containing the value provided in the operand.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_set1_epi64x(long long __q) {
return _mm_set_epi64x(__q, __q);
}

Expand All @@ -3687,7 +3690,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q) {
/// vector.
/// \returns An initialized 128-bit vector of [2 x i64] with all elements
/// containing the value provided in the operand.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_set1_epi64(__m64 __q) {
return _mm_set_epi64(__q, __q);
}

Expand All @@ -3704,7 +3708,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q) {
/// vector.
/// \returns An initialized 128-bit vector of [4 x i32] with all elements
/// containing the value provided in the operand.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi32(int __i) {
return _mm_set_epi32(__i, __i, __i, __i);
}

Expand All @@ -3721,7 +3725,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i) {
/// vector.
/// \returns An initialized 128-bit vector of [8 x i16] with all elements
/// containing the value provided in the operand.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_set1_epi16(short __w) {
return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w);
}

Expand All @@ -3738,7 +3743,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w) {
/// vector.
/// \returns An initialized 128-bit vector of [16 x i8] with all elements
/// containing the value provided in the operand.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi8(char __b) {
return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
__b, __b, __b, __b, __b);
}
Expand All @@ -3757,8 +3762,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b) {
/// A 64-bit integral value used to initialize the upper 64 bits of the
/// result.
/// \returns An initialized 128-bit integer vector.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0,
__m64 __q1) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_setr_epi64(__m64 __q0, __m64 __q1) {
return _mm_set_epi64(__q1, __q0);
}

Expand All @@ -3779,9 +3784,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0,
/// \param __i3
/// A 32-bit integral value used to initialize bits [127:96] of the result.
/// \returns An initialized 128-bit integer vector.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1,
int __i2,
int __i3) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_setr_epi32(int __i0, int __i1, int __i2, int __i3) {
return _mm_set_epi32(__i3, __i2, __i1, __i0);
}

Expand Down Expand Up @@ -3810,7 +3814,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1,
/// \param __w7
/// A 16-bit integral value used to initialize bits [127:112] of the result.
/// \returns An initialized 128-bit integer vector.
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4,
short __w5, short __w6, short __w7) {
return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0);
Expand Down Expand Up @@ -3857,7 +3861,7 @@ _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4,
/// \param __b15
/// An 8-bit integral value used to initialize bits [127:120] of the result.
/// \returns An initialized 128-bit integer vector.
static __inline__ __m128i __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
char __b6, char __b7, char __b8, char __b9, char __b10,
char __b11, char __b12, char __b13, char __b14, char __b15) {
Expand Down
142 changes: 91 additions & 51 deletions clang/lib/Headers/hlsl/hlsl_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,24 @@ template <typename T> constexpr uint asuint(T F) {
return __detail::bit_cast<uint, T>(F);
}

//===----------------------------------------------------------------------===//
// asuint splitdouble builtins
//===----------------------------------------------------------------------===//

/// \fn void asuint(double D, out uint lowbits, out int highbits)
/// \brief Split and interprets the lowbits and highbits of double D into uints.
/// \param D The input double.
/// \param lowbits The output lowbits of D.
/// \param highbits The output highbits of D.
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_splitdouble)
void asuint(double, out uint, out uint);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_splitdouble)
void asuint(double2, out uint2, out uint2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_splitdouble)
void asuint(double3, out uint3, out uint3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_splitdouble)
void asuint(double4, out uint4, out uint4);

//===----------------------------------------------------------------------===//
// atan builtins
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -705,66 +723,88 @@ float4 cosh(float4);

#ifdef __HLSL_ENABLE_16_BIT
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int16_t countbits(int16_t);
const inline uint countbits(int16_t x) {
return __builtin_elementwise_popcount(x);
}
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int16_t2 countbits(int16_t2);
const inline uint2 countbits(int16_t2 x) {
return __builtin_elementwise_popcount(x);
}
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int16_t3 countbits(int16_t3);
const inline uint3 countbits(int16_t3 x) {
return __builtin_elementwise_popcount(x);
}
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int16_t4 countbits(int16_t4);
const inline uint4 countbits(int16_t4 x) {
return __builtin_elementwise_popcount(x);
}
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint16_t countbits(uint16_t);
const inline uint countbits(uint16_t x) {
return __builtin_elementwise_popcount(x);
}
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint16_t2 countbits(uint16_t2);
const inline uint2 countbits(uint16_t2 x) {
return __builtin_elementwise_popcount(x);
}
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint16_t3 countbits(uint16_t3);
const inline uint3 countbits(uint16_t3 x) {
return __builtin_elementwise_popcount(x);
}
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint16_t4 countbits(uint16_t4);
const inline uint4 countbits(uint16_t4 x) {
return __builtin_elementwise_popcount(x);
}
#endif

_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int countbits(int);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int2 countbits(int2);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int3 countbits(int3);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int4 countbits(int4);

_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint countbits(uint);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint2 countbits(uint2);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint3 countbits(uint3);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint4 countbits(uint4);

_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int64_t countbits(int64_t);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int64_t2 countbits(int64_t2);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int64_t3 countbits(int64_t3);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int64_t4 countbits(int64_t4);

_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint64_t countbits(uint64_t);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint64_t2 countbits(uint64_t2);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint64_t3 countbits(uint64_t3);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint64_t4 countbits(uint64_t4);
const inline uint countbits(int x) { return __builtin_elementwise_popcount(x); }
const inline uint2 countbits(int2 x) {
return __builtin_elementwise_popcount(x);
}
const inline uint3 countbits(int3 x) {
return __builtin_elementwise_popcount(x);
}
const inline uint4 countbits(int4 x) {
return __builtin_elementwise_popcount(x);
}

const inline uint countbits(uint x) {
return __builtin_elementwise_popcount(x);
}
const inline uint2 countbits(uint2 x) {
return __builtin_elementwise_popcount(x);
}
const inline uint3 countbits(uint3 x) {
return __builtin_elementwise_popcount(x);
}
const inline uint4 countbits(uint4 x) {
return __builtin_elementwise_popcount(x);
}

const inline uint countbits(int64_t x) {
return __builtin_elementwise_popcount(x);
}
const inline uint2 countbits(int64_t2 x) {
return __builtin_elementwise_popcount(x);
}
const inline uint3 countbits(int64_t3 x) {
return __builtin_elementwise_popcount(x);
}
const inline uint4 countbits(int64_t4 x) {
return __builtin_elementwise_popcount(x);
}

const inline uint countbits(uint64_t x) {
return __builtin_elementwise_popcount(x);
}
const inline uint2 countbits(uint64_t2 x) {
return __builtin_elementwise_popcount(x);
}
const inline uint3 countbits(uint64_t3 x) {
return __builtin_elementwise_popcount(x);
}
const inline uint4 countbits(uint64_t4 x) {
return __builtin_elementwise_popcount(x);
}

//===----------------------------------------------------------------------===//
// degrees builtins
Expand Down
15 changes: 15 additions & 0 deletions clang/lib/Headers/immintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,16 @@ _storebe_i64(void * __P, long long __D) {
#include <movdirintrin.h>
#endif

#if !defined(__SCE__) || __has_feature(modules) || \
(defined(__AVX10_2__) && defined(__MOVRS__))
#include <movrs_avx10_2intrin.h>
#endif

#if !defined(__SCE__) || __has_feature(modules) || \
(defined(__AVX10_2_512__) && defined(__MOVRS__))
#include <movrs_avx10_2_512intrin.h>
#endif

#if !defined(__SCE__) || __has_feature(modules) || defined(__PCONFIG__)
#include <pconfigintrin.h>
#endif
Expand Down Expand Up @@ -667,6 +677,11 @@ _storebe_i64(void * __P, long long __D) {
#include <avx10_2_512satcvtintrin.h>
#endif

#if !defined(__SCE__) || __has_feature(modules) || \
(defined(__AVX10_2_512__) && defined(__SM4__))
#include <sm4evexintrin.h>
#endif

#if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)
#include <enqcmdintrin.h>
#endif
Expand Down
98 changes: 98 additions & 0 deletions clang/lib/Headers/movrs_avx10_2_512intrin.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*===----- movrs_avx10_2_512intrin.h - AVX10.2-512-MOVRS intrinsics --------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error \
"Never use <movrs_avx10_2_512intrin.h> directly; include <immintrin.h> instead."
#endif

#ifndef __MOVRS_AVX10_2_512INTRIN_H
#define __MOVRS_AVX10_2_512INTRIN_H
#ifdef __x86_64__

/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS512 \
__attribute__((__always_inline__, __nodebug__, \
__target__("movrs, avx10.2-512"), __min_vector_width__(512)))

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_loadrs_epi8(void const *__A) {
return (__m512i)__builtin_ia32_vmovrsb512((const __v64qi *)(__A));
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_loadrs_epi8(__m512i __W, __mmask64 __U, void const *__A) {
return (__m512i)__builtin_ia32_selectb_512(
(__mmask64)__U, (__v64qi)_mm512_loadrs_epi8(__A), (__v64qi)__W);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_loadrs_epi8(__mmask64 __U, void const *__A) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
(__v64qi)_mm512_loadrs_epi8(__A),
(__v64qi)_mm512_setzero_si512());
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_loadrs_epi32(void const *__A) {
return (__m512i)__builtin_ia32_vmovrsd512((const __v16si *)(__A));
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_loadrs_epi32(__m512i __W, __mmask16 __U, void const *__A) {
return (__m512i)__builtin_ia32_selectd_512(
(__mmask16)__U, (__v16si)_mm512_loadrs_epi32(__A), (__v16si)__W);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_loadrs_epi32(__mmask16 __U, void const *__A) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
(__v16si)_mm512_loadrs_epi32(__A),
(__v16si)_mm512_setzero_si512());
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_loadrs_epi64(void const *__A) {
return (__m512i)__builtin_ia32_vmovrsq512((const __v8di *)(__A));
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_loadrs_epi64(__m512i __W, __mmask8 __U, void const *__A) {
return (__m512i)__builtin_ia32_selectq_512(
(__mmask8)__U, (__v8di)_mm512_loadrs_epi64(__A), (__v8di)__W);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_loadrs_epi64(__mmask8 __U, void const *__A) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_loadrs_epi64(__A),
(__v8di)_mm512_setzero_si512());
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_loadrs_epi16(void const *__A) {
return (__m512i)__builtin_ia32_vmovrsw512((const __v32hi *)(__A));
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_loadrs_epi16(__m512i __W, __mmask32 __U, void const *__A) {
return (__m512i)__builtin_ia32_selectw_512(
(__mmask32)__U, (__v32hi)_mm512_loadrs_epi16(__A), (__v32hi)__W);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_loadrs_epi16(__mmask32 __U, void const *__A) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_loadrs_epi16(__A),
(__v32hi)_mm512_setzero_si512());
}

#undef __DEFAULT_FN_ATTRS512

#endif /* __x86_64__ */
#endif /* __MOVRS_AVX10_2_512INTRIN_H */
174 changes: 174 additions & 0 deletions clang/lib/Headers/movrs_avx10_2intrin.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
/*===--------- movrs_avx10_2intrin.h - AVX10.2-MOVRS intrinsics ------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error \
"Never use <movrs_avx10_2intrin.h> directly; include <immintrin.h> instead."
#endif

#ifndef __MOVRS_AVX10_2INTRIN_H
#define __MOVRS_AVX10_2INTRIN_H
#ifdef __x86_64__

/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("movrs,avx10.2-256"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("movrs,avx10.2-256"), __min_vector_width__(256)))

static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_loadrs_epi8(void const *__A) {
return (__m128i)__builtin_ia32_vmovrsb128((const __v16qi *)(__A));
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_loadrs_epi8(__m128i __W, __mmask16 __U, void const *__A) {
return (__m128i)__builtin_ia32_selectb_128(
(__mmask16)__U, (__v16qi)_mm_loadrs_epi8(__A), (__v16qi)__W);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_loadrs_epi8(__mmask16 __U, void const *__A) {
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
(__v16qi)_mm_loadrs_epi8(__A),
(__v16qi)_mm_setzero_si128());
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_loadrs_epi8(void const *__A) {
return (__m256i)__builtin_ia32_vmovrsb256((const __v32qi *)(__A));
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_loadrs_epi8(__m256i __W, __mmask32 __U, void const *__A) {
return (__m256i)__builtin_ia32_selectb_256(
(__mmask32)__U, (__v32qi)_mm256_loadrs_epi8(__A), (__v32qi)__W);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_loadrs_epi8(__mmask32 __U, void const *__A) {
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
(__v32qi)_mm256_loadrs_epi8(__A),
(__v32qi)_mm256_setzero_si256());
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_loadrs_epi32(void const *__A) {
return (__m128i)__builtin_ia32_vmovrsd128((const __v4si *)(__A));
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_loadrs_epi32(__m128i __W, __mmask8 __U, void const *__A) {
return (__m128i)__builtin_ia32_selectd_128(
(__mmask8)__U, (__v4si)_mm_loadrs_epi32(__A), (__v4si)__W);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_loadrs_epi32(__mmask8 __U, void const *__A) {
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
(__v4si)_mm_loadrs_epi32(__A),
(__v4si)_mm_setzero_si128());
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_loadrs_epi32(void const *__A) {
return (__m256i)__builtin_ia32_vmovrsd256((const __v8si *)(__A));
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_loadrs_epi32(__m256i __W, __mmask8 __U, void const *__A) {
return (__m256i)__builtin_ia32_selectd_256(
(__mmask8)__U, (__v8si)_mm256_loadrs_epi32(__A), (__v8si)__W);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_loadrs_epi32(__mmask8 __U, void const *__A) {
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
(__v8si)_mm256_loadrs_epi32(__A),
(__v8si)_mm256_setzero_si256());
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_loadrs_epi64(void const *__A) {
return (__m128i)__builtin_ia32_vmovrsq128((const __v2di *)(__A));
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_loadrs_epi64(__m128i __W, __mmask8 __U, void const *__A) {
return (__m128i)__builtin_ia32_selectq_128(
(__mmask8)__U, (__v2di)_mm_loadrs_epi64(__A), (__v2di)__W);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_loadrs_epi64(__mmask8 __U, void const *__A) {
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
(__v2di)_mm_loadrs_epi64(__A),
(__v2di)_mm_setzero_si128());
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_loadrs_epi64(void const *__A) {
return (__m256i)__builtin_ia32_vmovrsq256((const __v4di *)(__A));
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_loadrs_epi64(__m256i __W, __mmask8 __U, void const *__A) {
return (__m256i)__builtin_ia32_selectq_256(
(__mmask8)__U, (__v4di)_mm256_loadrs_epi64(__A), (__v4di)__W);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_loadrs_epi64(__mmask8 __U, void const *__A) {
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
(__v4di)_mm256_loadrs_epi64(__A),
(__v4di)_mm256_setzero_si256());
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_loadrs_epi16(void const *__A) {
return (__m128i)__builtin_ia32_vmovrsw128((const __v8hi *)(__A));
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_loadrs_epi16(__m128i __W, __mmask8 __U, void const *__A) {
return (__m128i)__builtin_ia32_selectw_128(
(__mmask8)__U, (__v8hi)_mm_loadrs_epi16(__A), (__v8hi)__W);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_loadrs_epi16(__mmask8 __U, void const *__A) {
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
(__v8hi)_mm_loadrs_epi16(__A),
(__v8hi)_mm_setzero_si128());
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_loadrs_epi16(void const *__A) {
return (__m256i)__builtin_ia32_vmovrsw256((const __v16hi *)(__A));
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_loadrs_epi16(__m256i __W, __mmask16 __U, void const *__A) {
return (__m256i)__builtin_ia32_selectw_256(
(__mmask16)__U, (__v16hi)_mm256_loadrs_epi16(__A), (__v16hi)__W);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_loadrs_epi16(__mmask16 __U, void const *__A) {
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
(__v16hi)_mm256_loadrs_epi16(__A),
(__v16hi)_mm256_setzero_si256());
}

#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256

#endif /* __x86_64__ */
#endif /* __MOVRS_AVX10_2INTRIN_H */
32 changes: 32 additions & 0 deletions clang/lib/Headers/sm4evexintrin.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*===--------------- sm4evexintrin.h - SM4 EVEX intrinsics -----------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <sm4evexintrin.h> directly; include <immintrin.h> instead."
#endif // __IMMINTRIN_H

#ifndef __SM4EVEXINTRIN_H
#define __SM4EVEXINTRIN_H

#define __DEFAULT_FN_ATTRS512 \
__attribute__((__always_inline__, __nodebug__, \
__target__("sm4,avx10.2-512"), __min_vector_width__(512)))

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_sm4key4_epi32(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_vsm4key4512((__v16su)__A, (__v16su)__B);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_vsm4rnds4512((__v16su)__A, (__v16su)__B);
}

#undef __DEFAULT_FN_ATTRS512

#endif // __SM4EVEXINTRIN_H
2 changes: 2 additions & 0 deletions clang/lib/Interpreter/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ set(LLVM_LINK_COMPONENTS

if (EMSCRIPTEN AND "lld" IN_LIST LLVM_ENABLE_PROJECTS)
set(WASM_SRC Wasm.cpp)
set(WASM_LINK lldWasm)
endif()

add_clang_library(clangInterpreter
Expand Down Expand Up @@ -44,6 +45,7 @@ add_clang_library(clangInterpreter
clangParse
clangSema
clangSerialization
${WASM_LINK}
)

if ((MINGW OR CYGWIN) AND BUILD_SHARED_LIBS)
Expand Down
1 change: 0 additions & 1 deletion clang/lib/Lex/HeaderSearch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1582,7 +1582,6 @@ bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP,
}
}

FileInfo.IsLocallyIncluded = true;
IsFirstIncludeOfFile = PP.markIncluded(File);
return true;
}
Expand Down
56 changes: 31 additions & 25 deletions clang/lib/Lex/ModuleMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,12 +472,12 @@ static bool violatesPrivateInclude(Module *RequestingModule,
// as obtained from the lookup and as obtained from the module.
// This check is not cheap, so enable it only for debugging.
bool IsPrivate = false;
SmallVectorImpl<Module::Header> *HeaderList[] = {
&Header.getModule()->Headers[Module::HK_Private],
&Header.getModule()->Headers[Module::HK_PrivateTextual]};
for (auto *Hs : HeaderList)
ArrayRef<Module::Header> HeaderList[] = {
Header.getModule()->getHeaders(Module::HK_Private),
Header.getModule()->getHeaders(Module::HK_PrivateTextual)};
for (auto Hs : HeaderList)
IsPrivate |= llvm::any_of(
*Hs, [&](const Module::Header &H) { return H.Entry == IncFileEnt; });
Hs, [&](const Module::Header &H) { return H.Entry == IncFileEnt; });
assert(IsPrivate && "inconsistent headers and roles");
}
#endif
Expand Down Expand Up @@ -655,10 +655,9 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) {
SmallString<32> NameBuf;
StringRef Name = sanitizeFilenameAsIdentifier(
llvm::sys::path::stem(SkippedDir.getName()), NameBuf);
Result = findOrCreateModule(Name, Result, /*IsFramework=*/false,
Explicit).first;
InferredModuleAllowedBy[Result] = UmbrellaModuleMap;
Result->IsInferred = true;
Result = findOrCreateModuleFirst(Name, Result, /*IsFramework=*/false,
Explicit);
setInferredModuleAllowedBy(Result, UmbrellaModuleMap);

// Associate the module and the directory.
UmbrellaDirs[SkippedDir] = Result;
Expand All @@ -673,10 +672,9 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) {
SmallString<32> NameBuf;
StringRef Name = sanitizeFilenameAsIdentifier(
llvm::sys::path::stem(File.getName()), NameBuf);
Result = findOrCreateModule(Name, Result, /*IsFramework=*/false,
Explicit).first;
InferredModuleAllowedBy[Result] = UmbrellaModuleMap;
Result->IsInferred = true;
Result = findOrCreateModuleFirst(Name, Result, /*IsFramework=*/false,
Explicit);
setInferredModuleAllowedBy(Result, UmbrellaModuleMap);
Result->addTopHeader(File);

// If inferred submodules export everything they import, add a
Expand Down Expand Up @@ -868,6 +866,15 @@ std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name,
return std::make_pair(Sub, false);

// Create a new module with this name.
Module *M = createModule(Name, Parent, IsFramework, IsExplicit);
return std::make_pair(M, true);
}

Module *ModuleMap::createModule(StringRef Name, Module *Parent,
bool IsFramework, bool IsExplicit) {
assert(lookupModuleQualified(Name, Parent) == nullptr &&
"Creating duplicate submodule");

Module *Result = new (ModulesAlloc.Allocate())
Module(ModuleConstructorTag{}, Name, SourceLocation(), Parent,
IsFramework, IsExplicit, NumCreatedModules++);
Expand All @@ -877,7 +884,7 @@ std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name,
Modules[Name] = Result;
ModuleScopeIDs[Result] = CurrentModuleScopeID;
}
return std::make_pair(Result, true);
return Result;
}

Module *ModuleMap::createGlobalModuleFragmentForModuleUnit(SourceLocation Loc,
Expand Down Expand Up @@ -1097,8 +1104,7 @@ Module *ModuleMap::inferFrameworkModule(DirectoryEntryRef FrameworkDir,
Module *Result = new (ModulesAlloc.Allocate())
Module(ModuleConstructorTag{}, ModuleName, SourceLocation(), Parent,
/*IsFramework=*/true, /*IsExplicit=*/false, NumCreatedModules++);
InferredModuleAllowedBy[Result] = ModuleMapFID;
Result->IsInferred = true;
setInferredModuleAllowedBy(Result, ModuleMapFID);
if (!Parent) {
if (LangOpts.CurrentModule == ModuleName)
SourceModule = Result;
Expand Down Expand Up @@ -1296,27 +1302,28 @@ void ModuleMap::addHeader(Module *Mod, Module::Header Header,
ModuleHeaderRole Role, bool Imported) {
KnownHeader KH(Mod, Role);

FileEntryRef HeaderEntry = Header.Entry;

// Only add each header to the headers list once.
// FIXME: Should we diagnose if a header is listed twice in the
// same module definition?
auto &HeaderList = Headers[Header.Entry];
auto &HeaderList = Headers[HeaderEntry];
if (llvm::is_contained(HeaderList, KH))
return;

HeaderList.push_back(KH);
Mod->Headers[headerRoleToKind(Role)].push_back(Header);
Mod->addHeader(headerRoleToKind(Role), std::move(Header));

bool isCompilingModuleHeader = Mod->isForBuilding(LangOpts);
if (!Imported || isCompilingModuleHeader) {
// When we import HeaderFileInfo, the external source is expected to
// set the isModuleHeader flag itself.
HeaderInfo.MarkFileModuleHeader(Header.Entry, Role,
isCompilingModuleHeader);
HeaderInfo.MarkFileModuleHeader(HeaderEntry, Role, isCompilingModuleHeader);
}

// Notify callbacks that we just added a new header.
for (const auto &Cb : Callbacks)
Cb->moduleMapAddHeader(Header.Entry.getName());
Cb->moduleMapAddHeader(HeaderEntry.getName());
}

FileID ModuleMap::getContainingModuleMapFileID(const Module *Module) const {
Expand Down Expand Up @@ -1345,7 +1352,7 @@ ModuleMap::getModuleMapFileForUniquing(const Module *M) const {
}

void ModuleMap::setInferredModuleAllowedBy(Module *M, FileID ModMapFID) {
assert(M->IsInferred && "module not inferred");
M->IsInferred = true;
InferredModuleAllowedBy[M] = ModMapFID;
}

Expand Down Expand Up @@ -2125,9 +2132,8 @@ void ModuleMapParser::parseModuleDecl() {
ActiveModule =
Map.createShadowedModule(ModuleName, Framework, ShadowingModule);
} else {
ActiveModule =
Map.findOrCreateModule(ModuleName, ActiveModule, Framework, Explicit)
.first;
ActiveModule = Map.findOrCreateModuleFirst(ModuleName, ActiveModule,
Framework, Explicit);
}

ActiveModule->DefinitionLoc = ModuleNameLoc;
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Sema/CheckExprLifetime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ shouldTrackFirstArgumentForConstructor(const CXXConstructExpr *Ctor) {
}

// Return true if this is an "normal" assignment operator.
// We assuments that a normal assingment operator always returns *this, that is,
// We assume that a normal assignment operator always returns *this, that is,
// an lvalue reference that is the same type as the implicit object parameter
// (or the LHS for a non-member operator$=).
static bool isNormalAssignmentOperator(const FunctionDecl *FD) {
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Sema/SemaAvailability.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,12 @@ static bool ShouldDiagnoseAvailabilityInContext(
return false;
}

if (K == AR_Deprecated) {
if (const auto *VD = dyn_cast<VarDecl>(OffendingDecl))
if (VD->isLocalVarDeclOrParm() && VD->isDeprecated())
return true;
}

// Checks if we should emit the availability diagnostic in the context of C.
auto CheckContext = [&](const Decl *C) {
if (K == AR_NotYetIntroduced) {
Expand Down
Loading