13 changes: 12 additions & 1 deletion clang/lib/CodeGen/CGAtomic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -727,7 +727,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,

llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
llvm::AtomicRMWInst *RMWI =
CGF.Builder.CreateAtomicRMW(Op, Ptr, LoadVal1, Order, Scope);
CGF.emitAtomicRMWInst(Op, Ptr, LoadVal1, Order, Scope);
RMWI->setVolatile(E->isVolatile());

// For __atomic_*_fetch operations, perform the operation again to
Expand Down Expand Up @@ -2034,6 +2034,17 @@ std::pair<RValue, llvm::Value *> CodeGenFunction::EmitAtomicCompareExchange(
IsWeak);
}

llvm::AtomicRMWInst *
CodeGenFunction::emitAtomicRMWInst(llvm::AtomicRMWInst::BinOp Op, Address Addr,
llvm::Value *Val, llvm::AtomicOrdering Order,
llvm::SyncScope::ID SSID) {

llvm::AtomicRMWInst *RMW =
Builder.CreateAtomicRMW(Op, Addr, Val, Order, SSID);
getTargetHooks().setTargetAtomicMetadata(*this, *RMW);
return RMW;
}

void CodeGenFunction::EmitAtomicUpdate(
LValue LVal, llvm::AtomicOrdering AO,
const llvm::function_ref<RValue(RValue)> &UpdateOp, bool IsVolatile) {
Expand Down
13 changes: 7 additions & 6 deletions clang/lib/CodeGen/CGExprScalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2841,9 +2841,10 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
isInc ? llvm::Instruction::FAdd : llvm::Instruction::FSub;
llvm::Value *amt = llvm::ConstantFP::get(
VMContext, llvm::APFloat(static_cast<float>(1.0)));
llvm::Value *old =
Builder.CreateAtomicRMW(aop, LV.getAddress(), amt,
llvm::AtomicOrdering::SequentiallyConsistent);
llvm::AtomicRMWInst *old =
CGF.emitAtomicRMWInst(aop, LV.getAddress(), amt,
llvm::AtomicOrdering::SequentiallyConsistent);

return isPre ? Builder.CreateBinOp(op, old, amt) : old;
}
value = EmitLoadOfLValue(LV, E->getExprLoc());
Expand Down Expand Up @@ -3583,9 +3584,9 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
EmitScalarConversion(OpInfo.RHS, E->getRHS()->getType(), LHSTy,
E->getExprLoc()),
LHSTy);
Value *OldVal = Builder.CreateAtomicRMW(
AtomicOp, LHSLV.getAddress(), Amt,
llvm::AtomicOrdering::SequentiallyConsistent);

llvm::AtomicRMWInst *OldVal =
CGF.emitAtomicRMWInst(AtomicOp, LHSLV.getAddress(), Amt);

// Since operation is atomic, the result type is guaranteed to be the
// same as the input in LLVM terms.
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/CodeGen/CGStmtOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6326,8 +6326,8 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
UpdateVal = CGF.Builder.CreateCast(llvm::Instruction::CastOps::UIToFP, IC,
X.getAddress().getElementType());
}
llvm::Value *Res =
CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO);
llvm::AtomicRMWInst *Res =
CGF.emitAtomicRMWInst(RMWOp, X.getAddress(), UpdateVal, AO);
return std::make_pair(true, RValue::get(Res));
}

Expand Down
7 changes: 7 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -4160,6 +4160,13 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::AtomicOrdering::SequentiallyConsistent,
bool IsWeak = false, AggValueSlot Slot = AggValueSlot::ignored());

/// Emit an atomicrmw instruction, and applying relevant metadata when
/// applicable.
llvm::AtomicRMWInst *emitAtomicRMWInst(
llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val,
llvm::AtomicOrdering Order = llvm::AtomicOrdering::SequentiallyConsistent,
llvm::SyncScope::ID SSID = llvm::SyncScope::System);

void EmitAtomicUpdate(LValue LVal, llvm::AtomicOrdering AO,
const llvm::function_ref<RValue(RValue)> &UpdateOp,
bool IsVolatile);
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/CodeGen/TargetInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,10 @@ class TargetCodeGenInfo {
llvm::AtomicOrdering Ordering,
llvm::LLVMContext &Ctx) const;

/// Allow the target to apply other metadata to an atomic instruction
virtual void setTargetAtomicMetadata(CodeGenFunction &CGF,
llvm::AtomicRMWInst &RMW) const {}

/// Interface class for filling custom fields of a block literal for OpenCL.
class TargetOpenCLBlockHelper {
public:
Expand Down
19 changes: 19 additions & 0 deletions clang/lib/CodeGen/Targets/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,8 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
SyncScope Scope,
llvm::AtomicOrdering Ordering,
llvm::LLVMContext &Ctx) const override;
void setTargetAtomicMetadata(CodeGenFunction &CGF,
llvm::AtomicRMWInst &RMW) const override;
llvm::Value *createEnqueuedBlockKernel(CodeGenFunction &CGF,
llvm::Function *BlockInvokeFunc,
llvm::Type *BlockTy) const override;
Expand Down Expand Up @@ -546,6 +548,23 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
return Ctx.getOrInsertSyncScopeID(Name);
}

void AMDGPUTargetCodeGenInfo::setTargetAtomicMetadata(
CodeGenFunction &CGF, llvm::AtomicRMWInst &RMW) const {
if (!CGF.getTarget().allowAMDGPUUnsafeFPAtomics())
return;

// TODO: Introduce new, more controlled options that also work for integers,
// and deprecate allowAMDGPUUnsafeFPAtomics.
llvm::AtomicRMWInst::BinOp RMWOp = RMW.getOperation();
if (llvm::AtomicRMWInst::isFPOperation(RMWOp)) {
llvm::MDNode *Empty = llvm::MDNode::get(CGF.getLLVMContext(), {});
RMW.setMetadata("amdgpu.no.fine.grained.memory", Empty);

if (RMWOp == llvm::AtomicRMWInst::FAdd && RMW.getType()->isFloatTy())
RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
}
}

bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
return false;
}
Expand Down
6 changes: 4 additions & 2 deletions clang/lib/Driver/ToolChains/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -620,8 +620,10 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const char *LinkingOutput) const {
std::string Linker = getToolChain().GetLinkerPath();
ArgStringList CmdArgs;
CmdArgs.push_back("--no-undefined");
CmdArgs.push_back("-shared");
if (!Args.hasArg(options::OPT_r)) {
CmdArgs.push_back("--no-undefined");
CmdArgs.push_back("-shared");
}

addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
Args.AddAllArgs(CmdArgs, options::OPT_L);
Expand Down
28 changes: 26 additions & 2 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3384,9 +3384,28 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
CmdArgs.push_back("-ffast-math");

// Handle __FINITE_MATH_ONLY__ similarly.
if (!HonorINFs && !HonorNaNs)
// The -ffinite-math-only is added to CmdArgs when !HonorINFs && !HonorNaNs.
// Otherwise process the Xclang arguments to determine if -menable-no-infs and
// -menable-no-nans are set by the user.
bool shouldAddFiniteMathOnly = false;
if (!HonorINFs && !HonorNaNs) {
shouldAddFiniteMathOnly = true;
} else {
bool InfValues = true;
bool NanValues = true;
for (const auto *Arg : Args.filtered(options::OPT_Xclang)) {
StringRef ArgValue = Arg->getValue();
if (ArgValue == "-menable-no-nans")
NanValues = false;
else if (ArgValue == "-menable-no-infs")
InfValues = false;
}
if (!NanValues && !InfValues)
shouldAddFiniteMathOnly = true;
}
if (shouldAddFiniteMathOnly) {
CmdArgs.push_back("-ffinite-math-only");

}
if (const Arg *A = Args.getLastArg(options::OPT_mfpmath_EQ)) {
CmdArgs.push_back("-mfpmath");
CmdArgs.push_back(A->getValue());
Expand Down Expand Up @@ -3755,6 +3774,11 @@ static void RenderOpenCLOptions(const ArgList &Args, ArgStringList &CmdArgs,
CmdArgs.push_back(Args.MakeArgString(CLExtStr));
}

if (Args.hasArg(options::OPT_cl_finite_math_only)) {
CmdArgs.push_back("-menable-no-infs");
CmdArgs.push_back("-menable-no-nans");
}

for (const auto &Arg : ForwardedArguments)
if (const auto *A = Args.getLastArg(Arg))
CmdArgs.push_back(Args.MakeArgString(A->getOption().getPrefixedName()));
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/Driver/ToolChains/PS4CPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,10 @@ void tools::PS5cpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
if (UseJMC)
AddLTOFlag("-enable-jmc-instrument");

if (Args.hasFlag(options::OPT_fstack_size_section,
options::OPT_fno_stack_size_section, false))
AddLTOFlag("-stack-size-section");

if (Arg *A = Args.getLastArg(options::OPT_fcrash_diagnostics_dir))
AddLTOFlag(Twine("-crash-diagnostics-dir=") + A->getValue());

Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Frontend/InitPreprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1318,7 +1318,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
if (!LangOpts.MathErrno)
Builder.defineMacro("__NO_MATH_ERRNO__");

if (LangOpts.FastMath || LangOpts.FiniteMathOnly)
if (LangOpts.FastMath || (LangOpts.NoHonorInfs && LangOpts.NoHonorNaNs))
Builder.defineMacro("__FINITE_MATH_ONLY__", "1");
else
Builder.defineMacro("__FINITE_MATH_ONLY__", "0");
Expand Down
1 change: 0 additions & 1 deletion clang/lib/Frontend/PrintPreprocessedOutput.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -980,7 +980,6 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
*Callbacks->OS << static_cast<unsigned>(*Iter);
PrintComma = true;
}
IsStartOfLine = true;
} else if (Tok.isAnnotation()) {
// Ignore annotation tokens created by pragmas - the pragmas themselves
// will be reproduced in the preprocessed output.
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/Lex/PPMacroExpansion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1604,6 +1604,16 @@ static bool isTargetVariantEnvironment(const TargetInfo &TI,
return false;
}

#if defined(__sun__) && defined(__svr4__)
// GCC mangles std::tm as tm for binary compatibility on Solaris (Issue
// #33114). We need to match this to allow the std::put_time calls to link
// (PR #99075).
asm("_ZNKSt8time_putIcSt19ostreambuf_iteratorIcSt11char_traitsIcEEE3putES3_"
"RSt8ios_basecPKSt2tmPKcSB_ = "
"_ZNKSt8time_putIcSt19ostreambuf_iteratorIcSt11char_traitsIcEEE3putES3_"
"RSt8ios_basecPK2tmPKcSB_");
#endif

/// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
/// as a builtin macro, handle it and return the next token as 'Tok'.
void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
Expand Down
15 changes: 15 additions & 0 deletions clang/lib/Sema/SemaChecking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1477,6 +1477,18 @@ static bool BuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall,
return false;
}

// In OpenCL, __builtin_alloca_* should return a pointer to address space
// that corresponds to the stack address space i.e private address space.
static void builtinAllocaAddrSpace(Sema &S, CallExpr *TheCall) {
QualType RT = TheCall->getType();
assert((RT->isPointerType() && !(RT->getPointeeType().hasAddressSpace())) &&
"__builtin_alloca has invalid address space");

RT = RT->getPointeeType();
RT = S.Context.getAddrSpaceQualType(RT, LangAS::opencl_private);
TheCall->setType(S.Context.getPointerType(RT));
}

namespace {
enum PointerAuthOpKind {
PAO_Strip,
Expand Down Expand Up @@ -2214,6 +2226,9 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
case Builtin::BI__builtin_alloca_uninitialized:
Diag(TheCall->getBeginLoc(), diag::warn_alloca)
<< TheCall->getDirectCallee();
if (getLangOpts().OpenCL) {
builtinAllocaAddrSpace(*this, TheCall);
}
break;
case Builtin::BI__arithmetic_fence:
if (BuiltinArithmeticFence(TheCall))
Expand Down
24 changes: 20 additions & 4 deletions clang/lib/Sema/SemaDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11009,6 +11009,9 @@ static bool AttrCompatibleWithMultiVersion(attr::Kind Kind,
switch (Kind) {
default:
return false;
case attr::ArmLocallyStreaming:
return MVKind == MultiVersionKind::TargetVersion ||
MVKind == MultiVersionKind::TargetClones;
case attr::Used:
return MVKind == MultiVersionKind::Target;
case attr::NonNull:
Expand Down Expand Up @@ -11145,7 +11148,21 @@ bool Sema::areMultiversionVariantFunctionsCompatible(
FunctionType::ExtInfo OldTypeInfo = OldType->getExtInfo();
FunctionType::ExtInfo NewTypeInfo = NewType->getExtInfo();

if (OldTypeInfo.getCC() != NewTypeInfo.getCC())
const auto *OldFPT = OldFD->getType()->getAs<FunctionProtoType>();
const auto *NewFPT = NewFD->getType()->getAs<FunctionProtoType>();

bool ArmStreamingCCMismatched = false;
if (OldFPT && NewFPT) {
unsigned Diff =
OldFPT->getAArch64SMEAttributes() ^ NewFPT->getAArch64SMEAttributes();
// Arm-streaming, arm-streaming-compatible and non-streaming versions
// cannot be mixed.
if (Diff & (FunctionType::SME_PStateSMEnabledMask |
FunctionType::SME_PStateSMCompatibleMask))
ArmStreamingCCMismatched = true;
}

if (OldTypeInfo.getCC() != NewTypeInfo.getCC() || ArmStreamingCCMismatched)
return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << CallingConv;

QualType OldReturnType = OldType->getReturnType();
Expand All @@ -11165,9 +11182,8 @@ bool Sema::areMultiversionVariantFunctionsCompatible(
if (!CLinkageMayDiffer && OldFD->isExternC() != NewFD->isExternC())
return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << LanguageLinkage;

if (CheckEquivalentExceptionSpec(
OldFD->getType()->getAs<FunctionProtoType>(), OldFD->getLocation(),
NewFD->getType()->getAs<FunctionProtoType>(), NewFD->getLocation()))
if (CheckEquivalentExceptionSpec(OldFPT, OldFD->getLocation(), NewFPT,
NewFD->getLocation()))
return true;
}
return false;
Expand Down
7 changes: 0 additions & 7 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3034,9 +3034,6 @@ bool Sema::checkTargetVersionAttr(SourceLocation LiteralLoc, Decl *D,
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
<< Unsupported << None << CurFeature << TargetVersion;
}
if (IsArmStreamingFunction(cast<FunctionDecl>(D),
/*IncludeLocallyStreaming=*/false))
return Diag(LiteralLoc, diag::err_sme_streaming_cannot_be_multiversioned);
return false;
}

Expand Down Expand Up @@ -3133,10 +3130,6 @@ bool Sema::checkTargetClonesAttrString(
HasNotDefault = true;
}
}
if (IsArmStreamingFunction(cast<FunctionDecl>(D),
/*IncludeLocallyStreaming=*/false))
return Diag(LiteralLoc,
diag::err_sme_streaming_cannot_be_multiversioned);
} else {
// Other targets ( currently X86 )
if (Cur.starts_with("arch=")) {
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Sema/SemaDeclCXX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10385,7 +10385,7 @@ void Sema::checkIncorrectVTablePointerAuthenticationAttribute(
while (1) {
assert(PrimaryBase);
const CXXRecordDecl *Base = nullptr;
for (auto BasePtr : PrimaryBase->bases()) {
for (const CXXBaseSpecifier &BasePtr : PrimaryBase->bases()) {
if (!BasePtr.getType()->getAsCXXRecordDecl()->isDynamicClass())
continue;
Base = BasePtr.getType()->getAsCXXRecordDecl();
Expand Down
26 changes: 26 additions & 0 deletions clang/lib/Sema/SemaExprCXX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6030,6 +6030,32 @@ static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT, const TypeSourceI
return cast<CXXRecordDecl>(rhsRecord->getDecl())
->isDerivedFrom(cast<CXXRecordDecl>(lhsRecord->getDecl()));
}
case BTT_IsVirtualBaseOf: {
const RecordType *BaseRecord = LhsT->getAs<RecordType>();
const RecordType *DerivedRecord = RhsT->getAs<RecordType>();

if (!BaseRecord || !DerivedRecord) {
DiagnoseVLAInCXXTypeTrait(Self, Lhs,
tok::kw___builtin_is_virtual_base_of);
DiagnoseVLAInCXXTypeTrait(Self, Rhs,
tok::kw___builtin_is_virtual_base_of);
return false;
}

if (BaseRecord->isUnionType() || DerivedRecord->isUnionType())
return false;

if (!BaseRecord->isStructureOrClassType() ||
!DerivedRecord->isStructureOrClassType())
return false;

if (Self.RequireCompleteType(Rhs->getTypeLoc().getBeginLoc(), RhsT,
diag::err_incomplete_type))
return false;

return cast<CXXRecordDecl>(DerivedRecord->getDecl())
->isVirtuallyDerivedFrom(cast<CXXRecordDecl>(BaseRecord->getDecl()));
}
case BTT_IsSame:
return Self.Context.hasSameType(LhsT, RhsT);
case BTT_TypeCompatible: {
Expand Down
4 changes: 3 additions & 1 deletion clang/lib/Sema/SemaTemplateDeduction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -951,9 +951,11 @@ class PackDeductionScope {

// Skip over the pack elements that were expanded into separate arguments.
// If we partially expanded, this is the number of partial arguments.
// FIXME: `&& FixedNumExpansions` is a workaround for UB described in
// https://github.com/llvm/llvm-project/issues/100095
if (IsPartiallyExpanded)
PackElements += NumPartialPackArgs;
else if (IsExpanded)
else if (IsExpanded && FixedNumExpansions)
PackElements += *FixedNumExpansions;

for (auto &Pack : Packs) {
Expand Down
58 changes: 29 additions & 29 deletions clang/lib/StaticAnalyzer/Checkers/MmapWriteExecChecker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,49 +21,56 @@
#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"

using namespace clang;
using namespace ento;

namespace {
class MmapWriteExecChecker : public Checker<check::PreCall> {
class MmapWriteExecChecker
: public Checker<check::ASTDecl<TranslationUnitDecl>, check::PreCall> {
CallDescription MmapFn{CDM::CLibrary, {"mmap"}, 6};
CallDescription MprotectFn{CDM::CLibrary, {"mprotect"}, 3};
static int ProtWrite;
static int ProtExec;
static int ProtRead;
const BugType BT{this, "W^X check fails, Write Exec prot flags set",
"Security"};

// Default values are used if definition of the flags is not found.
mutable int ProtRead = 0x01;
mutable int ProtWrite = 0x02;
mutable int ProtExec = 0x04;

public:
void checkASTDecl(const TranslationUnitDecl *TU, AnalysisManager &Mgr,
BugReporter &BR) const;
void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
int ProtExecOv;
int ProtReadOv;
};
}

int MmapWriteExecChecker::ProtWrite = 0x02;
int MmapWriteExecChecker::ProtExec = 0x04;
int MmapWriteExecChecker::ProtRead = 0x01;
void MmapWriteExecChecker::checkASTDecl(const TranslationUnitDecl *TU,
AnalysisManager &Mgr,
BugReporter &BR) const {
Preprocessor &PP = Mgr.getPreprocessor();
const std::optional<int> FoundProtRead = tryExpandAsInteger("PROT_READ", PP);
const std::optional<int> FoundProtWrite =
tryExpandAsInteger("PROT_WRITE", PP);
const std::optional<int> FoundProtExec = tryExpandAsInteger("PROT_EXEC", PP);
if (FoundProtRead && FoundProtWrite && FoundProtExec) {
ProtRead = *FoundProtRead;
ProtWrite = *FoundProtWrite;
ProtExec = *FoundProtExec;
}
}

void MmapWriteExecChecker::checkPreCall(const CallEvent &Call,
CheckerContext &C) const {
CheckerContext &C) const {
if (matchesAny(Call, MmapFn, MprotectFn)) {
SVal ProtVal = Call.getArgSVal(2);
auto ProtLoc = ProtVal.getAs<nonloc::ConcreteInt>();
if (!ProtLoc)
return;
int64_t Prot = ProtLoc->getValue().getSExtValue();
if (ProtExecOv != ProtExec)
ProtExec = ProtExecOv;
if (ProtReadOv != ProtRead)
ProtRead = ProtReadOv;

// Wrong settings
if (ProtRead == ProtExec)
return;

if ((Prot & (ProtWrite | ProtExec)) == (ProtWrite | ProtExec)) {
if ((Prot & ProtWrite) && (Prot & ProtExec)) {
ExplodedNode *N = C.generateNonFatalErrorNode();
if (!N)
return;
Expand All @@ -80,17 +87,10 @@ void MmapWriteExecChecker::checkPreCall(const CallEvent &Call,
}
}

void ento::registerMmapWriteExecChecker(CheckerManager &mgr) {
MmapWriteExecChecker *Mwec =
mgr.registerChecker<MmapWriteExecChecker>();
Mwec->ProtExecOv =
mgr.getAnalyzerOptions()
.getCheckerIntegerOption(Mwec, "MmapProtExec");
Mwec->ProtReadOv =
mgr.getAnalyzerOptions()
.getCheckerIntegerOption(Mwec, "MmapProtRead");
void ento::registerMmapWriteExecChecker(CheckerManager &Mgr) {
Mgr.registerChecker<MmapWriteExecChecker>();
}

bool ento::shouldRegisterMmapWriteExecChecker(const CheckerManager &mgr) {
bool ento::shouldRegisterMmapWriteExecChecker(const CheckerManager &) {
return true;
}
5 changes: 5 additions & 0 deletions clang/test/AST/Interp/builtins.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,8 @@ constexpr bool assume() {
return true;
}
static_assert(assume(), "");

void test_builtin_os_log(void *buf, int i, const char *data) {
constexpr int len = __builtin_os_log_format_buffer_size("%d %{public}s %{private}.16P", i, data, data);
static_assert(len > 0, "Expect len > 0");
}
2 changes: 0 additions & 2 deletions clang/test/Analysis/analyzer-config.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
// CHECK-NEXT: alpha.clone.CloneChecker:ReportNormalClones = true
// CHECK-NEXT: alpha.cplusplus.STLAlgorithmModeling:AggressiveStdFindModeling = false
// CHECK-NEXT: alpha.osx.cocoa.DirectIvarAssignment:AnnotatedFunctions = false
// CHECK-NEXT: alpha.security.MmapWriteExec:MmapProtExec = 0x04
// CHECK-NEXT: alpha.security.MmapWriteExec:MmapProtRead = 0x01
// CHECK-NEXT: alpha.security.taint.TaintPropagation:Config = ""
// CHECK-NEXT: apply-fixits = false
// CHECK-NEXT: assume-controlled-environment = false
Expand Down
11 changes: 6 additions & 5 deletions clang/test/Analysis/mmap-writeexec.c
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
// RUN: %clang_analyze_cc1 -triple i686-unknown-linux -analyzer-checker=alpha.security.MmapWriteExec -analyzer-config alpha.security.MmapWriteExec:MmapProtExec=1 -analyzer-config alpha.security.MmapWriteExec:MmapProtRead=4 -DUSE_ALTERNATIVE_PROT_EXEC_DEFINITION -verify %s
// RUN: %clang_analyze_cc1 -triple i686-unknown-linux -analyzer-checker=alpha.security.MmapWriteExec -DUSE_ALTERNATIVE_PROT_EXEC_DEFINITION -verify %s
// RUN: %clang_analyze_cc1 -triple x86_64-unknown-apple-darwin10 -analyzer-checker=alpha.security.MmapWriteExec -verify %s

#define PROT_WRITE 0x02
#ifndef USE_ALTERNATIVE_PROT_EXEC_DEFINITION
#define PROT_EXEC 0x04
#define PROT_READ 0x01
#else
#define PROT_EXEC 0x01
#define PROT_WRITE 0x02
#define PROT_READ 0x04
#else
#define PROT_EXEC 0x08
#define PROT_WRITE 0x04
#define PROT_READ 0x02
#endif
#define MAP_PRIVATE 0x0002
#define MAP_ANON 0x1000
Expand Down
316 changes: 316 additions & 0 deletions clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c

Large diffs are not rendered by default.

107 changes: 107 additions & 0 deletions clang/test/CodeGen/aarch64-fmv-streaming.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -emit-llvm -o - %s | FileCheck %s


// CHECK-LABEL: define {{[^@]+}}@n_callee._Msve
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
//
// CHECK-LABEL: define {{[^@]+}}@n_callee._Msimd
// CHECK-SAME: () #[[ATTR1:[0-9]+]] {
//
__arm_locally_streaming __attribute__((target_clones("sve", "simd"))) void n_callee(void) {}
// CHECK-LABEL: define {{[^@]+}}@n_callee._Msme2
// CHECK-SAME: () #[[ATTR2:[0-9]+]] {
//
__attribute__((target_version("sme2"))) void n_callee(void) {}
// CHECK-LABEL: define {{[^@]+}}@n_callee.default
// CHECK-SAME: () #[[ATTR3:[0-9]+]] {
//
__attribute__((target_version("default"))) void n_callee(void) {}


// CHECK-LABEL: define {{[^@]+}}@s_callee._Msve
// CHECK-SAME: () #[[ATTR4:[0-9]+]] {
//
// CHECK-LABEL: define {{[^@]+}}@s_callee._Msimd
// CHECK-SAME: () #[[ATTR5:[0-9]+]] {
//
__attribute__((target_clones("sve", "simd"))) void s_callee(void) __arm_streaming {}
// CHECK-LABEL: define {{[^@]+}}@s_callee._Msme2
// CHECK-SAME: () #[[ATTR6:[0-9]+]] {
//
__arm_locally_streaming __attribute__((target_version("sme2"))) void s_callee(void) __arm_streaming {}
// CHECK-LABEL: define {{[^@]+}}@s_callee.default
// CHECK-SAME: () #[[ATTR7:[0-9]+]] {
//
__attribute__((target_version("default"))) void s_callee(void) __arm_streaming {}


// CHECK-LABEL: define {{[^@]+}}@sc_callee._Msve
// CHECK-SAME: () #[[ATTR8:[0-9]+]] {
//
// CHECK-LABEL: define {{[^@]+}}@sc_callee._Msimd
// CHECK-SAME: () #[[ATTR9:[0-9]+]] {
//
__attribute__((target_clones("sve", "simd"))) void sc_callee(void) __arm_streaming_compatible {}
// CHECK-LABEL: define {{[^@]+}}@sc_callee._Msme2
// CHECK-SAME: () #[[ATTR10:[0-9]+]] {
//
__arm_locally_streaming __attribute__((target_version("sme2"))) void sc_callee(void) __arm_streaming_compatible {}
// CHECK-LABEL: define {{[^@]+}}@sc_callee.default
// CHECK-SAME: () #[[ATTR11:[0-9]+]] {
//
__attribute__((target_version("default"))) void sc_callee(void) __arm_streaming_compatible {}


// CHECK-LABEL: define {{[^@]+}}@n_caller
// CHECK-SAME: () #[[ATTR3:[0-9]+]] {
// CHECK: call void @n_callee()
// CHECK: call void @s_callee() #[[ATTR12:[0-9]+]]
// CHECK: call void @sc_callee() #[[ATTR13:[0-9]+]]
//
void n_caller(void) {
n_callee();
s_callee();
sc_callee();
}


// CHECK-LABEL: define {{[^@]+}}@s_caller
// CHECK-SAME: () #[[ATTR7:[0-9]+]] {
// CHECK: call void @n_callee()
// CHECK: call void @s_callee() #[[ATTR12]]
// CHECK: call void @sc_callee() #[[ATTR13]]
//
void s_caller(void) __arm_streaming {
n_callee();
s_callee();
sc_callee();
}


// CHECK-LABEL: define {{[^@]+}}@sc_caller
// CHECK-SAME: () #[[ATTR11:[0-9]+]] {
// CHECK: call void @n_callee()
// CHECK: call void @s_callee() #[[ATTR12]]
// CHECK: call void @sc_callee() #[[ATTR13]]
//
void sc_caller(void) __arm_streaming_compatible {
n_callee();
s_callee();
sc_callee();
}


// CHECK: attributes #[[ATTR0:[0-9]+]] = {{.*}} "aarch64_pstate_sm_body"
// CHECK: attributes #[[ATTR1:[0-9]+]] = {{.*}} "aarch64_pstate_sm_body"
// CHECK: attributes #[[ATTR2:[0-9]+]] = {{.*}}
// CHECK: attributes #[[ATTR3]] = {{.*}}
// CHECK: attributes #[[ATTR4:[0-9]+]] = {{.*}} "aarch64_pstate_sm_enabled"
// CHECK: attributes #[[ATTR5:[0-9]+]] = {{.*}} "aarch64_pstate_sm_enabled"
// CHECK: attributes #[[ATTR6:[0-9]+]] = {{.*}} "aarch64_pstate_sm_body" "aarch64_pstate_sm_enabled"
// CHECK: attributes #[[ATTR7]] = {{.*}} "aarch64_pstate_sm_enabled"
// CHECK: attributes #[[ATTR8:[0-9]+]] = {{.*}} "aarch64_pstate_sm_compatible"
// CHECK: attributes #[[ATTR9:[0-9]+]] = {{.*}} "aarch64_pstate_sm_compatible"
// CHECK: attributes #[[ATTR10]] = {{.*}} "aarch64_pstate_sm_body" "aarch64_pstate_sm_compatible"
// CHECK: attributes #[[ATTR11]] = {{.*}} "aarch64_pstate_sm_compatible"
// CHECK: attributes #[[ATTR12]] = {{.*}} "aarch64_pstate_sm_enabled"
// CHECK: attributes #[[ATTR13]] = {{.*}} "aarch64_pstate_sm_compatible"
2 changes: 1 addition & 1 deletion clang/test/CodeGen/finite-math.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -ffinite-math-only -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=FINITE
// RUN: %clang_cc1 -menable-no-infs -menable-no-nans -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=FINITE
// RUN: %clang_cc1 -fno-signed-zeros -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=NSZ
// RUN: %clang_cc1 -freciprocal-math -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=RECIP
// RUN: %clang_cc1 -mreassociate -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=REASSOC
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGen/fp-floatcontrol-stack.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffp-contract=on -DDEFAULT=1 -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-DDEFAULT %s
// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffp-contract=on -DEBSTRICT=1 -ffp-exception-behavior=strict -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-DEBSTRICT %s
// RUN: %clang_cc1 -triple x86_64-linux-gnu -DFAST=1 -ffast-math -ffp-contract=fast -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-FAST %s
// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffp-contract=on -DNOHONOR=1 -menable-no-infs -menable-no-nans -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-NOHONOR %s
// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffp-contract=on -DNOHONOR=1 -ffinite-math-only -menable-no-infs -menable-no-nans -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-NOHONOR %s

#define FUN(n) \
(float z) { return n * z + n; }
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGen/fp-options-to-fast-math-flags.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck -check-prefix CHECK-PRECISE %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -menable-no-nans -emit-llvm -o - %s | FileCheck -check-prefix CHECK-NO-NANS %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -menable-no-infs -emit-llvm -o - %s | FileCheck -check-prefix CHECK-NO-INFS %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -ffinite-math-only -emit-llvm -o - %s | FileCheck -check-prefix CHECK-FINITE %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -menable-no-infs -menable-no-nans -emit-llvm -o - %s | FileCheck -check-prefix CHECK-FINITE %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fno-signed-zeros -emit-llvm -o - %s | FileCheck -check-prefix CHECK-NO-SIGNED-ZEROS %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -mreassociate -emit-llvm -o - %s | FileCheck -check-prefix CHECK-REASSOC %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -freciprocal-math -emit-llvm -o - %s | FileCheck -check-prefix CHECK-RECIP %s
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGen/nofpclass.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --version 2
// REQUIRES: x86-registered-target
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-feature +avx -fenable-matrix -ffinite-math-only -emit-llvm -o - %s | FileCheck -check-prefixes=CFINITEONLY %s
// RUN: %clang_cc1 -x cl -triple x86_64-unknown-unknown -target-feature +avx -fenable-matrix -cl-finite-math-only -emit-llvm -o - %s | FileCheck -check-prefixes=CLFINITEONLY %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-feature +avx -fenable-matrix -menable-no-infs -menable-no-nans -emit-llvm -o - %s | FileCheck -check-prefixes=CFINITEONLY %s
// RUN: %clang_cc1 -x cl -triple x86_64-unknown-unknown -target-feature +avx -fenable-matrix -menable-no-nans -menable-no-infs -emit-llvm -o - %s | FileCheck -check-prefixes=CLFINITEONLY %s

// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-feature +avx -fenable-matrix -menable-no-nans -emit-llvm -o - %s | FileCheck -check-prefixes=NONANS %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-feature +avx -fenable-matrix -menable-no-infs -emit-llvm -o - %s | FileCheck -check-prefixes=NOINFS %s
Expand Down
102 changes: 79 additions & 23 deletions clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
// RUN: %clang_cc1 -x hip %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -target-cpu gfx906 -fnative-half-type \
// RUN: -fnative-half-arguments-and-returns | FileCheck %s
// RUN: -fnative-half-arguments-and-returns | FileCheck -check-prefixes=CHECK,SAFEIR %s

// RUN: %clang_cc1 -x hip %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -target-cpu gfx906 -fnative-half-type \
// RUN: -fnative-half-arguments-and-returns -munsafe-fp-atomics | FileCheck -check-prefixes=CHECK,UNSAFEIR %s

// RUN: %clang_cc1 -x hip %s -O3 -S -o - -triple=amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -target-cpu gfx1100 -fnative-half-type \
Expand All @@ -18,24 +22,38 @@

__global__ void ffp1(float *p) {
// CHECK-LABEL: @_Z4ffp1Pf
// CHECK: atomicrmw fadd ptr {{.*}} monotonic
// CHECK: atomicrmw fmax ptr {{.*}} monotonic
// CHECK: atomicrmw fmin ptr {{.*}} monotonic
// CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic
// CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic
// SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4{{$}}
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 4{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4{{$}}

// UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}

// SAFE: _Z4ffp1Pf
// SAFE: global_atomic_cmpswap
// SAFE: global_atomic_cmpswap
// SAFE: global_atomic_cmpswap
// SAFE: global_atomic_cmpswap
// SAFE: global_atomic_cmpswap
// SAFE: global_atomic_cmpswap

// UNSAFE: _Z4ffp1Pf
// UNSAFE: global_atomic_add_f32
// UNSAFE: global_atomic_cmpswap
// UNSAFE: global_atomic_cmpswap
// UNSAFE: global_atomic_cmpswap
// UNSAFE: global_atomic_cmpswap
// UNSAFE: global_atomic_cmpswap

__atomic_fetch_add(p, 1.0f, memory_order_relaxed);
__atomic_fetch_sub(p, 1.0f, memory_order_relaxed);
__atomic_fetch_max(p, 1.0f, memory_order_relaxed);
__atomic_fetch_min(p, 1.0f, memory_order_relaxed);
__hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
Expand All @@ -44,23 +62,36 @@ __global__ void ffp1(float *p) {

__global__ void ffp2(double *p) {
// CHECK-LABEL: @_Z4ffp2Pd
// CHECK: atomicrmw fsub ptr {{.*}} monotonic
// CHECK: atomicrmw fmax ptr {{.*}} monotonic
// CHECK: atomicrmw fmin ptr {{.*}} monotonic
// CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic
// CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic
// SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8{{$}}
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8{{$}}

// UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}

// SAFE-LABEL: @_Z4ffp2Pd
// SAFE: global_atomic_cmpswap_b64
// SAFE: global_atomic_cmpswap_b64
// SAFE: global_atomic_cmpswap_b64
// SAFE: global_atomic_cmpswap_b64
// SAFE: global_atomic_cmpswap_b64
// SAFE: global_atomic_cmpswap_b64

// UNSAFE-LABEL: @_Z4ffp2Pd
// UNSAFE: global_atomic_add_f64
// UNSAFE: global_atomic_cmpswap_x2
// UNSAFE: global_atomic_cmpswap_x2
// UNSAFE: global_atomic_cmpswap_x2
// UNSAFE: global_atomic_max_f64
// UNSAFE: global_atomic_min_f64
__atomic_fetch_add(p, 1.0, memory_order_relaxed);
__atomic_fetch_sub(p, 1.0, memory_order_relaxed);
__atomic_fetch_max(p, 1.0, memory_order_relaxed);
__atomic_fetch_min(p, 1.0, memory_order_relaxed);
Expand All @@ -71,11 +102,20 @@ __global__ void ffp2(double *p) {
// long double is the same as double for amdgcn.
__global__ void ffp3(long double *p) {
// CHECK-LABEL: @_Z4ffp3Pe
// CHECK: atomicrmw fsub ptr {{.*}} monotonic
// CHECK: atomicrmw fmax ptr {{.*}} monotonic
// CHECK: atomicrmw fmin ptr {{.*}} monotonic
// CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic
// CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic
// SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8{{$}}
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8{{$}}

// UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}

// SAFE-LABEL: @_Z4ffp3Pe
// SAFE: global_atomic_cmpswap_b64
// SAFE: global_atomic_cmpswap_b64
Expand All @@ -88,6 +128,7 @@ __global__ void ffp3(long double *p) {
// UNSAFE: global_atomic_cmpswap_x2
// UNSAFE: global_atomic_max_f64
// UNSAFE: global_atomic_min_f64
__atomic_fetch_add(p, 1.0L, memory_order_relaxed);
__atomic_fetch_sub(p, 1.0L, memory_order_relaxed);
__atomic_fetch_max(p, 1.0L, memory_order_relaxed);
__atomic_fetch_min(p, 1.0L, memory_order_relaxed);
Expand All @@ -98,37 +139,52 @@ __global__ void ffp3(long double *p) {
__device__ double ffp4(double *p, float f) {
// CHECK-LABEL: @_Z4ffp4Pdf
// CHECK: fpext float {{.*}} to double
// CHECK: atomicrmw fsub ptr {{.*}} monotonic
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
return __atomic_fetch_sub(p, f, memory_order_relaxed);
}

__device__ double ffp5(double *p, int i) {
// CHECK-LABEL: @_Z4ffp5Pdi
// CHECK: sitofp i32 {{.*}} to double
// CHECK: atomicrmw fsub ptr {{.*}} monotonic
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
return __atomic_fetch_sub(p, i, memory_order_relaxed);
}

__global__ void ffp6(_Float16 *p) {
// CHECK-LABEL: @_Z4ffp6PDF16
// CHECK: atomicrmw fadd ptr {{.*}} monotonic
// CHECK: atomicrmw fmax ptr {{.*}} monotonic
// CHECK: atomicrmw fmin ptr {{.*}} monotonic
// CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic
// CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic
// SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2{{$}}
// SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2{{$}}
// SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 2{{$}}
// SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 2{{$}}

// UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
// UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}

// SAFE: _Z4ffp6PDF16
// SAFE: global_atomic_cmpswap
// SAFE: global_atomic_cmpswap
// SAFE: global_atomic_cmpswap
// SAFE: global_atomic_cmpswap
// SAFE: global_atomic_cmpswap
// SAFE: global_atomic_cmpswap

// UNSAFE: _Z4ffp6PDF16
// UNSAFE: global_atomic_cmpswap
// UNSAFE: global_atomic_cmpswap
// UNSAFE: global_atomic_cmpswap
// UNSAFE: global_atomic_cmpswap
// UNSAFE: global_atomic_cmpswap
// UNSAFE: global_atomic_cmpswap
__atomic_fetch_add(p, 1.0, memory_order_relaxed);
__atomic_fetch_sub(p, 1.0, memory_order_relaxed);
__atomic_fetch_max(p, 1.0, memory_order_relaxed);
__atomic_fetch_min(p, 1.0, memory_order_relaxed);
__hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
Expand Down
141 changes: 141 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-alloca.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 \
// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL %s
// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL2.0 \
// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL %s
// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 \
// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL %s
// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -cl-ext=+__opencl_c_generic_address_space \
// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL %s

// OPENCL-LABEL: define dso_local void @test1_builtin_alloca(
// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
// OPENCL-NEXT: [[ENTRY:.*:]]
// OPENCL-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// OPENCL-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
// OPENCL-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
// OPENCL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
// OPENCL-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64
// OPENCL-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4
// OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
// OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
// OPENCL-NEXT: ret void
//
void test1_builtin_alloca(unsigned n) {
__private float* alloc_ptr = (__private float*)__builtin_alloca(n*sizeof(int));
}

// OPENCL-LABEL: define dso_local void @test1_builtin_alloca_uninitialized(
// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
// OPENCL-NEXT: [[ENTRY:.*:]]
// OPENCL-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// OPENCL-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
// OPENCL-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
// OPENCL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
// OPENCL-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64
// OPENCL-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4
// OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
// OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
// OPENCL-NEXT: ret void
//
void test1_builtin_alloca_uninitialized(unsigned n) {
__private float* alloc_ptr_uninitialized = (__private float*)__builtin_alloca_uninitialized(n*sizeof(int));
}

// OPENCL-LABEL: define dso_local void @test1_builtin_alloca_with_align(
// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
// OPENCL-NEXT: [[ENTRY:.*:]]
// OPENCL-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// OPENCL-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
// OPENCL-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
// OPENCL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
// OPENCL-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64
// OPENCL-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4
// OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5)
// OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
// OPENCL-NEXT: ret void
//
void test1_builtin_alloca_with_align(unsigned n) {
__private float* alloc_ptr_align = (__private float*)__builtin_alloca_with_align((n*sizeof(int)), 8);
}

// OPENCL-LABEL: define dso_local void @test1_builtin_alloca_with_align_uninitialized(
// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
// OPENCL-NEXT: [[ENTRY:.*:]]
// OPENCL-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// OPENCL-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
// OPENCL-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
// OPENCL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
// OPENCL-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64
// OPENCL-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4
// OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5)
// OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
// OPENCL-NEXT: ret void
//
void test1_builtin_alloca_with_align_uninitialized(unsigned n) {
__private float* alloc_ptr_align_uninitialized = (__private float*)__builtin_alloca_with_align_uninitialized((n*sizeof(int)), 8);
}

// OPENCL-LABEL: define dso_local void @test2_builtin_alloca(
// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
// OPENCL-NEXT: [[ENTRY:.*:]]
// OPENCL-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// OPENCL-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
// OPENCL-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
// OPENCL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
// OPENCL-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64
// OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
// OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
// OPENCL-NEXT: ret void
//
void test2_builtin_alloca(unsigned n) {
__private void *alloc_ptr = __builtin_alloca(n);
}

// OPENCL-LABEL: define dso_local void @test2_builtin_alloca_uninitialized(
// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
// OPENCL-NEXT: [[ENTRY:.*:]]
// OPENCL-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// OPENCL-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
// OPENCL-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
// OPENCL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
// OPENCL-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64
// OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
// OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
// OPENCL-NEXT: ret void
//
void test2_builtin_alloca_uninitialized(unsigned n) {
__private void *alloc_ptr_uninitialized = __builtin_alloca_uninitialized(n);
}

// OPENCL-LABEL: define dso_local void @test2_builtin_alloca_with_align(
// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
// OPENCL-NEXT: [[ENTRY:.*:]]
// OPENCL-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// OPENCL-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
// OPENCL-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
// OPENCL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
// OPENCL-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64
// OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5)
// OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
// OPENCL-NEXT: ret void
//
void test2_builtin_alloca_with_align(unsigned n) {
__private void *alloc_ptr_align = __builtin_alloca_with_align(n, 8);
}

// OPENCL-LABEL: define dso_local void @test2_builtin_alloca_with_align_uninitialized(
// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] {
// OPENCL-NEXT: [[ENTRY:.*:]]
// OPENCL-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// OPENCL-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
// OPENCL-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4
// OPENCL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
// OPENCL-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64
// OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5)
// OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
// OPENCL-NEXT: ret void
//
void test2_builtin_alloca_with_align_uninitialized(unsigned n) {
__private void *alloc_ptr_align_uninitialized = __builtin_alloca_with_align_uninitialized(n, 8);
}
4 changes: 2 additions & 2 deletions clang/test/CodeGenOpenCL/relaxed-fpmath.cl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s -check-prefix=NORMAL
// RUN: %clang_cc1 %s -emit-llvm -cl-fast-relaxed-math -o - | FileCheck %s -check-prefix=FAST
// RUN: %clang_cc1 %s -emit-llvm -cl-finite-math-only -o - | FileCheck %s -check-prefix=FINITE
// RUN: %clang_cc1 %s -emit-llvm -menable-no-infs -menable-no-nans -cl-finite-math-only -o - | FileCheck %s -check-prefix=FINITE
// RUN: %clang_cc1 %s -emit-llvm -cl-unsafe-math-optimizations -o - | FileCheck %s -check-prefix=UNSAFE
// RUN: %clang_cc1 %s -emit-llvm -cl-mad-enable -o - | FileCheck %s -check-prefix=MAD
// RUN: %clang_cc1 %s -emit-llvm -cl-no-signed-zeros -o - | FileCheck %s -check-prefix=NOSIGNED
Expand All @@ -9,7 +9,7 @@
// RUN: %clang_cc1 %s -DGEN_PCH=1 -finclude-default-header -triple spir-unknown-unknown -emit-pch -o %t.pch
// RUN: %clang_cc1 %s -include-pch %t.pch -fno-validate-pch -emit-llvm -o - | FileCheck %s -check-prefix=NORMAL
// RUN: %clang_cc1 %s -include-pch %t.pch -fno-validate-pch -emit-llvm -cl-fast-relaxed-math -o - | FileCheck %s -check-prefix=FAST
// RUN: %clang_cc1 %s -include-pch %t.pch -fno-validate-pch -emit-llvm -cl-finite-math-only -o - | FileCheck %s -check-prefix=FINITE
// RUN: %clang_cc1 %s -include-pch %t.pch -fno-validate-pch -emit-llvm -menable-no-infs -menable-no-nans -cl-finite-math-only -o - | FileCheck %s -check-prefix=FINITE
// RUN: %clang_cc1 %s -include-pch %t.pch -fno-validate-pch -emit-llvm -cl-unsafe-math-optimizations -o - | FileCheck %s -check-prefix=UNSAFE
// RUN: %clang_cc1 %s -include-pch %t.pch -fno-validate-pch -emit-llvm -cl-mad-enable -o - | FileCheck %s -check-prefix=MAD
// RUN: %clang_cc1 %s -include-pch %t.pch -fno-validate-pch -emit-llvm -cl-no-signed-zeros -o - | FileCheck %s -check-prefix=NOSIGNED
Expand Down
4 changes: 4 additions & 0 deletions clang/test/Driver/amdgpu-toolchain.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,7 @@
// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
// RUN: -fuse-ld=ld %s 2>&1 | FileCheck -check-prefixes=LD %s
// LD: ld.lld

// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
// RUN: -r %s 2>&1 | FileCheck -check-prefixes=RELO %s
// RELO-NOT: -shared
2 changes: 1 addition & 1 deletion clang/test/Driver/opencl.cl
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
// CHECK-OPT-DISABLE: "-cc1" {{.*}} "-cl-opt-disable"
// CHECK-STRICT-ALIASING: "-cc1" {{.*}} "-cl-strict-aliasing"
// CHECK-SINGLE-PRECISION-CONST: "-cc1" {{.*}} "-cl-single-precision-constant"
// CHECK-FINITE-MATH-ONLY: "-cc1" {{.*}} "-cl-finite-math-only"
// CHECK-FINITE-MATH-ONLY: "-cc1" {{.*}} "-menable-no-infs" "-menable-no-nans" "-cl-finite-math-only"
// CHECK-KERNEL-ARG-INFO: "-cc1" {{.*}} "-cl-kernel-arg-info"
// CHECK-UNSAFE-MATH-OPT: "-cc1" {{.*}} "-cl-unsafe-math-optimizations"
// CHECK-FAST-RELAXED-MATH: "-cc1" {{.*}} "-cl-fast-relaxed-math"
Expand Down
1 change: 1 addition & 0 deletions clang/test/Driver/stack-size-section.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

// RUN: %clang -### --target=x86_64-linux-gnu -flto -fstack-size-section %s 2>&1 | FileCheck %s --check-prefix=LTO
// RUN: %clang -### --target=x86_64-linux-gnu -flto -fstack-size-section -fno-stack-size-section %s 2>&1 | FileCheck %s --check-prefix=LTO-NO
// RUN: %clang -### --target=x86_64-sie-ps5 -fstack-size-section %s 2>&1 | FileCheck %s --check-prefix=LTO

// LTO: "-plugin-opt=-stack-size-section"
// LTO-NO-NOT: "-plugin-opt=-stack-size-section"
Expand Down
3 changes: 2 additions & 1 deletion clang/test/Headers/__clang_hip_cmath.hip
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
// RUN: -internal-isystem %S/Inputs/include \
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -O1 -ffinite-math-only -o - \
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -O1 -menable-no-infs \
// RUN: -menable-no-nans -o - \
// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefix=FINITEONLY %s

// DEFAULT-LABEL: @test_fma_f16(
Expand Down
3 changes: 2 additions & 1 deletion clang/test/Headers/__clang_hip_math.hip
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \
// RUN: -internal-isystem %S/Inputs/include \
// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -O1 -ffinite-math-only -o - \
// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -O1 -menable-no-infs \
// RUN: -menable-no-nans -o - \
// RUN: -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,FINITEONLY %s

// Check that we end up with -fapprox-func set on intrinsic calls
Expand Down
2 changes: 1 addition & 1 deletion clang/test/Headers/float.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// RUN: %clang_cc1 -fsyntax-only -verify -std=c99 -ffreestanding %s
// RUN: %clang_cc1 -fsyntax-only -verify -std=c11 -ffreestanding %s
// RUN: %clang_cc1 -fsyntax-only -verify -std=c23 -ffreestanding %s
// RUN: %clang_cc1 -fsyntax-only -verify=finite -std=c23 -ffreestanding -ffinite-math-only %s
// RUN: %clang_cc1 -fsyntax-only -verify=finite -std=c23 -ffreestanding -menable-no-nans -menable-no-infs %s
// RUN: %clang_cc1 -fsyntax-only -verify -xc++ -std=c++11 -ffreestanding %s
// RUN: %clang_cc1 -fsyntax-only -verify -xc++ -std=c++14 -ffreestanding %s
// RUN: %clang_cc1 -fsyntax-only -verify -xc++ -std=c++17 -ffreestanding %s
Expand Down
59 changes: 59 additions & 0 deletions clang/test/OpenMP/amdgpu-unsafe-fp-atomics.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -o - | FileCheck -check-prefix=DEFAULT %s
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -munsafe-fp-atomics -emit-llvm %s -fopenmp-is-target-device -o - | FileCheck -check-prefix=UNSAFE-FP-ATOMICS %s

#pragma omp declare target

float fv, fx;
double dv, dx;

// DEFAULT-LABEL: define hidden void @_Z15atomic_fadd_f32v(
// DEFAULT-SAME: ) #[[ATTR0:[0-9]+]] {
// DEFAULT-NEXT: [[ENTRY:.*:]]
// DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr addrspacecast (ptr addrspace(1) @fv to ptr), align 4
// DEFAULT-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @fx to ptr), float [[TMP0]] monotonic, align 4
// DEFAULT-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[TMP0]]
// DEFAULT-NEXT: store float [[ADD]], ptr addrspacecast (ptr addrspace(1) @fv to ptr), align 4
// DEFAULT-NEXT: ret void
//
// UNSAFE-FP-ATOMICS-LABEL: define hidden void @_Z15atomic_fadd_f32v(
// UNSAFE-FP-ATOMICS-SAME: ) #[[ATTR0:[0-9]+]] {
// UNSAFE-FP-ATOMICS-NEXT: [[ENTRY:.*:]]
// UNSAFE-FP-ATOMICS-NEXT: [[TMP0:%.*]] = load float, ptr addrspacecast (ptr addrspace(1) @fv to ptr), align 4
// UNSAFE-FP-ATOMICS-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @fx to ptr), float [[TMP0]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META5:![0-9]+]], !amdgpu.ignore.denormal.mode [[META5]]
// UNSAFE-FP-ATOMICS-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[TMP0]]
// UNSAFE-FP-ATOMICS-NEXT: store float [[ADD]], ptr addrspacecast (ptr addrspace(1) @fv to ptr), align 4
// UNSAFE-FP-ATOMICS-NEXT: ret void
//
void atomic_fadd_f32() {
#pragma omp atomic capture
fv = fx = fx + fv;
}

// DEFAULT-LABEL: define hidden void @_Z15atomic_fadd_f64v(
// DEFAULT-SAME: ) #[[ATTR0]] {
// DEFAULT-NEXT: [[ENTRY:.*:]]
// DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr addrspacecast (ptr addrspace(1) @dv to ptr), align 8
// DEFAULT-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @dx to ptr), double [[TMP0]] monotonic, align 8
// DEFAULT-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP0]]
// DEFAULT-NEXT: store double [[ADD]], ptr addrspacecast (ptr addrspace(1) @dv to ptr), align 8
// DEFAULT-NEXT: ret void
//
// UNSAFE-FP-ATOMICS-LABEL: define hidden void @_Z15atomic_fadd_f64v(
// UNSAFE-FP-ATOMICS-SAME: ) #[[ATTR0]] {
// UNSAFE-FP-ATOMICS-NEXT: [[ENTRY:.*:]]
// UNSAFE-FP-ATOMICS-NEXT: [[TMP0:%.*]] = load double, ptr addrspacecast (ptr addrspace(1) @dv to ptr), align 8
// UNSAFE-FP-ATOMICS-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @dx to ptr), double [[TMP0]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META5]]
// UNSAFE-FP-ATOMICS-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP0]]
// UNSAFE-FP-ATOMICS-NEXT: store double [[ADD]], ptr addrspacecast (ptr addrspace(1) @dv to ptr), align 8
// UNSAFE-FP-ATOMICS-NEXT: ret void
//
void atomic_fadd_f64() {
#pragma omp atomic capture
dv = dx = dx + dv;
}

#pragma omp end declare target
//.
// UNSAFE-FP-ATOMICS: [[META5]] = !{}
//.
6 changes: 6 additions & 0 deletions clang/test/Parser/namelookup-anonymous-struct.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
// RUN: %clang_cc1 -std=c11 -verify %s

struct GH31295 {
struct { int x; };
int arr[sizeof(x)]; // expected-error{{use of undeclared identifier 'x'}}
};
4 changes: 2 additions & 2 deletions clang/test/Preprocessor/predefined-macros.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-NO-MATH-ERRNO
// CHECK-NO-MATH-ERRNO: #define __NO_MATH_ERRNO__ 1
//
// RUN: %clang_cc1 %s -E -dM -ffinite-math-only -o - \
// RUN: %clang_cc1 %s -E -dM -menable-no-nans -menable-no-infs -o - \
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FINITE-MATH-ONLY
// CHECK-FINITE-MATH-ONLY: #define __FINITE_MATH_ONLY__ 1
//
Expand Down Expand Up @@ -316,4 +316,4 @@
// RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device | FileCheck -match-full-lines \
// RUN: %s --check-prefix=CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG
// CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG: #define __HIPSTDPAR__ 1
// CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG-NOT: #define __HIPSTDPAR_INTERPOSE_ALLOC__ 1
// CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG-NOT: #define __HIPSTDPAR_INTERPOSE_ALLOC__ 1
46 changes: 46 additions & 0 deletions clang/test/Sema/aarch64-fmv-streaming.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -Waarch64-sme-attributes -fsyntax-only -verify %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -Waarch64-sme-attributes -fsyntax-only -verify=expected-cpp -x c++ %s

__attribute__((target_clones("sve", "simd"))) void ok_arm_streaming(void) __arm_streaming {}
__arm_locally_streaming __attribute__((target_version("sme2"))) void ok_arm_streaming(void) __arm_streaming {}
__attribute__((target_version("default"))) void ok_arm_streaming(void) __arm_streaming {}

__attribute__((target_clones("sve", "simd"))) void ok_arm_streaming_compatible(void) __arm_streaming_compatible {}
__arm_locally_streaming __attribute__((target_version("sme2"))) void ok_arm_streaming_compatible(void) __arm_streaming_compatible {}
__attribute__((target_version("default"))) void ok_arm_streaming_compatible(void) __arm_streaming_compatible {}

__arm_locally_streaming __attribute__((target_clones("sve", "simd"))) void ok_no_streaming(void) {}
__attribute__((target_version("sme2"))) void ok_no_streaming(void) {}
__attribute__((target_version("default"))) void ok_no_streaming(void) {}

__attribute__((target_clones("sve", "simd"))) void bad_mixed_streaming(void) {}
// expected-cpp-error@+2 {{multiversioned function declaration has a different calling convention}}
// expected-error@+1 {{multiversioned function declaration has a different calling convention}}
__attribute__((target_version("sme2"))) void bad_mixed_streaming(void) __arm_streaming {}
// expected-cpp-error@+2 {{multiversioned function declaration has a different calling convention}}
// expected-error@+1 {{multiversioned function declaration has a different calling convention}}
__attribute__((target_version("default"))) void bad_mixed_streaming(void) __arm_streaming_compatible {}
// expected-cpp-error@+2 {{multiversioned function declaration has a different calling convention}}
// expected-error@+1 {{multiversioned function declaration has a different calling convention}}
__arm_locally_streaming __attribute__((target_version("dotprod"))) void bad_mixed_streaming(void) __arm_streaming {}

void n_caller(void) {
ok_arm_streaming();
ok_arm_streaming_compatible();
ok_no_streaming();
bad_mixed_streaming();
}

void s_caller(void) __arm_streaming {
ok_arm_streaming();
ok_arm_streaming_compatible();
ok_no_streaming();
bad_mixed_streaming();
}

void sc_caller(void) __arm_streaming_compatible {
ok_arm_streaming();
ok_arm_streaming_compatible();
ok_no_streaming();
bad_mixed_streaming();
}
42 changes: 0 additions & 42 deletions clang/test/Sema/aarch64-sme-func-attrs.c
Original file line number Diff line number Diff line change
Expand Up @@ -455,48 +455,6 @@ void unimplemented_spill_fill_za(void (*share_zt0_only)(void) __arm_inout("zt0")
share_zt0_only();
}

// expected-cpp-error@+2 {{streaming function cannot be multi-versioned}}
// expected-error@+1 {{streaming function cannot be multi-versioned}}
__attribute__((target_version("sme2")))
void cannot_work_version(void) __arm_streaming {}
// expected-cpp-error@+5 {{function declared 'void ()' was previously declared 'void () __arm_streaming', which has different SME function attributes}}
// expected-cpp-note@-2 {{previous declaration is here}}
// expected-error@+3 {{function declared 'void (void)' was previously declared 'void (void) __arm_streaming', which has different SME function attributes}}
// expected-note@-4 {{previous declaration is here}}
__attribute__((target_version("default")))
void cannot_work_version(void) {}


// expected-cpp-error@+2 {{streaming function cannot be multi-versioned}}
// expected-error@+1 {{streaming function cannot be multi-versioned}}
__attribute__((target_clones("sme2")))
void cannot_work_clones(void) __arm_streaming {}


__attribute__((target("sme2")))
void just_fine_streaming(void) __arm_streaming {}
__attribute__((target_version("sme2")))
void just_fine(void) { just_fine_streaming(); }
__attribute__((target_version("default")))
void just_fine(void) {}


__arm_locally_streaming
__attribute__((target_version("sme2")))
void incompatible_locally_streaming(void) {}
// expected-error@-1 {{attribute 'target_version' multiversioning cannot be combined with attribute '__arm_locally_streaming'}}
// expected-cpp-error@-2 {{attribute 'target_version' multiversioning cannot be combined with attribute '__arm_locally_streaming'}}
__attribute__((target_version("default")))
void incompatible_locally_streaming(void) {}


void fmv_caller() {
cannot_work_version();
cannot_work_clones();
just_fine();
incompatible_locally_streaming();
}

void sme_streaming_with_vl_arg(__SVInt8_t a) __arm_streaming { }

__SVInt8_t sme_streaming_returns_vl(void) __arm_streaming { __SVInt8_t r; return r; }
Expand Down
9 changes: 5 additions & 4 deletions clang/test/Sema/warn-infinity-nan-disabled-lnx.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
// RUN: %clang_cc1 -x c++ -verify=no-inf-no-nan \
// RUN: -triple powerpc64le-unknown-unknown %s -menable-no-infs \
// RUN: -menable-no-nans -std=c++23
// RUN: -triple powerpc64le-unknown-unknown %s \
// RUN: -menable-no-infs -menable-no-nans -std=c++23

// RUN: %clang_cc1 -x c++ -verify=no-inf-no-nan \
// RUN: -triple powerpc64le-unknown-unknown %s -menable-no-infs \
// RUN: -menable-no-nans -funsafe-math-optimizations -std=c++23
// RUN: -triple powerpc64le-unknown-unknown %s \
// RUN: -menable-no-infs -menable-no-nans -funsafe-math-optimizations \
// RUN: -std=c++23

// RUN: %clang_cc1 -x c++ -verify=no-fast -triple powerpc64le-unknown-unknown \
// RUN: %s -std=c++23
Expand Down
5 changes: 3 additions & 2 deletions clang/test/Sema/warn-infinity-nan-disabled-win.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
// RUN: -menable-no-nans -std=c++23

// RUN: %clang_cc1 -x c++ -verify=no-inf-no-nan \
// RUN: -triple powerpc64le-unknown-unknown %s -menable-no-infs \
// RUN: -menable-no-nans -funsafe-math-optimizations -std=c++23
// RUN: -triple powerpc64le-unknown-unknown %s \
// RUN: -menable-no-infs -menable-no-nans -funsafe-math-optimizations \
// RUN: -std=c++23

// RUN: %clang_cc1 -x c++ -verify=no-fast -triple powerpc64le-unknown-unknown \
// RUN: %s -std=c++23
Expand Down
17 changes: 17 additions & 0 deletions clang/test/SemaCXX/pr100095.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// RUN: %clang_cc1 -fsyntax-only -std=c++11 %s
// XFAIL: asserts

template <class> struct Pair;
template <class...> struct Tuple {
template <class _Up> Tuple(_Up);
};
template <typename> struct StatusOr;
template <int> using ElementType = int;
template <int... fields>
using Key = Tuple<ElementType<fields>...>;
template <int... fields>
StatusOr<Pair<Key<fields...>>> Parser();
struct Helper { Helper(Tuple<>, Tuple<>, int, int); };
struct D : Helper {
D(Key<> f, int n, int e) : Helper(f, Parser<>, n, e) {}
};
74 changes: 72 additions & 2 deletions clang/test/SemaCXX/type-traits.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2402,11 +2402,11 @@ template<typename T> struct DerivedB : BaseA<T> { };
template<typename T> struct CrazyDerived : T { };


class class_forward; // expected-note 2 {{forward declaration of 'class_forward'}}
class class_forward; // expected-note 4 {{forward declaration of 'class_forward'}}

template <class T> class DerivedTemp : Base {};
template <class T> class NonderivedTemp {};
template <class T> class UndefinedTemp; // expected-note {{declared here}}
template <class T> class UndefinedTemp; // expected-note 2 {{declared here}}

void is_base_of() {
static_assert(__is_base_of(Base, Derived));
Expand Down Expand Up @@ -2457,6 +2457,76 @@ void is_base_of() {
static_assert(!__is_base_of(DerivedB<int>, BaseA<int>));
}

struct DerivedTransitiveViaNonVirtual : Derived3 {};
struct DerivedTransitiveViaVirtual : virtual Derived3 {};

template <typename T>
struct CrazyDerivedVirtual : virtual T {};

struct DerivedPrivate : private virtual Base {};
struct DerivedProtected : protected virtual Base {};
struct DerivedPrivatePrivate : private DerivedPrivate {};
struct DerivedPrivateProtected : private DerivedProtected {};
struct DerivedProtectedPrivate : protected DerivedProtected {};
struct DerivedProtectedProtected : protected DerivedProtected {};

void is_virtual_base_of(int n) {
static_assert(!__builtin_is_virtual_base_of(Base, Derived));
static_assert(!__builtin_is_virtual_base_of(const Base, Derived));
static_assert(!__builtin_is_virtual_base_of(Derived, Base));
static_assert(!__builtin_is_virtual_base_of(Derived, int));
static_assert(!__builtin_is_virtual_base_of(Base, Base));
static_assert(!__builtin_is_virtual_base_of(Base, Derived3));
static_assert(!__builtin_is_virtual_base_of(Derived, Derived3));
static_assert(__builtin_is_virtual_base_of(Derived2b, Derived3));
static_assert(__builtin_is_virtual_base_of(Derived2a, Derived3));
static_assert(!__builtin_is_virtual_base_of(BaseA<int>, DerivedB<int>));
static_assert(!__builtin_is_virtual_base_of(DerivedB<int>, BaseA<int>));
static_assert(!__builtin_is_virtual_base_of(Union, Union));
static_assert(!__builtin_is_virtual_base_of(Empty, Empty));
static_assert(!__builtin_is_virtual_base_of(class_forward, class_forward)); // expected-error {{incomplete type 'class_forward' where a complete type is required}}
static_assert(!__builtin_is_virtual_base_of(Empty, class_forward)); // expected-error {{incomplete type 'class_forward' where a complete type is required}}
static_assert(!__builtin_is_virtual_base_of(class_forward, Empty));
static_assert(!__builtin_is_virtual_base_of(Base&, Derived&));
static_assert(!__builtin_is_virtual_base_of(Base[10], Derived[10]));
static_assert(!__builtin_is_virtual_base_of(Base[n], Derived[n])); // expected-error 2 {{variable length arrays are not supported in '__builtin_is_virtual_base_of'}}
static_assert(!__builtin_is_virtual_base_of(int, int));
static_assert(!__builtin_is_virtual_base_of(int[], int[]));
static_assert(!__builtin_is_virtual_base_of(long, int));
static_assert(!__builtin_is_virtual_base_of(Base, DerivedTemp<int>));
static_assert(!__builtin_is_virtual_base_of(Base, NonderivedTemp<int>));
static_assert(!__builtin_is_virtual_base_of(Base, UndefinedTemp<int>)); // expected-error {{implicit instantiation of undefined template 'UndefinedTemp<int>'}}
static_assert(__builtin_is_virtual_base_of(Base, DerivedPrivate));
static_assert(__builtin_is_virtual_base_of(Base, DerivedProtected));
static_assert(__builtin_is_virtual_base_of(Base, DerivedPrivatePrivate));
static_assert(__builtin_is_virtual_base_of(Base, DerivedPrivateProtected));
static_assert(__builtin_is_virtual_base_of(Base, DerivedProtectedPrivate));
static_assert(__builtin_is_virtual_base_of(Base, DerivedProtectedProtected));
static_assert(__builtin_is_virtual_base_of(Derived2a, DerivedTransitiveViaNonVirtual));
static_assert(__builtin_is_virtual_base_of(Derived2b, DerivedTransitiveViaNonVirtual));
static_assert(__builtin_is_virtual_base_of(Derived2a, DerivedTransitiveViaVirtual));
static_assert(__builtin_is_virtual_base_of(Derived2b, DerivedTransitiveViaVirtual));
static_assert(!__builtin_is_virtual_base_of(Base, CrazyDerived<Base>));
static_assert(!__builtin_is_virtual_base_of(CrazyDerived<Base>, Base));
static_assert(__builtin_is_virtual_base_of(Base, CrazyDerivedVirtual<Base>));
static_assert(!__builtin_is_virtual_base_of(CrazyDerivedVirtual<Base>, Base));

static_assert(!__builtin_is_virtual_base_of(IncompleteUnion, IncompleteUnion));
static_assert(!__builtin_is_virtual_base_of(Union, IncompleteUnion));
static_assert(!__builtin_is_virtual_base_of(IncompleteUnion, Union));
static_assert(!__builtin_is_virtual_base_of(IncompleteStruct, IncompleteUnion));
static_assert(!__builtin_is_virtual_base_of(IncompleteUnion, IncompleteStruct));
static_assert(!__builtin_is_virtual_base_of(Empty, IncompleteUnion));
static_assert(!__builtin_is_virtual_base_of(IncompleteUnion, Empty));
static_assert(!__builtin_is_virtual_base_of(int, IncompleteUnion));
static_assert(!__builtin_is_virtual_base_of(IncompleteUnion, int));
static_assert(!__builtin_is_virtual_base_of(Empty, Union));
static_assert(!__builtin_is_virtual_base_of(Union, Empty));
static_assert(!__builtin_is_virtual_base_of(int, Empty));
static_assert(!__builtin_is_virtual_base_of(Union, int));
static_assert(!__builtin_is_virtual_base_of(IncompleteStruct, IncompleteStruct[n])); // expected-error {{variable length arrays are not supported in '__builtin_is_virtual_base_of'}}
}

template<class T, class U>
class TemplateClass {};

Expand Down
7 changes: 4 additions & 3 deletions clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -598,10 +598,11 @@ Expected<SmallVector<StringRef>> getInput(const ArgList &Args) {
Res.Prevailing = !Sym.isUndefined() && ObjSym.File == *BitcodeFile;

// We need LTO to preseve the following global symbols:
// 1) Symbols used in regular objects.
// 2) Prevailing symbols that are needed visible to the gpu runtime.
// 1) All symbols during a relocatable link.
// 2) Symbols used in regular objects.
// 3) Prevailing symbols that are needed visible to the gpu runtime.
Res.VisibleToRegularObj =
ObjSym.UsedInRegularObj ||
Args.hasArg(OPT_relocatable) || ObjSym.UsedInRegularObj ||
(Res.Prevailing &&
(Sym.getVisibility() != GlobalValue::HiddenVisibility &&
!Sym.canBeOmittedFromSymbolTable()));
Expand Down
7 changes: 5 additions & 2 deletions clang/tools/clang-nvlink-wrapper/NVLinkOpts.td
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ def plugin_opt : Joined<["--", "-"], "plugin-opt=">, Flags<[WrapperOnlyOption]>,
def save_temps : Flag<["--", "-"], "save-temps">,
Flags<[WrapperOnlyOption]>, HelpText<"Save intermediate results">;

def relocatable : Flag<["--", "-"], "relocatable">,
Flags<[WrapperOnlyOption]>, HelpText<"Perform a relocatable link (LTO only)">;
def r : Flag<["-"], "r">, Flags<[WrapperOnlyOption]>, Alias<relocatable>;

def whole_archive : Flag<["--", "-"], "whole-archive">,
Flags<[WrapperOnlyOption, HelpHidden]>;
def no_whole_archive : Flag<["--", "-"], "no-whole-archive">,
Expand All @@ -83,8 +87,7 @@ def mllvm : Separate<["-"], "mllvm">, Flags<[WrapperOnlyOption]>,
HelpText<"Arguments passed to LLVM, including Clang invocations, for which "
"the '-mllvm' prefix is preserved. Use '-mllvm --help' for a list "
"of options.">;
def mllvm_EQ : Joined<["-"], "mllvm=">, Flags<[HelpHidden]>,
Alias<mllvm>;
def mllvm_EQ : Joined<["-"], "mllvm=">, Flags<[HelpHidden]>, Alias<mllvm>;

def dry_run : Flag<["--", "-"], "dry-run">, Flags<[WrapperOnlyOption]>,
HelpText<"Print generated commands without running.">;
37 changes: 37 additions & 0 deletions clang/unittests/AST/ASTImporterTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9783,6 +9783,43 @@ TEST_P(ASTImporterOptionSpecificTestBase, ImportExistingEmptyAnonymousEnums) {
EXPECT_EQ(ImportedE2, ToE1);
}

TEST_P(ASTImporterOptionSpecificTestBase, ImportMultipleAnonymousEnumDecls) {
Decl *ToTU = getToTuDecl("", Lang_CXX03);
Decl *FromTU = getTuDecl(
R"(
struct foo {
enum { A };
enum { B };
};
)",
Lang_CXX03);

auto EnumConstA = enumConstantDecl(hasName("A"));
auto EnumConstB = enumConstantDecl(hasName("B"));

auto *FromA = FirstDeclMatcher<EnumConstantDecl>().match(FromTU, EnumConstA);
auto *FromB = FirstDeclMatcher<EnumConstantDecl>().match(FromTU, EnumConstB);

auto *ToA = Import(FromA, Lang_CXX03);
auto *ToB = Import(FromB, Lang_CXX03);

ASSERT_TRUE(ToA);
ASSERT_TRUE(ToB);

auto *ToFooA = FirstDeclMatcher<CXXRecordDecl>().match(
ToTU, tagDecl(has(enumDecl(has(EnumConstA)))));
auto *ToFooB = FirstDeclMatcher<CXXRecordDecl>().match(
ToTU, tagDecl(has(enumDecl(has(EnumConstB)))));
ASSERT_EQ(ToFooA, ToFooB);

// different EnumDecl
auto *ToEnumDeclA =
FirstDeclMatcher<EnumDecl>().match(ToTU, enumDecl(has(EnumConstA)));
auto *ToEnumDeclB =
FirstDeclMatcher<EnumDecl>().match(ToTU, enumDecl(has(EnumConstB)));
ASSERT_NE(ToEnumDeclA, ToEnumDeclB);
}

INSTANTIATE_TEST_SUITE_P(ParameterizedTests, ASTImporterLookupTableTest,
DefaultTestValuesForRunOptions);

Expand Down
14 changes: 14 additions & 0 deletions clang/unittests/AST/StructuralEquivalenceTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1109,6 +1109,20 @@ TEST_F(StructuralEquivalenceEnumTest, EnumsWithDifferentBody) {
EXPECT_FALSE(testStructuralMatch(t));
}

TEST_F(StructuralEquivalenceEnumTest, AnonymousEnumsWithSameConsts) {
// field x is required to trigger comparison of the anonymous enum
auto t = makeNamedDecls("struct foo { enum { A } x; };",
"struct foo { enum { A } x;};", Lang_CXX11);
EXPECT_TRUE(testStructuralMatch(t));
}

TEST_F(StructuralEquivalenceEnumTest, AnonymousEnumsWithDiffConsts) {
// field x is required to trigger comparison of the anonymous enum
auto t = makeNamedDecls("struct foo { enum { A } x; };",
"struct foo { enum { B } x;};", Lang_CXX11);
EXPECT_FALSE(testStructuralMatch(t));
}

struct StructuralEquivalenceEnumConstantTest : StructuralEquivalenceTest {};

TEST_F(StructuralEquivalenceEnumConstantTest, EnumConstantsWithSameValues) {
Expand Down
49 changes: 49 additions & 0 deletions clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,55 @@ TEST_F(EnvironmentTest, CXXDefaultInitExprResultObjIsWrappedExprResultObj) {
&Env.getResultObjectLocation(*DefaultInit->getExpr()));
}

// This test verifies the behavior of `getResultObjectLocation()` in
// scenarios involving inherited constructors.
// Since the specific AST node of interest `CXXConstructorDecl` is implicitly
// generated, we cannot annotate any statements inside of it as we do in tests
// within TransferTest. Thus, the only way to get the right `Environment` is by
// explicitly initializing it as we do in tests within EnvironmentTest.
// This is why this test is not inside TransferTest, where most of the tests for
// `getResultObjectLocation()` are located.
TEST_F(EnvironmentTest, ResultObjectLocationForInheritedCtorInitExpr) {
using namespace ast_matchers;

std::string Code = R"(
struct Base {
Base(int b) {}
};
struct Derived : Base {
using Base::Base;
};
Derived d = Derived(0);
)";

auto Unit =
tooling::buildASTFromCodeWithArgs(Code, {"-fsyntax-only", "-std=c++20"});
auto &Context = Unit->getASTContext();

ASSERT_EQ(Context.getDiagnostics().getClient()->getNumErrors(), 0U);

auto Results =
match(cxxConstructorDecl(
hasAnyConstructorInitializer(cxxCtorInitializer(
withInitializer(expr().bind("inherited_ctor_init_expr")))))
.bind("ctor"),
Context);
const auto *Constructor = selectFirst<CXXConstructorDecl>("ctor", Results);
const auto *InheritedCtorInit = selectFirst<CXXInheritedCtorInitExpr>(
"inherited_ctor_init_expr", Results);

EXPECT_EQ(InheritedCtorInit->child_begin(), InheritedCtorInit->child_end());

Environment Env(DAContext, *Constructor);
Env.initialize();

RecordStorageLocation &Loc = Env.getResultObjectLocation(*InheritedCtorInit);
EXPECT_NE(&Loc, nullptr);

EXPECT_EQ(&Loc, Env.getThisPointeeStorageLocation());
}

TEST_F(EnvironmentTest, Stmt) {
using namespace ast_matchers;

Expand Down
2 changes: 1 addition & 1 deletion flang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1072,7 +1072,7 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc,
opts.setFPContractMode(fpContractMode);
}

if (args.getLastArg(clang::driver::options::OPT_menable_no_infinities)) {
if (args.getLastArg(clang::driver::options::OPT_menable_no_infs)) {
opts.NoHonorInfs = true;
}

Expand Down
5 changes: 3 additions & 2 deletions flang/runtime/transformational.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,8 @@ void RTDEF(CshiftVector)(Descriptor &result, const Descriptor &source,
SubscriptValue lb{sourceDim.LowerBound()};
for (SubscriptValue j{0}; j < extent; ++j) {
SubscriptValue resultAt{1 + j};
SubscriptValue sourceAt{lb + (j + shift) % extent};
SubscriptValue sourceAt{
lb + static_cast<SubscriptValue>(j + shift) % extent};
if (sourceAt < lb) {
sourceAt += extent;
}
Expand Down Expand Up @@ -619,7 +620,7 @@ void RTDEF(EoshiftVector)(Descriptor &result, const Descriptor &source,
}
SubscriptValue lb{source.GetDimension(0).LowerBound()};
for (SubscriptValue j{1}; j <= extent; ++j) {
SubscriptValue sourceAt{lb + j - 1 + shift};
SubscriptValue sourceAt{lb + j - 1 + static_cast<SubscriptValue>(shift)};
if (sourceAt >= lb && sourceAt < lb + extent) {
CopyElement(result, &j, source, &sourceAt, terminator);
} else if (boundary) {
Expand Down
1 change: 1 addition & 0 deletions libc/config/darwin/arm/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.acoshf
libc.src.math.asinf
libc.src.math.asinhf
libc.src.math.atan2
libc.src.math.atan2f
libc.src.math.atanf
libc.src.math.atanhf
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/aarch64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.acoshf
libc.src.math.asinf
libc.src.math.asinhf
libc.src.math.atan2
libc.src.math.atan2f
libc.src.math.atanf
libc.src.math.atanhf
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/arm/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.acoshf
libc.src.math.asinf
libc.src.math.asinhf
libc.src.math.atan2
libc.src.math.atan2f
libc.src.math.atanf
libc.src.math.atanhf
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/riscv/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.acoshf
libc.src.math.asinf
libc.src.math.asinhf
libc.src.math.atan2
libc.src.math.atan2f
libc.src.math.atanf
libc.src.math.atanhf
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.acoshf
libc.src.math.asinf
libc.src.math.asinhf
libc.src.math.atan2
libc.src.math.atan2f
libc.src.math.atanf
libc.src.math.atanhf
Expand Down
1 change: 1 addition & 0 deletions libc/config/windows/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.acoshf
libc.src.math.asinf
libc.src.math.asinhf
libc.src.math.atan2
libc.src.math.atan2f
libc.src.math.atanf
libc.src.math.atanhf
Expand Down
2 changes: 1 addition & 1 deletion libc/docs/math/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ Higher Math Functions
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| atan | |check| | | | | | 7.12.4.3 | F.10.1.3 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| atan2 | |check| | | | | | 7.12.4.4 | F.10.1.4 |
| atan2 | |check| | 1 ULP | | | | 7.12.4.4 | F.10.1.4 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| atan2pi | | | | | | 7.12.4.11 | F.10.1.11 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
Expand Down
1 change: 1 addition & 0 deletions libc/spec/stdc.td
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,7 @@ def StdC : StandardSpec<"stdc"> {

FunctionSpec<"atanf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,

FunctionSpec<"atan2", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"atan2f", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,

FunctionSpec<"acoshf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
Expand Down
20 changes: 20 additions & 0 deletions libc/src/math/generic/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3848,6 +3848,26 @@ add_entrypoint_object(
libc.src.__support.macros.optimization
)

add_entrypoint_object(
atan2
SRCS
atan2.cpp
HDRS
../atan2.h
COMPILE_OPTIONS
-O3
DEPENDS
.inv_trigf_utils
libc.src.__support.FPUtil.double_double
libc.src.__support.FPUtil.dyadic_float
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.nearest_integer
libc.src.__support.FPUtil.polyeval
libc.src.__support.FPUtil.rounding_mode
libc.src.__support.macros.optimization
)

add_entrypoint_object(
scalblnf16
SRCS
Expand Down
313 changes: 313 additions & 0 deletions libc/src/math/generic/atan2.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,313 @@
//===-- Double-precision atan2 function -----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/math/atan2.h"
#include "inv_trigf_utils.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/PolyEval.h"
#include "src/__support/FPUtil/double_double.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/FPUtil/nearest_integer.h"
#include "src/__support/FPUtil/rounding_mode.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY

namespace LIBC_NAMESPACE_DECL {

namespace {

using DoubleDouble = fputil::DoubleDouble;

// atan(i/64) with i = 0..64, generated by Sollya with:
// > for i from 0 to 64 do {
// a = round(atan(i/64), D, RN);
// b = round(atan(i/64) - a, D, RN);
// print("{", b, ",", a, "},");
// };
constexpr fputil::DoubleDouble ATAN_I[65] = {
{0.0, 0.0},
{-0x1.220c39d4dff5p-61, 0x1.fff555bbb729bp-7},
{-0x1.5ec431444912cp-60, 0x1.ffd55bba97625p-6},
{-0x1.86ef8f794f105p-63, 0x1.7fb818430da2ap-5},
{-0x1.c934d86d23f1dp-60, 0x1.ff55bb72cfdeap-5},
{0x1.ac4ce285df847p-58, 0x1.3f59f0e7c559dp-4},
{-0x1.cfb654c0c3d98p-58, 0x1.7ee182602f10fp-4},
{0x1.f7b8f29a05987p-58, 0x1.be39ebe6f07c3p-4},
{-0x1.cd37686760c17p-59, 0x1.fd5ba9aac2f6ep-4},
{-0x1.b485914dacf8cp-59, 0x1.1e1fafb043727p-3},
{0x1.61a3b0ce9281bp-57, 0x1.3d6eee8c6626cp-3},
{-0x1.054ab2c010f3dp-58, 0x1.5c9811e3ec26ap-3},
{0x1.347b0b4f881cap-58, 0x1.7b97b4bce5b02p-3},
{0x1.cf601e7b4348ep-59, 0x1.9a6a8e96c8626p-3},
{0x1.17b10d2e0e5abp-61, 0x1.b90d7529260a2p-3},
{0x1.c648d1534597ep-57, 0x1.d77d5df205736p-3},
{0x1.8ab6e3cf7afbdp-57, 0x1.f5b75f92c80ddp-3},
{0x1.62e47390cb865p-56, 0x1.09dc597d86362p-2},
{0x1.30ca4748b1bf9p-57, 0x1.18bf5a30bf178p-2},
{-0x1.077cdd36dfc81p-56, 0x1.278372057ef46p-2},
{-0x1.963a544b672d8p-57, 0x1.362773707ebccp-2},
{-0x1.5d5e43c55b3bap-56, 0x1.44aa436c2af0ap-2},
{-0x1.2566480884082p-57, 0x1.530ad9951cd4ap-2},
{-0x1.a725715711fp-56, 0x1.614840309cfe2p-2},
{-0x1.c63aae6f6e918p-56, 0x1.6f61941e4def1p-2},
{0x1.69c885c2b249ap-56, 0x1.7d5604b63b3f7p-2},
{0x1.b6d0ba3748fa8p-56, 0x1.8b24d394a1b25p-2},
{0x1.9e6c988fd0a77p-56, 0x1.98cd5454d6b18p-2},
{-0x1.24dec1b50b7ffp-56, 0x1.a64eec3cc23fdp-2},
{0x1.ae187b1ca504p-56, 0x1.b3a911da65c6cp-2},
{-0x1.cc1ce70934c34p-56, 0x1.c0db4c94ec9fp-2},
{-0x1.a2cfa4418f1adp-56, 0x1.cde53432c1351p-2},
{0x1.a2b7f222f65e2p-56, 0x1.dac670561bb4fp-2},
{0x1.0e53dc1bf3435p-56, 0x1.e77eb7f175a34p-2},
{-0x1.a3992dc382a23p-57, 0x1.f40dd0b541418p-2},
{-0x1.b32c949c9d593p-55, 0x1.0039c73c1a40cp-1},
{-0x1.d5b495f6349e6p-56, 0x1.0657e94db30dp-1},
{0x1.974fa13b5404fp-58, 0x1.0c6145b5b43dap-1},
{-0x1.2bdaee1c0ee35p-58, 0x1.1255d9bfbd2a9p-1},
{0x1.c621cec00c301p-55, 0x1.1835a88be7c13p-1},
{-0x1.928df287a668fp-58, 0x1.1e00babdefeb4p-1},
{0x1.c421c9f38224ep-57, 0x1.23b71e2cc9e6ap-1},
{-0x1.09e73b0c6c087p-56, 0x1.2958e59308e31p-1},
{0x1.c5d5e9ff0cf8dp-55, 0x1.2ee628406cbcap-1},
{0x1.1021137c71102p-55, 0x1.345f01cce37bbp-1},
{-0x1.2304331d8bf46p-55, 0x1.39c391cd4171ap-1},
{0x1.ecf8b492644fp-56, 0x1.3f13fb89e96f4p-1},
{-0x1.f76d0163f79c8p-56, 0x1.445065b795b56p-1},
{0x1.2419a87f2a458p-56, 0x1.4978fa3269ee1p-1},
{0x1.4a33dbeb3796cp-55, 0x1.4e8de5bb6ec04p-1},
{-0x1.1bb74abda520cp-55, 0x1.538f57b89061fp-1},
{-0x1.5e5c9d8c5a95p-56, 0x1.587d81f732fbbp-1},
{0x1.0028e4bc5e7cap-57, 0x1.5d58987169b18p-1},
{-0x1.2b785350ee8c1p-57, 0x1.6220d115d7b8ep-1},
{-0x1.6ea6febe8bbbap-56, 0x1.66d663923e087p-1},
{-0x1.a80386188c50ep-55, 0x1.6b798920b3d99p-1},
{-0x1.8c34d25aadef6p-56, 0x1.700a7c5784634p-1},
{0x1.7b2a6165884a1p-59, 0x1.748978fba8e0fp-1},
{0x1.406a08980374p-55, 0x1.78f6bbd5d315ep-1},
{0x1.560821e2f3aa9p-55, 0x1.7d528289fa093p-1},
{-0x1.bf76229d3b917p-56, 0x1.819d0b7158a4dp-1},
{0x1.6b66e7fc8b8c3p-57, 0x1.85d69576cc2c5p-1},
{-0x1.55b9a5e177a1bp-55, 0x1.89ff5ff57f1f8p-1},
{-0x1.ec182ab042f61p-56, 0x1.8e17aa99cc05ep-1},
{0x1.1a62633145c07p-55, 0x1.921fb54442d18p-1},
};

// Approximate atan(x) for |x| <= 2^-7.
// Using degree-9 Taylor polynomial:
// P = x - x^3/3 + x^5/5 -x^7/7 + x^9/9;
// Then the absolute error is bounded by:
// |atan(x) - P(x)| < |x|^11/11 < 2^(-7*11) / 11 < 2^-80.
// And the relative error is bounded by:
// |(atan(x) - P(x))/atan(x)| < |x|^10 / 10 < 2^-73.
// For x = x_hi + x_lo, fully expand the polynomial and drop any terms less than
// ulp(x_hi^3 / 3) gives us:
// P(x) ~ x_hi - x_hi^3/3 + x_hi^5/5 - x_hi^7/7 + x_hi^9/9 +
// + x_lo * (1 - x_hi^2 + x_hi^4)
DoubleDouble atan_eval(const DoubleDouble &x) {
DoubleDouble p;
p.hi = x.hi;
double x_hi_sq = x.hi * x.hi;
// c0 ~ x_hi^2 * 1/5 - 1/3
double c0 = fputil::multiply_add(x_hi_sq, 0x1.999999999999ap-3,
-0x1.5555555555555p-2);
// c1 ~ x_hi^2 * 1/9 - 1/7
double c1 = fputil::multiply_add(x_hi_sq, 0x1.c71c71c71c71cp-4,
-0x1.2492492492492p-3);
// x_hi^3
double x_hi_3 = x_hi_sq * x.hi;
// x_hi^4
double x_hi_4 = x_hi_sq * x_hi_sq;
// d0 ~ 1/3 - x_hi^2 / 5 + x_hi^4 / 7 - x_hi^6 / 9
double d0 = fputil::multiply_add(x_hi_4, c1, c0);
// x_lo - x_lo * x_hi^2 + x_lo * x_hi^4
double d1 = fputil::multiply_add(x_hi_4 - x_hi_sq, x.lo, x.lo);
// p.lo ~ -x_hi^3/3 + x_hi^5/5 - x_hi^7/7 + x_hi^9/9 +
// + x_lo * (1 - x_hi^2 + x_hi^4)
p.lo = fputil::multiply_add(x_hi_3, d0, d1);
return p;
}

} // anonymous namespace

// There are several range reduction steps we can take for atan2(y, x) as
// follow:

// * Range reduction 1: signness
// atan2(y, x) will return a number between -PI and PI representing the angle
// forming by the 0x axis and the vector (x, y) on the 0xy-plane.
// In particular, we have that:
// atan2(y, x) = atan( y/x ) if x >= 0 and y >= 0 (I-quadrant)
// = pi + atan( y/x ) if x < 0 and y >= 0 (II-quadrant)
// = -pi + atan( y/x ) if x < 0 and y < 0 (III-quadrant)
// = atan( y/x ) if x >= 0 and y < 0 (IV-quadrant)
// Since atan function is odd, we can use the formula:
// atan(-u) = -atan(u)
// to adjust the above conditions a bit further:
// atan2(y, x) = atan( |y|/|x| ) if x >= 0 and y >= 0 (I-quadrant)
// = pi - atan( |y|/|x| ) if x < 0 and y >= 0 (II-quadrant)
// = -pi + atan( |y|/|x| ) if x < 0 and y < 0 (III-quadrant)
// = -atan( |y|/|x| ) if x >= 0 and y < 0 (IV-quadrant)
// Which can be simplified to:
// atan2(y, x) = sign(y) * atan( |y|/|x| ) if x >= 0
// = sign(y) * (pi - atan( |y|/|x| )) if x < 0

// * Range reduction 2: reciprocal
// Now that the argument inside atan is positive, we can use the formula:
// atan(1/x) = pi/2 - atan(x)
// to make the argument inside atan <= 1 as follow:
// atan2(y, x) = sign(y) * atan( |y|/|x|) if 0 <= |y| <= x
// = sign(y) * (pi/2 - atan( |x|/|y| ) if 0 <= x < |y|
// = sign(y) * (pi - atan( |y|/|x| )) if 0 <= |y| <= -x
// = sign(y) * (pi/2 + atan( |x|/|y| )) if 0 <= -x < |y|

// * Range reduction 3: look up table.
// After the previous two range reduction steps, we reduce the problem to
// compute atan(u) with 0 <= u <= 1, or to be precise:
// atan( n / d ) where n = min(|x|, |y|) and d = max(|x|, |y|).
// An accurate polynomial approximation for the whole [0, 1] input range will
// require a very large degree. To make it more efficient, we reduce the input
// range further by finding an integer idx such that:
// | n/d - idx/64 | <= 1/128.
// In particular,
// idx := round(2^6 * n/d)
// Then for the fast pass, we find a polynomial approximation for:
// atan( n/d ) ~ atan( idx/64 ) + (n/d - idx/64) * Q(n/d - idx/64)
// For the accurate pass, we use the addition formula:
// atan( n/d ) - atan( idx/64 ) = atan( (n/d - idx/64)/(1 + (n*idx)/(64*d)) )
// = atan( (n - d*(idx/64))/(d + n*(idx/64)) )
// And for the fast pass, we use degree-9 Taylor polynomial to compute the RHS:
// atan(u) ~ P(u) = u - u^3/3 + u^5/5 - u^7/7 + u^9/9
// with absolute errors bounded by:
// |atan(u) - P(u)| < |u|^11 / 11 < 2^-80
// and relative errors bounded by:
// |(atan(u) - P(u)) / P(u)| < u^10 / 11 < 2^-73.

LLVM_LIBC_FUNCTION(double, atan2, (double y, double x)) {
using FPBits = fputil::FPBits<double>;

constexpr double IS_NEG[2] = {1.0, -1.0};
constexpr DoubleDouble ZERO = {0.0, 0.0};
constexpr DoubleDouble MZERO = {-0.0, -0.0};
constexpr DoubleDouble PI = {0x1.1a62633145c07p-53, 0x1.921fb54442d18p+1};
constexpr DoubleDouble MPI = {-0x1.1a62633145c07p-53, -0x1.921fb54442d18p+1};
constexpr DoubleDouble PI_OVER_2 = {0x1.1a62633145c07p-54,
0x1.921fb54442d18p0};
constexpr DoubleDouble MPI_OVER_2 = {-0x1.1a62633145c07p-54,
-0x1.921fb54442d18p0};
constexpr DoubleDouble PI_OVER_4 = {0x1.1a62633145c07p-55,
0x1.921fb54442d18p-1};
constexpr DoubleDouble THREE_PI_OVER_4 = {0x1.a79394c9e8a0ap-54,
0x1.2d97c7f3321d2p+1};
// Adjustment for constant term:
// CONST_ADJ[x_sign][y_sign][recip]
constexpr DoubleDouble CONST_ADJ[2][2][2] = {
{{ZERO, MPI_OVER_2}, {MZERO, MPI_OVER_2}},
{{MPI, PI_OVER_2}, {MPI, PI_OVER_2}}};

FPBits x_bits(x), y_bits(y);
bool x_sign = x_bits.sign().is_neg();
bool y_sign = y_bits.sign().is_neg();
x_bits = x_bits.abs();
y_bits = y_bits.abs();
uint64_t x_abs = x_bits.uintval();
uint64_t y_abs = y_bits.uintval();
bool recip = x_abs < y_abs;
uint64_t min_abs = recip ? x_abs : y_abs;
uint64_t max_abs = !recip ? x_abs : y_abs;
unsigned min_exp = static_cast<unsigned>(min_abs >> FPBits::FRACTION_LEN);
unsigned max_exp = static_cast<unsigned>(max_abs >> FPBits::FRACTION_LEN);

double num = FPBits(min_abs).get_val();
double den = FPBits(max_abs).get_val();

// Check for exceptional cases, whether inputs are 0, inf, nan, or close to
// overflow, or close to underflow.
if (LIBC_UNLIKELY(max_exp > 0x7ffU - 128U || min_exp < 128U)) {
if (x_bits.is_nan() || y_bits.is_nan())
return FPBits::quiet_nan().get_val();
unsigned x_except = x_abs == 0 ? 0 : (FPBits(x_abs).is_inf() ? 2 : 1);
unsigned y_except = y_abs == 0 ? 0 : (FPBits(y_abs).is_inf() ? 2 : 1);

// Exceptional cases:
// EXCEPT[y_except][x_except][x_is_neg]
// with x_except & y_except:
// 0: zero
// 1: finite, non-zero
// 2: infinity
constexpr DoubleDouble EXCEPTS[3][3][2] = {
{{ZERO, PI}, {ZERO, PI}, {ZERO, PI}},
{{PI_OVER_2, PI_OVER_2}, {ZERO, ZERO}, {ZERO, PI}},
{{PI_OVER_2, PI_OVER_2},
{PI_OVER_2, PI_OVER_2},
{PI_OVER_4, THREE_PI_OVER_4}},
};

if ((x_except != 1) || (y_except != 1)) {
DoubleDouble r = EXCEPTS[y_except][x_except][x_sign];
return fputil::multiply_add(IS_NEG[y_sign], r.hi, IS_NEG[y_sign] * r.lo);
}
bool scale_up = min_exp < 128U;
bool scale_down = max_exp > 0x7ffU - 128U;
// At least one input is denormal, multiply both numerator and denominator
// by some large enough power of 2 to normalize denormal inputs.
if (scale_up) {
num *= 0x1.0p64;
if (!scale_down)
den *= 0x1.0p64;
} else if (scale_down) {
den *= 0x1.0p-64;
if (!scale_up)
num *= 0x1.0p-64;
}

min_abs = FPBits(num).uintval();
max_abs = FPBits(den).uintval();
min_exp = static_cast<unsigned>(min_abs >> FPBits::FRACTION_LEN);
max_exp = static_cast<unsigned>(max_abs >> FPBits::FRACTION_LEN);
}

double final_sign = IS_NEG[(x_sign != y_sign) != recip];
DoubleDouble const_term = CONST_ADJ[x_sign][y_sign][recip];
unsigned exp_diff = max_exp - min_exp;
// We have the following bound for normalized n and d:
// 2^(-exp_diff - 1) < n/d < 2^(-exp_diff + 1).
if (LIBC_UNLIKELY(exp_diff > 54)) {
return fputil::multiply_add(final_sign, const_term.hi,
final_sign * (const_term.lo + num / den));
}

double k = fputil::nearest_integer(64.0 * num / den);
unsigned idx = static_cast<unsigned>(k);
// k = idx / 64
k *= 0x1.0p-6;

// Range reduction:
// atan(n/d) - atan(k/64) = atan((n/d - k/64) / (1 + (n/d) * (k/64)))
// = atan((n - d * k/64)) / (d + n * k/64))
DoubleDouble num_k = fputil::exact_mult(num, k);
DoubleDouble den_k = fputil::exact_mult(den, k);

// num_dd = n - d * k
DoubleDouble num_dd = fputil::exact_add(num - den_k.hi, -den_k.lo);
// den_dd = d + n * k
DoubleDouble den_dd = fputil::exact_add(den, num_k.hi);
den_dd.lo += num_k.lo;

// q = (n - d * k) / (d + n * k)
DoubleDouble q = fputil::div(num_dd, den_dd);
// p ~ atan(q)
DoubleDouble p = atan_eval(q);

DoubleDouble r = fputil::add(const_term, fputil::add(ATAN_I[idx], p));
r.hi *= final_sign;
r.lo *= final_sign;

return r.hi + r.lo;
}

} // namespace LIBC_NAMESPACE_DECL
10 changes: 10 additions & 0 deletions libc/startup/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@ function(add_startup_object name)
PROPERTIES
OUTPUT_NAME ${name}.o
)

# Make an executable target of relocatable bitcode for clang if needed.
if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR)
add_executable(${fq_target_name}.exe $<TARGET_OBJECTS:${fq_target_name}>)
set_target_properties(${fq_target_name}.exe PROPERTIES
RUNTIME_OUTPUT_DIRECTORY ${LIBC_LIBRARY_DIR}
RUNTIME_OUTPUT_NAME ${name}.o)
target_link_options(${fq_target_name}.exe PRIVATE
"-nostdlib" "-flto" "-Wl,--lto-emit-llvm" "-march= ")
endif()
endfunction()

if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_ARCHITECTURE})
Expand Down
12 changes: 12 additions & 0 deletions libc/test/src/math/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2044,6 +2044,18 @@ add_fp_unittest(
libc.src.__support.FPUtil.fp_bits
)

add_fp_unittest(
atan2_test
NEED_MPFR
SUITE
libc-math-unittests
SRCS
atan2_test.cpp
DEPENDS
libc.src.math.atan2
libc.src.__support.FPUtil.fp_bits
)

add_fp_unittest(
f16add_test
NEED_MPFR
Expand Down
125 changes: 125 additions & 0 deletions libc/test/src/math/atan2_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
//===-- Unittests for atan2 -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/__support/FPUtil/FPBits.h"
#include "src/math/atan2.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
#include "utils/MPFRWrapper/MPFRUtils.h"

using LlvmLibcAtan2Test = LIBC_NAMESPACE::testing::FPTest<double>;
using LIBC_NAMESPACE::testing::tlog;

namespace mpfr = LIBC_NAMESPACE::testing::mpfr;

TEST_F(LlvmLibcAtan2Test, TrickyInputs) {
mpfr::BinaryInput<double> inputs[] = {
{0x1.0853408534085p-2, 0x1.e7b54166c6126p-2},
{FPBits::inf().get_val(), 0x0.0000000000001p-1022},
};

for (mpfr::BinaryInput<double> &input : inputs) {
double x = input.x;
double y = input.y;
mpfr::RoundingMode rm = mpfr::RoundingMode::Downward;
mpfr::ForceRoundingMode rr(rm);
ASSERT_MPFR_MATCH(mpfr::Operation::Atan2, input,
LIBC_NAMESPACE::atan2(x, y), 0.5, rm);
input.x = -input.x;
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Atan2, input,
LIBC_NAMESPACE::atan2(-x, y), 0.5);
input.y = -input.y;
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Atan2, input,
LIBC_NAMESPACE::atan2(-x, -y), 0.5);
input.x = -input.x;
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Atan2, input,
LIBC_NAMESPACE::atan2(x, -y), 0.5);
}
}

TEST_F(LlvmLibcAtan2Test, InDoubleRange) {
constexpr uint64_t X_COUNT = 123;
constexpr uint64_t X_START = FPBits(0.25).uintval();
constexpr uint64_t X_STOP = FPBits(4.0).uintval();
constexpr uint64_t X_STEP = (X_STOP - X_START) / X_COUNT;

constexpr uint64_t Y_COUNT = 137;
constexpr uint64_t Y_START = FPBits(0.25).uintval();
constexpr uint64_t Y_STOP = FPBits(4.0).uintval();
constexpr uint64_t Y_STEP = (Y_STOP - Y_START) / Y_COUNT;

auto test = [&](mpfr::RoundingMode rounding_mode) {
mpfr::ForceRoundingMode __r(rounding_mode);
if (!__r.success)
return;

uint64_t fails = 0;
uint64_t finite_count = 0;
uint64_t total_count = 0;
double failed_x = 0.0, failed_y = 0.0, failed_r = 0.0;
double tol = 0.5;

for (uint64_t i = 0, v = X_START; i <= X_COUNT; ++i, v += X_STEP) {
double x = FPBits(v).get_val();
if (FPBits(x).is_inf_or_nan() || x < 0.0)
continue;

for (uint64_t j = 0, w = Y_START; j <= Y_COUNT; ++j, w += Y_STEP) {
double y = FPBits(w).get_val();
if (FPBits(y).is_inf_or_nan())
continue;

double result = LIBC_NAMESPACE::atan2(x, y);
++total_count;
if (FPBits(result).is_inf_or_nan())
continue;

++finite_count;
mpfr::BinaryInput<double> inputs{x, y};

if (!TEST_MPFR_MATCH_ROUNDING_SILENTLY(mpfr::Operation::Atan2, inputs,
result, 0.5, rounding_mode)) {
++fails;
while (!TEST_MPFR_MATCH_ROUNDING_SILENTLY(
mpfr::Operation::Atan2, inputs, result, tol, rounding_mode)) {
failed_x = x;
failed_y = y;
failed_r = result;

if (tol > 1000.0)
break;

tol *= 2.0;
}
}
}
}
if (fails || (finite_count < total_count)) {
tlog << " Atan2 failed: " << fails << "/" << finite_count << "/"
<< total_count << " tests.\n"
<< " Max ULPs is at most: " << static_cast<uint64_t>(tol) << ".\n";
}
if (fails) {
mpfr::BinaryInput<double> inputs{failed_x, failed_y};
EXPECT_MPFR_MATCH(mpfr::Operation::Atan2, inputs, failed_r, 0.5,
rounding_mode);
}
};

tlog << " Test Rounding To Nearest...\n";
test(mpfr::RoundingMode::Nearest);

tlog << " Test Rounding Downward...\n";
test(mpfr::RoundingMode::Downward);

tlog << " Test Rounding Upward...\n";
test(mpfr::RoundingMode::Upward);

tlog << " Test Rounding Toward Zero...\n";
test(mpfr::RoundingMode::TowardZero);
}
10 changes: 10 additions & 0 deletions libc/test/src/math/smoke/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3531,6 +3531,16 @@ add_fp_unittest(
libc.src.__support.FPUtil.fp_bits
)

add_fp_unittest(
atan2_test
SUITE
libc-math-smoke-tests
SRCS
atan2_test.cpp
DEPENDS
libc.src.math.atan2
)

add_fp_unittest(
scalblnf16_test
SUITE
Expand Down
22 changes: 22 additions & 0 deletions libc/test/src/math/smoke/atan2_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//===-- Unittests for atan2 -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/math/atan2.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"

using LlvmLibcAtan2Test = LIBC_NAMESPACE::testing::FPTest<double>;

TEST_F(LlvmLibcAtan2Test, SpecialNumbers) {
EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::atan2(aNaN, zero));
EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::atan2(1.0, aNaN));
EXPECT_FP_EQ_ALL_ROUNDING(0.0, LIBC_NAMESPACE::atan2(zero, zero));
EXPECT_FP_EQ_ALL_ROUNDING(-0.0, LIBC_NAMESPACE::atan2(-0.0, zero));
EXPECT_FP_EQ_ALL_ROUNDING(0.0, LIBC_NAMESPACE::atan2(1.0, inf));
EXPECT_FP_EQ_ALL_ROUNDING(-0.0, LIBC_NAMESPACE::atan2(-1.0, inf));
}
Loading