24 changes: 14 additions & 10 deletions clang/lib/AST/ByteCode/Compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,9 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) {
std::nullopt, true, false,
/*IsMutable=*/false, nullptr);
}
return this->emitNull(classifyPrim(CE->getType()), Desc, CE);

uint64_t Val = Ctx.getASTContext().getTargetNullPointerValue(CE->getType());
return this->emitNull(classifyPrim(CE->getType()), Val, Desc, CE);
}

case CK_PointerToIntegral: {
Expand Down Expand Up @@ -3817,7 +3819,7 @@ template <class Emitter> bool Compiler<Emitter>::visitBool(const Expr *E) {

// Convert pointers to bool.
if (T == PT_Ptr || T == PT_FnPtr) {
if (!this->emitNull(*T, nullptr, E))
if (!this->emitNull(*T, 0, nullptr, E))
return false;
return this->emitNE(*T, E);
}
Expand Down Expand Up @@ -3857,11 +3859,12 @@ bool Compiler<Emitter>::visitZeroInitializer(PrimType T, QualType QT,
case PT_IntAPS:
return this->emitZeroIntAPS(Ctx.getBitWidth(QT), E);
case PT_Ptr:
return this->emitNullPtr(nullptr, E);
return this->emitNullPtr(Ctx.getASTContext().getTargetNullPointerValue(QT),
nullptr, E);
case PT_FnPtr:
return this->emitNullFnPtr(nullptr, E);
return this->emitNullFnPtr(0, nullptr, E);
case PT_MemberPtr:
return this->emitNullMemberPtr(nullptr, E);
return this->emitNullMemberPtr(0, nullptr, E);
case PT_Float:
return this->emitConstFloat(APFloat::getZero(Ctx.getFloatSemantics(QT)), E);
case PT_FixedPoint: {
Expand Down Expand Up @@ -4421,7 +4424,7 @@ bool Compiler<Emitter>::visitAPValue(const APValue &Val, PrimType ValType,

if (Val.isLValue()) {
if (Val.isNullPointer())
return this->emitNull(ValType, nullptr, E);
return this->emitNull(ValType, 0, nullptr, E);
APValue::LValueBase Base = Val.getLValueBase();
if (const Expr *BaseExpr = Base.dyn_cast<const Expr *>())
return this->visit(BaseExpr);
Expand All @@ -4431,7 +4434,7 @@ bool Compiler<Emitter>::visitAPValue(const APValue &Val, PrimType ValType,
} else if (Val.isMemberPointer()) {
if (const ValueDecl *MemberDecl = Val.getMemberPointerDecl())
return this->emitGetMemberPtr(MemberDecl, E);
return this->emitNullMemberPtr(nullptr, E);
return this->emitNullMemberPtr(0, nullptr, E);
}

return false;
Expand Down Expand Up @@ -4783,7 +4786,8 @@ bool Compiler<Emitter>::VisitCXXNullPtrLiteralExpr(
if (DiscardResult)
return true;

return this->emitNullPtr(nullptr, E);
uint64_t Val = Ctx.getASTContext().getTargetNullPointerValue(E->getType());
return this->emitNullPtr(Val, nullptr, E);
}

template <class Emitter>
Expand Down Expand Up @@ -5333,7 +5337,7 @@ bool Compiler<Emitter>::emitLambdaStaticInvokerBody(const CXXMethodDecl *MD) {
// one here, and we don't need one either because the lambda cannot have
// any captures, as verified above. Emit a null pointer. This is then
// special-cased when interpreting to not emit any misleading diagnostics.
if (!this->emitNullPtr(nullptr, MD))
if (!this->emitNullPtr(0, nullptr, MD))
return false;

// Forward all arguments from the static invoker to the lambda call operator.
Expand Down Expand Up @@ -6483,7 +6487,7 @@ bool Compiler<Emitter>::emitBuiltinBitCast(const CastExpr *E) {
if (!this->discard(SubExpr))
return false;

return this->emitNullPtr(nullptr, E);
return this->emitNullPtr(0, nullptr, E);
}

if (FromType->isNullPtrType() && ToT) {
Expand Down
8 changes: 5 additions & 3 deletions clang/lib/AST/ByteCode/Interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -2432,9 +2432,11 @@ static inline bool ZeroIntAPS(InterpState &S, CodePtr OpPC, uint32_t BitWidth) {
}

template <PrimType Name, class T = typename PrimConv<Name>::T>
inline bool Null(InterpState &S, CodePtr OpPC, const Descriptor *Desc) {
// Note: Desc can be null.
S.Stk.push<T>(0, Desc);
inline bool Null(InterpState &S, CodePtr OpPC, uint64_t Value,
const Descriptor *Desc) {
// FIXME(perf): This is a somewhat often-used function and the value of a
// null pointer is almost always 0.
S.Stk.push<T>(Value, Desc);
return true;
}

Expand Down
2 changes: 1 addition & 1 deletion clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1831,7 +1831,7 @@ static bool interp__builtin_memcpy(InterpState &S, CodePtr OpPC,
if (DestPtr.isDummy() || SrcPtr.isDummy())
return false;

if (!DoBitCastPtr(S, OpPC, SrcPtr, DestPtr))
if (!DoBitCastPtr(S, OpPC, SrcPtr, DestPtr, Size.getZExtValue()))
return false;

S.Stk.push<Pointer>(DestPtr);
Expand Down
24 changes: 17 additions & 7 deletions clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,9 +254,13 @@ static bool readPointerToBuffer(const Context &Ctx, const Pointer &FromPtr,
}

assert(P.isInitialized());
// nullptr_t is a PT_Ptr for us, but it's still not std::is_pointer_v.
if (T == PT_Ptr)
assert(false && "Implement casting to pointer types");
if (T == PT_Ptr) {
assert(P.getType()->isNullPtrType());
// Clang treats nullptr_t has having NO bits in its value
// representation. So, we accept it here and leave its bits
// uninitialized.
return true;
}

auto Buff =
std::make_unique<std::byte[]>(ObjectReprChars.getQuantity());
Expand Down Expand Up @@ -315,9 +319,17 @@ bool clang::interp::DoBitCast(InterpState &S, CodePtr OpPC, const Pointer &Ptr,

return Success;
}

bool clang::interp::DoBitCastPtr(InterpState &S, CodePtr OpPC,
const Pointer &FromPtr, Pointer &ToPtr) {
const ASTContext &ASTCtx = S.getASTContext();
CharUnits ObjectReprChars = ASTCtx.getTypeSizeInChars(ToPtr.getType());

return DoBitCastPtr(S, OpPC, FromPtr, ToPtr, ObjectReprChars.getQuantity());
}

bool clang::interp::DoBitCastPtr(InterpState &S, CodePtr OpPC,
const Pointer &FromPtr, Pointer &ToPtr,
size_t Size) {
assert(FromPtr.isLive());
assert(FromPtr.isBlockPointer());
assert(ToPtr.isBlockPointer());
Expand All @@ -331,9 +343,7 @@ bool clang::interp::DoBitCastPtr(InterpState &S, CodePtr OpPC,
return false;

const ASTContext &ASTCtx = S.getASTContext();

CharUnits ObjectReprChars = ASTCtx.getTypeSizeInChars(ToType);
BitcastBuffer Buffer(Bits(ASTCtx.toBits(ObjectReprChars)));
BitcastBuffer Buffer(Bytes(Size).toBits());
readPointerToBuffer(S.getContext(), FromPtr, Buffer,
/*ReturnOnUninit=*/false);

Expand Down
2 changes: 2 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltinBitCast.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ bool DoBitCast(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
std::byte *Buff, size_t BuffSize, bool &HasIndeterminateBits);
bool DoBitCastPtr(InterpState &S, CodePtr OpPC, const Pointer &FromPtr,
Pointer &ToPtr);
bool DoBitCastPtr(InterpState &S, CodePtr OpPC, const Pointer &FromPtr,
Pointer &ToPtr, size_t Size);

} // namespace interp
} // namespace clang
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/AST/ByteCode/Opcodes.td
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def ZeroIntAPS : Opcode {
// [] -> [Pointer]
def Null : Opcode {
let Types = [PtrTypeClass];
let Args = [ArgDesc];
let Args = [ArgUint64, ArgDesc];
let HasGroup = 1;
}

Expand Down
1 change: 1 addition & 0 deletions clang/lib/AST/MicrosoftMangle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1014,6 +1014,7 @@ void MicrosoftCXXNameMangler::mangleFloat(llvm::APFloat Number) {
case APFloat::S_x87DoubleExtended: Out << 'X'; break;
case APFloat::S_IEEEquad: Out << 'Y'; break;
case APFloat::S_PPCDoubleDouble: Out << 'Z'; break;
case APFloat::S_PPCDoubleDoubleLegacy:
case APFloat::S_Float8E5M2:
case APFloat::S_Float8E4M3:
case APFloat::S_Float8E4M3FN:
Expand Down
29 changes: 8 additions & 21 deletions clang/lib/Analysis/ExprMutationAnalyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,34 +55,21 @@ static bool canExprResolveTo(const Expr *Source, const Expr *Target) {
// This is matched by `IgnoreDerivedToBase(canResolveToExpr(InnerMatcher))`
// below.
const auto ConditionalOperatorM = [Target](const Expr *E) {
if (const auto *OP = dyn_cast<ConditionalOperator>(E)) {
if (const auto *TE = OP->getTrueExpr()->IgnoreParens())
if (canExprResolveTo(TE, Target))
return true;
if (const auto *FE = OP->getFalseExpr()->IgnoreParens())
if (canExprResolveTo(FE, Target))
return true;
}
return false;
};

const auto ElvisOperator = [Target](const Expr *E) {
if (const auto *OP = dyn_cast<BinaryConditionalOperator>(E)) {
if (const auto *TE = OP->getTrueExpr()->IgnoreParens())
if (canExprResolveTo(TE, Target))
return true;
if (const auto *FE = OP->getFalseExpr()->IgnoreParens())
if (canExprResolveTo(FE, Target))
return true;
if (const auto *CO = dyn_cast<AbstractConditionalOperator>(E)) {
const auto *TE = CO->getTrueExpr()->IgnoreParens();
if (TE && canExprResolveTo(TE, Target))
return true;
const auto *FE = CO->getFalseExpr()->IgnoreParens();
if (FE && canExprResolveTo(FE, Target))
return true;
}
return false;
};

const Expr *SourceExprP = Source->IgnoreParens();
return IgnoreDerivedToBase(SourceExprP,
[&](const Expr *E) {
return E == Target || ConditionalOperatorM(E) ||
ElvisOperator(E);
return E == Target || ConditionalOperatorM(E);
}) ||
EvalCommaExpr(SourceExprP, [&](const Expr *E) {
return IgnoreDerivedToBase(
Expand Down
65 changes: 35 additions & 30 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -522,8 +522,15 @@ Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,

Address DestAddr = CheckAtomicAlignment(CGF, E);

auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
auto *RTy = Exchange->getType();

auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));

if (RTy->isPointerTy()) {
Exchange = CGF.Builder.CreatePtrToInt(Exchange, CGF.IntPtrTy);
Comparand = CGF.Builder.CreatePtrToInt(Comparand, CGF.IntPtrTy);
}

// For Release ordering, the failure ordering should be Monotonic.
auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
Expand All @@ -534,10 +541,16 @@ Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
// blocks the few atomics optimizations that LLVM has. If we want to optimize
// _Interlocked* operations in the future, we will have to remove the volatile
// marker.
auto *Result = CGF.Builder.CreateAtomicCmpXchg(
auto *CmpXchg = CGF.Builder.CreateAtomicCmpXchg(
DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering);
Result->setVolatile(true);
return CGF.Builder.CreateExtractValue(Result, 0);
CmpXchg->setVolatile(true);

auto *Result = CGF.Builder.CreateExtractValue(CmpXchg, 0);
if (RTy->isPointerTy()) {
Result = CGF.Builder.CreateIntToPtr(Result, RTy);
}

return Result;
}

// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
Expand Down Expand Up @@ -1620,6 +1633,7 @@ enum class CodeGenFunction::MSVCIntrin {
_BitScanForward,
_BitScanReverse,
_InterlockedAnd,
_InterlockedCompareExchange,
_InterlockedDecrement,
_InterlockedExchange,
_InterlockedExchangeAdd,
Expand Down Expand Up @@ -1705,26 +1719,31 @@ translateArmToMsvcIntrin(unsigned BuiltinID) {
case clang::ARM::BI_InterlockedExchange16_acq:
case clang::ARM::BI_InterlockedExchange_acq:
case clang::ARM::BI_InterlockedExchange64_acq:
case clang::ARM::BI_InterlockedExchangePointer_acq:
return MSVCIntrin::_InterlockedExchange_acq;
case clang::ARM::BI_InterlockedExchange8_rel:
case clang::ARM::BI_InterlockedExchange16_rel:
case clang::ARM::BI_InterlockedExchange_rel:
case clang::ARM::BI_InterlockedExchange64_rel:
case clang::ARM::BI_InterlockedExchangePointer_rel:
return MSVCIntrin::_InterlockedExchange_rel;
case clang::ARM::BI_InterlockedExchange8_nf:
case clang::ARM::BI_InterlockedExchange16_nf:
case clang::ARM::BI_InterlockedExchange_nf:
case clang::ARM::BI_InterlockedExchange64_nf:
case clang::ARM::BI_InterlockedExchangePointer_nf:
return MSVCIntrin::_InterlockedExchange_nf;
case clang::ARM::BI_InterlockedCompareExchange8_acq:
case clang::ARM::BI_InterlockedCompareExchange16_acq:
case clang::ARM::BI_InterlockedCompareExchange_acq:
case clang::ARM::BI_InterlockedCompareExchange64_acq:
case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
return MSVCIntrin::_InterlockedCompareExchange_acq;
case clang::ARM::BI_InterlockedCompareExchange8_rel:
case clang::ARM::BI_InterlockedCompareExchange16_rel:
case clang::ARM::BI_InterlockedCompareExchange_rel:
case clang::ARM::BI_InterlockedCompareExchange64_rel:
case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
return MSVCIntrin::_InterlockedCompareExchange_rel;
case clang::ARM::BI_InterlockedCompareExchange8_nf:
case clang::ARM::BI_InterlockedCompareExchange16_nf:
Expand Down Expand Up @@ -1851,26 +1870,31 @@ translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
case clang::AArch64::BI_InterlockedExchange16_acq:
case clang::AArch64::BI_InterlockedExchange_acq:
case clang::AArch64::BI_InterlockedExchange64_acq:
case clang::AArch64::BI_InterlockedExchangePointer_acq:
return MSVCIntrin::_InterlockedExchange_acq;
case clang::AArch64::BI_InterlockedExchange8_rel:
case clang::AArch64::BI_InterlockedExchange16_rel:
case clang::AArch64::BI_InterlockedExchange_rel:
case clang::AArch64::BI_InterlockedExchange64_rel:
case clang::AArch64::BI_InterlockedExchangePointer_rel:
return MSVCIntrin::_InterlockedExchange_rel;
case clang::AArch64::BI_InterlockedExchange8_nf:
case clang::AArch64::BI_InterlockedExchange16_nf:
case clang::AArch64::BI_InterlockedExchange_nf:
case clang::AArch64::BI_InterlockedExchange64_nf:
case clang::AArch64::BI_InterlockedExchangePointer_nf:
return MSVCIntrin::_InterlockedExchange_nf;
case clang::AArch64::BI_InterlockedCompareExchange8_acq:
case clang::AArch64::BI_InterlockedCompareExchange16_acq:
case clang::AArch64::BI_InterlockedCompareExchange_acq:
case clang::AArch64::BI_InterlockedCompareExchange64_acq:
case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
return MSVCIntrin::_InterlockedCompareExchange_acq;
case clang::AArch64::BI_InterlockedCompareExchange8_rel:
case clang::AArch64::BI_InterlockedCompareExchange16_rel:
case clang::AArch64::BI_InterlockedCompareExchange_rel:
case clang::AArch64::BI_InterlockedCompareExchange64_rel:
case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
return MSVCIntrin::_InterlockedCompareExchange_rel;
case clang::AArch64::BI_InterlockedCompareExchange8_nf:
case clang::AArch64::BI_InterlockedCompareExchange16_nf:
Expand Down Expand Up @@ -2073,6 +2097,8 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
case MSVCIntrin::_InterlockedExchange_nf:
return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
AtomicOrdering::Monotonic);
case MSVCIntrin::_InterlockedCompareExchange:
return EmitAtomicCmpXchgForMSIntrin(*this, E);
case MSVCIntrin::_InterlockedCompareExchange_acq:
return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
case MSVCIntrin::_InterlockedCompareExchange_rel:
Expand Down Expand Up @@ -5720,32 +5746,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(
EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
case Builtin::BI_InterlockedCompareExchangePointer:
case Builtin::BI_InterlockedCompareExchangePointer_nf: {
llvm::Type *RTy;
llvm::IntegerType *IntType = IntegerType::get(
getLLVMContext(), getContext().getTypeSize(E->getType()));

Address DestAddr = CheckAtomicAlignment(*this, E);

llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
RTy = Exchange->getType();
Exchange = Builder.CreatePtrToInt(Exchange, IntType);

llvm::Value *Comparand =
Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);

auto Ordering =
BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;

auto Result = Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
Ordering, Ordering);
Result->setVolatile(true);

return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
0),
RTy));
}
return RValue::get(
EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange, E));
case Builtin::BI_InterlockedCompareExchangePointer_nf:
return RValue::get(
EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E));
case Builtin::BI_InterlockedCompareExchange8:
case Builtin::BI_InterlockedCompareExchange16:
case Builtin::BI_InterlockedCompareExchange:
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
Fn->addFnAttr("ptrauth-auth-traps");
if (CodeGenOpts.PointerAuth.IndirectGotos)
Fn->addFnAttr("ptrauth-indirect-gotos");
if (CodeGenOpts.PointerAuth.AArch64JumpTableHardening)
Fn->addFnAttr("aarch64-jump-table-hardening");

// Apply xray attributes to the function (as a string, for now)
bool AlwaysXRayAttr = false;
Expand Down
6 changes: 4 additions & 2 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1876,6 +1876,8 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args,
Args.addOptInFlag(CmdArgs,
options::OPT_fptrauth_init_fini_address_discrimination,
options::OPT_fno_ptrauth_init_fini_address_discrimination);
Args.addOptInFlag(CmdArgs, options::OPT_faarch64_jump_table_hardening,
options::OPT_fno_aarch64_jump_table_hardening);
}

void Clang::AddLoongArchTargetArgs(const ArgList &Args,
Expand Down Expand Up @@ -5937,9 +5939,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
if (!Args.hasFlag(options::OPT_fstrict_aliasing, StrictAliasingAliasOption,
options::OPT_fno_strict_aliasing, !IsWindowsMSVC))
CmdArgs.push_back("-relaxed-aliasing");
if (Args.hasFlag(options::OPT_fpointer_tbaa, options::OPT_fno_pointer_tbaa,
if (Args.hasFlag(options::OPT_fno_pointer_tbaa, options::OPT_fpointer_tbaa,
false))
CmdArgs.push_back("-pointer-tbaa");
CmdArgs.push_back("-no-pointer-tbaa");
if (!Args.hasFlag(options::OPT_fstruct_path_tbaa,
options::OPT_fno_struct_path_tbaa, true))
CmdArgs.push_back("-no-struct-path-tbaa");
Expand Down
30 changes: 15 additions & 15 deletions clang/lib/Driver/ToolChains/Fuchsia.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,15 +156,19 @@ void fuchsia::Linker::ConstructJob(Compilation &C, const JobAction &JA,
addLinkerCompressDebugSectionsOption(ToolChain, Args, CmdArgs);
AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA);

// Sample these options first so they are claimed even under -nostdlib et al.
bool NoLibc = Args.hasArg(options::OPT_nolibc);
bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) &&
!Args.hasArg(options::OPT_static);
bool Pthreads = Args.hasArg(options::OPT_pthread, options::OPT_pthreads);
bool SplitStack = Args.hasArg(options::OPT_fsplit_stack);
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs,
options::OPT_r)) {
if (Args.hasArg(options::OPT_static))
CmdArgs.push_back("-Bdynamic");

if (D.CCCIsCXX()) {
if (ToolChain.ShouldLinkCXXStdlib(Args)) {
bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) &&
!Args.hasArg(options::OPT_static);
CmdArgs.push_back("--push-state");
CmdArgs.push_back("--as-needed");
if (OnlyLibstdcxxStatic)
Expand All @@ -188,14 +192,13 @@ void fuchsia::Linker::ConstructJob(Compilation &C, const JobAction &JA,

AddRunTimeLibs(ToolChain, D, CmdArgs, Args);

if (Args.hasArg(options::OPT_pthread) ||
Args.hasArg(options::OPT_pthreads))
if (Pthreads)
CmdArgs.push_back("-lpthread");

if (Args.hasArg(options::OPT_fsplit_stack))
if (SplitStack)
CmdArgs.push_back("--wrap=pthread_create");

if (!Args.hasArg(options::OPT_nolibc))
if (!NoLibc)
CmdArgs.push_back("-lc");
}

Expand Down Expand Up @@ -229,7 +232,7 @@ void fuchsia::StaticLibTool::ConstructJob(Compilation &C, const JobAction &JA,

for (const auto &II : Inputs) {
if (II.isFilename()) {
CmdArgs.push_back(II.getFilename());
CmdArgs.push_back(II.getFilename());
}
}

Expand Down Expand Up @@ -343,16 +346,14 @@ std::string Fuchsia::ComputeEffectiveClangTriple(const ArgList &Args,
return Triple.str();
}

Tool *Fuchsia::buildLinker() const {
return new tools::fuchsia::Linker(*this);
}
Tool *Fuchsia::buildLinker() const { return new tools::fuchsia::Linker(*this); }

Tool *Fuchsia::buildStaticLibTool() const {
return new tools::fuchsia::StaticLibTool(*this);
}

ToolChain::RuntimeLibType Fuchsia::GetRuntimeLibType(
const ArgList &Args) const {
ToolChain::RuntimeLibType
Fuchsia::GetRuntimeLibType(const ArgList &Args) const {
if (Arg *A = Args.getLastArg(clang::driver::options::OPT_rtlib_EQ)) {
StringRef Value = A->getValue();
if (Value != "compiler-rt")
Expand All @@ -363,13 +364,12 @@ ToolChain::RuntimeLibType Fuchsia::GetRuntimeLibType(
return ToolChain::RLT_CompilerRT;
}

ToolChain::CXXStdlibType
Fuchsia::GetCXXStdlibType(const ArgList &Args) const {
ToolChain::CXXStdlibType Fuchsia::GetCXXStdlibType(const ArgList &Args) const {
if (Arg *A = Args.getLastArg(options::OPT_stdlib_EQ)) {
StringRef Value = A->getValue();
if (Value != "libc++")
getDriver().Diag(diag::err_drv_invalid_stdlib_name)
<< A->getAsString(Args);
<< A->getAsString(Args);
}

return ToolChain::CST_Libcxx;
Expand Down
18 changes: 18 additions & 0 deletions clang/lib/Driver/ToolChains/Linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,24 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
ExtraOpts.push_back("-z");
ExtraOpts.push_back("max-page-size=16384");
}
if (Triple.isAndroidVersionLT(29)) {
// https://github.com/android/ndk/issues/1196
// The unwinder used by the crash handler on versions of Android prior to
// API 29 did not correctly handle binaries built with rosegment, which is
// enabled by default for LLD. Android only supports LLD, so it's not an
// issue that this flag is not accepted by other linkers.
ExtraOpts.push_back("--no-rosegment");
}
if (!Triple.isAndroidVersionLT(28)) {
// Android supports relr packing starting with API 28 and had its own
// flavor (--pack-dyn-relocs=android) starting in API 23.
// TODO: It's possible to use both with --pack-dyn-relocs=android+relr,
// but we need to gather some data on the impact of that form before we
// can know if it's a good default.
// On the other hand, relr should always be an improvement.
ExtraOpts.push_back("--use-android-relr-tags");
ExtraOpts.push_back("--pack-dyn-relocs=relr");
}
}

if (GCCInstallation.getParentLibPath().contains("opt/rh/"))
Expand Down
8 changes: 7 additions & 1 deletion clang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1511,14 +1511,16 @@ void CompilerInvocation::setDefaultPointerAuthOptions(
Opts.ReturnAddresses = LangOpts.PointerAuthReturns;
Opts.AuthTraps = LangOpts.PointerAuthAuthTraps;
Opts.IndirectGotos = LangOpts.PointerAuthIndirectGotos;
Opts.AArch64JumpTableHardening = LangOpts.AArch64JumpTableHardening;
}

static void parsePointerAuthOptions(PointerAuthOptions &Opts,
const LangOptions &LangOpts,
const llvm::Triple &Triple,
DiagnosticsEngine &Diags) {
if (!LangOpts.PointerAuthCalls && !LangOpts.PointerAuthReturns &&
!LangOpts.PointerAuthAuthTraps && !LangOpts.PointerAuthIndirectGotos)
!LangOpts.PointerAuthAuthTraps && !LangOpts.PointerAuthIndirectGotos &&
!LangOpts.AArch64JumpTableHardening)
return;

CompilerInvocation::setDefaultPointerAuthOptions(Opts, LangOpts, Triple);
Expand Down Expand Up @@ -3454,6 +3456,8 @@ static void GeneratePointerAuthArgs(const LangOptions &Opts,
GenerateArg(Consumer, OPT_fptrauth_init_fini_address_discrimination);
if (Opts.PointerAuthELFGOT)
GenerateArg(Consumer, OPT_fptrauth_elf_got);
if (Opts.AArch64JumpTableHardening)
GenerateArg(Consumer, OPT_faarch64_jump_table_hardening);
}

static void ParsePointerAuthArgs(LangOptions &Opts, ArgList &Args,
Expand All @@ -3475,6 +3479,8 @@ static void ParsePointerAuthArgs(LangOptions &Opts, ArgList &Args,
Opts.PointerAuthInitFiniAddressDiscrimination =
Args.hasArg(OPT_fptrauth_init_fini_address_discrimination);
Opts.PointerAuthELFGOT = Args.hasArg(OPT_fptrauth_elf_got);
Opts.AArch64JumpTableHardening =
Args.hasArg(OPT_faarch64_jump_table_hardening);
}

/// Check if input file kind and language standard are compatible.
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/Headers/intrin0.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,9 @@ long _InterlockedExchange_rel(long volatile *_Target, long _Value);
__int64 _InterlockedExchange64_acq(__int64 volatile *_Target, __int64 _Value);
__int64 _InterlockedExchange64_nf(__int64 volatile *_Target, __int64 _Value);
__int64 _InterlockedExchange64_rel(__int64 volatile *_Target, __int64 _Value);
void *_InterlockedExchangePointer_acq(void *volatile *_Target, void *_Value);
void *_InterlockedExchangePointer_nf(void *volatile *_Target, void *_Value);
void *_InterlockedExchangePointer_rel(void *volatile *_Target, void *_Value);

/*----------------------------------------------------------------------------*\
|* Interlocked Compare Exchange
Expand Down Expand Up @@ -237,6 +240,12 @@ __int64 _InterlockedCompareExchange64_nf(__int64 volatile *_Destination,
__int64 _InterlockedCompareExchange64_rel(__int64 volatile *_Destination,
__int64 _Exchange,
__int64 _Comparand);
void *_InterlockedCompareExchangePointer_acq(void *volatile *_Destination,
void *_Exchange, void *_Comparand);
void *_InterlockedCompareExchangePointer_nf(void *volatile *_Destination,
void *_Exchange, void *_Comparand);
void *_InterlockedCompareExchangePointer_rel(void *volatile *_Destination,
void *_Exchange, void *_Comparand);
#endif

#ifdef __cplusplus
Expand Down
12 changes: 12 additions & 0 deletions clang/test/AST/ByteCode/amdgpu-nullptr.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -triple amdgcn -emit-llvm -o - | FileCheck %s

// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -triple amdgcn -emit-llvm -fexperimental-new-constant-interpreter -o - | FileCheck %s


// CHECK: @fold_priv ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(5) addrspacecast (ptr addrspace(1) null to ptr addrspace(5)), align 4
private short *fold_priv = (private short*)(generic int*)(global void*)0;

// CHECK: @fold_priv_arith ={{.*}} local_unnamed_addr addrspace(1) global ptr addrspace(5) inttoptr (i32 9 to ptr addrspace(5)), align 4
private char *fold_priv_arith = (private char*)0 + 10;


2 changes: 1 addition & 1 deletion clang/test/AST/ByteCode/builtin-bit-cast-bitfields.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -verify=expected,both -std=c++2a -fsyntax-only -fexperimental-new-constant-interpreter %s
// RUN: %clang_cc1 -verify=expected,both -std=c++2a -fsyntax-only -triple x86_64-linux-gnu -fexperimental-new-constant-interpreter %s
// RUN: %clang_cc1 -verify=expected,both -std=c++2a -fsyntax-only -triple armv8 -fexperimental-new-constant-interpreter %s
// RUN: %clang_cc1 -verify=expected,both -std=c++2a -fsyntax-only -triple aarch64_be-linux-gnu -fexperimental-new-constant-interpreter %s
// RUN: %clang_cc1 -verify=expected,both -std=c++2a -fsyntax-only -fexperimental-new-constant-interpreter -triple powerpc64le-unknown-unknown -mabi=ieeelongdouble %s
Expand Down
6 changes: 6 additions & 0 deletions clang/test/AST/ByteCode/builtin-bit-cast.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,14 @@ namespace simple {

/// This works in GCC and in the bytecode interpreter, but the current interpreter
/// diagnoses it.
/// FIXME: Should also be rejected in the bytecode interpreter.
static_assert(__builtin_bit_cast(intptr_t, nullptr) == 0); // ref-error {{not an integral constant expression}} \
// ref-note {{indeterminate value can only initialize an object}}

constexpr int test_from_nullptr_pass = (__builtin_bit_cast(unsigned char[sizeof(nullptr)], nullptr), 0);
constexpr unsigned char NPData[sizeof(nullptr)] = {1,2,3,4};
constexpr nullptr_t NP = __builtin_bit_cast(nullptr_t, NPData);
static_assert(NP == nullptr);
}

namespace Fail {
Expand Down
7 changes: 7 additions & 0 deletions clang/test/AST/ByteCode/builtin-functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1151,6 +1151,13 @@ namespace BuiltinMemcpy {
}
static_assert(simple() == 12);

constexpr bool arrayMemcpy() {
char src[] = "abc";
char dst[4] = {};
__builtin_memcpy(dst, src, 4);
return dst[0] == 'a' && dst[1] == 'b' && dst[2] == 'c' && dst[3] == '\0';
}
static_assert(arrayMemcpy());

extern struct Incomplete incomplete;
constexpr struct Incomplete *null_incomplete = 0;
Expand Down
56 changes: 28 additions & 28 deletions clang/test/CodeGen/attr-counted-by.c

Large diffs are not rendered by default.

66 changes: 66 additions & 0 deletions clang/test/CodeGen/ms-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,41 @@ void *test_InterlockedExchangePointer(void * volatile *Target, void *Value) {
// CHECK: ret ptr %[[RESULT]]
// CHECK: }

#if defined(__arm__) || defined(__aarch64__)
void *test_InterlockedExchangePointer_acq(void * volatile *Target, void *Value) {
return _InterlockedExchangePointer_acq(Target, Value);
}

// CHECK-ARM-ARM64: define{{.*}}ptr @test_InterlockedExchangePointer_acq(ptr {{[a-z_ ]*}}%Target, ptr {{[a-z_ ]*}}%Value){{.*}}{
// CHECK-ARM-ARM64: %[[VALUE:[0-9]+]] = ptrtoint ptr %Value to [[iPTR:i[0-9]+]]
// CHECK-ARM-ARM64: %[[EXCHANGE:[0-9]+]] = atomicrmw xchg ptr %Target, [[iPTR]] %[[VALUE]] acquire, align {{4|8}}
// CHECK-ARM-ARM64: %[[RESULT:[0-9]+]] = inttoptr [[iPTR]] %[[EXCHANGE]] to ptr
// CHECK-ARM-ARM64: ret ptr %[[RESULT]]
// CHECK-ARM-ARM64: }

void *test_InterlockedExchangePointer_nf(void * volatile *Target, void *Value) {
return _InterlockedExchangePointer_nf(Target, Value);
}

// CHECK-ARM-ARM64: define{{.*}}ptr @test_InterlockedExchangePointer_nf(ptr {{[a-z_ ]*}}%Target, ptr {{[a-z_ ]*}}%Value){{.*}}{
// CHECK-ARM-ARM64: %[[VALUE:[0-9]+]] = ptrtoint ptr %Value to [[iPTR]]
// CHECK-ARM-ARM64: %[[EXCHANGE:[0-9]+]] = atomicrmw xchg ptr %Target, [[iPTR]] %[[VALUE]] monotonic, align {{4|8}}
// CHECK-ARM-ARM64: %[[RESULT:[0-9]+]] = inttoptr [[iPTR]] %[[EXCHANGE]] to ptr
// CHECK-ARM-ARM64: ret ptr %[[RESULT]]
// CHECK-ARM-ARM64: }

void *test_InterlockedExchangePointer_rel(void * volatile *Target, void *Value) {
return _InterlockedExchangePointer_rel(Target, Value);
}

// CHECK-ARM-ARM64: define{{.*}}ptr @test_InterlockedExchangePointer_rel(ptr {{[a-z_ ]*}}%Target, ptr {{[a-z_ ]*}}%Value){{.*}}{
// CHECK-ARM-ARM64: %[[VALUE:[0-9]+]] = ptrtoint ptr %Value to [[iPTR]]
// CHECK-ARM-ARM64: %[[EXCHANGE:[0-9]+]] = atomicrmw xchg ptr %Target, [[iPTR]] %[[VALUE]] release, align {{4|8}}
// CHECK-ARM-ARM64: %[[RESULT:[0-9]+]] = inttoptr [[iPTR]] %[[EXCHANGE]] to ptr
// CHECK-ARM-ARM64: ret ptr %[[RESULT]]
// CHECK-ARM-ARM64: }
#endif

void *test_InterlockedCompareExchangePointer(void * volatile *Destination,
void *Exchange, void *Comparand) {
return _InterlockedCompareExchangePointer(Destination, Exchange, Comparand);
Expand Down Expand Up @@ -249,6 +284,37 @@ void *test_InterlockedCompareExchangePointer_nf(void * volatile *Destination,
// CHECK: ret ptr %[[RESULT:[0-9]+]]
// CHECK: }

#if defined(__arm__) || defined(__aarch64__)
void *test_InterlockedCompareExchangePointer_acq(void * volatile *Destination,
void *Exchange, void *Comparand) {
return _InterlockedCompareExchangePointer_acq(Destination, Exchange, Comparand);
}

// CHECK-ARM-ARM64: define{{.*}}ptr @test_InterlockedCompareExchangePointer_acq(ptr {{[a-z_ ]*}}%Destination, ptr {{[a-z_ ]*}}%Exchange, ptr {{[a-z_ ]*}}%Comparand){{.*}}{
// CHECK-ARM-ARM64: %[[EXCHANGE:[0-9]+]] = ptrtoint ptr %Exchange to [[iPTR]]
// CHECK-ARM-ARM64: %[[COMPARAND:[0-9]+]] = ptrtoint ptr %Comparand to [[iPTR]]
// CHECK-ARM-ARM64: %[[XCHG:[0-9]+]] = cmpxchg volatile ptr %[[DEST:.+]], [[iPTR]] %[[COMPARAND:[0-9]+]], [[iPTR]] %[[EXCHANGE:[0-9]+]] acquire acquire, align {{4|8}}
// CHECK-ARM-ARM64: %[[EXTRACT:[0-9]+]] = extractvalue { [[iPTR]], i1 } %[[XCHG]], 0
// CHECK-ARM-ARM64: %[[RESULT:[0-9]+]] = inttoptr [[iPTR]] %[[EXTRACT]] to ptr
// CHECK-ARM-ARM64: ret ptr %[[RESULT:[0-9]+]]
// CHECK-ARM-ARM64: }


void *test_InterlockedCompareExchangePointer_rel(void * volatile *Destination,
void *Exchange, void *Comparand) {
return _InterlockedCompareExchangePointer_rel(Destination, Exchange, Comparand);
}

// CHECK-ARM-ARM64: define{{.*}}ptr @test_InterlockedCompareExchangePointer_rel(ptr {{[a-z_ ]*}}%Destination, ptr {{[a-z_ ]*}}%Exchange, ptr {{[a-z_ ]*}}%Comparand){{.*}}{
// CHECK-ARM-ARM64: %[[EXCHANGE:[0-9]+]] = ptrtoint ptr %Exchange to [[iPTR]]
// CHECK-ARM-ARM64: %[[COMPARAND:[0-9]+]] = ptrtoint ptr %Comparand to [[iPTR]]
// CHECK-ARM-ARM64: %[[XCHG:[0-9]+]] = cmpxchg volatile ptr %[[DEST:.+]], [[iPTR]] %[[COMPARAND:[0-9]+]], [[iPTR]] %[[EXCHANGE:[0-9]+]] release monotonic, align {{4|8}}
// CHECK-ARM-ARM64: %[[EXTRACT:[0-9]+]] = extractvalue { [[iPTR]], i1 } %[[XCHG]], 0
// CHECK-ARM-ARM64: %[[RESULT:[0-9]+]] = inttoptr [[iPTR]] %[[EXTRACT]] to ptr
// CHECK-ARM-ARM64: ret ptr %[[RESULT:[0-9]+]]
// CHECK-ARM-ARM64: }
#endif

char test_InterlockedExchange8(char volatile *value, char mask) {
return _InterlockedExchange8(value, mask);
}
Expand Down
5 changes: 5 additions & 0 deletions clang/test/CodeGen/ptrauth-function-attributes.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
// RUN: %clang_cc1 -triple arm64-apple-ios -fptrauth-indirect-gotos -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,GOTOS
// RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-indirect-gotos -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,GOTOS

// RUN: %clang_cc1 -triple arm64e-apple-ios -faarch64-jump-table-hardening -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,JMPTBL
// RUN: %clang_cc1 -triple aarch64-linux-gnu -faarch64-jump-table-hardening -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,JMPTBL

// ALL: define {{(dso_local )?}}void @test() #0
void test() {
}
Expand All @@ -26,4 +29,6 @@ void test() {

// GOTOS: attributes #0 = {{{.*}} "ptrauth-indirect-gotos" {{.*}}}

// JMPTBL: attributes #0 = {{{.*}} "aarch64-jump-table-hardening" {{.*}}}

// OFF-NOT: attributes {{.*}} "ptrauth-
204 changes: 102 additions & 102 deletions clang/test/CodeGen/tbaa-pointers.c

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions clang/test/CodeGen/tbaa-reference.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: %clang_cc1 -triple x86_64-linux -O1 -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s -check-prefixes=CHECK,OLD-PATH
// RUN: %clang_cc1 -triple x86_64-linux -O1 -disable-llvm-passes -pointer-tbaa %s -emit-llvm -o - | FileCheck %s -check-prefixes=CHECK,OLD-PATH-POINTER
// RUN: %clang_cc1 -triple x86_64-linux -O1 -disable-llvm-passes %s -emit-llvm -new-struct-path-tbaa -o - | FileCheck %s -check-prefixes=CHECK,NEW-PATH
// RUN: %clang_cc1 -triple x86_64-linux -O1 -disable-llvm-passes %s -pointer-tbaa -emit-llvm -new-struct-path-tbaa -o - | FileCheck %s -check-prefixes=CHECK,NEW-PATH-POINTER
// RUN: %clang_cc1 -triple x86_64-linux -O1 -disable-llvm-passes -no-pointer-tbaa %s -emit-llvm -o - | FileCheck %s -check-prefixes=CHECK,OLD-PATH
// RUN: %clang_cc1 -triple x86_64-linux -O1 -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s -check-prefixes=CHECK,OLD-PATH-POINTER
// RUN: %clang_cc1 -triple x86_64-linux -O1 -disable-llvm-passes %s -no-pointer-tbaa -emit-llvm -new-struct-path-tbaa -o - | FileCheck %s -check-prefixes=CHECK,NEW-PATH
// RUN: %clang_cc1 -triple x86_64-linux -O1 -disable-llvm-passes %s -emit-llvm -new-struct-path-tbaa -o - | FileCheck %s -check-prefixes=CHECK,NEW-PATH-POINTER
//
// Check that we generate correct TBAA information for reference accesses.

Expand Down
1 change: 1 addition & 0 deletions clang/test/CodeGenCXX/template-instantiation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

// CHECK2-NOT: _ZTVN5test31SIiEE
// CHECK2-NOT: _ZTSN5test31SIiEE
// CHECK2: !{!"p1 _ZTSN5test31SIiEE",

// CHECK-LABEL: define linkonce_odr void @_ZN5test21CIiEC1Ev(ptr {{[^,]*}} %this) unnamed_addr
// CHECK-LABEL: define linkonce_odr void @_ZN5test21CIiE6foobarIdEEvT_(
Expand Down
153 changes: 80 additions & 73 deletions clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl
Original file line number Diff line number Diff line change
Expand Up @@ -1673,7 +1673,7 @@ void test_cvt_scalef32_sr_fp8_f32(global unsigned *out, float src, uint seed, fl
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[B_ADDR]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[C_ADDR]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.bitop3.i32(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i8 1)
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.bitop3.i32(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 1)
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
// CHECK-NEXT: ret void
Expand All @@ -1696,7 +1696,7 @@ void test_bitop3_b32(global uint* out, uint a, uint b, uint c)
// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr addrspace(5) [[A_ADDR]], align 2
// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr addrspace(5) [[B_ADDR]], align 2
// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(5) [[C_ADDR]], align 2
// CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.amdgcn.bitop3.i16(i16 [[TMP0]], i16 [[TMP1]], i16 [[TMP2]], i8 1)
// CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.amdgcn.bitop3.i16(i16 [[TMP0]], i16 [[TMP1]], i16 [[TMP2]], i32 1)
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
// CHECK-NEXT: store i16 [[TMP3]], ptr addrspace(1) [[TMP4]], align 2
// CHECK-NEXT: ret void
Expand Down
6 changes: 4 additions & 2 deletions clang/test/Driver/aarch64-ptrauth.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
// RUN: -fno-ptrauth-indirect-gotos -fptrauth-indirect-gotos \
// RUN: -fno-ptrauth-init-fini -fptrauth-init-fini \
// RUN: -fno-ptrauth-init-fini-address-discrimination -fptrauth-init-fini-address-discrimination \
// RUN: -fno-aarch64-jump-table-hardening -faarch64-jump-table-hardening \
// RUN: %s 2>&1 | FileCheck %s --check-prefix=ALL
// ALL: "-cc1"{{.*}} "-fptrauth-intrinsics" "-fptrauth-calls" "-fptrauth-returns" "-fptrauth-auth-traps" "-fptrauth-vtable-pointer-address-discrimination" "-fptrauth-vtable-pointer-type-discrimination" "-fptrauth-type-info-vtable-pointer-discrimination" "-fptrauth-indirect-gotos" "-fptrauth-init-fini" "-fptrauth-init-fini-address-discrimination"
// ALL: "-cc1"{{.*}} "-fptrauth-intrinsics" "-fptrauth-calls" "-fptrauth-returns" "-fptrauth-auth-traps" "-fptrauth-vtable-pointer-address-discrimination" "-fptrauth-vtable-pointer-type-discrimination" "-fptrauth-type-info-vtable-pointer-discrimination" "-fptrauth-indirect-gotos" "-fptrauth-init-fini" "-fptrauth-init-fini-address-discrimination" "-faarch64-jump-table-hardening"

// RUN: %clang -### -c --target=aarch64-linux -mabi=pauthtest %s 2>&1 | FileCheck %s --check-prefix=PAUTHABI1
// RUN: %clang -### -c --target=aarch64-linux-pauthtest %s 2>&1 | FileCheck %s --check-prefix=PAUTHABI1
Expand All @@ -38,7 +39,7 @@
// RUN: not %clang -### -c --target=x86_64 -fptrauth-intrinsics -fptrauth-calls -fptrauth-returns -fptrauth-auth-traps \
// RUN: -fptrauth-vtable-pointer-address-discrimination -fptrauth-vtable-pointer-type-discrimination \
// RUN: -fptrauth-type-info-vtable-pointer-discrimination -fptrauth-indirect-gotos -fptrauth-init-fini \
// RUN: -fptrauth-init-fini-address-discrimination %s 2>&1 | FileCheck %s --check-prefix=ERR1
// RUN: -fptrauth-init-fini-address-discrimination -faarch64-jump-table-hardening %s 2>&1 | FileCheck %s --check-prefix=ERR1
// ERR1: error: unsupported option '-fptrauth-intrinsics' for target '{{.*}}'
// ERR1-NEXT: error: unsupported option '-fptrauth-calls' for target '{{.*}}'
// ERR1-NEXT: error: unsupported option '-fptrauth-returns' for target '{{.*}}'
Expand All @@ -49,6 +50,7 @@
// ERR1-NEXT: error: unsupported option '-fptrauth-indirect-gotos' for target '{{.*}}'
// ERR1-NEXT: error: unsupported option '-fptrauth-init-fini' for target '{{.*}}'
// ERR1-NEXT: error: unsupported option '-fptrauth-init-fini-address-discrimination' for target '{{.*}}'
// ERR1-NEXT: error: unsupported option '-faarch64-jump-table-hardening' for target '{{.*}}'

//// Only support PAuth ABI for Linux as for now.
// RUN: not %clang -o /dev/null -c --target=aarch64-unknown -mabi=pauthtest %s 2>&1 | FileCheck %s --check-prefix=ERR2
Expand Down
6 changes: 6 additions & 0 deletions clang/test/Driver/fuchsia.c
Original file line number Diff line number Diff line change
Expand Up @@ -297,3 +297,9 @@
// RUN: %clang --target=riscv64-unknown-fuchsia -mno-relax -### %s 2>&1 \
// RUN: | FileCheck -check-prefix=RISCV64-FLAGS %s
// RISCV64-FLAGS: "-X" "--no-relax"

// RUN: %clang -### %s --target=x86_64-unknown-fuchsia 2>&1 \
// RUN: -nostdlib -nolibc \
// RUN: | FileCheck %s -check-prefix=CHECK-NOSTDLIB-NOLIBC
// CHECK-NOSTDLIB-NOLIBC-NOT: "warning:"
// CHECK-NOSTDLIB-NOLIBC-NOT: "error:"
30 changes: 30 additions & 0 deletions clang/test/Driver/linux-ld.c
Original file line number Diff line number Diff line change
Expand Up @@ -940,6 +940,36 @@
// CHECK-ANDROID-HASH-STYLE-M: "{{.*}}ld{{(.exe)?}}"
// CHECK-ANDROID-HASH-STYLE-M: "--hash-style=gnu"

// Check that we pass --no-rosegment for pre-29 Android versions and do not for
// 29+.
// RUN: %clang %s -### -o %t.o 2>&1 \
// RUN: --target=armv7-linux-android28 \
// RUN: | FileCheck --check-prefix=CHECK-ANDROID-ROSEGMENT-28 %s
// CHECK-ANDROID-ROSEGMENT-28: "{{.*}}ld{{(.exe)?}}"
// CHECK-ANDROID-ROSEGMENT-28: "--no-rosegment"
//
// RUN: %clang %s -### -o %t.o 2>&1 \
// RUN: --target=armv7-linux-android29 \
// RUN: | FileCheck --check-prefix=CHECK-ANDROID-ROSEGMENT-29 %s
// CHECK-ANDROID-ROSEGMENT-29: "{{.*}}ld{{(.exe)?}}"
// CHECK-ANDROID-ROSEGMENT-29-NOT: "--no-rosegment"

// Check that we pass --pack-dyn-relocs=relr for API 28+ and not before.
// RUN: %clang %s -### -o %t.o 2>&1 \
// RUN: --target=armv7-linux-android27 \
// RUN: | FileCheck --check-prefix=CHECK-ANDROID-RELR-27 %s
// CHECK-ANDROID-RELR-27: "{{.*}}ld{{(.exe)?}}"
// CHECK-ANDROID-RELR-27-NOT: "--pack-dyn-relocs=relr"
// CHECK-ANDROID-RELR-27-NOT: "--pack-dyn-relocs=android+relr"
//
// RUN: %clang %s -### -o %t.o 2>&1 \
// RUN: --target=armv7-linux-android28 \
// RUN: | FileCheck --check-prefix=CHECK-ANDROID-RELR-28 %s
// CHECK-ANDROID-RELR-28: "{{.*}}ld{{(.exe)?}}"
// CHECK-ANDROID-RELR-28: "--use-android-relr-tags"
// CHECK-ANDROID-RELR-28: "--pack-dyn-relocs=relr"
// CHECK-ANDROID-RELR-28-NOT: "--pack-dyn-relocs=android+relr"

// RUN: %clang -### %s -no-pie 2>&1 --target=mips64-linux-gnuabin32 \
// RUN: | FileCheck --check-prefix=CHECK-MIPS64EL-GNUABIN32 %s
// CHECK-MIPS64EL-GNUABIN32: "{{.*}}ld{{(.exe)?}}"
Expand Down
2 changes: 2 additions & 0 deletions clang/test/Format/docs_updated.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
// RUN: %python %S/../../docs/tools/dump_format_style.py %t
// RUN: diff %t %S/../../docs/ClangFormatStyleOptions.rst
1 change: 1 addition & 0 deletions clang/test/Format/lit.local.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ config.suffixes = [
".textpb",
".asciipb",
".td",
".test"
]

Large diffs are not rendered by default.

44 changes: 22 additions & 22 deletions clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ struct S {
// CHECK-NEXT: [[TMP1:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, ptr nonnull @.omp_task_entry..2)
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3]]
// CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8, !tbaa [[TBAA9:![0-9]+]]
// CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8, !tbaa [[TBAA10:![0-9]+]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]])
// CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @__kmpc_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
// CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP5]], 0
Expand All @@ -54,11 +54,11 @@ struct S {
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3]]
// CHECK-NEXT: [[TMP7:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 1, ptr nonnull @.omp_task_entry..4)
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 40
// CHECK-NEXT: store i64 0, ptr [[TMP8]], align 8, !tbaa [[TBAA13:![0-9]+]]
// CHECK-NEXT: store i64 0, ptr [[TMP8]], align 8, !tbaa [[TBAA15:![0-9]+]]
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 48
// CHECK-NEXT: store i64 9, ptr [[TMP9]], align 8, !tbaa [[TBAA13]]
// CHECK-NEXT: store i64 9, ptr [[TMP9]], align 8, !tbaa [[TBAA15]]
// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 56
// CHECK-NEXT: store i64 1, ptr [[TMP10]], align 8, !tbaa [[TBAA13]]
// CHECK-NEXT: store i64 1, ptr [[TMP10]], align 8, !tbaa [[TBAA15]]
// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 72
// CHECK-NEXT: store i64 0, ptr [[TMP11]], align 8
// CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP6]] to i64
Expand All @@ -75,28 +75,28 @@ struct S {
// CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8, !tbaa [[TBAA7]]
// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP15]], i64 [[IDXPROM]]
// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA7]]
// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA17:![0-9]+]]
// CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i64 [[IDXPROM]]
// CHECK-NEXT: [[TMP17:%.*]] = load i8, ptr [[ARRAYIDX9]], align 1, !tbaa [[TBAA15:![0-9]+]]
// CHECK-NEXT: [[TMP17:%.*]] = load i8, ptr [[ARRAYIDX9]], align 1, !tbaa [[TBAA19:![0-9]+]]
// CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP17]] to i32
// CHECK-NEXT: [[SUB12:%.*]] = sub i32 [[CONV]], [[TMP14]]
// CHECK-NEXT: [[CONV15:%.*]] = zext i32 [[SUB12]] to i64
// CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV15]], [[IDXPROM]]
// CHECK-NEXT: [[SUB16:%.*]] = add nsw i64 [[MUL]], -1
// CHECK-NEXT: [[TMP18:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 16, ptr nonnull @.omp_task_entry..6)
// CHECK-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8, !tbaa [[TBAA16:![0-9]+]]
// CHECK-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP19]], align 8, !tbaa [[TBAA7]]
// CHECK-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8, !tbaa [[TBAA20:![0-9]+]]
// CHECK-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP19]], align 8, !tbaa [[TBAA23:![0-9]+]]
// CHECK-NEXT: [[AGG_CAPTURED3_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i64 8
// CHECK-NEXT: store ptr [[ARGV_ADDR]], ptr [[AGG_CAPTURED3_SROA_2_0__SROA_IDX]], align 8, !tbaa [[TBAA7]]
// CHECK-NEXT: store ptr [[ARGV_ADDR]], ptr [[AGG_CAPTURED3_SROA_2_0__SROA_IDX]], align 8, !tbaa [[TBAA25:![0-9]+]]
// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3]]
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP20]], 0
// CHECK-NEXT: [[TMP21:%.*]] = sext i1 [[TOBOOL]] to i32
// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP18]], i64 40
// CHECK-NEXT: store i64 0, ptr [[TMP22]], align 8, !tbaa [[TBAA13]]
// CHECK-NEXT: store i64 0, ptr [[TMP22]], align 8, !tbaa [[TBAA15]]
// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP18]], i64 48
// CHECK-NEXT: store i64 [[SUB16]], ptr [[TMP23]], align 8, !tbaa [[TBAA13]]
// CHECK-NEXT: store i64 [[SUB16]], ptr [[TMP23]], align 8, !tbaa [[TBAA15]]
// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP18]], i64 56
// CHECK-NEXT: store i64 1, ptr [[TMP24]], align 8, !tbaa [[TBAA13]]
// CHECK-NEXT: store i64 1, ptr [[TMP24]], align 8, !tbaa [[TBAA15]]
// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP18]], i64 72
// CHECK-NEXT: store i64 0, ptr [[TMP25]], align 8
// CHECK-NEXT: call void @__kmpc_taskloop_5(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr nonnull [[TMP18]], i32 [[TMP21]], ptr nonnull [[TMP22]], ptr nonnull [[TMP23]], i64 1, i32 1, i32 2, i64 4, i32 1, ptr null) #[[ATTR1]]
Expand All @@ -111,11 +111,11 @@ struct S {
// CHECK-NEXT: call void @__kmpc_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
// CHECK-NEXT: [[TMP27:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 1, ptr nonnull @.omp_task_entry..8)
// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 40
// CHECK-NEXT: store i64 0, ptr [[TMP28]], align 8, !tbaa [[TBAA13]]
// CHECK-NEXT: store i64 0, ptr [[TMP28]], align 8, !tbaa [[TBAA15]]
// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 48
// CHECK-NEXT: store i64 9, ptr [[TMP29]], align 8, !tbaa [[TBAA13]]
// CHECK-NEXT: store i64 9, ptr [[TMP29]], align 8, !tbaa [[TBAA15]]
// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 56
// CHECK-NEXT: store i64 1, ptr [[TMP30]], align 8, !tbaa [[TBAA13]]
// CHECK-NEXT: store i64 1, ptr [[TMP30]], align 8, !tbaa [[TBAA15]]
// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 72
// CHECK-NEXT: store i64 0, ptr [[TMP31]], align 8
// CHECK-NEXT: call void @__kmpc_taskloop(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP27]], i32 1, ptr nonnull [[TMP28]], ptr nonnull [[TMP29]], i64 1, i32 1, i32 0, i64 0, ptr null)
Expand All @@ -137,22 +137,22 @@ struct S {
// CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0
// CHECK-NEXT: br i1 [[DOTNOT]], label %[[OMP_IF_END:.*]], label %[[OMP_IF_THEN:.*]]
// CHECK: [[OMP_IF_THEN]]:
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[THIS]], align 4, !tbaa [[TBAA27:![0-9]+]]
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[THIS]], align 4, !tbaa [[TBAA35:![0-9]+]]
// CHECK-NEXT: tail call void @__kmpc_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[C_ADDR]], align 4, !tbaa [[TBAA3]]
// CHECK-NEXT: [[SUB4:%.*]] = add nsw i32 [[TMP3]], -1
// CHECK-NEXT: [[TMP4:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 16, ptr nonnull @.omp_task_entry..10)
// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !tbaa [[TBAA16]]
// CHECK-NEXT: store ptr [[THIS]], ptr [[TMP5]], align 8, !tbaa [[TBAA7]]
// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !tbaa [[TBAA20]]
// CHECK-NEXT: store ptr [[THIS]], ptr [[TMP5]], align 8, !tbaa [[TBAA37:![0-9]+]]
// CHECK-NEXT: [[AGG_CAPTURED_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 8
// CHECK-NEXT: store ptr [[C_ADDR]], ptr [[AGG_CAPTURED_SROA_2_0__SROA_IDX]], align 8, !tbaa [[TBAA7]]
// CHECK-NEXT: store ptr [[C_ADDR]], ptr [[AGG_CAPTURED_SROA_2_0__SROA_IDX]], align 8, !tbaa [[TBAA23]]
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 40
// CHECK-NEXT: store i64 0, ptr [[TMP6]], align 8, !tbaa [[TBAA13]]
// CHECK-NEXT: store i64 0, ptr [[TMP6]], align 8, !tbaa [[TBAA15]]
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 48
// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[SUB4]] to i64
// CHECK-NEXT: store i64 [[CONV]], ptr [[TMP7]], align 8, !tbaa [[TBAA13]]
// CHECK-NEXT: store i64 [[CONV]], ptr [[TMP7]], align 8, !tbaa [[TBAA15]]
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 56
// CHECK-NEXT: store i64 1, ptr [[TMP8]], align 8, !tbaa [[TBAA13]]
// CHECK-NEXT: store i64 1, ptr [[TMP8]], align 8, !tbaa [[TBAA15]]
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 72
// CHECK-NEXT: store i64 0, ptr [[TMP9]], align 8
// CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP2]] to i64
Expand Down
13 changes: 9 additions & 4 deletions clang/unittests/CodeGen/TBAAMetadataTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ auto OmnipotentCharC = MMTuple(
MConstInt(0, 64)
);

auto AnyPtr = MMTuple(
MMString("any pointer"),
OmnipotentCharC,
MConstInt(0, 64)
);

auto OmnipotentCharCXX = MMTuple(
MMString("omnipotent char"),
Expand Down Expand Up @@ -116,8 +121,8 @@ TEST(TBAAMetadataTest, BasicTypes) {
MValType(PointerType::getUnqual(Compiler.Context)),
MMTuple(
MMTuple(
MMString("any pointer"),
OmnipotentCharC,
MMString("p1 void"),
AnyPtr,
MConstInt(0)),
MSameAs(0),
MConstInt(0))));
Expand All @@ -128,8 +133,8 @@ TEST(TBAAMetadataTest, BasicTypes) {
MValType(PointerType::getUnqual(Compiler.Context)),
MMTuple(
MMTuple(
MMString("any pointer"),
OmnipotentCharC,
MMString("p1 int"),
AnyPtr,
MConstInt(0)),
MSameAs(0),
MConstInt(0))));
Expand Down
19 changes: 19 additions & 0 deletions compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,22 @@ INTERCEPTOR(int, mkfifo, const char *pathname, mode_t mode) {
return REAL(mkfifo)(pathname, mode);
}

INTERCEPTOR(pid_t, fork, void) {
__rtsan_notify_intercepted_call("fork");
return REAL(fork)();
}

INTERCEPTOR(int, execve, const char *filename, char *const argv[],
char *const envp[]) {
__rtsan_notify_intercepted_call("execve");
return REAL(execve)(filename, argv, envp);
}

// TODO: the `wait` family of functions is an oddity. In testing, if you
// intercept them, Darwin seemingly ignores them, and linux never returns from
// the test. Revisit this in the future, but hopefully intercepting fork/exec is
// enough to dissuade usage of wait by proxy.

#if SANITIZER_APPLE
#define INT_TYPE_SYSCALL int
#else
Expand Down Expand Up @@ -956,6 +972,9 @@ void __rtsan::InitializeInterceptors() {
INTERCEPT_FUNCTION(pipe);
INTERCEPT_FUNCTION(mkfifo);

INTERCEPT_FUNCTION(fork);
INTERCEPT_FUNCTION(execve);

INTERCEPT_FUNCTION(syscall);
}

Expand Down
59 changes: 59 additions & 0 deletions compiler-rt/test/rtsan/fork_exec.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// RUN: %clangxx -fsanitize=realtime -DIS_NONBLOCKING=1 %s -o %t
// RUN: %env_rtsan_opts="halt_on_error=true" not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-HALT
// RUN: %env_rtsan_opts="halt_on_error=false" %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-NOHALT

// RUN: %clangxx -fsanitize=realtime -DIS_NONBLOCKING=0 %s -o %t
// RUN: %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-OK
// RUN: %env_rtsan_opts="halt_on_error=false" %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-OK

// UNSUPPORTED: ios

// Intent: Ensure fork/exec dies when realtime and survives otherwise
// This behavior is difficult to test in a gtest, because the process is
// wiped away with exec.

#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>

#if IS_NONBLOCKING
# define MAYBE_NONBLOCKING [[clang::nonblocking]]
#else
# define MAYBE_NONBLOCKING
#endif

int main() MAYBE_NONBLOCKING {
const pid_t pid = fork();

if (pid == 0) {
char *args[] = {"/bin/ls", nullptr};
execve(args[0], args, nullptr);
perror("execve failed");
return 1;
} else if (pid > 0) {
int status;
waitpid(pid, &status, 0);
usleep(1);
} else {
perror("fork failed");
return 1;
}

printf("fork/exec succeeded\n");
return 0;
}

// CHECK-NOHALT: Intercepted call to {{.*}} `fork` {{.*}}
// CHECK-NOHALT: Intercepted call to {{.*}} `execve` {{.*}}

// usleep checks that rtsan is still enabled in the parent process
// See note in our interceptors file for why we don't look for `wait`
// CHECK-NOHALT: Intercepted call to {{.*}} `usleep` {{.*}}

// CHECK-NOHALT: fork/exec succeeded

// CHECK-HALT: ==ERROR: RealtimeSanitizer: unsafe-library-call
// CHECK-HALT-NEXT: Intercepted call to {{.*}} `fork` {{.*}}

// CHECK-OK: fork/exec succeeded
44 changes: 44 additions & 0 deletions flang/include/flang/Common/Fortran-consts.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
//===-- include/flang/Common/Fortran-consts.h -------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef FORTRAN_COMMON_FORTRAN_CONSTS_H_
#define FORTRAN_COMMON_FORTRAN_CONSTS_H_

#include "flang/Common/enum-class.h"
#include <cstdint>

namespace Fortran::common {

// Fortran has five kinds of intrinsic data types, plus the derived types.
ENUM_CLASS(TypeCategory, Integer, Real, Complex, Character, Logical, Derived)
ENUM_CLASS(VectorElementCategory, Integer, Unsigned, Real)

ENUM_CLASS(IoStmtKind, None, Backspace, Close, Endfile, Flush, Inquire, Open,
Print, Read, Rewind, Wait, Write)

// Defined I/O variants
ENUM_CLASS(
DefinedIo, ReadFormatted, ReadUnformatted, WriteFormatted, WriteUnformatted)

// Fortran arrays may have up to 15 dimensions (See Fortran 2018 section 5.4.6).
static constexpr int maxRank{15};

// Floating-point rounding modes; these are packed into a byte to save
// room in the runtime's format processing context structure. These
// enumerators are defined with the corresponding values returned from
// llvm.get.rounding.
enum class RoundingMode : std::uint8_t {
ToZero, // ROUND=ZERO, RZ - truncation
TiesToEven, // ROUND=NEAREST, RN - default IEEE rounding
Up, // ROUND=UP, RU
Down, // ROUND=DOWN, RD
TiesAwayFromZero, // ROUND=COMPATIBLE, RC - ties round away from zero
};

} // namespace Fortran::common
#endif /* FORTRAN_COMMON_FORTRAN_CONSTS_H_ */
26 changes: 1 addition & 25 deletions flang/include/flang/Common/Fortran.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,14 @@

#include "enum-set.h"
#include "idioms.h"
#include "flang/Common/Fortran-consts.h"
#include <cinttypes>
#include <optional>
#include <string>

namespace Fortran::common {
class LanguageFeatureControl;

// Fortran has five kinds of intrinsic data types, plus the derived types.
ENUM_CLASS(TypeCategory, Integer, Real, Complex, Character, Logical, Derived)
ENUM_CLASS(VectorElementCategory, Integer, Unsigned, Real)

constexpr bool IsNumericTypeCategory(TypeCategory category) {
return category == TypeCategory::Integer || category == TypeCategory::Real ||
category == TypeCategory::Complex;
Expand All @@ -47,9 +44,6 @@ const char *AsFortran(RelationalOperator);

ENUM_CLASS(Intent, Default, In, Out, InOut)

ENUM_CLASS(IoStmtKind, None, Backspace, Close, Endfile, Flush, Inquire, Open,
Print, Read, Rewind, Wait, Write)

// Union of specifiers for all I/O statements.
ENUM_CLASS(IoSpecKind, Access, Action, Advance, Asynchronous, Blank, Decimal,
Delim, Direct, Encoding, End, Eor, Err, Exist, File, Fmt, Form, Formatted,
Expand All @@ -61,29 +55,11 @@ ENUM_CLASS(IoSpecKind, Access, Action, Advance, Asynchronous, Blank, Decimal,
Dispose, // nonstandard
)

// Defined I/O variants
ENUM_CLASS(
DefinedIo, ReadFormatted, ReadUnformatted, WriteFormatted, WriteUnformatted)
const char *AsFortran(DefinedIo);

// Floating-point rounding modes; these are packed into a byte to save
// room in the runtime's format processing context structure. These
// enumerators are defined with the corresponding values returned from
// llvm.get.rounding.
enum class RoundingMode : std::uint8_t {
ToZero, // ROUND=ZERO, RZ - truncation
TiesToEven, // ROUND=NEAREST, RN - default IEEE rounding
Up, // ROUND=UP, RU
Down, // ROUND=DOWN, RD
TiesAwayFromZero, // ROUND=COMPATIBLE, RC - ties round away from zero
};

// Fortran label. Must be in [1..99999].
using Label = std::uint64_t;

// Fortran arrays may have up to 15 dimensions (See Fortran 2018 section 5.4.6).
static constexpr int maxRank{15};

// CUDA subprogram attribute combinations
ENUM_CLASS(CUDASubprogramAttrs, Host, Device, HostDevice, Global, Grid_Global)

Expand Down
2 changes: 1 addition & 1 deletion flang/include/flang/Common/format.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#define FORTRAN_COMMON_FORMAT_H_

#include "enum-set.h"
#include "flang/Common/Fortran.h"
#include "flang/Common/Fortran-consts.h"
#include <cstring>

// Define a FormatValidator class template to validate a format expression
Expand Down
37 changes: 37 additions & 0 deletions flang/include/flang/Common/target-rounding.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
//===-- include/flang/Common/target-rounding.h ------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef FORTRAN_COMMON_TARGET_ROUNDING_H_
#define FORTRAN_COMMON_TARGET_ROUNDING_H_

#include "flang/Common/Fortran-consts.h"
#include "flang/Common/enum-set.h"

namespace Fortran::common {

// Floating-point rounding control
struct Rounding {
common::RoundingMode mode{common::RoundingMode::TiesToEven};
// When set, emulate status flag behavior peculiar to x86
// (viz., fail to set the Underflow flag when an inexact product of a
// multiplication is rounded up to a normal number from a subnormal
// in some rounding modes)
#if __x86_64__ || __riscv || __loongarch__
bool x86CompatibleBehavior{true};
#else
bool x86CompatibleBehavior{false};
#endif
};

// These are ordered like the bits in a common fenv.h header file.
ENUM_CLASS(RealFlag, InvalidArgument, Denorm, DivideByZero, Overflow, Underflow,
Inexact)
using RealFlags = common::EnumSet<RealFlag, RealFlag_enumSize>;

} // namespace Fortran::common
#endif /* FORTRAN_COMMON_TARGET_ROUNDING_H_ */
8 changes: 3 additions & 5 deletions flang/include/flang/Evaluate/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "flang/Common/idioms.h"
#include "flang/Common/indirection.h"
#include "flang/Common/restorer.h"
#include "flang/Common/target-rounding.h"
#include "flang/Parser/char-block.h"
#include "flang/Parser/message.h"
#include <cinttypes>
Expand All @@ -32,6 +33,8 @@ class IntrinsicProcTable;
class TargetCharacteristics;

using common::ConstantSubscript;
using common::RealFlag;
using common::RealFlags;
using common::RelationalOperator;

// Integers are always ordered; reals may not be.
Expand Down Expand Up @@ -128,11 +131,6 @@ static constexpr bool Satisfies(RelationalOperator op, Relation relation) {
return false; // silence g++ warning
}

// These are ordered like the bits in a common fenv.h header file.
ENUM_CLASS(RealFlag, InvalidArgument, Denorm, DivideByZero, Overflow, Underflow,
Inexact)
using RealFlags = common::EnumSet<RealFlag, RealFlag_enumSize>;

template <typename A> struct ValueWithRealFlags {
A AccumulateFlags(RealFlags &f) {
f |= flags;
Expand Down
28 changes: 12 additions & 16 deletions flang/include/flang/Evaluate/target.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,13 @@
#include "flang/Common/Fortran.h"
#include "flang/Common/enum-class.h"
#include "flang/Common/enum-set.h"
#include "flang/Common/target-rounding.h"
#include "flang/Evaluate/common.h"
#include <cstdint>

namespace Fortran::evaluate {

// Floating-point rounding control
struct Rounding {
common::RoundingMode mode{common::RoundingMode::TiesToEven};
// When set, emulate status flag behavior peculiar to x86
// (viz., fail to set the Underflow flag when an inexact product of a
// multiplication is rounded up to a normal number from a subnormal
// in some rounding modes)
#if __x86_64__ || __riscv || __loongarch__
bool x86CompatibleBehavior{true};
#else
bool x86CompatibleBehavior{false};
#endif
};
using common::Rounding;

ENUM_CLASS(IeeeFeature, Denormal, Divide, Flags, Halting, Inf, Io, NaN,
Rounding, Sqrt, Standard, Subnormal, UnderflowControl)
Expand All @@ -52,6 +41,12 @@ class TargetCharacteristics {
}
void set_areSubnormalsFlushedToZero(bool yes = true);

// Check if a given real kind has flushing control.
bool hasSubnormalFlushingControl(int kind) const;
// Check if any or all real kinds have flushing control.
bool hasSubnormalFlushingControl(bool any = false) const;
void set_hasSubnormalFlushingControl(int kind, bool yes = true);

Rounding roundingMode() const { return roundingMode_; }
void set_roundingMode(Rounding);

Expand Down Expand Up @@ -111,13 +106,14 @@ class TargetCharacteristics {
const IeeeFeatures &ieeeFeatures() const { return ieeeFeatures_; }

private:
static constexpr int maxKind{32};
std::uint8_t byteSize_[common::TypeCategory_enumSize][maxKind]{};
std::uint8_t align_[common::TypeCategory_enumSize][maxKind]{};
static constexpr int maxKind{16};
std::uint8_t byteSize_[common::TypeCategory_enumSize][maxKind + 1]{};
std::uint8_t align_[common::TypeCategory_enumSize][maxKind + 1]{};
bool isBigEndian_{false};
bool isPPC_{false};
bool isOSWindows_{false};
bool areSubnormalsFlushedToZero_{false};
bool hasSubnormalFlushingControl_[maxKind + 1]{};
Rounding roundingMode_{defaultRounding};
std::size_t procedurePointerByteSize_{8};
std::size_t procedurePointerAlignment_{8};
Expand Down
1 change: 1 addition & 0 deletions flang/include/flang/Lower/PFTBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -723,6 +723,7 @@ struct FunctionLikeUnit : public ProgramUnit {
bool hasIeeeAccess{false};
bool mayModifyHaltingMode{false};
bool mayModifyRoundingMode{false};
bool mayModifyUnderflowMode{false};
/// Terminal basic block (if any)
mlir::Block *finalBlock{};
HostAssociations hostAssociations;
Expand Down
7 changes: 2 additions & 5 deletions flang/include/flang/Optimizer/Builder/IntrinsicCall.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,11 +166,6 @@ struct IntrinsicLibrary {
getRuntimeCallGenerator(llvm::StringRef name,
mlir::FunctionType soughtFuncType);

/// Helper to generate TODOs for module procedures that must be intercepted in
/// lowering and are not yet implemented.
template <const char *intrinsicName>
void genModuleProcTODO(llvm::ArrayRef<fir::ExtendedValue>);

void genAbort(llvm::ArrayRef<fir::ExtendedValue>);
/// Lowering for the ABS intrinsic. The ABS intrinsic expects one argument in
/// the llvm::ArrayRef. The ABS intrinsic is lowered into MLIR/FIR operation
Expand Down Expand Up @@ -278,6 +273,7 @@ struct IntrinsicLibrary {
template <bool isGet>
void genIeeeGetOrSetStatus(llvm::ArrayRef<fir::ExtendedValue>);
void genIeeeGetRoundingMode(llvm::ArrayRef<fir::ExtendedValue>);
void genIeeeGetUnderflowMode(llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genIeeeInt(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genIeeeIsFinite(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genIeeeIsNan(mlir::Type, llvm::ArrayRef<mlir::Value>);
Expand All @@ -295,6 +291,7 @@ struct IntrinsicLibrary {
template <bool isFlag>
void genIeeeSetFlagOrHaltingMode(llvm::ArrayRef<fir::ExtendedValue>);
void genIeeeSetRoundingMode(llvm::ArrayRef<fir::ExtendedValue>);
void genIeeeSetUnderflowMode(llvm::ArrayRef<fir::ExtendedValue>);
template <mlir::arith::CmpFPredicate pred>
mlir::Value genIeeeSignalingCompare(mlir::Type resultType,
llvm::ArrayRef<mlir::Value>);
Expand Down
4 changes: 4 additions & 0 deletions flang/include/flang/Optimizer/Builder/Runtime/Exceptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,9 @@ namespace fir::runtime {
mlir::Value genMapExcept(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value excepts);

mlir::Value genGetUnderflowMode(fir::FirOpBuilder &builder, mlir::Location loc);
void genSetUnderflowMode(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value bit);

} // namespace fir::runtime
#endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_EXCEPTIONS_H
5 changes: 5 additions & 0 deletions flang/include/flang/Optimizer/Transforms/CUFOpConversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,16 @@ class SymbolTable;

namespace cuf {

/// Patterns that convert CUF operations to runtime calls.
void populateCUFToFIRConversionPatterns(const fir::LLVMTypeConverter &converter,
mlir::DataLayout &dl,
const mlir::SymbolTable &symtab,
mlir::RewritePatternSet &patterns);

/// Patterns that updates fir operations in presence of CUF.
void populateFIRCUFConversionPatterns(const mlir::SymbolTable &symtab,
mlir::RewritePatternSet &patterns);

} // namespace cuf

#endif // FORTRAN_OPTIMIZER_TRANSFORMS_CUFOPCONVERSION_H_
8 changes: 4 additions & 4 deletions flang/include/flang/Runtime/CUDA/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,16 @@ extern "C" {
void RTDECL(CUFRegisterAllocator)();
}

void *CUFAllocPinned(std::size_t);
void *CUFAllocPinned(std::size_t, std::int64_t);
void CUFFreePinned(void *);

void *CUFAllocDevice(std::size_t);
void *CUFAllocDevice(std::size_t, std::int64_t);
void CUFFreeDevice(void *);

void *CUFAllocManaged(std::size_t);
void *CUFAllocManaged(std::size_t, std::int64_t);
void CUFFreeManaged(void *);

void *CUFAllocUnified(std::size_t);
void *CUFAllocUnified(std::size_t, std::int64_t);
void CUFFreeUnified(void *);

} // namespace Fortran::runtime::cuda
Expand Down
3 changes: 3 additions & 0 deletions flang/include/flang/Runtime/CUDA/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ static constexpr unsigned kHostToDevice = 0;
static constexpr unsigned kDeviceToHost = 1;
static constexpr unsigned kDeviceToDevice = 2;

/// Value used for asyncId when no specific stream is specified.
static constexpr std::int64_t kCudaNoStream = -1;

#define CUDA_REPORT_IF_ERROR(expr) \
[](cudaError_t err) { \
if (err == cudaSuccess) \
Expand Down
6 changes: 3 additions & 3 deletions flang/include/flang/Runtime/allocatable.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,9 @@ int RTDECL(AllocatableCheckLengthParameter)(Descriptor &,
// Successfully allocated memory is initialized if the allocatable has a
// derived type, and is always initialized by AllocatableAllocateSource().
// Performs all necessary coarray synchronization and validation actions.
int RTDECL(AllocatableAllocate)(Descriptor &, bool hasStat = false,
const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
int sourceLine = 0);
int RTDECL(AllocatableAllocate)(Descriptor &, std::int64_t asyncId = -1,
bool hasStat = false, const Descriptor *errMsg = nullptr,
const char *sourceFile = nullptr, int sourceLine = 0);
int RTDECL(AllocatableAllocateSource)(Descriptor &, const Descriptor &source,
bool hasStat = false, const Descriptor *errMsg = nullptr,
const char *sourceFile = nullptr, int sourceLine = 0);
Expand Down
10 changes: 6 additions & 4 deletions flang/include/flang/Runtime/allocator-registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#define FORTRAN_RUNTIME_ALLOCATOR_REGISTRY_H_

#include "flang/Common/api-attrs.h"
#include <cstdint>
#include <cstdlib>
#include <vector>

Expand All @@ -25,18 +26,19 @@ static constexpr unsigned kUnifiedAllocatorPos = 4;

namespace Fortran::runtime {

using AllocFct = void *(*)(std::size_t);
using AllocFct = void *(*)(std::size_t, std::int64_t);
using FreeFct = void (*)(void *);

typedef struct Allocator_t {
AllocFct alloc{nullptr};
FreeFct free{nullptr};
} Allocator_t;

#ifdef RT_DEVICE_COMPILATION
static RT_API_ATTRS void *MallocWrapper(std::size_t size) {
static RT_API_ATTRS void *MallocWrapper(
std::size_t size, [[maybe_unused]] std::int64_t) {
return std::malloc(size);
}
#ifdef RT_DEVICE_COMPILATION
static RT_API_ATTRS void FreeWrapper(void *p) { return std::free(p); }
#endif

Expand All @@ -46,7 +48,7 @@ struct AllocatorRegistry {
: allocators{{&MallocWrapper, &FreeWrapper}} {}
#else
constexpr AllocatorRegistry() {
allocators[kDefaultAllocator] = {&std::malloc, &std::free};
allocators[kDefaultAllocator] = {&MallocWrapper, &std::free};
};
#endif
RT_API_ATTRS void Register(int, Allocator_t);
Expand Down
2 changes: 1 addition & 1 deletion flang/include/flang/Runtime/cpp-type.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#ifndef FORTRAN_RUNTIME_CPP_TYPE_H_
#define FORTRAN_RUNTIME_CPP_TYPE_H_

#include "flang/Common/Fortran.h"
#include "flang/Common/Fortran-consts.h"
#include "flang/Common/float128.h"
#include "flang/Common/float80.h"
#include "flang/Common/uint128.h"
Expand Down
2 changes: 1 addition & 1 deletion flang/include/flang/Runtime/descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ class Descriptor {
// before calling. It (re)computes the byte strides after
// allocation. Does not allocate automatic components or
// perform default component initialization.
RT_API_ATTRS int Allocate();
RT_API_ATTRS int Allocate(std::int64_t asyncId = -1);
RT_API_ATTRS void SetByteStrides();

// Deallocates storage; does not call FINAL subroutines or
Expand Down
4 changes: 4 additions & 0 deletions flang/include/flang/Runtime/exceptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ extern "C" {
// This mapping is done at runtime to support cross compilation.
std::uint32_t RTNAME(MapException)(std::uint32_t excepts);

// Get and set the ieee underflow mode if supported; otherwise nops.
bool RTNAME(GetUnderflowMode)(void);
void RTNAME(SetUnderflowMode)(bool flag);

} // extern "C"
} // namespace Fortran::runtime
#endif // FORTRAN_RUNTIME_EXCEPTIONS_H_
2 changes: 1 addition & 1 deletion flang/include/flang/Runtime/type-code.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#ifndef FORTRAN_RUNTIME_TYPE_CODE_H_
#define FORTRAN_RUNTIME_TYPE_CODE_H_

#include "flang/Common/Fortran.h"
#include "flang/Common/Fortran-consts.h"
#include "flang/Common/optional.h"
#include "flang/ISO_Fortran_binding_wrapper.h"
#include <utility>
Expand Down
5 changes: 5 additions & 0 deletions flang/include/flang/Tools/TargetSetup.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ namespace Fortran::tools {
targetCharacteristics.DisableType(
Fortran::common::TypeCategory::Real, /*kind=*/10);
}
if (targetTriple.getArch() == llvm::Triple::ArchType::x86_64) {
targetCharacteristics.set_hasSubnormalFlushingControl(/*kind=*/3);
targetCharacteristics.set_hasSubnormalFlushingControl(/*kind=*/4);
targetCharacteristics.set_hasSubnormalFlushingControl(/*kind=*/8);
}

// Figure out if we can support F128: see
// flang/runtime/Float128Math/math-entries.h
Expand Down
12 changes: 10 additions & 2 deletions flang/lib/Evaluate/fold-logical.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -890,8 +890,16 @@ Expr<Type<TypeCategory::Logical, KIND>> FoldIntrinsicFunction(
return Expr<T>{context.targetCharacteristics().ieeeFeatures().test(
IeeeFeature::Subnormal)};
} else if (name == "__builtin_ieee_support_underflow_control") {
return Expr<T>{context.targetCharacteristics().ieeeFeatures().test(
IeeeFeature::UnderflowControl)};
// Setting kind=0 checks subnormal flushing control across all type kinds.
if (args[0]) {
return Expr<T>{
context.targetCharacteristics().hasSubnormalFlushingControl(
args[0]->GetType().value().kind())};
} else {
return Expr<T>{
context.targetCharacteristics().hasSubnormalFlushingControl(
/*any=*/false)};
}
}
return Expr<T>{std::move(funcRef)};
}
Expand Down
32 changes: 28 additions & 4 deletions flang/lib/Evaluate/target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Rounding TargetCharacteristics::defaultRounding;

TargetCharacteristics::TargetCharacteristics() {
auto enableCategoryKinds{[this](TypeCategory category) {
for (int kind{0}; kind < maxKind; ++kind) {
for (int kind{1}; kind <= maxKind; ++kind) {
if (CanSupportType(category, kind)) {
auto byteSize{static_cast<std::size_t>(kind)};
if (category == TypeCategory::Real ||
Expand Down Expand Up @@ -70,14 +70,14 @@ bool TargetCharacteristics::EnableType(common::TypeCategory category,

void TargetCharacteristics::DisableType(
common::TypeCategory category, std::int64_t kind) {
if (kind >= 0 && kind < maxKind) {
if (kind > 0 && kind <= maxKind) {
align_[static_cast<int>(category)][kind] = 0;
}
}

std::size_t TargetCharacteristics::GetByteSize(
common::TypeCategory category, std::int64_t kind) const {
if (kind >= 0 && kind < maxKind) {
if (kind > 0 && kind <= maxKind) {
return byteSize_[static_cast<int>(category)][kind];
} else {
return 0;
Expand All @@ -86,7 +86,7 @@ std::size_t TargetCharacteristics::GetByteSize(

std::size_t TargetCharacteristics::GetAlignment(
common::TypeCategory category, std::int64_t kind) const {
if (kind >= 0 && kind < maxKind) {
if (kind > 0 && kind <= maxKind) {
return align_[static_cast<int>(category)][kind];
} else {
return 0;
Expand All @@ -108,6 +108,30 @@ void TargetCharacteristics::set_areSubnormalsFlushedToZero(bool yes) {
areSubnormalsFlushedToZero_ = yes;
}

// Check if a given real kind has flushing control.
bool TargetCharacteristics::hasSubnormalFlushingControl(int kind) const {
CHECK(kind > 0 && kind <= maxKind);
CHECK(CanSupportType(TypeCategory::Real, kind));
return hasSubnormalFlushingControl_[kind];
}

// Check if any or all real kinds have flushing control.
bool TargetCharacteristics::hasSubnormalFlushingControl(bool any) const {
for (int kind{1}; kind <= maxKind; ++kind) {
if (CanSupportType(TypeCategory::Real, kind) &&
hasSubnormalFlushingControl_[kind] == any) {
return any;
}
}
return !any;
}

void TargetCharacteristics::set_hasSubnormalFlushingControl(
int kind, bool yes) {
CHECK(kind > 0 && kind <= maxKind);
hasSubnormalFlushingControl_[kind] = yes;
}

void TargetCharacteristics::set_roundingMode(Rounding rounding) {
roundingMode_ = rounding;
}
Expand Down
11 changes: 8 additions & 3 deletions flang/lib/Lower/Allocatable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,14 @@ static mlir::Value genRuntimeAllocate(fir::FirOpBuilder &builder,
? fir::runtime::getRuntimeFunc<mkRTKey(PointerAllocate)>(loc, builder)
: fir::runtime::getRuntimeFunc<mkRTKey(AllocatableAllocate)>(loc,
builder);
llvm::SmallVector<mlir::Value> args{
box.getAddr(), errorManager.hasStat, errorManager.errMsgAddr,
errorManager.sourceFile, errorManager.sourceLine};
llvm::SmallVector<mlir::Value> args{box.getAddr()};
if (!box.isPointer())
args.push_back(
builder.createIntegerConstant(loc, builder.getI64Type(), -1));
args.push_back(errorManager.hasStat);
args.push_back(errorManager.errMsgAddr);
args.push_back(errorManager.sourceFile);
args.push_back(errorManager.sourceLine);
llvm::SmallVector<mlir::Value> operands;
for (auto [fst, snd] : llvm::zip(args, callee.getFunctionType().getInputs()))
operands.emplace_back(builder.createConvert(loc, snd, fst));
Expand Down
19 changes: 16 additions & 3 deletions flang/lib/Lower/Bridge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "flang/Optimizer/Builder/Runtime/Character.h"
#include "flang/Optimizer/Builder/Runtime/Derived.h"
#include "flang/Optimizer/Builder/Runtime/EnvironmentDefaults.h"
#include "flang/Optimizer/Builder/Runtime/Exceptions.h"
#include "flang/Optimizer/Builder/Runtime/Main.h"
#include "flang/Optimizer/Builder/Runtime/Ragged.h"
#include "flang/Optimizer/Builder/Runtime/Stop.h"
Expand Down Expand Up @@ -5181,8 +5182,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
genOpenMPSymbolProperties(*this, var);
}

/// Where applicable, save the exception state and halting and rounding
/// modes at function entry and restore them at function exits.
/// Where applicable, save the exception state and halting, rounding, and
/// underflow modes at function entry, and restore them at function exits.
void manageFPEnvironment(Fortran::lower::pft::FunctionLikeUnit &funit) {
mlir::Location loc = toLocation();
mlir::Location endLoc =
Expand Down Expand Up @@ -5224,7 +5225,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
});
}
if (funit.mayModifyRoundingMode) {
// F18 Clause 17.4.5: In a procedure [...], the processor shall not
// F18 Clause 17.4p5: In a procedure [...], the processor shall not
// change the rounding modes on entry, and on return shall ensure that
// the rounding modes are the same as they were on entry.
mlir::func::FuncOp getRounding =
Expand All @@ -5237,6 +5238,18 @@ class FirConverter : public Fortran::lower::AbstractConverter {
builder->create<fir::CallOp>(endLoc, setRounding, roundingMode);
});
}
if ((funit.mayModifyUnderflowMode) &&
(bridge.getTargetCharacteristics().hasSubnormalFlushingControl(
/*any=*/true))) {
// F18 Clause 17.5p2: In a procedure [...], the processor shall not
// change the underflow mode on entry, and on return shall ensure that
// the underflow mode is the same as it was on entry.
mlir::Value underflowMode =
fir::runtime::genGetUnderflowMode(*builder, loc);
bridge.fctCtx().attachCleanup([=]() {
fir::runtime::genSetUnderflowMode(*builder, loc, {underflowMode});
});
}
}

/// Start translation of a function.
Expand Down
5 changes: 4 additions & 1 deletion flang/lib/Lower/PFTBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,11 +161,14 @@ class PFTBuilder {
return;
if (procName.starts_with("ieee_set_modes_") ||
procName.starts_with("ieee_set_status_"))
proc->mayModifyHaltingMode = proc->mayModifyRoundingMode = true;
proc->mayModifyHaltingMode = proc->mayModifyRoundingMode =
proc->mayModifyUnderflowMode = true;
else if (procName.starts_with("ieee_set_halting_mode_"))
proc->mayModifyHaltingMode = true;
else if (procName.starts_with("ieee_set_rounding_mode_"))
proc->mayModifyRoundingMode = true;
else if (procName.starts_with("ieee_set_underflow_mode_"))
proc->mayModifyUnderflowMode = true;
}

/// Convert an IfStmt into an IfConstruct, retaining the IfStmt as the
Expand Down
34 changes: 22 additions & 12 deletions flang/lib/Optimizer/Builder/IntrinsicCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,6 @@ static bool isStaticallyPresent(const fir::ExtendedValue &exv) {
return !isStaticallyAbsent(exv);
}

/// IEEE module procedure names not yet implemented for genModuleProcTODO.
static constexpr char ieee_get_underflow_mode[] = "ieee_get_underflow_mode";
static constexpr char ieee_set_underflow_mode[] = "ieee_set_underflow_mode";

using I = IntrinsicLibrary;

/// Flag to indicate that an intrinsic argument has to be handled as
Expand Down Expand Up @@ -328,7 +324,10 @@ static constexpr IntrinsicHandler handlers[]{
{"radix", asValue, handleDynamicOptional}}},
/*isElemental=*/false},
{"ieee_get_status", &I::genIeeeGetOrSetStatus</*isGet=*/true>},
{"ieee_get_underflow_mode", &I::genModuleProcTODO<ieee_get_underflow_mode>},
{"ieee_get_underflow_mode",
&I::genIeeeGetUnderflowMode,
{{{"gradual", asAddr}}},
/*isElemental=*/false},
{"ieee_int", &I::genIeeeInt},
{"ieee_is_finite", &I::genIeeeIsFinite},
{"ieee_is_nan", &I::genIeeeIsNan},
Expand Down Expand Up @@ -375,7 +374,7 @@ static constexpr IntrinsicHandler handlers[]{
{"radix", asValue, handleDynamicOptional}}},
/*isElemental=*/false},
{"ieee_set_status", &I::genIeeeGetOrSetStatus</*isGet=*/false>},
{"ieee_set_underflow_mode", &I::genModuleProcTODO<ieee_set_underflow_mode>},
{"ieee_set_underflow_mode", &I::genIeeeSetUnderflowMode},
{"ieee_signaling_eq",
&I::genIeeeSignalingCompare<mlir::arith::CmpFPredicate::OEQ>},
{"ieee_signaling_ge",
Expand Down Expand Up @@ -2295,12 +2294,6 @@ mlir::Value IntrinsicLibrary::genConversion(mlir::Type resultType,
return builder.convertWithSemantics(loc, resultType, args[0]);
}

template <const char *intrinsicName>
void IntrinsicLibrary::genModuleProcTODO(
llvm::ArrayRef<fir::ExtendedValue> args) {
crashOnMissingIntrinsic(loc, intrinsicName);
}

// ABORT
void IntrinsicLibrary::genAbort(llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 0);
Expand Down Expand Up @@ -4471,6 +4464,14 @@ void IntrinsicLibrary::genIeeeGetOrSetStatus(
genRuntimeCall(isGet ? "fegetenv" : "fesetenv", i32Ty, addr);
}

// IEEE_GET_UNDERFLOW_MODE
void IntrinsicLibrary::genIeeeGetUnderflowMode(
llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 1);
mlir::Value flag = fir::runtime::genGetUnderflowMode(builder, loc);
builder.createStoreWithConvert(loc, flag, fir::getBase(args[0]));
}

// IEEE_INT
mlir::Value IntrinsicLibrary::genIeeeInt(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
Expand Down Expand Up @@ -5135,6 +5136,15 @@ void IntrinsicLibrary::genIeeeSetRoundingMode(
builder.create<fir::CallOp>(loc, setRound, mode);
}

// IEEE_SET_UNDERFLOW_MODE
void IntrinsicLibrary::genIeeeSetUnderflowMode(
llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 1);
mlir::Value gradual = builder.create<fir::ConvertOp>(loc, builder.getI1Type(),
getBase(args[0]));
fir::runtime::genSetUnderflowMode(builder, loc, {gradual});
}

// IEEE_SIGNALING_EQ, IEEE_SIGNALING_GE, IEEE_SIGNALING_GT,
// IEEE_SIGNALING_LE, IEEE_SIGNALING_LT, IEEE_SIGNALING_NE
template <mlir::arith::CmpFPredicate pred>
Expand Down
9 changes: 6 additions & 3 deletions flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,19 @@ void fir::runtime::genAllocatableAllocate(fir::FirOpBuilder &builder,
mlir::func::FuncOp func{
fir::runtime::getRuntimeFunc<mkRTKey(AllocatableAllocate)>(loc, builder)};
mlir::FunctionType fTy{func.getFunctionType()};
mlir::Value asyncId =
builder.createIntegerConstant(loc, builder.getI64Type(), -1);
mlir::Value sourceFile{fir::factory::locationToFilename(builder, loc)};
mlir::Value sourceLine{
fir::factory::locationToLineNo(builder, loc, fTy.getInput(4))};
fir::factory::locationToLineNo(builder, loc, fTy.getInput(5))};
if (!hasStat)
hasStat = builder.createBool(loc, false);
if (!errMsg) {
mlir::Type boxNoneTy = fir::BoxType::get(builder.getNoneType());
errMsg = builder.create<fir::AbsentOp>(loc, boxNoneTy).getResult();
}
llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
builder, loc, fTy, desc, hasStat, errMsg, sourceFile, sourceLine)};
llvm::SmallVector<mlir::Value> args{
fir::runtime::createArguments(builder, loc, fTy, desc, asyncId, hasStat,
errMsg, sourceFile, sourceLine)};
builder.create<fir::CallOp>(loc, func, args);
}
14 changes: 14 additions & 0 deletions flang/lib/Optimizer/Builder/Runtime/Exceptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,17 @@ mlir::Value fir::runtime::genMapExcept(fir::FirOpBuilder &builder,
fir::runtime::getRuntimeFunc<mkRTKey(MapException)>(loc, builder)};
return builder.create<fir::CallOp>(loc, func, excepts).getResult(0);
}

mlir::Value fir::runtime::genGetUnderflowMode(fir::FirOpBuilder &builder,
mlir::Location loc) {
mlir::func::FuncOp func{
fir::runtime::getRuntimeFunc<mkRTKey(GetUnderflowMode)>(loc, builder)};
return builder.create<fir::CallOp>(loc, func).getResult(0);
}

void fir::runtime::genSetUnderflowMode(fir::FirOpBuilder &builder,
mlir::Location loc, mlir::Value flag) {
mlir::func::FuncOp func{
fir::runtime::getRuntimeFunc<mkRTKey(SetUnderflowMode)>(loc, builder)};
builder.create<fir::CallOp>(loc, func, flag);
}
150 changes: 90 additions & 60 deletions flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,15 @@ static bool hasDoubleDescriptors(OpTy op) {
return false;
}

bool isDeviceGlobal(fir::GlobalOp op) {
auto attr = op.getDataAttr();
if (attr && (*attr == cuf::DataAttribute::Device ||
*attr == cuf::DataAttribute::Managed ||
*attr == cuf::DataAttribute::Constant))
return true;
return false;
}

static mlir::Value createConvertOp(mlir::PatternRewriter &rewriter,
mlir::Location loc, mlir::Type toTy,
mlir::Value val) {
Expand All @@ -89,62 +98,6 @@ static mlir::Value createConvertOp(mlir::PatternRewriter &rewriter,
return val;
}

mlir::Value getDeviceAddress(mlir::PatternRewriter &rewriter,
mlir::OpOperand &operand,
const mlir::SymbolTable &symtab) {
mlir::Value v = operand.get();
auto declareOp = v.getDefiningOp<fir::DeclareOp>();
if (!declareOp)
return v;

auto addrOfOp = declareOp.getMemref().getDefiningOp<fir::AddrOfOp>();
if (!addrOfOp)
return v;

auto globalOp = symtab.lookup<fir::GlobalOp>(
addrOfOp.getSymbol().getRootReference().getValue());

if (!globalOp)
return v;

bool isDevGlobal{false};
auto attr = globalOp.getDataAttrAttr();
if (attr) {
switch (attr.getValue()) {
case cuf::DataAttribute::Device:
case cuf::DataAttribute::Managed:
case cuf::DataAttribute::Constant:
isDevGlobal = true;
break;
default:
break;
}
}
if (!isDevGlobal)
return v;
mlir::OpBuilder::InsertionGuard guard(rewriter);
rewriter.setInsertionPoint(operand.getOwner());
auto loc = declareOp.getLoc();
auto mod = declareOp->getParentOfType<mlir::ModuleOp>();
fir::FirOpBuilder builder(rewriter, mod);

mlir::func::FuncOp callee =
fir::runtime::getRuntimeFunc<mkRTKey(CUFGetDeviceAddress)>(loc, builder);
auto fTy = callee.getFunctionType();
auto toTy = fTy.getInput(0);
mlir::Value inputArg =
createConvertOp(rewriter, loc, toTy, declareOp.getResult());
mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc);
mlir::Value sourceLine =
fir::factory::locationToLineNo(builder, loc, fTy.getInput(2));
llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
builder, loc, fTy, inputArg, sourceFile, sourceLine)};
auto call = rewriter.create<fir::CallOp>(loc, callee, args);
mlir::Value cast = createConvertOp(
rewriter, loc, declareOp.getMemref().getType(), call->getResult(0));
return cast;
}

template <typename OpTy>
static mlir::LogicalResult convertOpToCall(OpTy op,
mlir::PatternRewriter &rewriter,
Expand Down Expand Up @@ -422,6 +375,54 @@ struct CUFAllocOpConversion : public mlir::OpRewritePattern<cuf::AllocOp> {
const fir::LLVMTypeConverter *typeConverter;
};

struct DeclareOpConversion : public mlir::OpRewritePattern<fir::DeclareOp> {
using OpRewritePattern::OpRewritePattern;

DeclareOpConversion(mlir::MLIRContext *context,
const mlir::SymbolTable &symtab)
: OpRewritePattern(context), symTab{symtab} {}

mlir::LogicalResult
matchAndRewrite(fir::DeclareOp op,
mlir::PatternRewriter &rewriter) const override {
if (auto addrOfOp = op.getMemref().getDefiningOp<fir::AddrOfOp>()) {
if (auto global = symTab.lookup<fir::GlobalOp>(
addrOfOp.getSymbol().getRootReference().getValue())) {
if (isDeviceGlobal(global)) {
rewriter.setInsertionPointAfter(addrOfOp);
auto mod = op->getParentOfType<mlir::ModuleOp>();
fir::FirOpBuilder builder(rewriter, mod);
mlir::Location loc = op.getLoc();
mlir::func::FuncOp callee =
fir::runtime::getRuntimeFunc<mkRTKey(CUFGetDeviceAddress)>(
loc, builder);
auto fTy = callee.getFunctionType();
mlir::Type toTy = fTy.getInput(0);
mlir::Value inputArg =
createConvertOp(rewriter, loc, toTy, addrOfOp.getResult());
mlir::Value sourceFile =
fir::factory::locationToFilename(builder, loc);
mlir::Value sourceLine =
fir::factory::locationToLineNo(builder, loc, fTy.getInput(2));
llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
builder, loc, fTy, inputArg, sourceFile, sourceLine)};
auto call = rewriter.create<fir::CallOp>(loc, callee, args);
mlir::Value cast = createConvertOp(
rewriter, loc, op.getMemref().getType(), call->getResult(0));
rewriter.startOpModification(op);
op.getMemrefMutable().assign(cast);
rewriter.finalizeOpModification(op);
return success();
}
}
}
return failure();
}

private:
const mlir::SymbolTable &symTab;
};

struct CUFFreeOpConversion : public mlir::OpRewritePattern<cuf::FreeOp> {
using OpRewritePattern::OpRewritePattern;

Expand Down Expand Up @@ -511,7 +512,7 @@ static mlir::Value emboxSrc(mlir::PatternRewriter &rewriter,
builder.create<fir::StoreOp>(loc, src, alloc);
addr = alloc;
} else {
addr = getDeviceAddress(rewriter, op.getSrcMutable(), symtab);
addr = op.getSrc();
}
llvm::SmallVector<mlir::Value> lenParams;
mlir::Type boxTy = fir::BoxType::get(srcTy);
Expand All @@ -531,7 +532,7 @@ static mlir::Value emboxDst(mlir::PatternRewriter &rewriter,
mlir::Location loc = op.getLoc();
fir::FirOpBuilder builder(rewriter, mod);
mlir::Type dstTy = fir::unwrapRefType(op.getDst().getType());
mlir::Value dstAddr = getDeviceAddress(rewriter, op.getDstMutable(), symtab);
mlir::Value dstAddr = op.getDst();
mlir::Type dstBoxTy = fir::BoxType::get(dstTy);
llvm::SmallVector<mlir::Value> lenParams;
mlir::Value dstBox =
Expand Down Expand Up @@ -652,8 +653,8 @@ struct CUFDataTransferOpConversion
mlir::Value sourceLine =
fir::factory::locationToLineNo(builder, loc, fTy.getInput(5));

mlir::Value dst = getDeviceAddress(rewriter, op.getDstMutable(), symtab);
mlir::Value src = getDeviceAddress(rewriter, op.getSrcMutable(), symtab);
mlir::Value dst = op.getDst();
mlir::Value src = op.getSrc();
// Materialize the src if constant.
if (matchPattern(src.getDefiningOp(), mlir::m_Constant())) {
mlir::Value temp = builder.createTemporary(loc, srcTy);
Expand Down Expand Up @@ -823,6 +824,30 @@ class CUFOpConversion : public fir::impl::CUFOpConversionBase<CUFOpConversion> {
"error in CUF op conversion\n");
signalPassFailure();
}

target.addDynamicallyLegalOp<fir::DeclareOp>([&](fir::DeclareOp op) {
if (inDeviceContext(op))
return true;
if (auto addrOfOp = op.getMemref().getDefiningOp<fir::AddrOfOp>()) {
if (auto global = symtab.lookup<fir::GlobalOp>(
addrOfOp.getSymbol().getRootReference().getValue())) {
if (mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(global.getType())))
return true;
if (isDeviceGlobal(global))
return false;
}
}
return true;
});

patterns.clear();
cuf::populateFIRCUFConversionPatterns(symtab, patterns);
if (mlir::failed(mlir::applyPartialConversion(getOperation(), target,
std::move(patterns)))) {
mlir::emitError(mlir::UnknownLoc::get(ctx),
"error in CUF op conversion\n");
signalPassFailure();
}
}
};
} // namespace
Expand All @@ -837,3 +862,8 @@ void cuf::populateCUFToFIRConversionPatterns(
&dl, &converter);
patterns.insert<CUFLaunchOpConversion>(patterns.getContext(), symtab);
}

void cuf::populateFIRCUFConversionPatterns(const mlir::SymbolTable &symtab,
mlir::RewritePatternSet &patterns) {
patterns.insert<DeclareOpConversion>(patterns.getContext(), symtab);
}
2 changes: 1 addition & 1 deletion flang/runtime/CUDA/allocatable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream,
}
// Perform the standard allocation.
int stat{RTNAME(AllocatableAllocate)(
desc, hasStat, errMsg, sourceFile, sourceLine)};
desc, stream, hasStat, errMsg, sourceFile, sourceLine)};
return stat;
}

Expand Down
14 changes: 9 additions & 5 deletions flang/runtime/CUDA/allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,23 +33,26 @@ void RTDEF(CUFRegisterAllocator)() {
}
}

void *CUFAllocPinned(std::size_t sizeInBytes) {
void *CUFAllocPinned(
std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
void *p;
CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&p, sizeInBytes));
return p;
}

void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); }

void *CUFAllocDevice(std::size_t sizeInBytes) {
void *CUFAllocDevice(
std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
void *p;
CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
return p;
}

void CUFFreeDevice(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); }

void *CUFAllocManaged(std::size_t sizeInBytes) {
void *CUFAllocManaged(
std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
void *p;
CUDA_REPORT_IF_ERROR(
cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
Expand All @@ -58,9 +61,10 @@ void *CUFAllocManaged(std::size_t sizeInBytes) {

void CUFFreeManaged(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); }

void *CUFAllocUnified(std::size_t sizeInBytes) {
void *CUFAllocUnified(
std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
// Call alloc managed for the time being.
return CUFAllocManaged(sizeInBytes);
return CUFAllocManaged(sizeInBytes, asyncId);
}

void CUFFreeUnified(void *p) {
Expand Down
3 changes: 2 additions & 1 deletion flang/runtime/CUDA/descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ RT_EXT_API_GROUP_BEGIN

Descriptor *RTDEF(CUFAllocDesciptor)(
std::size_t sizeInBytes, const char *sourceFile, int sourceLine) {
return reinterpret_cast<Descriptor *>(CUFAllocManaged(sizeInBytes));
return reinterpret_cast<Descriptor *>(
CUFAllocManaged(sizeInBytes, kCudaNoStream));
}

void RTDEF(CUFFreeDesciptor)(
Expand Down
10 changes: 6 additions & 4 deletions flang/runtime/allocatable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,15 +133,17 @@ void RTDEF(AllocatableApplyMold)(
}
}

int RTDEF(AllocatableAllocate)(Descriptor &descriptor, bool hasStat,
const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
int RTDEF(AllocatableAllocate)(Descriptor &descriptor, std::int64_t asyncId,
bool hasStat, const Descriptor *errMsg, const char *sourceFile,
int sourceLine) {
Terminator terminator{sourceFile, sourceLine};
if (!descriptor.IsAllocatable()) {
return ReturnError(terminator, StatInvalidDescriptor, errMsg, hasStat);
} else if (descriptor.IsAllocated()) {
return ReturnError(terminator, StatBaseNotNull, errMsg, hasStat);
} else {
int stat{ReturnError(terminator, descriptor.Allocate(), errMsg, hasStat)};
int stat{
ReturnError(terminator, descriptor.Allocate(asyncId), errMsg, hasStat)};
if (stat == StatOk) {
if (const DescriptorAddendum * addendum{descriptor.Addendum()}) {
if (const auto *derived{addendum->derivedType()}) {
Expand All @@ -160,7 +162,7 @@ int RTDEF(AllocatableAllocateSource)(Descriptor &alloc,
const Descriptor &source, bool hasStat, const Descriptor *errMsg,
const char *sourceFile, int sourceLine) {
int stat{RTNAME(AllocatableAllocate)(
alloc, hasStat, errMsg, sourceFile, sourceLine)};
alloc, /*asyncId=*/-1, hasStat, errMsg, sourceFile, sourceLine)};
if (stat == StatOk) {
Terminator terminator{sourceFile, sourceLine};
DoFromSourceAssign(alloc, source, terminator);
Expand Down
8 changes: 4 additions & 4 deletions flang/runtime/array-constructor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,17 @@ static RT_API_ATTRS void AllocateOrReallocateVectorIfNeeded(
initialAllocationSize(fromElements, to.ElementBytes())};
to.GetDimension(0).SetBounds(1, allocationSize);
RTNAME(AllocatableAllocate)
(to, /*hasStat=*/false, /*errMsg=*/nullptr, vector.sourceFile,
vector.sourceLine);
(to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr,
vector.sourceFile, vector.sourceLine);
to.GetDimension(0).SetBounds(1, fromElements);
vector.actualAllocationSize = allocationSize;
} else {
// Do not over-allocate if the final extent was known before pushing the
// first value: there should be no reallocation.
RUNTIME_CHECK(terminator, previousToElements >= fromElements);
RTNAME(AllocatableAllocate)
(to, /*hasStat=*/false, /*errMsg=*/nullptr, vector.sourceFile,
vector.sourceLine);
(to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr,
vector.sourceFile, vector.sourceLine);
vector.actualAllocationSize = previousToElements;
}
} else {
Expand Down
4 changes: 2 additions & 2 deletions flang/runtime/descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ RT_API_ATTRS static inline int MapAllocIdx(const Descriptor &desc) {
#endif
}

RT_API_ATTRS int Descriptor::Allocate() {
RT_API_ATTRS int Descriptor::Allocate(std::int64_t asyncId) {
std::size_t elementBytes{ElementBytes()};
if (static_cast<std::int64_t>(elementBytes) < 0) {
// F'2023 7.4.4.2 p5: "If the character length parameter value evaluates
Expand All @@ -175,7 +175,7 @@ RT_API_ATTRS int Descriptor::Allocate() {
// Zero size allocation is possible in Fortran and the resulting
// descriptor must be allocated/associated. Since std::malloc(0)
// result is implementation defined, always allocate at least one byte.
void *p{alloc(byteSize ? byteSize : 1)};
void *p{alloc(byteSize ? byteSize : 1, asyncId)};
if (!p) {
return CFI_ERROR_MEM_ALLOCATION;
}
Expand Down
20 changes: 20 additions & 0 deletions flang/runtime/exceptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
#include "flang/Runtime/exceptions.h"
#include "terminator.h"
#include <cfenv>
#if __x86_64__
#include <xmmintrin.h>
#endif

// When not supported, these macro are undefined in cfenv.h,
// set them to zero in that case.
Expand Down Expand Up @@ -78,5 +81,22 @@ uint32_t RTNAME(MapException)(uint32_t excepts) {
// on some systems, e.g. Solaris, so omit object size comparison for now.
// TODO: consider femode_t object size comparison once its more mature.

bool RTNAME(GetUnderflowMode)(void) {
#if __x86_64__
// The MXCSR Flush to Zero flag is the negation of the ieee_get_underflow_mode
// GRADUAL argument. It affects real computations of kinds 3, 4, and 8.
return _MM_GET_FLUSH_ZERO_MODE() == _MM_FLUSH_ZERO_OFF;
#else
return false;
#endif
}
void RTNAME(SetUnderflowMode)(bool flag) {
#if __x86_64__
// The MXCSR Flush to Zero flag is the negation of the ieee_set_underflow_mode
// GRADUAL argument. It affects real computations of kinds 3, 4, and 8.
_MM_SET_FLUSH_ZERO_MODE(flag ? _MM_FLUSH_ZERO_OFF : _MM_FLUSH_ZERO_ON);
#endif
}

} // extern "C"
} // namespace Fortran::runtime
2 changes: 1 addition & 1 deletion flang/runtime/format.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

#include "environment.h"
#include "io-error.h"
#include "flang/Common/Fortran.h"
#include "flang/Common/Fortran-consts.h"
#include "flang/Common/optional.h"
#include "flang/Decimal/decimal.h"
#include "flang/Runtime/freestanding-tools.h"
Expand Down
3 changes: 2 additions & 1 deletion flang/runtime/non-tbp-dio.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
#ifndef FORTRAN_RUNTIME_NON_TBP_DIO_H_
#define FORTRAN_RUNTIME_NON_TBP_DIO_H_

#include "flang/Common/Fortran.h"
#include "flang/Common/Fortran-consts.h"
#include "flang/Common/api-attrs.h"
#include <cstddef>

namespace Fortran::runtime::typeInfo {
Expand Down
2 changes: 1 addition & 1 deletion flang/runtime/type-info.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// flang/module/__fortran_type_info.f90.

#include "terminator.h"
#include "flang/Common/Fortran.h"
#include "flang/Common/Fortran-consts.h"
#include "flang/Common/bit-population-count.h"
#include "flang/Common/optional.h"
#include "flang/Runtime/descriptor.h"
Expand Down
6 changes: 3 additions & 3 deletions flang/test/Evaluate/fold-ieee.f90
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ module m
logical, parameter :: test_sn_all = ieee_support_subnormal()
logical, parameter :: test_sn_4 = ieee_support_subnormal(1.)
logical, parameter :: test_sn_8 = ieee_support_subnormal(1.d0)
logical, parameter :: test_uc_all = ieee_support_underflow_control()
logical, parameter :: test_uc_4 = ieee_support_underflow_control(1.)
logical, parameter :: test_uc_8 = ieee_support_underflow_control(1.d0)
! logical, parameter :: test_uc_all = .not. ieee_support_underflow_control()
! logical, parameter :: test_uc_4 = ieee_support_underflow_control(1.)
! logical, parameter :: test_uc_8 = ieee_support_underflow_control(1.d0)
end
15 changes: 8 additions & 7 deletions flang/test/Evaluate/folding18.f90
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,12 @@ module m
.and. ieee_support_subnormal(1.0_8) &
.and. ieee_support_subnormal(1.0_10) &
.and. ieee_support_subnormal(1.0_16)
logical, parameter :: test_ieee_support_underflow_control = ieee_support_underflow_control() &
.and. ieee_support_underflow_control(1.0_2) &
.and. ieee_support_underflow_control(1.0_3) &
.and. ieee_support_underflow_control(1.0_4) &
.and. ieee_support_underflow_control(1.0_8) &
.and. ieee_support_underflow_control(1.0_10) &
.and. ieee_support_underflow_control(1.0_16)
! varies by architecture
! logical, parameter :: test_ieee_support_underflow_control = .not. ieee_support_underflow_control() &
! .and. .not. ieee_support_underflow_control(1.0_2) &
! .and. ieee_support_underflow_control(1.0_3) &
! .and. ieee_support_underflow_control(1.0_4) &
! .and. ieee_support_underflow_control(1.0_8) &
! .and. .not. ieee_support_underflow_control(1.0_10) &
! .and. .not. ieee_support_underflow_control(1.0_16)
end module
Loading