87 changes: 72 additions & 15 deletions clang/lib/CodeGen/CGStmtOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/OpenMPKinds.h"
#include "clang/Basic/PrettyStackTrace.h"
#include "clang/Basic/SourceManager.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
Expand All @@ -34,11 +35,14 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Debug.h"
#include <optional>
using namespace clang;
using namespace CodeGen;
using namespace llvm::omp;

#define TTL_CODEGEN_TYPE "target-teams-loop-codegen"

static const VarDecl *getBaseDecl(const Expr *Ref);

namespace {
Expand Down Expand Up @@ -1432,9 +1436,12 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
*this, D.getBeginLoc(),
isOpenMPWorksharingDirective(D.getDirectiveKind()));
}
bool TeamsLoopCanBeParallel = false;
if (auto *TTLD = dyn_cast<OMPTargetTeamsGenericLoopDirective>(&D))
TeamsLoopCanBeParallel = TTLD->canBeParallelFor();
bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
isOpenMPParallelDirective(D.getDirectiveKind()) ||
ReductionKind == OMPD_simd;
TeamsLoopCanBeParallel || ReductionKind == OMPD_simd;
bool SimpleReduction = ReductionKind == OMPD_simd;
// Emit nowait reduction if nowait clause is present or directive is a
// parallel directive (it always has implicit barrier).
Expand Down Expand Up @@ -7928,11 +7935,9 @@ void CodeGenFunction::EmitOMPParallelGenericLoopDirective(
void CodeGenFunction::EmitOMPTeamsGenericLoopDirective(
const OMPTeamsGenericLoopDirective &S) {
// To be consistent with current behavior of 'target teams loop', emit
// 'teams loop' as if its constituent constructs are 'distribute,
// 'parallel, and 'for'.
// 'teams loop' as if its constituent constructs are 'teams' and 'distribute'.
auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
S.getDistInc());
CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
};

// Emit teams region as a standalone region.
Expand All @@ -7946,15 +7951,33 @@ void CodeGenFunction::EmitOMPTeamsGenericLoopDirective(
CodeGenDistribute);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
};
emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
emitPostUpdateForReductionClause(*this, S,
[](CodeGenFunction &) { return nullptr; });
}

static void
emitTargetTeamsGenericLoopRegion(CodeGenFunction &CGF,
const OMPTargetTeamsGenericLoopDirective &S,
PrePostActionTy &Action) {
static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction &CGF,
std::string StatusMsg,
const OMPExecutableDirective &D) {
#ifndef NDEBUG
bool IsDevice = CGF.CGM.getLangOpts().OpenMPIsTargetDevice;
if (IsDevice)
StatusMsg += ": DEVICE";
else
StatusMsg += ": HOST";
SourceLocation L = D.getBeginLoc();
auto &SM = CGF.getContext().getSourceManager();
PresumedLoc PLoc = SM.getPresumedLoc(L);
const char *FileName = PLoc.isValid() ? PLoc.getFilename() : nullptr;
unsigned LineNo =
PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(L);
llvm::dbgs() << StatusMsg << ": " << FileName << ": " << LineNo << "\n";
#endif
}

static void emitTargetTeamsGenericLoopRegionAsParallel(
CodeGenFunction &CGF, PrePostActionTy &Action,
const OMPTargetTeamsGenericLoopDirective &S) {
Action.Enter(CGF);
// Emit 'teams loop' as if its constituent constructs are 'distribute,
// 'parallel, and 'for'.
Expand All @@ -7974,19 +7997,50 @@ emitTargetTeamsGenericLoopRegion(CodeGenFunction &CGF,
CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
};

DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
emitTargetTeamsLoopCodegenStatus(
CGF, TTL_CODEGEN_TYPE " as parallel for", S));
emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
CodeGenTeams);
emitPostUpdateForReductionClause(CGF, S,
[](CodeGenFunction &) { return nullptr; });
}

/// Emit combined directive 'target teams loop' as if its constituent
/// constructs are 'target', 'teams', 'distribute', 'parallel', and 'for'.
static void emitTargetTeamsGenericLoopRegionAsDistribute(
CodeGenFunction &CGF, PrePostActionTy &Action,
const OMPTargetTeamsGenericLoopDirective &S) {
Action.Enter(CGF);
// Emit 'teams loop' as if its constituent construct is 'distribute'.
auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
};

// Emit teams region as a standalone region.
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
PrePostActionTy &Action) {
Action.Enter(CGF);
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
};
DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
emitTargetTeamsLoopCodegenStatus(
CGF, TTL_CODEGEN_TYPE " as distribute", S));
emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
emitPostUpdateForReductionClause(CGF, S,
[](CodeGenFunction &) { return nullptr; });
}

void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective(
const OMPTargetTeamsGenericLoopDirective &S) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
emitTargetTeamsGenericLoopRegion(CGF, S, Action);
if (S.canBeParallelFor())
emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S);
else
emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S);
};
emitCommonOMPTargetDirective(*this, S, CodeGen);
}
Expand All @@ -7996,7 +8050,10 @@ void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
const OMPTargetTeamsGenericLoopDirective &S) {
// Emit SPMD target parallel loop region as a standalone region.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
emitTargetTeamsGenericLoopRegion(CGF, S, Action);
if (S.canBeParallelFor())
emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S);
else
emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S);
};
llvm::Function *Fn;
llvm::Constant *Addr;
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/Driver/ToolChains/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,10 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
options::OPT_mno_wavefrontsize64, false))
Features.push_back("+wavefrontsize64");

if (Args.hasFlag(options::OPT_mamdgpu_precise_memory_op,
options::OPT_mno_amdgpu_precise_memory_op, false))
Features.push_back("+precise-memory");

handleTargetFeaturesGroup(D, Triple, Args, Features,
options::OPT_m_amdgpu_Features_Group);
}
Expand Down
9 changes: 6 additions & 3 deletions clang/lib/Driver/ToolChains/Flang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ static void addVSDefines(const ToolChain &TC, const ArgList &Args,
CmdArgs.push_back(Args.MakeArgString("-D_MSC_FULL_VER=" + Twine(ver)));
CmdArgs.push_back(Args.MakeArgString("-D_WIN32"));

llvm::Triple triple = TC.getTriple();
const llvm::Triple &triple = TC.getTriple();
if (triple.isAArch64()) {
CmdArgs.push_back("-D_M_ARM64=1");
} else if (triple.isX86() && triple.isArch32Bit()) {
Expand Down Expand Up @@ -589,7 +589,7 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args,

if (!HonorINFs && !HonorNaNs && AssociativeMath && ReciprocalMath &&
ApproxFunc && !SignedZeros &&
(FPContract == "fast" || FPContract == "")) {
(FPContract == "fast" || FPContract.empty())) {
CmdArgs.push_back("-ffast-math");
return;
}
Expand Down Expand Up @@ -679,7 +679,10 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back(Args.MakeArgString(TripleStr));

if (isa<PreprocessJobAction>(JA)) {
CmdArgs.push_back("-E");
CmdArgs.push_back("-E");
if (Args.getLastArg(options::OPT_dM)) {
CmdArgs.push_back("-dM");
}
} else if (isa<CompileJobAction>(JA) || isa<BackendJobAction>(JA)) {
if (JA.getType() == types::TY_Nothing) {
CmdArgs.push_back("-fsyntax-only");
Expand Down
6 changes: 2 additions & 4 deletions clang/lib/Parse/ParseOpenACC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -843,8 +843,7 @@ Parser::OpenACCClauseParseResult Parser::ParseOpenACCClauseParams(
}
case OpenACCClauseKind::If: {
ExprResult CondExpr = ParseOpenACCConditionalExpr(*this);
// An invalid expression can be just about anything, so just give up on
// this clause list.

if (CondExpr.isInvalid()) {
Parens.skipToEnd();
return OpenACCCanContinue();
Expand Down Expand Up @@ -966,8 +965,7 @@ Parser::OpenACCClauseParseResult Parser::ParseOpenACCClauseParams(
case OpenACCClauseKind::Self: {
assert(DirKind != OpenACCDirectiveKind::Update);
ExprResult CondExpr = ParseOpenACCConditionalExpr(*this);
// An invalid expression can be just about anything, so just give up on
// this clause list.

if (CondExpr.isInvalid()) {
Parens.skipToEnd();
return OpenACCCanContinue();
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Sema/SemaOpenACC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,10 @@ SemaOpenACC::ActOnClause(ArrayRef<const OpenACCClause *> ExistingClauses,
});

if (Itr != ExistingClauses.end()) {
SemaRef.Diag(Clause.getBeginLoc(),
Diag(Clause.getBeginLoc(),
diag::err_acc_duplicate_clause_disallowed)
<< Clause.getDirectiveKind() << Clause.getClauseKind();
SemaRef.Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here);
Diag((*Itr)->getBeginLoc(), diag::note_acc_previous_clause_here);
return nullptr;
}

Expand Down
86 changes: 83 additions & 3 deletions clang/lib/Sema/SemaOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4478,6 +4478,8 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
Params);
break;
}
// For 'target teams loop', collect all captured regions so codegen can
// later decide the best IR to emit given the associated loop-nest.
case OMPD_target_teams_loop:
case OMPD_target_teams_distribute_parallel_for:
case OMPD_target_teams_distribute_parallel_for_simd: {
Expand Down Expand Up @@ -6135,6 +6137,79 @@ processImplicitMapsWithDefaultMappers(Sema &S, DSAStackTy *Stack,
}
}

namespace {
/// A 'teams loop' with a nested 'loop bind(parallel)' or generic function
/// call in the associated loop-nest cannot be a 'parallel for'.
class TeamsLoopChecker final : public ConstStmtVisitor<TeamsLoopChecker> {
Sema &SemaRef;

public:
bool teamsLoopCanBeParallelFor() const { return TeamsLoopCanBeParallelFor; }

// Is there a nested OpenMP loop bind(parallel)
void VisitOMPExecutableDirective(const OMPExecutableDirective *D) {
if (D->getDirectiveKind() == llvm::omp::Directive::OMPD_loop) {
if (const auto *C = D->getSingleClause<OMPBindClause>())
if (C->getBindKind() == OMPC_BIND_parallel) {
TeamsLoopCanBeParallelFor = false;
// No need to continue visiting any more
return;
}
}
for (const Stmt *Child : D->children())
if (Child)
Visit(Child);
}

void VisitCallExpr(const CallExpr *C) {
// Function calls inhibit parallel loop translation of 'target teams loop'
// unless the assume-no-nested-parallelism flag has been specified.
// OpenMP API runtime library calls do not inhibit parallel loop
// translation, regardless of the assume-no-nested-parallelism.
if (C) {
bool IsOpenMPAPI = false;
auto *FD = dyn_cast_or_null<FunctionDecl>(C->getCalleeDecl());
if (FD) {
std::string Name = FD->getNameInfo().getAsString();
IsOpenMPAPI = Name.find("omp_") == 0;
}
TeamsLoopCanBeParallelFor =
IsOpenMPAPI || SemaRef.getLangOpts().OpenMPNoNestedParallelism;
if (!TeamsLoopCanBeParallelFor)
return;
}
for (const Stmt *Child : C->children())
if (Child)
Visit(Child);
}

void VisitCapturedStmt(const CapturedStmt *S) {
if (!S)
return;
Visit(S->getCapturedDecl()->getBody());
}

void VisitStmt(const Stmt *S) {
if (!S)
return;
for (const Stmt *Child : S->children())
if (Child)
Visit(Child);
}
explicit TeamsLoopChecker(Sema &SemaRef)
: SemaRef(SemaRef), TeamsLoopCanBeParallelFor(true) {}

private:
bool TeamsLoopCanBeParallelFor;
};
} // namespace

static bool teamsLoopCanBeParallelFor(Stmt *AStmt, Sema &SemaRef) {
TeamsLoopChecker Checker(SemaRef);
Checker.Visit(AStmt);
return Checker.teamsLoopCanBeParallelFor();
}

bool Sema::mapLoopConstruct(llvm::SmallVector<OMPClause *> &ClausesWithoutBind,
ArrayRef<OMPClause *> Clauses,
OpenMPBindClauseKind &BindKind,
Expand Down Expand Up @@ -10895,7 +10970,8 @@ StmtResult Sema::ActOnOpenMPTargetTeamsGenericLoopDirective(
setFunctionHasBranchProtectedScope();

return OMPTargetTeamsGenericLoopDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
teamsLoopCanBeParallelFor(AStmt, *this));
}

StmtResult Sema::ActOnOpenMPParallelGenericLoopDirective(
Expand Down Expand Up @@ -15645,14 +15721,19 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
if (NameModifier == OMPD_unknown || NameModifier == OMPD_parallel)
CaptureRegion = OMPD_target;
break;
case OMPD_teams_loop:
case OMPD_target_teams_loop:
// For [target] teams loop, assume capture region is 'teams' so it's
// available for codegen later to use if/when necessary.
CaptureRegion = OMPD_teams;
break;
case OMPD_target_teams_distribute_parallel_for_simd:
if (OpenMPVersion >= 50 &&
(NameModifier == OMPD_unknown || NameModifier == OMPD_simd)) {
CaptureRegion = OMPD_parallel;
break;
}
[[fallthrough]];
case OMPD_target_teams_loop:
case OMPD_target_teams_distribute_parallel_for:
// If this clause applies to the nested 'parallel' region, capture within
// the 'teams' region, otherwise do not capture.
Expand Down Expand Up @@ -15775,7 +15856,6 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_loop:
case OMPD_teams_loop:
case OMPD_teams:
case OMPD_tile:
case OMPD_unroll:
Expand Down
9 changes: 2 additions & 7 deletions clang/lib/Serialization/ASTReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6626,8 +6626,6 @@ void ASTReader::ReadPragmaDiagnosticMappings(DiagnosticsEngine &Diag) {
"Invalid data, missing pragma diagnostic states");
FileID FID = ReadFileID(F, Record, Idx);
assert(FID.isValid() && "invalid FileID for transition");
// FIXME: Remove this once we don't need the side-effects.
(void)SourceMgr.getSLocEntryOrNull(FID);
unsigned Transitions = Record[Idx++];

// Note that we don't need to set up Parent/ParentOffset here, because
Expand Down Expand Up @@ -11756,11 +11754,8 @@ void ASTRecordReader::readOMPChildren(OMPChildren *Data) {

OpenACCClause *ASTRecordReader::readOpenACCClause() {
OpenACCClauseKind ClauseKind = readEnum<OpenACCClauseKind>();
// TODO OpenACC: We don't have these used anywhere, but eventually we should
// be constructing the Clauses with them, so these attributes can go away at
// that point.
[[maybe_unused]] SourceLocation BeginLoc = readSourceLocation();
[[maybe_unused]] SourceLocation EndLoc = readSourceLocation();
SourceLocation BeginLoc = readSourceLocation();
SourceLocation EndLoc = readSourceLocation();

switch (ClauseKind) {
case OpenACCClauseKind::Default: {
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Serialization/ASTReaderStmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2769,6 +2769,7 @@ void ASTStmtReader::VisitOMPTeamsGenericLoopDirective(
void ASTStmtReader::VisitOMPTargetTeamsGenericLoopDirective(
OMPTargetTeamsGenericLoopDirective *D) {
VisitOMPLoopDirective(D);
D->setCanBeParallelFor(Record.readBool());
}

void ASTStmtReader::VisitOMPParallelGenericLoopDirective(
Expand Down
21 changes: 17 additions & 4 deletions clang/lib/Serialization/ASTWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,13 @@ static TypeCode getTypeCodeForTypeClass(Type::TypeClass id) {

namespace {

std::set<const FileEntry *> GetAffectingModuleMaps(const Preprocessor &PP,
Module *RootModule) {
std::optional<std::set<const FileEntry *>>
GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) {
// Without implicit module map search, there's no good reason to know about
// any module maps that are not affecting.
if (!PP.getHeaderSearchInfo().getHeaderSearchOpts().ImplicitModuleMaps)
return std::nullopt;

SmallVector<const Module *> ModulesToProcess{RootModule};

const HeaderSearch &HS = PP.getHeaderSearchInfo();
Expand Down Expand Up @@ -4735,8 +4740,16 @@ void ASTWriter::computeNonAffectingInputFiles() {
if (!Cache->OrigEntry)
continue;

if (!isModuleMap(File.getFileCharacteristic()) ||
llvm::is_contained(AffectingModuleMaps, *Cache->OrigEntry))
// Don't prune anything other than module maps.
if (!isModuleMap(File.getFileCharacteristic()))
continue;

// Don't prune module maps if all are guaranteed to be affecting.
if (!AffectingModuleMaps)
continue;

// Don't prune module maps that are affecting.
if (llvm::is_contained(*AffectingModuleMaps, *Cache->OrigEntry))
continue;

IsSLocAffecting[I] = false;
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Serialization/ASTWriterStmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2821,6 +2821,7 @@ void ASTStmtWriter::VisitOMPTeamsGenericLoopDirective(
void ASTStmtWriter::VisitOMPTargetTeamsGenericLoopDirective(
OMPTargetTeamsGenericLoopDirective *D) {
VisitOMPLoopDirective(D);
Record.writeBool(D->canBeParallelFor());
Code = serialization::STMT_OMP_TARGET_TEAMS_GENERIC_LOOP_DIRECTIVE;
}

Expand Down
6 changes: 6 additions & 0 deletions clang/test/Driver/amdgpu-features.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,9 @@

// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mno-cumode %s 2>&1 | FileCheck --check-prefix=NO-CUMODE %s
// NO-CUMODE: "-target-feature" "-cumode"

// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mamdgpu-precise-memory-op %s 2>&1 | FileCheck --check-prefix=PREC-MEM %s
// PREC-MEM: "-target-feature" "+precise-memory"

// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mno-amdgpu-precise-memory-op %s 2>&1 | FileCheck --check-prefix=NO-PREC-MEM %s
// NO-PREC-MEM-NOT: {{".*precise-memory"}}
8 changes: 4 additions & 4 deletions clang/test/Driver/lld-repro.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
// RUN: echo "-nostartfiles -nostdlib -fuse-ld=lld -gen-reproducer=error -fcrash-diagnostics-dir=%t" \
// RUN: | sed -e 's/\\/\\\\/g' > %t.rsp

// RUN: not %clang %s @%t.rsp -fcrash-diagnostics=all 2>&1 \
// RUN: not %clang %s @%t.rsp -fcrash-diagnostics=all -o /dev/null 2>&1 \
// RUN: | FileCheck %s

// Test that the reproducer can still be created even when the input source cannot be preprocessed
// again, like when reading from stdin.
// RUN: not %clang -x c - @%t.rsp -fcrash-diagnostics=all 2>&1 < %s \
// RUN: not %clang -x c - @%t.rsp -fcrash-diagnostics=all -o /dev/null 2>&1 < %s \
// RUN: | FileCheck %s

// check that we still get lld's output
Expand All @@ -20,9 +20,9 @@
// CHECK-NEXT: note: diagnostic msg:
// CHECK: ********************

// RUN: not %clang %s @%t.rsp -fcrash-diagnostics=compiler 2>&1 \
// RUN: not %clang %s @%t.rsp -fcrash-diagnostics=compiler -o /dev/null 2>&1 \
// RUN: | FileCheck %s --check-prefix=NO-LINKER
// RUN: not %clang %s @%t.rsp 2>&1 \
// RUN: not %clang %s @%t.rsp -o /dev/null 2>&1 \
// RUN: | FileCheck %s --check-prefix=NO-LINKER

// NO-LINKER-NOT: Preprocessed source(s) and associated run script(s) are located at:
Expand Down
34 changes: 34 additions & 0 deletions clang/test/Modules/home-is-cwd-search-paths.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// This test demonstrates how -fmodule-map-file-home-is-cwd with -fmodules-embed-all-files
// extend the importer search paths by relying on the side effects of pragma diagnostic
// mappings deserialization.

// RUN: rm -rf %t
// RUN: split-file %s %t

//--- dir1/a.modulemap
module a { header "a.h" }
//--- dir1/a.h
#include "search.h"
// The first compilation is configured such that -I search does contain the search.h header.
//--- dir1/search/search.h
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wparentheses"
#pragma clang diagnostic pop
// RUN: cd %t/dir1 && %clang_cc1 -fmodules -I search \
// RUN: -emit-module -fmodule-name=a a.modulemap -o %t/a.pcm \
// RUN: -fmodules-embed-all-files -fmodule-map-file-home-is-cwd

//--- dir2/b.modulemap
module b { header "b.h" }
//--- dir2/b.h
#include "search.h" // expected-error{{'search.h' file not found}}
// The second compilation is configured such that -I search is an empty directory.
// However, since b.pcm simply embeds the headers as "search/search.h", this compilation
// ends up seeing it too. This relies solely on ASTReader::ReadPragmaDiagnosticMappings()
// eagerly reading the corresponding INPUT_FILE record before header search happens.
// Removing the eager deserialization makes this header invisible and so does removing
// the pragma directives.
// RUN: mkdir %t/dir2/search
// RUN: cd %t/dir2 && %clang_cc1 -fmodules -I search \
// RUN: -emit-module -fmodule-name=b b.modulemap -o %t/b.pcm \
// RUN: -fmodule-file=%t/a.pcm -verify
48 changes: 39 additions & 9 deletions clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -320,35 +320,44 @@ int bar(int n){
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1
// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
Expand All @@ -360,6 +369,7 @@ int bar(int n){
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
Expand Down Expand Up @@ -1566,35 +1576,44 @@ int bar(int n){
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1
// CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
// CHECK2-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
Expand All @@ -1606,6 +1625,7 @@ int bar(int n){
// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK2-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
Expand Down Expand Up @@ -2801,35 +2821,44 @@ int bar(int n){
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33
// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_kernel_environment, ptr [[DYN_PTR]])
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1
// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
Expand All @@ -2841,6 +2870,7 @@ int bar(int n){
// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
// CHECK3-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
Expand Down

Large diffs are not rendered by default.

1,132 changes: 8 additions & 1,124 deletions clang/test/OpenMP/target_teams_generic_loop_codegen.cpp

Large diffs are not rendered by default.

587 changes: 587 additions & 0 deletions clang/test/OpenMP/target_teams_generic_loop_codegen_as_distribute.cpp

Large diffs are not rendered by default.

3,998 changes: 3,998 additions & 0 deletions clang/test/OpenMP/target_teams_generic_loop_codegen_as_parallel_for.cpp

Large diffs are not rendered by default.

710 changes: 114 additions & 596 deletions clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp

Large diffs are not rendered by default.

1,438 changes: 240 additions & 1,198 deletions clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp

Large diffs are not rendered by default.

1,360 changes: 148 additions & 1,212 deletions clang/test/OpenMP/teams_generic_loop_codegen-1.cpp

Large diffs are not rendered by default.

630 changes: 138 additions & 492 deletions clang/test/OpenMP/teams_generic_loop_codegen.cpp

Large diffs are not rendered by default.

808 changes: 136 additions & 672 deletions clang/test/OpenMP/teams_generic_loop_collapse_codegen.cpp

Large diffs are not rendered by default.

685 changes: 103 additions & 582 deletions clang/test/OpenMP/teams_generic_loop_private_codegen.cpp

Large diffs are not rendered by default.

785 changes: 109 additions & 676 deletions clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions flang/include/flang/Frontend/PreprocessorOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ struct PreprocessorOptions {
// -fno-reformat: Emit cooked character stream as -E output
bool noReformat{false};

// -dM: Show macro definitions with -dM -E
bool showMacros{false};

void addMacroDef(llvm::StringRef name) {
macros.emplace_back(std::string(name), false);
}
Expand Down
3 changes: 3 additions & 0 deletions flang/include/flang/Parser/parsing.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "parse-tree.h"
#include "provenance.h"
#include "flang/Common/Fortran-features.h"
#include "flang/Parser/preprocessor.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>
#include <string>
Expand Down Expand Up @@ -59,6 +60,7 @@ class Parsing {
const SourceFile *Prescan(const std::string &path, Options);
void EmitPreprocessedSource(
llvm::raw_ostream &, bool lineDirectives = true) const;
void EmitPreprocessorMacros(llvm::raw_ostream &) const;
void DumpCookedChars(llvm::raw_ostream &) const;
void DumpProvenance(llvm::raw_ostream &) const;
void DumpParsingLog(llvm::raw_ostream &) const;
Expand All @@ -83,6 +85,7 @@ class Parsing {
const char *finalRestingPlace_{nullptr};
std::optional<Program> parseTree_;
ParsingLog log_;
Preprocessor preprocessor_{allCooked_.allSources()};
};
} // namespace Fortran::parser
#endif // FORTRAN_PARSER_PARSING_H_
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@
// performed, so that special compiler command options &/or source file name
// extensions for preprocessing will not be necessary.

#include "token-sequence.h"
#include "flang/Parser/char-block.h"
#include "flang/Parser/provenance.h"
#include "flang/Parser/token-sequence.h"
#include "llvm/Support/raw_ostream.h"
#include <cstddef>
#include <list>
#include <stack>
Expand All @@ -39,7 +40,7 @@ class Definition {
Definition(const std::string &predefined, AllSources &);

bool isFunctionLike() const { return isFunctionLike_; }
std::size_t argumentCount() const { return argumentCount_; }
std::size_t argumentCount() const { return argNames_.size(); }
bool isVariadic() const { return isVariadic_; }
bool isDisabled() const { return isDisabled_; }
bool isPredefined() const { return isPredefined_; }
Expand All @@ -49,15 +50,21 @@ class Definition {

TokenSequence Apply(const std::vector<TokenSequence> &args, Prescanner &);

void Print(llvm::raw_ostream &out, const char *macroName = "") const;

private:
static TokenSequence Tokenize(const std::vector<std::string> &argNames,
const TokenSequence &token, std::size_t firstToken, std::size_t tokens);
// For a given token, return the index of the argument to which the token
// corresponds, or `argumentCount` if the token does not correspond to any
// argument.
std::size_t GetArgumentIndex(const CharBlock &token) const;

bool isFunctionLike_{false};
std::size_t argumentCount_{0};
bool isVariadic_{false};
bool isDisabled_{false};
bool isPredefined_{false};
std::vector<std::string> argNames_;
TokenSequence replacement_;
};

Expand Down Expand Up @@ -89,6 +96,8 @@ class Preprocessor {
// Implements a preprocessor directive.
void Directive(const TokenSequence &, Prescanner &);

void PrintMacros(llvm::raw_ostream &out) const;

private:
enum class IsElseActive { No, Yes };
enum class CanDeadElseAppear { No, Yes };
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ class TokenSequence {
}
TokenSequence(TokenSequence &&that)
: start_{std::move(that.start_)}, nextStart_{that.nextStart_},
char_{std::move(that.char_)}, provenances_{
std::move(that.provenances_)} {}
char_{std::move(that.char_)},
provenances_{std::move(that.provenances_)} {}
TokenSequence(const std::string &s, Provenance p) { Put(s, p); }

TokenSequence &operator=(const TokenSequence &that) {
Expand Down
1 change: 1 addition & 0 deletions flang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -772,6 +772,7 @@ static void parsePreprocessorArgs(Fortran::frontend::PreprocessorOptions &opts,

opts.noReformat = args.hasArg(clang::driver::options::OPT_fno_reformat);
opts.noLineDirectives = args.hasArg(clang::driver::options::OPT_P);
opts.showMacros = args.hasArg(clang::driver::options::OPT_dM);
}

/// Parses all semantic related arguments and populates the variables
Expand Down
4 changes: 3 additions & 1 deletion flang/lib/Frontend/FrontendActions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,9 @@ void PrintPreprocessedAction::executeAction() {

// Format or dump the prescanner's output
CompilerInstance &ci = this->getInstance();
if (ci.getInvocation().getPreprocessorOpts().noReformat) {
if (ci.getInvocation().getPreprocessorOpts().showMacros) {
ci.getParsing().EmitPreprocessorMacros(outForPP);
} else if (ci.getInvocation().getPreprocessorOpts().noReformat) {
ci.getParsing().DumpCookedChars(outForPP);
} else {
ci.getParsing().EmitPreprocessedSource(
Expand Down
17 changes: 10 additions & 7 deletions flang/lib/Parser/parsing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
//===----------------------------------------------------------------------===//

#include "flang/Parser/parsing.h"
#include "preprocessor.h"
#include "prescan.h"
#include "type-parsers.h"
#include "flang/Parser/message.h"
#include "flang/Parser/preprocessor.h"
#include "flang/Parser/provenance.h"
#include "flang/Parser/source.h"
#include "llvm/Support/raw_ostream.h"
Expand Down Expand Up @@ -60,20 +60,19 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) {
}
}

Preprocessor preprocessor{allSources};
if (!options.predefinitions.empty()) {
preprocessor.DefineStandardMacros();
preprocessor_.DefineStandardMacros();
for (const auto &predef : options.predefinitions) {
if (predef.second) {
preprocessor.Define(predef.first, *predef.second);
preprocessor_.Define(predef.first, *predef.second);
} else {
preprocessor.Undefine(predef.first);
preprocessor_.Undefine(predef.first);
}
}
}
currentCooked_ = &allCooked_.NewCookedSource();
Prescanner prescanner{
messages_, *currentCooked_, preprocessor, options.features};
messages_, *currentCooked_, preprocessor_, options.features};
prescanner.set_fixedForm(options.isFixedForm)
.set_fixedFormColumnLimit(options.fixedFormColumns)
.AddCompilerDirectiveSentinel("dir$");
Expand All @@ -87,7 +86,7 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) {
if (options.features.IsEnabled(LanguageFeature::CUDA)) {
prescanner.AddCompilerDirectiveSentinel("$cuf");
prescanner.AddCompilerDirectiveSentinel("@cuf");
preprocessor.Define("_CUDA", "1");
preprocessor_.Define("_CUDA", "1");
}
ProvenanceRange range{allSources.AddIncludedFile(
*sourceFile, ProvenanceRange{}, options.isModuleFile)};
Expand All @@ -107,6 +106,10 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) {
return sourceFile;
}

void Parsing::EmitPreprocessorMacros(llvm::raw_ostream &out) const {
preprocessor_.PrintMacros(out);
}

void Parsing::EmitPreprocessedSource(
llvm::raw_ostream &out, bool lineDirectives) const {
const std::string *sourcePath{nullptr};
Expand Down
68 changes: 61 additions & 7 deletions flang/lib/Parser/preprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
//
//===----------------------------------------------------------------------===//

#include "preprocessor.h"
#include "flang/Parser/preprocessor.h"

#include "prescan.h"
#include "flang/Common/idioms.h"
#include "flang/Parser/characters.h"
Expand All @@ -21,6 +22,7 @@
#include <optional>
#include <set>
#include <utility>
#include <vector>

namespace Fortran::parser {

Expand All @@ -31,8 +33,7 @@ Definition::Definition(
Definition::Definition(const std::vector<std::string> &argNames,
const TokenSequence &repl, std::size_t firstToken, std::size_t tokens,
bool isVariadic)
: isFunctionLike_{true},
argumentCount_(argNames.size()), isVariadic_{isVariadic},
: isFunctionLike_{true}, isVariadic_{isVariadic}, argNames_{argNames},
replacement_{Tokenize(argNames, repl, firstToken, tokens)} {}

Definition::Definition(const std::string &predefined, AllSources &sources)
Expand All @@ -46,6 +47,37 @@ bool Definition::set_isDisabled(bool disable) {
return was;
}

void Definition::Print(llvm::raw_ostream &out, const char *macroName) const {
if (!isFunctionLike_) {
// If it's not a function-like macro, then just print the replacement.
out << ' ' << replacement_.ToString();
return;
}

size_t argCount{argumentCount()};

out << '(';
for (size_t i{0}; i != argCount; ++i) {
if (i != 0) {
out << ", ";
}
out << argNames_[i];
}
if (isVariadic_) {
out << ", ...";
}
out << ") ";

for (size_t i{0}, e{replacement_.SizeInTokens()}; i != e; ++i) {
std::string tok{replacement_.TokenAt(i).ToString()};
if (size_t idx{GetArgumentIndex(tok)}; idx < argCount) {
out << argNames_[idx];
} else {
out << tok;
}
}
}

static bool IsLegalIdentifierStart(const CharBlock &cpl) {
return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]);
}
Expand Down Expand Up @@ -73,6 +105,13 @@ TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
return result;
}

std::size_t Definition::GetArgumentIndex(const CharBlock &token) const {
if (token.size() >= 2 && token[0] == '~') {
return static_cast<size_t>(token[1] - 'A');
}
return argumentCount();
}

static TokenSequence Stringify(
const TokenSequence &tokens, AllSources &allSources) {
TokenSequence result;
Expand Down Expand Up @@ -159,7 +198,7 @@ TokenSequence Definition::Apply(
continue;
}
if (bytes == 2 && token[0] == '~') { // argument substitution
std::size_t index = token[1] - 'A';
std::size_t index{GetArgumentIndex(token)};
if (index >= args.size()) {
continue;
}
Expand Down Expand Up @@ -202,8 +241,8 @@ TokenSequence Definition::Apply(
Provenance commaProvenance{
prescanner.preprocessor().allSources().CompilerInsertionProvenance(
',')};
for (std::size_t k{argumentCount_}; k < args.size(); ++k) {
if (k > argumentCount_) {
for (std::size_t k{argumentCount()}; k < args.size(); ++k) {
if (k > argumentCount()) {
result.Put(","s, commaProvenance);
}
result.Put(args[k]);
Expand All @@ -212,7 +251,7 @@ TokenSequence Definition::Apply(
j + 2 < tokens && replacement_.TokenAt(j + 1).OnlyNonBlank() == '(' &&
parenthesesNesting == 0) {
parenthesesNesting = 1;
skipping = args.size() == argumentCount_;
skipping = args.size() == argumentCount();
++j;
} else {
if (parenthesesNesting > 0) {
Expand Down Expand Up @@ -713,6 +752,21 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner &prescanner) {
}
}

void Preprocessor::PrintMacros(llvm::raw_ostream &out) const {
// std::set is ordered. Use that to print the macros in an
// alphabetical order.
std::set<std::string> macroNames;
for (const auto &[name, _] : definitions_) {
macroNames.insert(name.ToString());
}

for (const std::string &name : macroNames) {
out << "#define " << name;
definitions_.at(name).Print(out, name.c_str());
out << '\n';
}
}

CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) {
names_.push_back(t.ToString());
return {names_.back().data(), names_.back().size()};
Expand Down
4 changes: 2 additions & 2 deletions flang/lib/Parser/prescan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
//===----------------------------------------------------------------------===//

#include "prescan.h"
#include "preprocessor.h"
#include "token-sequence.h"
#include "flang/Common/idioms.h"
#include "flang/Parser/characters.h"
#include "flang/Parser/message.h"
#include "flang/Parser/preprocessor.h"
#include "flang/Parser/source.h"
#include "flang/Parser/token-sequence.h"
#include "llvm/Support/raw_ostream.h"
#include <cstddef>
#include <cstring>
Expand Down
2 changes: 1 addition & 1 deletion flang/lib/Parser/prescan.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@
// fixed form character literals on truncated card images, file
// inclusion, and driving the Fortran source preprocessor.

#include "token-sequence.h"
#include "flang/Common/Fortran-features.h"
#include "flang/Parser/characters.h"
#include "flang/Parser/message.h"
#include "flang/Parser/provenance.h"
#include "flang/Parser/token-sequence.h"
#include <bitset>
#include <optional>
#include <string>
Expand Down
3 changes: 2 additions & 1 deletion flang/lib/Parser/token-sequence.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
//
//===----------------------------------------------------------------------===//

#include "token-sequence.h"
#include "flang/Parser/token-sequence.h"

#include "prescan.h"
#include "flang/Parser/characters.h"
#include "flang/Parser/message.h"
Expand Down
1 change: 1 addition & 0 deletions flang/test/Driver/driver-help-hidden.f90
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
! CHECK-NEXT: -ccc-print-phases Dump list of actions to perform
! CHECK-NEXT: -cpp Enable predefined and command line preprocessor macros
! CHECK-NEXT: -c Only run preprocess, compile, and assemble steps
! CHECK-NEXT: -dM Print macro definitions in -E mode instead of normal output
! CHECK-NEXT: -dumpmachine Display the compiler's target processor
! CHECK-NEXT: -dumpversion Display the version of the compiler
! CHECK-NEXT: -D <macro>=<value> Define <macro> to <value> (or 1 if <value> omitted)
Expand Down
2 changes: 2 additions & 0 deletions flang/test/Driver/driver-help.f90
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
! HELP-NEXT: -### Print (but do not run) the commands to run for this compilation
! HELP-NEXT: -cpp Enable predefined and command line preprocessor macros
! HELP-NEXT: -c Only run preprocess, compile, and assemble steps
! HELP-NEXT: -dM Print macro definitions in -E mode instead of normal output
! HELP-NEXT: -dumpmachine Display the compiler's target processor
! HELP-NEXT: -dumpversion Display the version of the compiler
! HELP-NEXT: -D <macro>=<value> Define <macro> to <value> (or 1 if <value> omitted)
Expand Down Expand Up @@ -155,6 +156,7 @@
! HELP-FC1-NEXT:OPTIONS:
! HELP-FC1-NEXT: -cpp Enable predefined and command line preprocessor macros
! HELP-FC1-NEXT: --dependent-lib=<value> Add dependent library
! HELP-FC1-NEXT: -dM Print macro definitions in -E mode instead of normal output
! HELP-FC1-NEXT: -D <macro>=<value> Define <macro> to <value> (or 1 if <value> omitted)
! HELP-FC1-NEXT: -emit-fir Build the parse tree, then lower it to FIR
! HELP-FC1-NEXT: -emit-hlfir Build the parse tree, then lower it to HLFIR
Expand Down
14 changes: 14 additions & 0 deletions flang/test/Preprocessing/show-macros1.F90
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
! RUN: %flang -dM -E -o - %s | FileCheck %s

! Check the default macros. Omit certain ones such as __LINE__
! or __FILE__, or target-specific ones, like __x86_64__.

! Macros are printed in the alphabetical order.

! CHECK: #define __DATE__
! CHECK: #define __TIME__
! CHECK: #define __flang__
! CHECK: #define __flang_major__
! CHECK: #define __flang_minor__
! CHECK: #define __flang_patchlevel__

6 changes: 6 additions & 0 deletions flang/test/Preprocessing/show-macros2.F90
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
! RUN: %flang -DFOO -DBAR=FOO -dM -E -o - %s | FileCheck %s

! Check command line definitions

! CHECK: #define BAR FOO
! CHECK: #define FOO 1
9 changes: 9 additions & 0 deletions flang/test/Preprocessing/show-macros3.F90
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
! RUN: %flang -dM -E -o - %s | FileCheck %s

! Variadic macro
#define FOO1(X, Y, ...) bar(bar(X, Y), __VA_ARGS__)
! CHECK: #define FOO1(X, Y, ...) bar(bar(X, Y), __VA_ARGS__)

! Macro with an unused parameter
#define FOO2(X, Y, Z) (X + Z)
! CHECK: #define FOO2(X, Y, Z) (X + Z)
7 changes: 6 additions & 1 deletion libc/docs/gpu/building.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ The simplest way to build the GPU libc is to use the existing LLVM runtimes
support. This will automatically handle bootstrapping an up-to-date ``clang``
compiler and using it to build the C library. The following CMake invocation
will instruct it to build the ``libc`` runtime targeting both AMD and NVIDIA
GPUs.
GPUs. The ``LIBC_GPU_BUILD`` option can also be enabled to add the relevant
arguments automatically.

.. code-block:: sh

Expand Down Expand Up @@ -234,6 +235,10 @@ standard runtime build.
This flag controls whether or not the libc build will generate its own
headers. This must always be on when targeting the GPU.

**LIBC_GPU_BUILD**:BOOL
Shorthand for enabling GPU support. Equivalent to enabling support for both
AMDGPU and NVPTX builds for ``libc``.

**LIBC_GPU_TEST_ARCHITECTURE**:STRING
Sets the architecture used to build the GPU tests for, such as ``gfx90a`` or
``sm_80`` for AMD and NVIDIA GPUs respectively. The default behavior is to
Expand Down
2 changes: 1 addition & 1 deletion libc/hdr/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,5 @@ add_proxy_header_library(
fenv_macros.h
FULL_BUILD_DEPENDS
libc.include.llvm-libc-macros.fenv_macros
libc.incude.fenv
libc.include.fenv
)
1 change: 1 addition & 0 deletions libcxx/include/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -863,6 +863,7 @@ set(files
__utility/pair.h
__utility/piecewise_construct.h
__utility/priority_tag.h
__utility/private_constructor_tag.h
__utility/rel_ops.h
__utility/small_buffer.h
__utility/swap.h
Expand Down
2 changes: 1 addition & 1 deletion libcxx/include/__algorithm/simd_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ _LIBCPP_PUSH_MACROS
#include <__undef_macros>

// TODO: Find out how altivec changes things and allow vectorizations there too.
#if _LIBCPP_STD_VER >= 14 && defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER >= 1700 && !defined(__ALTIVEC__)
#if _LIBCPP_STD_VER >= 14 && defined(_LIBCPP_CLANG_VER) && !defined(__ALTIVEC__)
# define _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 1
#else
# define _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 0
Expand Down
4 changes: 2 additions & 2 deletions libcxx/include/__chrono/leap_second.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
# include <__compare/ordering.h>
# include <__compare/three_way_comparable.h>
# include <__config>
# include <__utility/private_constructor_tag.h>

# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
Expand All @@ -35,9 +36,8 @@ namespace chrono {

class leap_second {
public:
struct __constructor_tag;
[[nodiscard]]
_LIBCPP_HIDE_FROM_ABI explicit constexpr leap_second(__constructor_tag&&, sys_seconds __date, seconds __value)
_LIBCPP_HIDE_FROM_ABI explicit constexpr leap_second(__private_constructor_tag, sys_seconds __date, seconds __value)
: __date_(__date), __value_(__value) {}

_LIBCPP_HIDE_FROM_ABI leap_second(const leap_second&) = default;
Expand Down
4 changes: 2 additions & 2 deletions libcxx/include/__chrono/time_zone_link.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

# include <__compare/strong_order.h>
# include <__config>
# include <__utility/private_constructor_tag.h>
# include <string>
# include <string_view>

Expand All @@ -37,9 +38,8 @@ namespace chrono {

class time_zone_link {
public:
struct __constructor_tag;
_LIBCPP_NODISCARD_EXT
_LIBCPP_HIDE_FROM_ABI explicit time_zone_link(__constructor_tag&&, string_view __name, string_view __target)
_LIBCPP_HIDE_FROM_ABI explicit time_zone_link(__private_constructor_tag, string_view __name, string_view __target)
: __name_{__name}, __target_{__target} {}

_LIBCPP_HIDE_FROM_ABI time_zone_link(time_zone_link&&) = default;
Expand Down
4 changes: 2 additions & 2 deletions libcxx/include/__config
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@
// Warn if a compiler version is used that is not supported anymore
// LLVM RELEASE Update the minimum compiler versions
# if defined(_LIBCPP_CLANG_VER)
# if _LIBCPP_CLANG_VER < 1600
# warning "Libc++ only supports Clang 16 and later"
# if _LIBCPP_CLANG_VER < 1700
# warning "Libc++ only supports Clang 17 and later"
# endif
# elif defined(_LIBCPP_APPLE_CLANG_VER)
# if _LIBCPP_APPLE_CLANG_VER < 1500
Expand Down
12 changes: 6 additions & 6 deletions libcxx/include/__locale
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <__mutex/once_flag.h>
#include <__type_traits/make_unsigned.h>
#include <__utility/no_destroy.h>
#include <__utility/private_constructor_tag.h>
#include <cctype>
#include <clocale>
#include <cstdint>
Expand Down Expand Up @@ -97,8 +98,7 @@ private:

template <class>
friend struct __no_destroy;
struct __private_tag {};
_LIBCPP_HIDE_FROM_ABI explicit locale(__private_tag, __imp* __loc) : __locale_(__loc) {}
_LIBCPP_HIDE_FROM_ABI explicit locale(__private_constructor_tag, __imp* __loc) : __locale_(__loc) {}

void __install_ctor(const locale&, facet*, long);
static locale& __global();
Expand Down Expand Up @@ -1248,10 +1248,10 @@ extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname<char, char
#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname<wchar_t, char, mbstate_t>;
#endif
extern template class _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS
codecvt_byname<char16_t, char, mbstate_t>; // deprecated in C++20
extern template class _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS
codecvt_byname<char32_t, char, mbstate_t>; // deprecated in C++20
extern template class _LIBCPP_DEPRECATED_IN_CXX20
_LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname<char16_t, char, mbstate_t>; // deprecated in C++20
extern template class _LIBCPP_DEPRECATED_IN_CXX20
_LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname<char32_t, char, mbstate_t>; // deprecated in C++20
#ifndef _LIBCPP_HAS_NO_CHAR8_T
extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname<char16_t, char8_t, mbstate_t>; // C++20
extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname<char32_t, char8_t, mbstate_t>; // C++20
Expand Down
9 changes: 4 additions & 5 deletions libcxx/include/__stop_token/stop_callback.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <__type_traits/is_nothrow_constructible.h>
#include <__utility/forward.h>
#include <__utility/move.h>
#include <__utility/private_constructor_tag.h>

#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
Expand Down Expand Up @@ -49,13 +50,13 @@ class _LIBCPP_AVAILABILITY_SYNC stop_callback : private __stop_callback_base {
requires constructible_from<_Callback, _Cb>
_LIBCPP_HIDE_FROM_ABI explicit stop_callback(const stop_token& __st,
_Cb&& __cb) noexcept(is_nothrow_constructible_v<_Callback, _Cb>)
: stop_callback(__private_tag{}, __st.__state_, std::forward<_Cb>(__cb)) {}
: stop_callback(__private_constructor_tag{}, __st.__state_, std::forward<_Cb>(__cb)) {}

template <class _Cb>
requires constructible_from<_Callback, _Cb>
_LIBCPP_HIDE_FROM_ABI explicit stop_callback(stop_token&& __st,
_Cb&& __cb) noexcept(is_nothrow_constructible_v<_Callback, _Cb>)
: stop_callback(__private_tag{}, std::move(__st.__state_), std::forward<_Cb>(__cb)) {}
: stop_callback(__private_constructor_tag{}, std::move(__st.__state_), std::forward<_Cb>(__cb)) {}

_LIBCPP_HIDE_FROM_ABI ~stop_callback() {
if (__state_) {
Expand All @@ -74,10 +75,8 @@ class _LIBCPP_AVAILABILITY_SYNC stop_callback : private __stop_callback_base {

friend __stop_callback_base;

struct __private_tag {};

template <class _StatePtr, class _Cb>
_LIBCPP_HIDE_FROM_ABI explicit stop_callback(__private_tag, _StatePtr&& __state, _Cb&& __cb) noexcept(
_LIBCPP_HIDE_FROM_ABI explicit stop_callback(__private_constructor_tag, _StatePtr&& __state, _Cb&& __cb) noexcept(
is_nothrow_constructible_v<_Callback, _Cb>)
: __stop_callback_base([](__stop_callback_base* __cb_base) noexcept {
// stop callback is supposed to only be called once
Expand Down
28 changes: 28 additions & 0 deletions libcxx/include/__utility/private_constructor_tag.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef _LIBCPP__UTILITY_PRIVATE_CONSTRUCTOR_TAG_H
#define _LIBCPP__UTILITY_PRIVATE_CONSTRUCTOR_TAG_H

#include <__config>

#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif

_LIBCPP_BEGIN_NAMESPACE_STD

// This tag allows defining non-standard exposition-only constructors while
// preventing users from being able to use them, since this reserved-name tag
// needs to be used.
struct __private_constructor_tag {};

_LIBCPP_END_NAMESPACE_STD

#endif // _LIBCPP__UTILITY_PRIVATE_CONSTRUCTOR_TAG_H
21 changes: 11 additions & 10 deletions libcxx/include/module.modulemap
Original file line number Diff line number Diff line change
Expand Up @@ -2090,18 +2090,19 @@ module std_private_utility_pair [system] {
export std_private_type_traits_is_nothrow_move_assignable
export std_private_utility_pair_fwd
}
module std_private_utility_pair_fwd [system] { header "__fwd/pair.h" }
module std_private_utility_piecewise_construct [system] { header "__utility/piecewise_construct.h" }
module std_private_utility_priority_tag [system] { header "__utility/priority_tag.h" }
module std_private_utility_rel_ops [system] { header "__utility/rel_ops.h" }
module std_private_utility_small_buffer [system] { header "__utility/small_buffer.h" }
module std_private_utility_swap [system] {
module std_private_utility_pair_fwd [system] { header "__fwd/pair.h" }
module std_private_utility_piecewise_construct [system] { header "__utility/piecewise_construct.h" }
module std_private_utility_priority_tag [system] { header "__utility/priority_tag.h" }
module std_private_utility_private_constructor_tag [system] { header "__utility/private_constructor_tag.h" }
module std_private_utility_rel_ops [system] { header "__utility/rel_ops.h" }
module std_private_utility_small_buffer [system] { header "__utility/small_buffer.h" }
module std_private_utility_swap [system] {
header "__utility/swap.h"
export std_private_type_traits_is_swappable
}
module std_private_utility_to_underlying [system] { header "__utility/to_underlying.h" }
module std_private_utility_unreachable [system] { header "__utility/unreachable.h" }
module std_private_utility_to_underlying [system] { header "__utility/to_underlying.h" }
module std_private_utility_unreachable [system] { header "__utility/unreachable.h" }

module std_private_variant_monostate [system] { header "__variant/monostate.h" }
module std_private_variant_monostate [system] { header "__variant/monostate.h" }

module std_private_vector_fwd [system] { header "__fwd/vector.h" }
module std_private_vector_fwd [system] { header "__fwd/vector.h" }
2 changes: 0 additions & 2 deletions libcxx/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -334,8 +334,6 @@ endif()

if (LIBCXX_ENABLE_LOCALIZATION AND LIBCXX_ENABLE_FILESYSTEM AND LIBCXX_ENABLE_TIME_ZONE_DATABASE)
list(APPEND LIBCXX_EXPERIMENTAL_SOURCES
include/tzdb/leap_second_private.h
include/tzdb/time_zone_link_private.h
include/tzdb/time_zone_private.h
include/tzdb/types_private.h
include/tzdb/tzdb_list_private.h
Expand Down
27 changes: 0 additions & 27 deletions libcxx/src/include/tzdb/leap_second_private.h

This file was deleted.

27 changes: 0 additions & 27 deletions libcxx/src/include/tzdb/time_zone_link_private.h

This file was deleted.

2 changes: 1 addition & 1 deletion libcxx/src/locale.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -497,7 +497,7 @@ constinit __no_destroy<locale::__imp>
locale::__imp::classic_locale_imp_(__uninitialized_tag{}); // initialized below in classic()

const locale& locale::classic() {
static const __no_destroy<locale> classic_locale(__private_tag{}, [] {
static const __no_destroy<locale> classic_locale(__private_constructor_tag{}, [] {
// executed exactly once on first initialization of `classic_locale`
locale::__imp::classic_locale_imp_.__emplace(1u);
return &locale::__imp::classic_locale_imp_.__get();
Expand Down
6 changes: 2 additions & 4 deletions libcxx/src/tzdb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
#include <stdexcept>
#include <string>

#include "include/tzdb/leap_second_private.h"
#include "include/tzdb/time_zone_link_private.h"
#include "include/tzdb/time_zone_private.h"
#include "include/tzdb/types_private.h"
#include "include/tzdb/tzdb_list_private.h"
Expand Down Expand Up @@ -582,7 +580,7 @@ static void __parse_link(tzdb& __tzdb, istream& __input) {
string __name = chrono::__parse_string(__input);
chrono::__skip_line(__input);

__tzdb.links.emplace_back(time_zone_link::__constructor_tag{}, std::move(__name), std::move(__target));
__tzdb.links.emplace_back(std::__private_constructor_tag{}, std::move(__name), std::move(__target));
}

static void __parse_tzdata(tzdb& __db, __tz::__rules_storage_type& __rules, istream& __input) {
Expand Down Expand Up @@ -649,7 +647,7 @@ static void __parse_leap_seconds(vector<leap_second>& __leap_seconds, istream&&
seconds __value{chrono::__parse_integral(__input, false)};
chrono::__skip_line(__input);

__leap_seconds.emplace_back(leap_second::__constructor_tag{}, __date, __value);
__leap_seconds.emplace_back(std::__private_constructor_tag{}, __date, __value);
}
}

Expand Down
2 changes: 1 addition & 1 deletion libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// UNSUPPORTED: c++03

// TODO: Investigate these failures which break the CI.
// UNSUPPORTED: clang-16, clang-17, clang-18, clang-19
// UNSUPPORTED: clang-17, clang-18, clang-19

// TODO: Investigate this failure on GCC 13 (in Ubuntu Jammy)
// UNSUPPORTED: gcc-13
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// struct __private_constructor_tag{};

// The private constructor tag is intended to be a trivial type that can easily
// be used to mark a constructor exposition-only.
//
// Tests whether the type is trivial.

#include <__utility/private_constructor_tag.h>
#include <type_traits>

static_assert(std::is_trivial<std::__private_constructor_tag>::value, "");
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
// There is a bug in older versions of Clang that causes trouble with constraints in classes like
// `ContainerWithDirectCtr`.
// XFAIL: clang-16, apple-clang-15
// XFAIL: apple-clang-15

// template<template<class...> class C, input_range R, class... Args>
// constexpr auto to(R&& r, Args&&... args); // Since C++23
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@
#include <type_traits>
#include <cassert>

// Add the include path required by test_chrono_leap_second.h when using libc++.
// ADDITIONAL_COMPILE_FLAGS(stdlib=libc++): -I %{libcxx-dir}/src/include
#include "test_chrono_leap_second.h"

constexpr bool test() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@
#include <concepts>
#include <cassert>

// Add the include path required by test_chrono_leap_second.h when using libc++.
// ADDITIONAL_COMPILE_FLAGS(stdlib=libc++): -I %{libcxx-dir}/src/include
#include "test_chrono_leap_second.h"

constexpr bool test() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@

#include "test_macros.h"

// Add the include path required by test_chrono_leap_second.h when using libc++.
// ADDITIONAL_COMPILE_FLAGS(stdlib=libc++): -I %{libcxx-dir}/src/include
#include "test_chrono_leap_second.h"

constexpr void test(const std::chrono::leap_second leap_second, std::chrono::sys_seconds expected) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@

#include "test_macros.h"

// Add the include path required by test_chrono_leap_second.h when using libc++.
// ADDITIONAL_COMPILE_FLAGS(stdlib=libc++): -I %{libcxx-dir}/src/include
#include "test_chrono_leap_second.h"

constexpr void test(const std::chrono::leap_second leap_second, std::chrono::seconds expected) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@
#include "test_macros.h"
#include "test_comparisons.h"

// Add the include path required by test_chrono_leap_second.h when using libc++.
// ADDITIONAL_COMPILE_FLAGS(stdlib=libc++): -I %{libcxx-dir}/src/include
#include "test_chrono_leap_second.h"

constexpr void test_comparison(const std::chrono::leap_second lhs, const std::chrono::leap_second rhs) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23
// UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME
// The tested functionality needs deducing this.
// UNSUPPORTED: clang-16 || clang-17
// UNSUPPORTED: clang-17
// XFAIL: apple-clang

// <format>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23
// UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME
// The tested functionality needs deducing this.
// UNSUPPORTED: clang-16 || clang-17
// UNSUPPORTED: clang-17
// XFAIL: apple-clang

// <format>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23
// UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME
// UNSUPPORTED: clang-16 || clang-17
// UNSUPPORTED: clang-17
// XFAIL: apple-clang

// <format>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23
// The tested functionality needs deducing this.
// UNSUPPORTED: clang-16 || clang-17
// UNSUPPORTED: clang-17
// XFAIL: apple-clang

// <variant>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23
// The tested functionality needs deducing this.
// UNSUPPORTED: clang-16 || clang-17
// UNSUPPORTED: clang-17
// XFAIL: apple-clang

// <variant>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23
// The tested functionality needs deducing this.
// UNSUPPORTED: clang-16 || clang-17
// UNSUPPORTED: clang-17
// XFAIL: apple-clang

// <variant>
Expand Down
9 changes: 2 additions & 7 deletions libcxx/test/support/test_chrono_leap_second.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,11 @@

#ifdef _LIBCPP_VERSION

// In order to find this include the calling test needs to provide this path in
// the search path. Typically this looks like:
// ADDITIONAL_COMPILE_FLAGS(stdlib=libc++): -I %{libcxx-dir}/src/include
// where the number of `../` sequences depends on the subdirectory level of the
// test.
# include "tzdb/leap_second_private.h" // Header in the dylib
# include <__utility/private_constructor_tag.h>

inline constexpr std::chrono::leap_second
test_leap_second_create(const std::chrono::sys_seconds& date, const std::chrono::seconds& value) {
return std::chrono::leap_second{std::chrono::leap_second::__constructor_tag{}, date, value};
return std::chrono::leap_second{std::__private_constructor_tag{}, date, value};
}

#else // _LIBCPP_VERSION
Expand Down
1 change: 1 addition & 0 deletions libcxx/utils/generate_iwyu_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ def IWYU_mapping(header: str) -> typing.Optional[typing.List[str]]:
"__debug_utils/.+",
"__fwd/get[.]h",
"__support/.+",
"__utility/private_constructor_tag.h",
]
if any(re.match(pattern, header) for pattern in ignore):
return None
Expand Down
375 changes: 318 additions & 57 deletions lld/ELF/SyntheticSections.cpp

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions lld/ELF/SyntheticSections.h
Original file line number Diff line number Diff line change
Expand Up @@ -1284,11 +1284,15 @@ class MemtagGlobalDescriptors final : public SyntheticSection {
SmallVector<const Symbol *, 0> symbols;
};

template <class ELFT> void createSyntheticSections();
InputSection *createInterpSection();
MergeInputSection *createCommentSection();
template <class ELFT> void splitSections();
void combineEhSections();

bool hasMemtag();
bool canHaveMemtagGlobals();

template <typename ELFT> void writeEhdr(uint8_t *buf, Partition &part);
template <typename ELFT> void writePhdrs(uint8_t *buf, Partition &part);

Expand Down
292 changes: 0 additions & 292 deletions lld/ELF/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,6 @@ template <class ELFT> class Writer {
};
} // anonymous namespace

static bool needsInterpSection() {
return !config->relocatable && !config->shared &&
!config->dynamicLinker.empty() && script->needsInterpSection();
}

template <class ELFT> void elf::writeResult() {
Writer<ELFT>().run();
}
Expand Down Expand Up @@ -297,22 +292,6 @@ static void demoteSymbolsAndComputeIsPreemptible() {
}
}

bool elf::hasMemtag() {
return config->emachine == EM_AARCH64 &&
config->androidMemtagMode != ELF::NT_MEMTAG_LEVEL_NONE;
}

// Fully static executables don't support MTE globals at this point in time, as
// we currently rely on:
// - A dynamic loader to process relocations, and
// - Dynamic entries.
// This restriction could be removed in future by re-using some of the ideas
// that ifuncs use in fully static executables.
bool elf::canHaveMemtagGlobals() {
return hasMemtag() &&
(config->relocatable || config->shared || needsInterpSection());
}

static OutputSection *findSection(StringRef name, unsigned partition = 1) {
for (SectionCommand *cmd : script->sectionCommands)
if (auto *osd = dyn_cast<OutputDesc>(cmd))
Expand All @@ -321,272 +300,6 @@ static OutputSection *findSection(StringRef name, unsigned partition = 1) {
return nullptr;
}

template <class ELFT> void elf::createSyntheticSections() {
// Initialize all pointers with NULL. This is needed because
// you can call lld::elf::main more than once as a library.
Out::tlsPhdr = nullptr;
Out::preinitArray = nullptr;
Out::initArray = nullptr;
Out::finiArray = nullptr;

// Add the .interp section first because it is not a SyntheticSection.
// The removeUnusedSyntheticSections() function relies on the
// SyntheticSections coming last.
if (needsInterpSection()) {
for (size_t i = 1; i <= partitions.size(); ++i) {
InputSection *sec = createInterpSection();
sec->partition = i;
ctx.inputSections.push_back(sec);
}
}

auto add = [](SyntheticSection &sec) { ctx.inputSections.push_back(&sec); };

in.shStrTab = std::make_unique<StringTableSection>(".shstrtab", false);

Out::programHeaders = make<OutputSection>("", 0, SHF_ALLOC);
Out::programHeaders->addralign = config->wordsize;

if (config->strip != StripPolicy::All) {
in.strTab = std::make_unique<StringTableSection>(".strtab", false);
in.symTab = std::make_unique<SymbolTableSection<ELFT>>(*in.strTab);
in.symTabShndx = std::make_unique<SymtabShndxSection>();
}

in.bss = std::make_unique<BssSection>(".bss", 0, 1);
add(*in.bss);

// If there is a SECTIONS command and a .data.rel.ro section name use name
// .data.rel.ro.bss so that we match in the .data.rel.ro output section.
// This makes sure our relro is contiguous.
bool hasDataRelRo = script->hasSectionsCommand && findSection(".data.rel.ro");
in.bssRelRo = std::make_unique<BssSection>(
hasDataRelRo ? ".data.rel.ro.bss" : ".bss.rel.ro", 0, 1);
add(*in.bssRelRo);

// Add MIPS-specific sections.
if (config->emachine == EM_MIPS) {
if (!config->shared && config->hasDynSymTab) {
in.mipsRldMap = std::make_unique<MipsRldMapSection>();
add(*in.mipsRldMap);
}
if ((in.mipsAbiFlags = MipsAbiFlagsSection<ELFT>::create()))
add(*in.mipsAbiFlags);
if ((in.mipsOptions = MipsOptionsSection<ELFT>::create()))
add(*in.mipsOptions);
if ((in.mipsReginfo = MipsReginfoSection<ELFT>::create()))
add(*in.mipsReginfo);
}

StringRef relaDynName = config->isRela ? ".rela.dyn" : ".rel.dyn";

const unsigned threadCount = config->threadCount;
for (Partition &part : partitions) {
auto add = [&](SyntheticSection &sec) {
sec.partition = part.getNumber();
ctx.inputSections.push_back(&sec);
};

if (!part.name.empty()) {
part.elfHeader = std::make_unique<PartitionElfHeaderSection<ELFT>>();
part.elfHeader->name = part.name;
add(*part.elfHeader);

part.programHeaders =
std::make_unique<PartitionProgramHeadersSection<ELFT>>();
add(*part.programHeaders);
}

if (config->buildId != BuildIdKind::None) {
part.buildId = std::make_unique<BuildIdSection>();
add(*part.buildId);
}

part.dynStrTab = std::make_unique<StringTableSection>(".dynstr", true);
part.dynSymTab =
std::make_unique<SymbolTableSection<ELFT>>(*part.dynStrTab);
part.dynamic = std::make_unique<DynamicSection<ELFT>>();

if (hasMemtag()) {
part.memtagAndroidNote = std::make_unique<MemtagAndroidNote>();
add(*part.memtagAndroidNote);
if (canHaveMemtagGlobals()) {
part.memtagGlobalDescriptors =
std::make_unique<MemtagGlobalDescriptors>();
add(*part.memtagGlobalDescriptors);
}
}

if (config->androidPackDynRelocs)
part.relaDyn = std::make_unique<AndroidPackedRelocationSection<ELFT>>(
relaDynName, threadCount);
else
part.relaDyn = std::make_unique<RelocationSection<ELFT>>(
relaDynName, config->zCombreloc, threadCount);

if (config->hasDynSymTab) {
add(*part.dynSymTab);

part.verSym = std::make_unique<VersionTableSection>();
add(*part.verSym);

if (!namedVersionDefs().empty()) {
part.verDef = std::make_unique<VersionDefinitionSection>();
add(*part.verDef);
}

part.verNeed = std::make_unique<VersionNeedSection<ELFT>>();
add(*part.verNeed);

if (config->gnuHash) {
part.gnuHashTab = std::make_unique<GnuHashTableSection>();
add(*part.gnuHashTab);
}

if (config->sysvHash) {
part.hashTab = std::make_unique<HashTableSection>();
add(*part.hashTab);
}

add(*part.dynamic);
add(*part.dynStrTab);
}
add(*part.relaDyn);

if (config->relrPackDynRelocs) {
part.relrDyn = std::make_unique<RelrSection<ELFT>>(threadCount);
add(*part.relrDyn);
}

if (!config->relocatable) {
if (config->ehFrameHdr) {
part.ehFrameHdr = std::make_unique<EhFrameHeader>();
add(*part.ehFrameHdr);
}
part.ehFrame = std::make_unique<EhFrameSection>();
add(*part.ehFrame);

if (config->emachine == EM_ARM) {
// This section replaces all the individual .ARM.exidx InputSections.
part.armExidx = std::make_unique<ARMExidxSyntheticSection>();
add(*part.armExidx);
}
}

if (!config->packageMetadata.empty()) {
part.packageMetadataNote = std::make_unique<PackageMetadataNote>();
add(*part.packageMetadataNote);
}
}

if (partitions.size() != 1) {
// Create the partition end marker. This needs to be in partition number 255
// so that it is sorted after all other partitions. It also has other
// special handling (see createPhdrs() and combineEhSections()).
in.partEnd =
std::make_unique<BssSection>(".part.end", config->maxPageSize, 1);
in.partEnd->partition = 255;
add(*in.partEnd);

in.partIndex = std::make_unique<PartitionIndexSection>();
addOptionalRegular("__part_index_begin", in.partIndex.get(), 0);
addOptionalRegular("__part_index_end", in.partIndex.get(),
in.partIndex->getSize());
add(*in.partIndex);
}

// Add .got. MIPS' .got is so different from the other archs,
// it has its own class.
if (config->emachine == EM_MIPS) {
in.mipsGot = std::make_unique<MipsGotSection>();
add(*in.mipsGot);
} else {
in.got = std::make_unique<GotSection>();
add(*in.got);
}

if (config->emachine == EM_PPC) {
in.ppc32Got2 = std::make_unique<PPC32Got2Section>();
add(*in.ppc32Got2);
}

if (config->emachine == EM_PPC64) {
in.ppc64LongBranchTarget = std::make_unique<PPC64LongBranchTargetSection>();
add(*in.ppc64LongBranchTarget);
}

in.gotPlt = std::make_unique<GotPltSection>();
add(*in.gotPlt);
in.igotPlt = std::make_unique<IgotPltSection>();
add(*in.igotPlt);
// Add .relro_padding if DATA_SEGMENT_RELRO_END is used; otherwise, add the
// section in the absence of PHDRS/SECTIONS commands.
if (config->zRelro && ((script->phdrsCommands.empty() &&
!script->hasSectionsCommand) || script->seenRelroEnd)) {
in.relroPadding = std::make_unique<RelroPaddingSection>();
add(*in.relroPadding);
}

if (config->emachine == EM_ARM) {
in.armCmseSGSection = std::make_unique<ArmCmseSGSection>();
add(*in.armCmseSGSection);
}

// _GLOBAL_OFFSET_TABLE_ is defined relative to either .got.plt or .got. Treat
// it as a relocation and ensure the referenced section is created.
if (ElfSym::globalOffsetTable && config->emachine != EM_MIPS) {
if (target->gotBaseSymInGotPlt)
in.gotPlt->hasGotPltOffRel = true;
else
in.got->hasGotOffRel = true;
}

// We always need to add rel[a].plt to output if it has entries.
// Even for static linking it can contain R_[*]_IRELATIVE relocations.
in.relaPlt = std::make_unique<RelocationSection<ELFT>>(
config->isRela ? ".rela.plt" : ".rel.plt", /*sort=*/false,
/*threadCount=*/1);
add(*in.relaPlt);

if ((config->emachine == EM_386 || config->emachine == EM_X86_64) &&
(config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)) {
in.ibtPlt = std::make_unique<IBTPltSection>();
add(*in.ibtPlt);
}

if (config->emachine == EM_PPC)
in.plt = std::make_unique<PPC32GlinkSection>();
else
in.plt = std::make_unique<PltSection>();
add(*in.plt);
in.iplt = std::make_unique<IpltSection>();
add(*in.iplt);

if (config->andFeatures || !ctx.aarch64PauthAbiCoreInfo.empty())
add(*make<GnuPropertySection>());

if (config->gdbIndex) {
in.gdbIndex = GdbIndexSection::create<ELFT>();
add(*in.gdbIndex);
}

// .note.GNU-stack is always added when we are creating a re-linkable
// object file. Other linkers are using the presence of this marker
// section to control the executable-ness of the stack area, but that
// is irrelevant these days. Stack area should always be non-executable
// by default. So we emit this section unconditionally.
if (config->relocatable)
add(*make<GnuStackSection>());

if (in.symTab)
add(*in.symTab);
if (in.symTabShndx)
add(*in.symTabShndx);
add(*in.shStrTab);
if (in.strTab)
add(*in.strTab);
}

// The main function of the writer.
template <class ELFT> void Writer<ELFT>::run() {
// Now that we have a complete set of output sections. This function
Expand Down Expand Up @@ -3114,11 +2827,6 @@ template <class ELFT> void Writer<ELFT>::writeBuildId() {
part.buildId->writeBuildId(output);
}

template void elf::createSyntheticSections<ELF32LE>();
template void elf::createSyntheticSections<ELF32BE>();
template void elf::createSyntheticSections<ELF64LE>();
template void elf::createSyntheticSections<ELF64BE>();

template void elf::writeResult<ELF32LE>();
template void elf::writeResult<ELF32BE>();
template void elf::writeResult<ELF64LE>();
Expand Down
3 changes: 0 additions & 3 deletions lld/ELF/Writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ namespace lld::elf {
class InputFile;
class OutputSection;
void copySectionsIntoPartitions();
template <class ELFT> void createSyntheticSections();
template <class ELFT> void writeResult();

// This describes a program header entry.
Expand Down Expand Up @@ -57,8 +56,6 @@ bool isMipsN32Abi(const InputFile *f);
bool isMicroMips();
bool isMipsR6();

bool hasMemtag();
bool canHaveMemtagGlobals();
} // namespace lld::elf

#endif
2 changes: 2 additions & 0 deletions llvm/include/llvm/CodeGen/MachineCombinerPattern.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ enum class MachineCombinerPattern {
FMADD_XA,
FMSUB,
FNMSUB,
SHXADD_ADD_SLLI_OP1,
SHXADD_ADD_SLLI_OP2,

// X86 VNNI
DPWSSD,
Expand Down
228 changes: 178 additions & 50 deletions llvm/lib/Analysis/ValueTracking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1621,6 +1621,14 @@ static void computeKnownBitsFromOperator(const Operator *I,
computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
Known = KnownBits::ssub_sat(Known, Known2);
break;
// for min/max reduce, any bit common to each element in the input vec
// is set in the output.
case Intrinsic::vector_reduce_umax:
case Intrinsic::vector_reduce_umin:
case Intrinsic::vector_reduce_smax:
case Intrinsic::vector_reduce_smin:
computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
break;
case Intrinsic::umin:
computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
Expand Down Expand Up @@ -2463,6 +2471,34 @@ static bool isNonZeroSub(const APInt &DemandedElts, unsigned Depth,
return ::isKnownNonEqual(X, Y, Depth, Q);
}

static bool isNonZeroMul(const APInt &DemandedElts, unsigned Depth,
const SimplifyQuery &Q, unsigned BitWidth, Value *X,
Value *Y, bool NSW, bool NUW) {
// If X and Y are non-zero then so is X * Y as long as the multiplication
// does not overflow.
if (NSW || NUW)
return isKnownNonZero(X, DemandedElts, Depth, Q) &&
isKnownNonZero(Y, DemandedElts, Depth, Q);

// If either X or Y is odd, then if the other is non-zero the result can't
// be zero.
KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q);
if (XKnown.One[0])
return isKnownNonZero(Y, DemandedElts, Depth, Q);

KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q);
if (YKnown.One[0])
return XKnown.isNonZero() || isKnownNonZero(X, DemandedElts, Depth, Q);

// If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is
// non-zero, then X * Y is non-zero. We can find sX and sY by just taking
// the lowest known One of X and Y. If they are non-zero, the result
// must be non-zero. We can check if LSB(X) * LSB(Y) != 0 by doing
// X.CountLeadingZeros + Y.CountLeadingZeros < BitWidth.
return (XKnown.countMaxTrailingZeros() + YKnown.countMaxTrailingZeros()) <
BitWidth;
}

static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts,
unsigned Depth, const SimplifyQuery &Q,
const KnownBits &KnownVal) {
Expand Down Expand Up @@ -2658,33 +2694,10 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
Q.IIQ.hasNoUnsignedWrap(BO));
}
case Instruction::Mul: {
// If X and Y are non-zero then so is X * Y as long as the multiplication
// does not overflow.
const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I);
if (Q.IIQ.hasNoSignedWrap(BO) || Q.IIQ.hasNoUnsignedWrap(BO))
return isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q) &&
isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q);

// If either X or Y is odd, then if the other is non-zero the result can't
// be zero.
KnownBits XKnown =
computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q);
if (XKnown.One[0])
return isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q);

KnownBits YKnown =
computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q);
if (YKnown.One[0])
return XKnown.isNonZero() ||
isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q);

// If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is
// non-zero, then X * Y is non-zero. We can find sX and sY by just taking
// the lowest known One of X and Y. If they are non-zero, the result
// must be non-zero. We can check if LSB(X) * LSB(Y) != 0 by doing
// X.CountLeadingZeros + Y.CountLeadingZeros < BitWidth.
return (XKnown.countMaxTrailingZeros() + YKnown.countMaxTrailingZeros()) <
BitWidth;
return isNonZeroMul(DemandedElts, Depth, Q, BitWidth, I->getOperand(0),
I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO),
Q.IIQ.hasNoUnsignedWrap(BO));
}
case Instruction::Select: {
// (C ? X : Y) != 0 if X != 0 and Y != 0.
Expand Down Expand Up @@ -2750,6 +2763,29 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
return isKnownNonZero(U.get(), DemandedElts, NewDepth, RecQ);
});
}
case Instruction::InsertElement: {
if (isa<ScalableVectorType>(I->getType()))
break;

const Value *Vec = I->getOperand(0);
const Value *Elt = I->getOperand(1);
auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2));

unsigned NumElts = DemandedElts.getBitWidth();
APInt DemandedVecElts = DemandedElts;
bool SkipElt = false;
// If we know the index we are inserting too, clear it from Vec check.
if (CIdx && CIdx->getValue().ult(NumElts)) {
DemandedVecElts.clearBit(CIdx->getZExtValue());
SkipElt = !DemandedElts[CIdx->getZExtValue()];
}

// Result is zero if Elt is non-zero and rest of the demanded elts in Vec
// are non-zero.
return (SkipElt || isKnownNonZero(Elt, Depth, Q)) &&
(DemandedVecElts.isZero() ||
isKnownNonZero(Vec, DemandedVecElts, Depth, Q));
}
case Instruction::ExtractElement:
if (const auto *EEI = dyn_cast<ExtractElementInst>(I)) {
const Value *Vec = EEI->getVectorOperand();
Expand All @@ -2764,6 +2800,21 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
}
}
break;
case Instruction::ShuffleVector: {
auto *Shuf = dyn_cast<ShuffleVectorInst>(I);
if (!Shuf)
break;
APInt DemandedLHS, DemandedRHS;
// For undef elements, we don't know anything about the common state of
// the shuffle result.
if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
break;
// If demanded elements for both vecs are non-zero, the shuffle is non-zero.
return (DemandedRHS.isZero() ||
isKnownNonZero(Shuf->getOperand(1), DemandedRHS, Depth, Q)) &&
(DemandedLHS.isZero() ||
isKnownNonZero(Shuf->getOperand(0), DemandedLHS, Depth, Q));
}
case Instruction::Freeze:
return isKnownNonZero(I->getOperand(0), Depth, Q) &&
isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT,
Expand All @@ -2785,6 +2836,29 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
// handled in isKnownNonZero.
return false;
}
case Instruction::ExtractValue: {
const WithOverflowInst *WO;
if (match(I, m_ExtractValue<0>(m_WithOverflowInst(WO)))) {
switch (WO->getBinaryOp()) {
default:
break;
case Instruction::Add:
return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth,
WO->getArgOperand(0), WO->getArgOperand(1),
/*NSW=*/false,
/*NUW=*/false);
case Instruction::Sub:
return isNonZeroSub(DemandedElts, Depth, Q, BitWidth,
WO->getArgOperand(0), WO->getArgOperand(1));
case Instruction::Mul:
return isNonZeroMul(DemandedElts, Depth, Q, BitWidth,
WO->getArgOperand(0), WO->getArgOperand(1),
/*NSW=*/false, /*NUW=*/false);
break;
}
}
break;
}
case Instruction::Call:
case Instruction::Invoke: {
const auto *Call = cast<CallBase>(I);
Expand Down Expand Up @@ -2824,27 +2898,54 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth,
II->getArgOperand(0), II->getArgOperand(1),
/*NSW=*/true, /* NUW=*/false);
// umin/smin/smax/smin of all non-zero elements is always non-zero.
case Intrinsic::vector_reduce_umax:
case Intrinsic::vector_reduce_umin:
case Intrinsic::vector_reduce_smax:
case Intrinsic::vector_reduce_smin:
return isKnownNonZero(II->getArgOperand(0), Depth, Q);
case Intrinsic::umax:
case Intrinsic::uadd_sat:
return isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q) ||
isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q);
case Intrinsic::smin:
case Intrinsic::smax: {
auto KnownOpImpliesNonZero = [&](const KnownBits &K) {
return II->getIntrinsicID() == Intrinsic::smin
? K.isNegative()
: K.isStrictlyPositive();
// If either arg is strictly positive the result is non-zero. Otherwise
// the result is non-zero if both ops are non-zero.
auto IsNonZero = [&](Value *Op, std::optional<bool> &OpNonZero,
const KnownBits &OpKnown) {
if (!OpNonZero.has_value())
OpNonZero = OpKnown.isNonZero() ||
isKnownNonZero(Op, DemandedElts, Depth, Q);
return *OpNonZero;
};
KnownBits XKnown =
// Avoid re-computing isKnownNonZero.
std::optional<bool> Op0NonZero, Op1NonZero;
KnownBits Op1Known =
computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q);
if (Op1Known.isNonNegative() &&
IsNonZero(II->getArgOperand(1), Op1NonZero, Op1Known))
return true;
KnownBits Op0Known =
computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q);
if (KnownOpImpliesNonZero(XKnown))
if (Op0Known.isNonNegative() &&
IsNonZero(II->getArgOperand(0), Op0NonZero, Op0Known))
return true;
KnownBits YKnown =
return IsNonZero(II->getArgOperand(1), Op1NonZero, Op1Known) &&
IsNonZero(II->getArgOperand(0), Op0NonZero, Op0Known);
}
case Intrinsic::smin: {
// If either arg is negative the result is non-zero. Otherwise
// the result is non-zero if both ops are non-zero.
KnownBits Op1Known =
computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q);
if (KnownOpImpliesNonZero(YKnown))
if (Op1Known.isNegative())
return true;
KnownBits Op0Known =
computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q);
if (Op0Known.isNegative())
return true;

if (XKnown.isNonZero() && YKnown.isNonZero())
if (Op1Known.isNonZero() && Op0Known.isNonZero())
return true;
}
[[fallthrough]];
Expand Down Expand Up @@ -3005,7 +3106,20 @@ getInvertibleOperands(const Operator *Op1,
switch (Op1->getOpcode()) {
default:
break;
case Instruction::Add:
case Instruction::Or:
if (!cast<PossiblyDisjointInst>(Op1)->isDisjoint() ||
!cast<PossiblyDisjointInst>(Op2)->isDisjoint())
break;
[[fallthrough]];
case Instruction::Xor:
case Instruction::Add: {
Value *Other;
if (match(Op2, m_c_BinOp(m_Specific(Op1->getOperand(0)), m_Value(Other))))
return std::make_pair(Op1->getOperand(1), Other);
if (match(Op2, m_c_BinOp(m_Specific(Op1->getOperand(1)), m_Value(Other))))
return std::make_pair(Op1->getOperand(0), Other);
break;
}
case Instruction::Sub:
if (Op1->getOperand(0) == Op2->getOperand(0))
return getOperands(1);
Expand Down Expand Up @@ -3093,20 +3207,33 @@ getInvertibleOperands(const Operator *Op1,
return std::nullopt;
}

/// Return true if V2 == V1 + X, where X is known non-zero.
static bool isAddOfNonZero(const Value *V1, const Value *V2, unsigned Depth,
const SimplifyQuery &Q) {
/// Return true if V1 == (binop V2, X), where X is known non-zero.
/// Only handle a small subset of binops where (binop V2, X) with non-zero X
/// implies V2 != V1.
static bool isModifyingBinopOfNonZero(const Value *V1, const Value *V2,
unsigned Depth, const SimplifyQuery &Q) {
const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
if (!BO || BO->getOpcode() != Instruction::Add)
if (!BO)
return false;
Value *Op = nullptr;
if (V2 == BO->getOperand(0))
Op = BO->getOperand(1);
else if (V2 == BO->getOperand(1))
Op = BO->getOperand(0);
else
return false;
return isKnownNonZero(Op, Depth + 1, Q);
switch (BO->getOpcode()) {
default:
break;
case Instruction::Or:
if (!cast<PossiblyDisjointInst>(V1)->isDisjoint())
break;
[[fallthrough]];
case Instruction::Xor:
case Instruction::Add:
Value *Op = nullptr;
if (V2 == BO->getOperand(0))
Op = BO->getOperand(1);
else if (V2 == BO->getOperand(1))
Op = BO->getOperand(0);
else
return false;
return isKnownNonZero(Op, Depth + 1, Q);
}
return false;
}

/// Return true if V2 == V1 * C, where V1 is known non-zero, C is not 0/1 and
Expand Down Expand Up @@ -3266,7 +3393,8 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth,
};
}

if (isAddOfNonZero(V1, V2, Depth, Q) || isAddOfNonZero(V2, V1, Depth, Q))
if (isModifyingBinopOfNonZero(V1, V2, Depth, Q) ||
isModifyingBinopOfNonZero(V2, V1, Depth, Q))
return true;

if (isNonEqualMul(V1, V2, Depth, Q) || isNonEqualMul(V2, V1, Depth, Q))
Expand Down
19 changes: 11 additions & 8 deletions llvm/lib/Support/APInt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2585,11 +2585,13 @@ int APInt::tcMultiply(WordType *dst, const WordType *lhs,
assert(dst != lhs && dst != rhs);

int overflow = 0;
tcSet(dst, 0, parts);

for (unsigned i = 0; i < parts; i++)
overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
parts - i, true);
for (unsigned i = 0; i < parts; i++) {
// Don't accumulate on the first iteration so we don't need to initalize
// dst to 0.
overflow |=
tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts, parts - i, i != 0);
}

return overflow;
}
Expand All @@ -2605,10 +2607,11 @@ void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,

assert(dst != lhs && dst != rhs);

tcSet(dst, 0, rhsParts);

for (unsigned i = 0; i < lhsParts; i++)
tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
for (unsigned i = 0; i < lhsParts; i++) {
// Don't accumulate on the first iteration so we don't need to initalize
// dst to 0.
tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, i != 0);
}
}

// If RHS is zero LHS and REMAINDER are left unchanged, return one.
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,10 @@ def FeatureCuMode : SubtargetFeature<"cumode",
"Enable CU wavefront execution mode"
>;

def FeaturePreciseMemory
: SubtargetFeature<"precise-memory", "EnablePreciseMemory",
"true", "Enable precise memory mode">;

def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
"SGPRInitBug",
"true",
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool EnableTgSplit = false;
bool EnableCuMode = false;
bool TrapHandler = false;
bool EnablePreciseMemory = false;

// Used as options.
bool EnableLoadStoreOpt = false;
Expand Down Expand Up @@ -599,6 +600,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return EnableCuMode;
}

bool isPreciseMemoryEnabled() const { return EnablePreciseMemory; }

bool hasFlatAddressSpace() const {
return FlatAddressSpace;
}
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2305,6 +2305,14 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
}
#endif

if (ST->isPreciseMemoryEnabled() && Inst.mayLoadOrStore()) {
AMDGPU::Waitcnt Wait = WCG->getAllZeroWaitcnt(
Inst.mayStore() && !SIInstrInfo::isAtomicRet(Inst));
ScoreBrackets.simplifyWaitcnt(Wait);
Modified |= generateWaitcnt(Wait, std::next(Inst.getIterator()), Block,
ScoreBrackets, /*OldWaitcntInstr=*/nullptr);
}

LLVM_DEBUG({
Inst.print(dbgs());
ScoreBrackets.dump();
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -2087,7 +2087,7 @@ def : GCNPat <
def : GCNPat <
(DivergentUnaryFrag<fneg> (v2f32 VReg_64:$src)),
(V_PK_ADD_F32 11 /* OP_SEL_1 | NEG_LO | HEG_HI */, VReg_64:$src,
11 /* OP_SEL_1 | NEG_LO | HEG_HI */, 0,
11 /* OP_SEL_1 | NEG_LO | HEG_HI */, (i64 0),
0, 0, 0, 0, 0)
> {
let SubtargetPredicate = HasPackedFP32Ops;
Expand Down Expand Up @@ -2999,15 +2999,15 @@ def : GCNPat<
let SubtargetPredicate = HasPackedFP32Ops in {
def : GCNPat<
(fcanonicalize (v2f32 (VOP3PMods v2f32:$src, i32:$src_mods))),
(V_PK_MUL_F32 0, CONST.FP32_ONE, $src_mods, $src)
(V_PK_MUL_F32 0, (i64 CONST.FP32_ONE), $src_mods, $src)
>;
}

// TODO: Handle fneg like other types.
let SubtargetPredicate = isNotGFX12Plus in {
def : GCNPat<
(fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))),
(V_MUL_F64_e64 0, CONST.FP64_ONE, $src_mods, $src)
(V_MUL_F64_e64 0, (i64 CONST.FP64_ONE), $src_mods, $src)
>;
}
} // End AddedComplexity = -5
Expand Down Expand Up @@ -3369,7 +3369,7 @@ def : GCNPat <
SRCMODS.NONE,
(V_FRACT_F64_e64 $mods, $x),
SRCMODS.NONE,
(V_MOV_B64_PSEUDO 0x3fefffffffffffff)),
(V_MOV_B64_PSEUDO (i64 0x3fefffffffffffff))),
$x,
(V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, (i32 3 /*NaN*/))))
>;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1046,7 +1046,7 @@ def VS_32_Lo128 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2
let HasSGPR = 1;
}

def VS_64 : SIRegisterClass<"AMDGPU", [i64, f64, v2f32], 32, (add VReg_64, SReg_64)> {
def VS_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32, (add VReg_64, SReg_64)> {
let isAllocatable = 0;
let HasVGPR = 1;
let HasSGPR = 1;
Expand Down
40 changes: 0 additions & 40 deletions llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1001,46 +1001,6 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
// Mark BP as used if function has dedicated base pointer.
if (hasBP(MF))
SavedRegs.set(RISCVABI::getBPReg());

// If interrupt is enabled and there are calls in the handler,
// unconditionally save all Caller-saved registers and
// all FP registers, regardless whether they are used.
MachineFrameInfo &MFI = MF.getFrameInfo();
auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();

if (MF.getFunction().hasFnAttribute("interrupt") && MFI.hasCalls()) {

static const MCPhysReg CSRegs[] = { RISCV::X1, /* ra */
RISCV::X5, RISCV::X6, RISCV::X7, /* t0-t2 */
RISCV::X10, RISCV::X11, /* a0-a1, a2-a7 */
RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17,
RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31 /* t3-t6 */
};

for (auto Reg : CSRegs)
SavedRegs.set(Reg);

// According to psABI, if ilp32e/lp64e ABIs are used with an ISA that
// has any of the registers x16-x31 and f0-f31, then these registers are
// considered temporaries, so we should also save x16-x31 here.
if (STI.getTargetABI() == RISCVABI::ABI_ILP32E ||
STI.getTargetABI() == RISCVABI::ABI_LP64E) {
for (MCPhysReg Reg = RISCV::X16; Reg <= RISCV::X31; Reg++)
SavedRegs.set(Reg);
}

if (Subtarget.hasStdExtF()) {

// If interrupt is enabled, this list contains all FP registers.
const MCPhysReg * Regs = MF.getRegInfo().getCalleeSavedRegs();

for (unsigned i = 0; Regs[i]; ++i)
if (RISCV::FPR16RegClass.contains(Regs[i]) ||
RISCV::FPR32RegClass.contains(Regs[i]) ||
RISCV::FPR64RegClass.contains(Regs[i]))
SavedRegs.set(Regs[i]);
}
}
}

std::pair<int64_t, Align>
Expand Down
13 changes: 11 additions & 2 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4638,8 +4638,17 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
Subtarget.getXLenVT()));
Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
OffsetVec, Passthru, Mask, VL);
Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
Interleaved, EvenV, Passthru, Mask, VL);
if (!EvenV.isUndef())
Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
Interleaved, EvenV, Passthru, Mask, VL);
} else if (EvenV.isUndef()) {
Interleaved =
DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);

SDValue OffsetVec =
DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
Interleaved, OffsetVec, Passthru, Mask, VL);
} else {
// FIXME: We should freeze the odd vector here. We already handled the case
// of provably undef/poison above.
Expand Down
Loading