24 changes: 24 additions & 0 deletions clang/include/clang/Basic/DarwinSDKInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,30 @@ class DarwinSDKInfo {
map(const VersionTuple &Key, const VersionTuple &MinimumValue,
std::optional<VersionTuple> MaximumValue) const;

/// Remap the 'introduced' availability version.
/// If None is returned, the 'unavailable' availability should be used
/// instead.
std::optional<VersionTuple>
mapIntroducedAvailabilityVersion(const VersionTuple &Key) const {
// API_TO_BE_DEPRECATED is 100000.
if (Key.getMajor() == 100000)
return VersionTuple(100000);
// Use None for maximum to force unavailable behavior for
return map(Key, MinimumValue, std::nullopt);
}

/// Remap the 'deprecated' and 'obsoleted' availability version.
/// If None is returned for 'obsoleted', the 'unavailable' availability
/// should be used instead. If None is returned for 'deprecated', the
/// 'deprecated' version should be dropped.
std::optional<VersionTuple>
mapDeprecatedObsoletedAvailabilityVersion(const VersionTuple &Key) const {
// API_TO_BE_DEPRECATED is 100000.
if (Key.getMajor() == 100000)
return VersionTuple(100000);
return map(Key, MinimumValue, MaximumValue);
}

static std::optional<RelatedTargetVersionMapping>
parseJSON(const llvm::json::Object &Obj,
VersionTuple MaximumDeploymentTarget);
Expand Down
5 changes: 4 additions & 1 deletion clang/include/clang/InstallAPI/Frontend.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,15 @@ class FrontendRecordsSlice : public llvm::MachO::RecordsSlice {
/// \param D The pointer to the declaration from traversing AST.
/// \param Access The intended access level of symbol.
/// \param Flags The flags that describe attributes of the symbol.
/// \param Inlined Whether declaration is inlined, only applicable to
/// functions.
/// \return The non-owning pointer to added record in slice.
GlobalRecord *addGlobal(StringRef Name, RecordLinkage Linkage,
GlobalRecord::Kind GV,
const clang::AvailabilityInfo Avail, const Decl *D,
const HeaderType Access,
SymbolFlags Flags = SymbolFlags::None);
SymbolFlags Flags = SymbolFlags::None,
bool Inlined = false);

/// Add ObjC Class record with attributes from AST.
///
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/InstallAPI/Visitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ class InstallAPIVisitor final : public ASTConsumer,
/// Collect global variables.
bool VisitVarDecl(const VarDecl *D);

/// Collect global functions.
bool VisitFunctionDecl(const FunctionDecl *D);

/// Collect Objective-C Interface declarations.
/// Every Objective-C class has an interface declaration that lists all the
/// ivars, properties, and methods of the class.
Expand Down
5 changes: 4 additions & 1 deletion clang/include/clang/Interpreter/Interpreter.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ class IncrementalCompilerBuilder {
UserArgs = Args;
}

void SetTargetTriple(std::string TT) { TargetTriple = TT; }

// General C++
llvm::Expected<std::unique_ptr<CompilerInstance>> CreateCpp();

Expand All @@ -62,11 +64,12 @@ class IncrementalCompilerBuilder {

private:
static llvm::Expected<std::unique_ptr<CompilerInstance>>
create(std::vector<const char *> &ClangArgv);
create(std::string TT, std::vector<const char *> &ClangArgv);

llvm::Expected<std::unique_ptr<CompilerInstance>> createCuda(bool device);

std::vector<const char *> UserArgs;
std::optional<std::string> TargetTriple;

llvm::StringRef OffloadArch;
llvm::StringRef CudaSDKPath;
Expand Down
8 changes: 8 additions & 0 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4625,7 +4625,15 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
DDeps.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind);
OffloadAction::DeviceDependences DDep;
DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind);

// Compiling CUDA in non-RDC mode uses the PTX output if available.
for (Action *Input : A->getInputs())
if (Kind == Action::OFK_Cuda && A->getType() == types::TY_Object &&
!Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
false))
DDep.add(*Input, *TCAndArch->first, TCAndArch->second.data(), Kind);
OffloadActions.push_back(C.MakeAction<OffloadAction>(DDep, A->getType()));

++TCAndArch;
}
}
Expand Down
22 changes: 12 additions & 10 deletions clang/lib/Driver/ToolChains/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -503,18 +503,20 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
Exec, CmdArgs, Inputs, Output));
}

static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) {
bool includePTX = true;
for (Arg *A : Args) {
if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) ||
A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))
continue;
static bool shouldIncludePTX(const ArgList &Args, StringRef InputArch) {
// The new driver does not include PTX by default to avoid overhead.
bool includePTX = !Args.hasFlag(options::OPT_offload_new_driver,
options::OPT_no_offload_new_driver, false);
for (Arg *A : Args.filtered(options::OPT_cuda_include_ptx_EQ,
options::OPT_no_cuda_include_ptx_EQ)) {
A->claim();
const StringRef ArchStr = A->getValue();
if (ArchStr == "all" || ArchStr == gpu_arch) {
includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
continue;
}
if (A->getOption().matches(options::OPT_cuda_include_ptx_EQ) &&
(ArchStr == "all" || ArchStr == InputArch))
includePTX = true;
else if (A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ) &&
(ArchStr == "all" || ArchStr == InputArch))
includePTX = false;
}
return includePTX;
}
Expand Down
5 changes: 3 additions & 2 deletions clang/lib/InstallAPI/Frontend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ namespace clang::installapi {
GlobalRecord *FrontendRecordsSlice::addGlobal(
StringRef Name, RecordLinkage Linkage, GlobalRecord::Kind GV,
const clang::AvailabilityInfo Avail, const Decl *D, const HeaderType Access,
SymbolFlags Flags) {
SymbolFlags Flags, bool Inlined) {

auto *GR = llvm::MachO::RecordsSlice::addGlobal(Name, Linkage, GV, Flags);
auto *GR =
llvm::MachO::RecordsSlice::addGlobal(Name, Linkage, GV, Flags, Inlined);
FrontendRecords.insert({GR, FrontendAttrs{Avail, D, Access}});
return GR;
}
Expand Down
78 changes: 78 additions & 0 deletions clang/lib/InstallAPI/Visitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include "clang/InstallAPI/Visitor.h"
#include "clang/AST/ParentMapContext.h"
#include "clang/Basic/Linkage.h"
#include "clang/InstallAPI/Frontend.h"
#include "llvm/ADT/SmallString.h"
Expand All @@ -27,6 +28,31 @@ static bool isExported(const NamedDecl *D) {
(LV.getVisibility() == DefaultVisibility);
}

static bool isInlined(const FunctionDecl *D) {
bool HasInlineAttribute = false;
bool NoCXXAttr =
(!D->getASTContext().getLangOpts().CPlusPlus &&
!D->getASTContext().getTargetInfo().getCXXABI().isMicrosoft() &&
!D->hasAttr<DLLExportAttr>());

// Check all redeclarations to find an inline attribute or keyword.
for (const auto *RD : D->redecls()) {
if (!RD->isInlined())
continue;
HasInlineAttribute = true;
if (!(NoCXXAttr || RD->hasAttr<GNUInlineAttr>()))
continue;
if (RD->doesThisDeclarationHaveABody() &&
RD->isInlineDefinitionExternallyVisible())
return false;
}

if (!HasInlineAttribute)
return false;

return true;
}

static SymbolFlags getFlags(bool WeakDef, bool ThreadLocal) {
SymbolFlags Result = SymbolFlags::None;
if (WeakDef)
Expand Down Expand Up @@ -204,4 +230,56 @@ bool InstallAPIVisitor::VisitVarDecl(const VarDecl *D) {
return true;
}

bool InstallAPIVisitor::VisitFunctionDecl(const FunctionDecl *D) {
if (const CXXMethodDecl *M = dyn_cast<CXXMethodDecl>(D)) {
// Skip member function in class templates.
if (M->getParent()->getDescribedClassTemplate() != nullptr)
return true;

// Skip methods in CXX RecordDecls.
for (auto P : D->getASTContext().getParents(*M)) {
if (P.get<CXXRecordDecl>())
return true;
}

// Skip CXX ConstructorDecls and DestructorDecls.
if (isa<CXXConstructorDecl>(M) || isa<CXXDestructorDecl>(M))
return true;
}

// Skip templated functions.
switch (D->getTemplatedKind()) {
case FunctionDecl::TK_NonTemplate:
case FunctionDecl::TK_DependentNonTemplate:
break;
case FunctionDecl::TK_MemberSpecialization:
case FunctionDecl::TK_FunctionTemplateSpecialization:
if (auto *TempInfo = D->getTemplateSpecializationInfo()) {
if (!TempInfo->isExplicitInstantiationOrSpecialization())
return true;
}
break;
case FunctionDecl::TK_FunctionTemplate:
case FunctionDecl::TK_DependentFunctionTemplateSpecialization:
return true;
}

auto Access = getAccessForDecl(D);
if (!Access)
return true;
auto Name = getMangledName(D);
const AvailabilityInfo Avail = AvailabilityInfo::createFromDecl(D);
const bool ExplicitInstantiation = D->getTemplateSpecializationKind() ==
TSK_ExplicitInstantiationDeclaration;
const bool WeakDef = ExplicitInstantiation || D->hasAttr<WeakAttr>();
const bool Inlined = isInlined(D);
const RecordLinkage Linkage = (Inlined || !isExported(D))
? RecordLinkage::Internal
: RecordLinkage::Exported;
Ctx.Slice->addGlobal(Name, Linkage, GlobalRecord::Kind::Function, Avail, D,
*Access, getFlags(WeakDef, /*ThreadLocal=*/false),
Inlined);
return true;
}

} // namespace clang::installapi
12 changes: 7 additions & 5 deletions clang/lib/Interpreter/Interpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,8 @@ CreateCI(const llvm::opt::ArgStringList &Argv) {
} // anonymous namespace

llvm::Expected<std::unique_ptr<CompilerInstance>>
IncrementalCompilerBuilder::create(std::vector<const char *> &ClangArgv) {
IncrementalCompilerBuilder::create(std::string TT,
std::vector<const char *> &ClangArgv) {

// If we don't know ClangArgv0 or the address of main() at this point, try
// to guess it anyway (it's possible on some platforms).
Expand Down Expand Up @@ -162,8 +163,7 @@ IncrementalCompilerBuilder::create(std::vector<const char *> &ClangArgv) {
TextDiagnosticBuffer *DiagsBuffer = new TextDiagnosticBuffer;
DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagsBuffer);

driver::Driver Driver(/*MainBinaryName=*/ClangArgv[0],
llvm::sys::getProcessTriple(), Diags);
driver::Driver Driver(/*MainBinaryName=*/ClangArgv[0], TT, Diags);
Driver.setCheckInputsExist(false); // the input comes from mem buffers
llvm::ArrayRef<const char *> RF = llvm::ArrayRef(ClangArgv);
std::unique_ptr<driver::Compilation> Compilation(Driver.BuildCompilation(RF));
Expand All @@ -185,7 +185,8 @@ IncrementalCompilerBuilder::CreateCpp() {
Argv.push_back("-xc++");
Argv.insert(Argv.end(), UserArgs.begin(), UserArgs.end());

return IncrementalCompilerBuilder::create(Argv);
std::string TT = TargetTriple ? *TargetTriple : llvm::sys::getProcessTriple();
return IncrementalCompilerBuilder::create(TT, Argv);
}

llvm::Expected<std::unique_ptr<CompilerInstance>>
Expand Down Expand Up @@ -213,7 +214,8 @@ IncrementalCompilerBuilder::createCuda(bool device) {

Argv.insert(Argv.end(), UserArgs.begin(), UserArgs.end());

return IncrementalCompilerBuilder::create(Argv);
std::string TT = TargetTriple ? *TargetTriple : llvm::sys::getProcessTriple();
return IncrementalCompilerBuilder::create(TT, Argv);
}

llvm::Expected<std::unique_ptr<CompilerInstance>>
Expand Down
5 changes: 4 additions & 1 deletion clang/lib/Parse/ParseDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1234,8 +1234,11 @@ void Parser::ParseAvailabilityAttribute(
}
IdentifierLoc *Platform = ParseIdentifierLoc();
if (const IdentifierInfo *const Ident = Platform->Ident) {
// Disallow xrOS for availability attributes.
if (Ident->getName().contains("xrOS") || Ident->getName().contains("xros"))
Diag(Platform->Loc, diag::warn_availability_unknown_platform) << Ident;
// Canonicalize platform name from "macosx" to "macos".
if (Ident->getName() == "macosx")
else if (Ident->getName() == "macosx")
Platform->Ident = PP.getIdentifierInfo("macos");
// Canonicalize platform name from "macosx_app_extension" to
// "macos_app_extension".
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Parse/ParseExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3863,7 +3863,8 @@ std::optional<AvailabilitySpec> Parser::ParseAvailabilitySpec() {
StringRef Platform =
AvailabilityAttr::canonicalizePlatformName(GivenPlatform);

if (AvailabilityAttr::getPrettyPlatformName(Platform).empty()) {
if (AvailabilityAttr::getPrettyPlatformName(Platform).empty() ||
(GivenPlatform.contains("xros") || GivenPlatform.contains("xrOS"))) {
Diag(PlatformIdentifier->Loc,
diag::err_avail_query_unrecognized_platform_name)
<< GivenPlatform;
Expand Down
10 changes: 10 additions & 0 deletions clang/test/CodeGen/attr-availability-visionos.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// RUN: %clang_cc1 -triple arm64-apple-xros1 -emit-llvm -o - %s 2>&1 | FileCheck %s

__attribute__((availability(visionOS, introduced=1.1)))
void introduced_1_1();

void use() {
if (__builtin_available(visionOS 1.2, *))
introduced_1_1();
// CHECK: call i32 @__isPlatformVersionAtLeast(i32 11, i32 1, i32 2, i32 0)
}
26 changes: 26 additions & 0 deletions clang/test/CodeGen/tbaa-struct-relaxed-aliasing-with-tsan.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -o - -O1 -relaxed-aliasing -fsanitize=thread -disable-llvm-optzns %s | \
// RUN: FileCheck %s
// RUN: %clang_cc1 -triple x86_64-apple-darwin -new-struct-path-tbaa \
// RUN: -emit-llvm -o - -O1 -relaxed-aliasing -fsanitize=thread -disable-llvm-optzns %s | \
// RUN: FileCheck %s
//
// Check that we do not create tbaa for instructions generated for copies.
// FIXME: !tbaa.struct is generated with null node as tag.

// CHECK: !tbaa.struct
// CHECK-NOT: !tbaa

struct A {
short s;
int i;
char c;
int j;
};

void copyStruct(A *a1, A *a2) {
*a1 = *a2;
}

void copyInt(int *a, int *b) {
*a = *b;
}
25 changes: 13 additions & 12 deletions clang/test/Driver/cuda-phases.cu
Original file line number Diff line number Diff line change
Expand Up @@ -244,31 +244,32 @@
// NEW-DRIVER-RDC-NEXT: 18: assembler, {17}, object, (host-cuda)
// NEW-DRIVER-RDC-NEXT: 19: clang-linker-wrapper, {18}, image, (host-cuda)

// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver -fgpu-rdc \
// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver \
// RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s 2>&1 | FileCheck --check-prefix=NEW-DRIVER %s
// NEW-DRIVER: 0: input, "[[INPUT:.+]]", cuda
// NEW-DRIVER-NEXT: 1: preprocessor, {0}, cuda-cpp-output
// NEW-DRIVER-NEXT: 2: compiler, {1}, ir
// NEW-DRIVER-NEXT: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_52)
// NEW-DRIVER: 0: input, "[[CUDA:.+]]", cuda, (host-cuda)
// NEW-DRIVER-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
// NEW-DRIVER-NEXT: 2: compiler, {1}, ir, (host-cuda)
// NEW-DRIVER-NEXT: 3: input, "[[CUDA]]", cuda, (device-cuda, sm_52)
// NEW-DRIVER-NEXT: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52)
// NEW-DRIVER-NEXT: 5: compiler, {4}, ir, (device-cuda, sm_52)
// NEW-DRIVER-NEXT: 6: backend, {5}, assembler, (device-cuda, sm_52)
// NEW-DRIVER-NEXT: 7: assembler, {6}, object, (device-cuda, sm_52)
// NEW-DRIVER-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object
// NEW-DRIVER-NEXT: 9: input, "[[INPUT]]", cuda, (device-cuda, sm_70)
// NEW-DRIVER-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, object
// NEW-DRIVER-NEXT: 9: input, "[[CUDA]]", cuda, (device-cuda, sm_70)
// NEW-DRIVER-NEXT: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70)
// NEW-DRIVER-NEXT: 11: compiler, {10}, ir, (device-cuda, sm_70)
// NEW-DRIVER-NEXT: 12: backend, {11}, assembler, (device-cuda, sm_70)
// NEW-DRIVER-NEXT: 13: assembler, {12}, object, (device-cuda, sm_70)
// NEW-DRIVER-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object
// NEW-DRIVER-NEXT: 15: clang-offload-packager, {8, 14}, image
// NEW-DRIVER-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (powerpc64le-ibm-linux-gnu)" {15}, ir
// NEW-DRIVER-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {12}, object
// NEW-DRIVER-NEXT: 15: linker, {8, 14}, cuda-fatbin, (device-cuda)
// NEW-DRIVER-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {15}, ir
// NEW-DRIVER-NEXT: 17: backend, {16}, assembler, (host-cuda)
// NEW-DRIVER-NEXT: 18: assembler, {17}, object, (host-cuda)
// NEW-DRIVER-NEXT: 19: clang-linker-wrapper, {18}, image, (host-cuda)

// RUN: %clang -### --target=powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver \
// RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s %S/Inputs/empty.cpp 2>&1 | FileCheck --check-prefix=NON-CUDA-INPUT %s

// NON-CUDA-INPUT: 0: input, "[[CUDA:.+]]", cuda, (host-cuda)
// NON-CUDA-INPUT-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
// NON-CUDA-INPUT-NEXT: 2: compiler, {1}, ir, (host-cuda)
Expand All @@ -277,13 +278,13 @@
// NON-CUDA-INPUT-NEXT: 5: compiler, {4}, ir, (device-cuda, sm_52)
// NON-CUDA-INPUT-NEXT: 6: backend, {5}, assembler, (device-cuda, sm_52)
// NON-CUDA-INPUT-NEXT: 7: assembler, {6}, object, (device-cuda, sm_52)
// NON-CUDA-INPUT-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object
// NON-CUDA-INPUT-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, object
// NON-CUDA-INPUT-NEXT: 9: input, "[[CUDA]]", cuda, (device-cuda, sm_70)
// NON-CUDA-INPUT-NEXT: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70)
// NON-CUDA-INPUT-NEXT: 11: compiler, {10}, ir, (device-cuda, sm_70)
// NON-CUDA-INPUT-NEXT: 12: backend, {11}, assembler, (device-cuda, sm_70)
// NON-CUDA-INPUT-NEXT: 13: assembler, {12}, object, (device-cuda, sm_70)
// NON-CUDA-INPUT-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object
// NON-CUDA-INPUT-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {12}, object
// NON-CUDA-INPUT-NEXT: 15: linker, {8, 14}, cuda-fatbin, (device-cuda)
// NON-CUDA-INPUT-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {15}, ir
// NON-CUDA-INPUT-NEXT: 17: backend, {16}, assembler, (host-cuda)
Expand Down
78 changes: 78 additions & 0 deletions clang/test/InstallAPI/functions.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// RUN: rm -rf %t
// RUN: split-file %s %t
// RUN: sed -e "s|DSTROOT|%/t|g" %t/inputs.json.in > %t/inputs.json

// RUN: clang-installapi -target arm64-apple-macos13.1 \
// RUN: -I%t/usr/include -I%t/usr/local/include \
// RUN: -install_name @rpath/lib/libfunctions.dylib \
// RUN: %t/inputs.json -o %t/outputs.tbd 2>&1 | FileCheck %s --allow-empty
// RUN: llvm-readtapi -compare %t/outputs.tbd %t/expected.tbd 2>&1 | FileCheck %s --allow-empty

// CHECK-NOT: error:
// CHECK-NOT: warning:

//--- usr/include/functions.h
inline int inlined_func(void) { return 1;}
int public(int a);

//--- usr/local/include/private_functions.h
__attribute__((visibility("hidden")))
void hidden(void);

//--- inputs.json.in
{
"headers": [ {
"path" : "DSTROOT/usr/include/functions.h",
"type" : "public"
},
{
"path" : "DSTROOT/usr/local/include/private_functions.h",
"type" : "private"
}
],
"version": "3"
}

//--- expected.tbd
{
"main_library": {
"compatibility_versions": [
{
"version": "0"
}
],
"current_versions": [
{
"version": "0"
}
],
"exported_symbols": [
{
"text": {
"global": [
"_public"
]
}
}
],
"flags": [
{
"attributes": [
"not_app_extension_safe"
]
}
],
"install_names": [
{
"name": "@rpath/lib/libfunctions.dylib"
}
],
"target_info": [
{
"min_deployment": "13.1",
"target": "arm64-macos"
}
]
},
"tapi_tbd_version": 5
}
39 changes: 39 additions & 0 deletions clang/test/Sema/attr-availability-visionos.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// RUN: %clang_cc1 -triple arm64-apple-xros1 -fapplication-extension -verify=visionos %s 2>&1

__attribute__((availability(xros, unavailable))) // visionos-warning {{unknown platform 'xros' in availability macro}}
void xros_unavail(); // visionos-note {{}}

__attribute__((availability(xros_app_extension, unavailable))) // visionos-warning {{unknown platform 'xros_app_extension' in availability macro}}
void xros_ext_unavail(); // visionos-note {{}}

__attribute__((availability(visionOSApplicationExtension, unavailable)))
void visionos_ext_unavail(); // visionos-note {{}}

void use() {
xros_unavail(); // visionos-error {{'xros_unavail' is unavailable: not available on visionOS}}
xros_ext_unavail(); // visionos-error {{'xros_ext_unavail' is unavailable: not available on visionOS}}
visionos_ext_unavail(); // visionos-error {{'visionos_ext_unavail' is unavailable: not available on visionOS}}
}

__attribute__((availability(visionOS, introduced=1.0)))
void visionos_introduced_1();

__attribute__((availability(visionos, introduced=1.1)))
void visionos_introduced_1_1(); // visionos-note 4 {{'visionos_introduced_1_1' has been marked as being introduced in visionOS 1.1 here, but the deployment target is visionOS 1}}

void use2() {
if (__builtin_available(iOS 16.1, *))
visionos_introduced_1_1(); // visionos-warning {{'visionos_introduced_1_1' is only available on visionOS 1.1 or newer}} visionos-note {{enclose}}

if (__builtin_available(xrOS 1.1, *)) // visionos-error {{unrecognized platform name xrOS}}
visionos_introduced_1_1(); // visionos-warning {{'visionos_introduced_1_1' is only available on visionOS 1.1 or newer}} visionos-note {{enclose}}

if (__builtin_available(xros_app_extension 1, *)) // visionos-error {{unrecognized platform name xros_app_extension}}
visionos_introduced_1_1(); // visionos-warning {{'visionos_introduced_1_1' is only available on visionOS 1.1 or newer}} visionos-note {{enclose}}

if (__builtin_available(visionOS 1.1, *))
visionos_introduced_1_1();

visionos_introduced_1();
visionos_introduced_1_1(); // visionos-warning {{'visionos_introduced_1_1' is only available on visionOS 1.1 or newer}} visionos-note {{enclose}}
}
4 changes: 3 additions & 1 deletion clang/tools/clang-installapi/Options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,9 @@ Options::Options(DiagnosticsEngine &Diag, FileManager *FM,
for (const Arg *A : ArgList) {
if (A->isClaimed())
continue;
FrontendArgs.emplace_back(A->getAsString(ArgList));

FrontendArgs.emplace_back(A->getSpelling());
llvm::copy(A->getValues(), std::back_inserter(FrontendArgs));
}
FrontendArgs.push_back("-fsyntax-only");
}
Expand Down
10 changes: 10 additions & 0 deletions clang/unittests/Basic/DarwinSDKInfoTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,16 @@ TEST(DarwinSDKInfoTest, ParseAndTestMappingIOSDerived) {
EXPECT_EQ(
*Mapping->map(VersionTuple(13, 0), VersionTuple(), VersionTuple(99, 99)),
VersionTuple(99, 99));

// Verify introduced, deprecated, and obsoleted mappings.
EXPECT_EQ(Mapping->mapIntroducedAvailabilityVersion(VersionTuple(10, 1)),
VersionTuple(10.0));
EXPECT_EQ(Mapping->mapDeprecatedObsoletedAvailabilityVersion(
VersionTuple(100000, 0)),
VersionTuple(100000));
EXPECT_EQ(
Mapping->mapDeprecatedObsoletedAvailabilityVersion(VersionTuple(13.0)),
VersionTuple(15, 0, 99));
}

TEST(DarwinSDKInfoTest, MissingKeys) {
Expand Down
1 change: 1 addition & 0 deletions clang/unittests/Interpreter/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS
)

add_clang_unittest(ClangReplInterpreterTests
IncrementalCompilerBuilderTest.cpp
IncrementalProcessingTest.cpp
InterpreterTest.cpp
CodeCompletionTest.cpp
Expand Down
47 changes: 47 additions & 0 deletions clang/unittests/Interpreter/IncrementalCompilerBuilderTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
//=== unittests/Interpreter/IncrementalCompilerBuilderTest.cpp ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "clang/Basic/TargetOptions.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Interpreter/Interpreter.h"
#include "clang/Lex/PreprocessorOptions.h"
#include "llvm/Support/Error.h"
#include "gtest/gtest.h"

using namespace llvm;
using namespace clang;

namespace {

// Usually FrontendAction takes the raw pointers and wraps them back into
// unique_ptrs in InitializeFileRemapping()
static void cleanupRemappedFileBuffers(CompilerInstance &CI) {
for (const auto &RB : CI.getPreprocessorOpts().RemappedFileBuffers) {
delete RB.second;
}
CI.getPreprocessorOpts().clearRemappedFiles();
}

TEST(IncrementalCompilerBuilder, SetCompilerArgs) {
std::vector<const char *> ClangArgv = {"-Xclang", "-ast-dump-all"};
auto CB = clang::IncrementalCompilerBuilder();
CB.SetCompilerArgs(ClangArgv);
auto CI = cantFail(CB.CreateCpp());
EXPECT_TRUE(CI->getFrontendOpts().ASTDumpAll);
cleanupRemappedFileBuffers(*CI);
}

TEST(IncrementalCompilerBuilder, SetTargetTriple) {
auto CB = clang::IncrementalCompilerBuilder();
CB.SetTargetTriple("armv6-none-eabi");
auto CI = cantFail(CB.CreateCpp());
EXPECT_EQ(CI->getTargetOpts().Triple, "armv6-none-unknown-eabi");
cleanupRemappedFileBuffers(*CI);
}

} // end anonymous namespace
25 changes: 21 additions & 4 deletions compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,15 @@
#include <signal.h>
#include <stdio.h>
#include <sys/types.h>
// clang-format off
#include <windows.h>

// This must be included after windows.h.
// These must be included after windows.h.
// archicture need to be set before including
// libloaderapi
#include <libloaderapi.h>
#include <stringapiset.h>
#include <psapi.h>
// clang-format on

namespace fuzzer {

Expand Down Expand Up @@ -234,8 +239,20 @@ size_t PageSize() {
}

void SetThreadName(std::thread &thread, const std::string &name) {
// TODO ?
// to UTF-8 then SetThreadDescription ?
typedef HRESULT(WINAPI * proc)(HANDLE, PCWSTR);
HMODULE kbase = GetModuleHandleA("KernelBase.dll");
proc ThreadNameProc =
reinterpret_cast<proc>(GetProcAddress(kbase, "SetThreadDescription"));
if (proc) {
std::wstring buf;
auto sz = MultiByteToWideChar(CP_UTF8, 0, name.data(), -1, nullptr, 0);
if (sz > 0) {
buf.resize(sz);
if (MultiByteToWideChar(CP_UTF8, 0, name.data(), -1, &buf[0], sz) > 0) {
(void)ThreadNameProc(thread.native_handle(), buf.c_str());
}
}
}
}

} // namespace fuzzer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2808,6 +2808,15 @@ PRE_SYSCALL(fchownat)
POST_SYSCALL(fchownat)
(long res, long dfd, const void *filename, long user, long group, long flag) {}

PRE_SYSCALL(fchmodat2)(long dfd, const void *filename, long mode, long flag) {
if (filename)
PRE_READ(filename,
__sanitizer::internal_strlen((const char *)filename) + 1);
}

POST_SYSCALL(fchmodat2)
(long res, long dfd, const void *filename, long mode, long flag) {}

PRE_SYSCALL(openat)(long dfd, const void *filename, long flags, long mode) {
if (filename)
PRE_READ(filename,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// RUN: %clangxx %target_itanium_abi_host_triple %t -o %t.out
// RUN: %test_debuginfo %s %t.out
// XFAIL: gdb-clang-incompatibility
// XFAIL: system-darwin && target-aarch64
// XFAIL: system-darwin

// DEBUGGER: delete breakpoints
// DEBUGGER: break static-member.cpp:33
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// RUN: %clangxx %target_itanium_abi_host_triple %t -o %t.out
// RUN: %test_debuginfo %s %t.out
// XFAIL: !system-darwin && gdb-clang-incompatibility
// XFAIL: system-darwin && target-aarch64
// XFAIL: system-darwin
// DEBUGGER: delete breakpoints
// DEBUGGER: break static-member.cpp:33
// DEBUGGER: r
Expand Down
2 changes: 2 additions & 0 deletions libc/config/baremetal/arm/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,8 @@ if(LIBC_COMPILER_HAS_FIXED_POINT)
libc.src.stdfix.absr
libc.src.stdfix.abslk
libc.src.stdfix.abslr
libc.src.stdfix.exphk
libc.src.stdfix.expk
libc.src.stdfix.roundhk
libc.src.stdfix.roundhr
libc.src.stdfix.roundk
Expand Down
2 changes: 2 additions & 0 deletions libc/config/baremetal/riscv/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,8 @@ if(LIBC_COMPILER_HAS_FIXED_POINT)
libc.src.stdfix.absr
libc.src.stdfix.abslk
libc.src.stdfix.abslr
libc.src.stdfix.exphk
libc.src.stdfix.expk
libc.src.stdfix.roundhk
libc.src.stdfix.roundhr
libc.src.stdfix.roundk
Expand Down
2 changes: 2 additions & 0 deletions libc/config/linux/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,8 @@ if(LIBC_COMPILER_HAS_FIXED_POINT)
libc.src.stdfix.absr
libc.src.stdfix.abslk
libc.src.stdfix.abslr
libc.src.stdfix.exphk
libc.src.stdfix.expk
libc.src.stdfix.roundhk
libc.src.stdfix.roundhr
libc.src.stdfix.roundk
Expand Down
112 changes: 79 additions & 33 deletions libc/docs/c23.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,36 +15,15 @@ Implementation Status

(It's helpful to review 'Annex B (Informative) Library Summary' for these.)

New headers:

* stdbit.h
* stdckdint.h (|check|, macros are only defined with `__GNUC__` builtins)

Additions:

* uchar.h

* mbrtoc8
* c8rtomb
* char*_t

* string.h

* memset_explicit
* memccpy
* strdup
* strndup

* time.h

* gmtime_r
* localtime_r
* timegm
* timespec_getres
* strftime conversion specifiers
* fenv.h

* 0b
* 0B
* fesetexcept
* fetestexceptflag
* fegetmode
* fesetmode
* math.h

* acospi*
Expand Down Expand Up @@ -96,20 +75,87 @@ Additions:
* dfmal
* fsqrt*
* dsqrtl
* fenv.h

* fesetexcept
* fetestexceptflag
* fegetmode
* fesetmode
* stdbit.h (New header)
* stdckdint.h (New header) |check|
* stddef.h

* unreachable
* stdlib.h

* strfromd
* strfromf
* strfroml
* free_sized
* free_aligned_sized
* memalignment
* string.h

* memset_explicit |check|
* memccpy
* strdup
* strndup
* tgmath.h

* <TODO>
* acospi
* asinpi
* atan2pi
* atanpi
* compoundn
* cospi
* erf
* exp10m1
* exp10
* exp2m1
* fmaximum
* fmaximum_mag
* fmaximum_num
* fmaximum_mag_num
* fminimum
* fminimum_mag
* fminimum_num
* fminimum_mag_num
* fromfpx
* fromfp
* llogb
* log10p1
* log2p1
* logp1
* nextdown
* nextup
* pown
* powr
* rootn
* roundeven
* rsqrt
* scalbn
* sinpi
* tanpi
* ufromfpx
* ufromfp
* fadd
* dadd
* fsub
* dsub
* fmul
* dmul
* fdiv
* ddiv
* ffma
* dfma
* fsqrt
* dsqrt
* time.h

* gmtime_r
* localtime_r
* timegm
* timespec_getres
* strftime conversion specifiers

* 0b
* 0B
* uchar.h

* mbrtoc8
* c8rtomb
* char*_t
1 change: 1 addition & 0 deletions libc/docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ stages there is no ABI stability in any form.
dev/index.rst
porting
contributing
talks

.. toctree::
:hidden:
Expand Down
2 changes: 1 addition & 1 deletion libc/docs/math/stdfix.rst
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ floating point types, but are not part of the ISO/IEC TR 18037:2008 spec.
+===============+================+=============+===============+============+================+=============+================+=============+===============+============+================+=============+
| cos | | | | | | | | | | | | |
+---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+
| exp | | | | | | | | | | | | |
| exp | | | | | | | | |check| | | |check| | | |
+---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+
| log | | | | | | | | | | | | |
+---------------+----------------+-------------+---------------+------------+----------------+-------------+----------------+-------------+---------------+------------+----------------+-------------+
Expand Down
29 changes: 29 additions & 0 deletions libc/docs/talks.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
=====
Talks
=====
----
2023
----
* Math functions in LLVM libc or yet another correctly rounded libm - Tue Ly

* `video <https://www.youtube.com/watch?v=kBSJqVWNQLY>`__
* The LLVM C Library for GPUs - Joseph Huber

* `slides <https://llvm.org/devmtg/2023-10/slides/techtalks/Huber-LibCforGPUs.pdf>`__
* `video <https://www.youtube.com/watch?v=_LLGc48GYHc>`__

----
2022
----
* Using LLVM's libc - Sivachandra Reddy, Michael Jones, Tue Ly

* `slides <https://llvm.org/devmtg/2022-11/slides/Tutorial1-UsingLLVM-libc.pdf>`__
* `video <https://www.youtube.com/watch?v=OpY4lnpnbq4>`__
* Using modern CPU instructions to improve LLVM's libc math library - Tue Ly

* `slides <https://llvm.org/devmtg/2022-11/slides/QuickTalk7-UsingModernCPUInstructionsToImproveLLVM-libcMathLib>`__
* `video <https://www.youtube.com/watch?v=9bvdbdn0nMA>`__
* Approximating at Scale: How strto float in LLVM’s libc is faster - Michael Jones

* `slides <https://llvm.org/devmtg/2022-11/slides/QuickTalk3-ApproximatingatScale-StringToFloat.pdf>`__
* `video <https://www.youtube.com/watch?v=s-UjbTV8p6g>`__
3 changes: 3 additions & 0 deletions libc/spec/llvm_libc_stdfix_ext.td
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ def LLVMLibcStdfixExt : StandardSpec<"llvm_libc_stdfix_ext"> {
[], // types
[], // enums
[ // functions
GuardedFunctionSpec<"exphk", RetValSpec<ShortAccumType>, [ArgSpec<ShortAccumType>], "LIBC_COMPILER_HAS_FIXED_POINT">,
GuardedFunctionSpec<"expk", RetValSpec<AccumType>, [ArgSpec<AccumType>], "LIBC_COMPILER_HAS_FIXED_POINT">,

GuardedFunctionSpec<"sqrtuhr", RetValSpec<UnsignedShortFractType>, [ArgSpec<UnsignedShortFractType>], "LIBC_COMPILER_HAS_FIXED_POINT">,
GuardedFunctionSpec<"sqrtur", RetValSpec<UnsignedFractType>, [ArgSpec<UnsignedFractType>], "LIBC_COMPILER_HAS_FIXED_POINT">,
GuardedFunctionSpec<"sqrtulr", RetValSpec<UnsignedLongFractType>, [ArgSpec<UnsignedLongFractType>], "LIBC_COMPILER_HAS_FIXED_POINT">,
Expand Down
10 changes: 4 additions & 6 deletions libc/src/__support/CPP/bit.h
Original file line number Diff line number Diff line change
Expand Up @@ -269,14 +269,12 @@ first_trailing_one(T value) {
return value == cpp::numeric_limits<T>::max() ? 0 : countr_zero(value) + 1;
}

/// Count number of 1's aka population count or hamming weight.
/// Count number of 1's aka population count or Hamming weight.
///
/// Only unsigned integral types are allowed.
// TODO: rename as 'popcount' to follow the standard
// https://en.cppreference.com/w/cpp/numeric/popcount
template <typename T>
[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, int>
count_ones(T value) {
popcount(T value) {
int count = 0;
for (int i = 0; i != cpp::numeric_limits<T>::digits; ++i)
if ((value >> i) & 0x1)
Expand All @@ -285,7 +283,7 @@ count_ones(T value) {
}
#define ADD_SPECIALIZATION(TYPE, BUILTIN) \
template <> \
[[nodiscard]] LIBC_INLINE constexpr int count_ones<TYPE>(TYPE value) { \
[[nodiscard]] LIBC_INLINE constexpr int popcount<TYPE>(TYPE value) { \
return BUILTIN(value); \
}
ADD_SPECIALIZATION(unsigned char, __builtin_popcount)
Expand All @@ -300,7 +298,7 @@ ADD_SPECIALIZATION(unsigned long long, __builtin_popcountll)
template <typename T>
[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, int>
count_zeros(T value) {
return count_ones<T>(static_cast<T>(~value));
return popcount<T>(static_cast<T>(~value));
}

} // namespace LIBC_NAMESPACE::cpp
Expand Down
2 changes: 1 addition & 1 deletion libc/src/__support/UInt.h
Original file line number Diff line number Diff line change
Expand Up @@ -979,7 +979,7 @@ has_single_bit(T value) {
for (auto word : value.val) {
if (word == 0)
continue;
bits += count_ones(word);
bits += popcount(word);
if (bits > 1)
return false;
}
Expand Down
24 changes: 12 additions & 12 deletions libc/src/__support/fixed_point/fx_rep.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ template <> struct FXRep<short fract> {
SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN;

LIBC_INLINE static constexpr Type MIN() { return SFRACT_MIN; }
LIBC_INLINE static constexpr Type MAX() { return SFRACT_MIN; }
LIBC_INLINE static constexpr Type MAX() { return SFRACT_MAX; }
LIBC_INLINE static constexpr Type ZERO() { return 0.0HR; }
LIBC_INLINE static constexpr Type EPS() { return SFRACT_EPSILON; }
LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5HR; }
Expand All @@ -65,7 +65,7 @@ template <> struct FXRep<unsigned short fract> {
SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN;

LIBC_INLINE static constexpr Type MIN() { return USFRACT_MIN; }
LIBC_INLINE static constexpr Type MAX() { return USFRACT_MIN; }
LIBC_INLINE static constexpr Type MAX() { return USFRACT_MAX; }
LIBC_INLINE static constexpr Type ZERO() { return 0.0UHR; }
LIBC_INLINE static constexpr Type EPS() { return USFRACT_EPSILON; }
LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5UHR; }
Expand All @@ -85,7 +85,7 @@ template <> struct FXRep<fract> {
SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN;

LIBC_INLINE static constexpr Type MIN() { return FRACT_MIN; }
LIBC_INLINE static constexpr Type MAX() { return FRACT_MIN; }
LIBC_INLINE static constexpr Type MAX() { return FRACT_MAX; }
LIBC_INLINE static constexpr Type ZERO() { return 0.0R; }
LIBC_INLINE static constexpr Type EPS() { return FRACT_EPSILON; }
LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5R; }
Expand All @@ -105,7 +105,7 @@ template <> struct FXRep<unsigned fract> {
SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN;

LIBC_INLINE static constexpr Type MIN() { return UFRACT_MIN; }
LIBC_INLINE static constexpr Type MAX() { return UFRACT_MIN; }
LIBC_INLINE static constexpr Type MAX() { return UFRACT_MAX; }
LIBC_INLINE static constexpr Type ZERO() { return 0.0UR; }
LIBC_INLINE static constexpr Type EPS() { return UFRACT_EPSILON; }
LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5UR; }
Expand All @@ -125,7 +125,7 @@ template <> struct FXRep<long fract> {
SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN;

LIBC_INLINE static constexpr Type MIN() { return LFRACT_MIN; }
LIBC_INLINE static constexpr Type MAX() { return LFRACT_MIN; }
LIBC_INLINE static constexpr Type MAX() { return LFRACT_MAX; }
LIBC_INLINE static constexpr Type ZERO() { return 0.0LR; }
LIBC_INLINE static constexpr Type EPS() { return LFRACT_EPSILON; }
LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5LR; }
Expand All @@ -145,7 +145,7 @@ template <> struct FXRep<unsigned long fract> {
SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN;

LIBC_INLINE static constexpr Type MIN() { return ULFRACT_MIN; }
LIBC_INLINE static constexpr Type MAX() { return ULFRACT_MIN; }
LIBC_INLINE static constexpr Type MAX() { return ULFRACT_MAX; }
LIBC_INLINE static constexpr Type ZERO() { return 0.0ULR; }
LIBC_INLINE static constexpr Type EPS() { return ULFRACT_EPSILON; }
LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5ULR; }
Expand All @@ -165,7 +165,7 @@ template <> struct FXRep<short accum> {
SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN;

LIBC_INLINE static constexpr Type MIN() { return SACCUM_MIN; }
LIBC_INLINE static constexpr Type MAX() { return SACCUM_MIN; }
LIBC_INLINE static constexpr Type MAX() { return SACCUM_MAX; }
LIBC_INLINE static constexpr Type ZERO() { return 0.0HK; }
LIBC_INLINE static constexpr Type EPS() { return SACCUM_EPSILON; }
LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5HK; }
Expand All @@ -185,7 +185,7 @@ template <> struct FXRep<unsigned short accum> {
SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN;

LIBC_INLINE static constexpr Type MIN() { return USACCUM_MIN; }
LIBC_INLINE static constexpr Type MAX() { return USACCUM_MIN; }
LIBC_INLINE static constexpr Type MAX() { return USACCUM_MAX; }
LIBC_INLINE static constexpr Type ZERO() { return 0.0UHK; }
LIBC_INLINE static constexpr Type EPS() { return USACCUM_EPSILON; }
LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5UHK; }
Expand All @@ -205,7 +205,7 @@ template <> struct FXRep<accum> {
SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN;

LIBC_INLINE static constexpr Type MIN() { return ACCUM_MIN; }
LIBC_INLINE static constexpr Type MAX() { return ACCUM_MIN; }
LIBC_INLINE static constexpr Type MAX() { return ACCUM_MAX; }
LIBC_INLINE static constexpr Type ZERO() { return 0.0K; }
LIBC_INLINE static constexpr Type EPS() { return ACCUM_EPSILON; }
LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5K; }
Expand All @@ -225,7 +225,7 @@ template <> struct FXRep<unsigned accum> {
SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN;

LIBC_INLINE static constexpr Type MIN() { return UACCUM_MIN; }
LIBC_INLINE static constexpr Type MAX() { return UACCUM_MIN; }
LIBC_INLINE static constexpr Type MAX() { return UACCUM_MAX; }
LIBC_INLINE static constexpr Type ZERO() { return 0.0UK; }
LIBC_INLINE static constexpr Type EPS() { return UACCUM_EPSILON; }
LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5UK; }
Expand All @@ -245,7 +245,7 @@ template <> struct FXRep<long accum> {
SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN;

LIBC_INLINE static constexpr Type MIN() { return LACCUM_MIN; }
LIBC_INLINE static constexpr Type MAX() { return LACCUM_MIN; }
LIBC_INLINE static constexpr Type MAX() { return LACCUM_MAX; }
LIBC_INLINE static constexpr Type ZERO() { return 0.0LK; }
LIBC_INLINE static constexpr Type EPS() { return LACCUM_EPSILON; }
LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5LK; }
Expand All @@ -265,7 +265,7 @@ template <> struct FXRep<unsigned long accum> {
SIGN_LEN + INTEGRAL_LEN + FRACTION_LEN;

LIBC_INLINE static constexpr Type MIN() { return ULACCUM_MIN; }
LIBC_INLINE static constexpr Type MAX() { return ULACCUM_MIN; }
LIBC_INLINE static constexpr Type MAX() { return ULACCUM_MAX; }
LIBC_INLINE static constexpr Type ZERO() { return 0.0ULK; }
LIBC_INLINE static constexpr Type EPS() { return ULACCUM_EPSILON; }
LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5ULK; }
Expand Down
2 changes: 1 addition & 1 deletion libc/src/stdbit/stdc_count_ones_uc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
namespace LIBC_NAMESPACE {

LLVM_LIBC_FUNCTION(unsigned, stdc_count_ones_uc, (unsigned char value)) {
return static_cast<unsigned>(cpp::count_ones(value));
return static_cast<unsigned>(cpp::popcount(value));
}

} // namespace LIBC_NAMESPACE
2 changes: 1 addition & 1 deletion libc/src/stdbit/stdc_count_ones_ui.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
namespace LIBC_NAMESPACE {

LLVM_LIBC_FUNCTION(unsigned, stdc_count_ones_ui, (unsigned value)) {
return static_cast<unsigned>(cpp::count_ones(value));
return static_cast<unsigned>(cpp::popcount(value));
}

} // namespace LIBC_NAMESPACE
2 changes: 1 addition & 1 deletion libc/src/stdbit/stdc_count_ones_ul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
namespace LIBC_NAMESPACE {

LLVM_LIBC_FUNCTION(unsigned, stdc_count_ones_ul, (unsigned long value)) {
return static_cast<unsigned>(cpp::count_ones(value));
return static_cast<unsigned>(cpp::popcount(value));
}

} // namespace LIBC_NAMESPACE
2 changes: 1 addition & 1 deletion libc/src/stdbit/stdc_count_ones_ull.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
namespace LIBC_NAMESPACE {

LLVM_LIBC_FUNCTION(unsigned, stdc_count_ones_ull, (unsigned long long value)) {
return static_cast<unsigned>(cpp::count_ones(value));
return static_cast<unsigned>(cpp::popcount(value));
}

} // namespace LIBC_NAMESPACE
2 changes: 1 addition & 1 deletion libc/src/stdbit/stdc_count_ones_us.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
namespace LIBC_NAMESPACE {

LLVM_LIBC_FUNCTION(unsigned, stdc_count_ones_us, (unsigned short value)) {
return static_cast<unsigned>(cpp::count_ones(value));
return static_cast<unsigned>(cpp::popcount(value));
}

} // namespace LIBC_NAMESPACE
26 changes: 26 additions & 0 deletions libc/src/stdfix/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,29 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.fixed_point.sqrt
)

add_entrypoint_object(
exphk
HDRS
exphk.h
SRCS
exphk.cpp
COMPILE_OPTIONS
-O3
DEPENDS
libc.src.__support.fixed_point.fx_rep
libc.src.__support.CPP.bit
)

add_entrypoint_object(
expk
HDRS
expk.h
SRCS
expk.cpp
COMPILE_OPTIONS
-O3
DEPENDS
libc.src.__support.fixed_point.fx_rep
libc.src.__support.CPP.bit
)
92 changes: 92 additions & 0 deletions libc/src/stdfix/exphk.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
//===-- Implementation of exphk function ----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "exphk.h"
#include "src/__support/CPP/bit.h"
#include "src/__support/common.h"
#include "src/__support/fixed_point/fx_bits.h"

namespace LIBC_NAMESPACE {

namespace {

// Look up tables for exp(hi) and exp(mid).
// Generated with Sollya:
// > for i from 0 to 89 do {
// hi = floor(i/8) - 5;
// m = i/8 - floor(i/8) - 0.5;
// e_hi = nearestint(exp(hi) * 2^7) * 2^-7;
// e_mid = nearestint(exp(m) * 2^7) * 2^-7;
// print(hi, e_hi, m, e_mid);
// };
// Notice that when i = 88 and 89, e_hi will overflow short accum range.
static constexpr short accum EXP_HI[12] = {
0x1.0p-7hk, 0x1.0p-6hk, 0x1.8p-5hk, 0x1.1p-3hk, 0x1.78p-2hk, 0x1.0p0hk,
0x1.5cp1hk, 0x1.d9p2hk, 0x1.416p4hk, 0x1.b4dp5hk, 0x1.28d4p7hk, SACCUM_MAX,
};

static constexpr short accum EXP_MID[8] = {
0x1.38p-1hk, 0x1.6p-1hk, 0x1.9p-1hk, 0x1.c4p-1hk,
0x1.0p0hk, 0x1.22p0hk, 0x1.48p0hk, 0x1.74p0hk,
};

} // anonymous namespace

LLVM_LIBC_FUNCTION(short accum, exphk, (short accum x)) {
using FXRep = fixed_point::FXRep<short accum>;
using StorageType = typename FXRep::StorageType;
// Output overflow
if (LIBC_UNLIKELY(x >= 0x1.64p2hk))
return FXRep::MAX();
// Lower bound where exp(x) -> 0:
// floor(log(2^-8) * 2^7) * 2^-7
if (LIBC_UNLIKELY(x <= -0x1.63p2hk))
return FXRep::ZERO();

// Current range of x:
// -0x1.628p2 <= x <= 0x1.638p2
// Range reduction:
// x = hi + mid + lo,
// where:
// hi is an integer
// mid * 2^3 is an integer
// |lo| <= 2^-4.
// Then exp(x) = exp(hi + mid + lo) = exp(hi) * exp(mid) * exp(lo)
// ~ exp(hi) * exp(mid) * (1 + lo)
// with relative errors < |lo|^2 <= 2^-8.
// exp(hi) and exp(mid) are extracted from small lookup tables.

// Round-to-nearest 1/8, tie-to-(+Int):
constexpr short accum ONE_SIXTEENTH = 0x1.0p-4hk;
// x_rounded = floor(x + 1/16).
short accum x_rounded = ((x + ONE_SIXTEENTH) >> (FXRep::FRACTION_LEN - 3))
<< (FXRep::FRACTION_LEN - 3);
short accum lo = x - x_rounded;

// Range of x_rounded:
// x_rounded >= floor((-0x1.628p2 + 0x1.0p-4) * 2^3) * 2^-3
// = -0x1.6p2 = -5.5
// To get the indices, we shift the values so that it start with 0.
// Range of indices: 0 <= indices <= 89
StorageType indices = cpp::bit_cast<StorageType>((x_rounded + 0x1.6p2hk) >>
(FXRep::FRACTION_LEN - 3));
// So we have the following relation:
// indices = (hi + mid + 44/8) * 8
// That implies:
// hi + mid = indices/8 - 5.5
// So for lookup tables, we can use the upper 4 bits to get:
// exp( floor(indices / 8) - 5 )
// and lower 3 bits for:
// exp( (indices - floor(indices)) - 0.5 )
short accum exp_hi = EXP_HI[indices >> 3];
short accum exp_mid = EXP_MID[indices & 0x7];
// exp(x) ~ exp(hi) * exp(mid) * (1 + lo);
return (exp_hi * (exp_mid * (0x1.0p0hk + lo)));
}

} // namespace LIBC_NAMESPACE
20 changes: 20 additions & 0 deletions libc/src/stdfix/exphk.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
//===-- Implementation header for exphk -------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_STDFIX_EXPHK_H
#define LLVM_LIBC_SRC_STDFIX_EXPHK_H

#include "include/llvm-libc-macros/stdfix-macros.h"

namespace LIBC_NAMESPACE {

short accum exphk(short accum x);

} // namespace LIBC_NAMESPACE

#endif // LLVM_LIBC_SRC_STDFIX_EXPHK_H
104 changes: 104 additions & 0 deletions libc/src/stdfix/expk.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
//===-- Implementation of expk function ----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "expk.h"
#include "src/__support/CPP/bit.h"
#include "src/__support/common.h"
#include "src/__support/fixed_point/fx_bits.h"

namespace LIBC_NAMESPACE {

namespace {

// Look up tables for exp(hi) and exp(mid).
// Generated with Sollya:
// > for i from 0 to 23 do {
// hi = i - 11;
// e_hi = nearestint(exp(hi) * 2^15) * 2^-15;
// print(e_hi, "k,");
// };
static constexpr accum EXP_HI[24] = {
0x1p-15k, 0x1p-15k, 0x1p-13k, 0x1.6p-12k,
0x1.ep-11k, 0x1.44p-9k, 0x1.bap-8k, 0x1.2cp-6k,
0x1.97cp-5k, 0x1.153p-3k, 0x1.78b8p-2k, 0x1p0k,
0x1.5bf1p1k, 0x1.d8e68p2k, 0x1.415e6p4k, 0x1.b4c9p5k,
0x1.28d388p7k, 0x1.936dc6p8k, 0x1.1228858p10k, 0x1.749ea7cp11k,
0x1.fa7157cp12k, 0x1.5829dcf8p14k, 0x1.d3c4489p15k, ACCUM_MAX,
};

// Generated with Sollya:
// > for i from 0 to 15 do {
// m = i/16 - 0.0625;
// e_m = nearestint(exp(m) * 2^15) * 2^-15;
// print(e_m, "k,");
// };
static constexpr accum EXP_MID[16] = {
0x1.e0fcp-1k, 0x1p0k, 0x1.1082p0k, 0x1.2216p0k,
0x1.34ccp0k, 0x1.48b6p0k, 0x1.5deap0k, 0x1.747ap0k,
0x1.8c8p0k, 0x1.a612p0k, 0x1.c14cp0k, 0x1.de46p0k,
0x1.fd1ep0k, 0x1.0efap1k, 0x1.2074p1k, 0x1.330ep1k,
};

} // anonymous namespace

LLVM_LIBC_FUNCTION(accum, expk, (accum x)) {
using FXRep = fixed_point::FXRep<accum>;
using StorageType = typename FXRep::StorageType;
// Output overflow
// > floor(log(2^16) * 2^15) * 2^-15
if (LIBC_UNLIKELY(x >= 0x1.62e4p3k))
return FXRep::MAX();
// Lower bound where exp(x) -> 0:
// floor(log(2^-16) * 2^15) * 2^-15
if (LIBC_UNLIKELY(x <= -0x1.62e44p3k))
return FXRep::ZERO();

// Current range of x:
// -0x1.62e4p3 <= x <= 0x1.62e3cp3
// Range reduction:
// x = hi + mid + lo,
// where:
// hi is an integer
// mid * 2^4 is an integer
// |lo| <= 2^-5.
// Then exp(x) = exp(hi + mid + lo) = exp(hi) * exp(mid) * exp(lo)
// ~ exp(hi) * exp(mid) * (1 + lo + lo^2 / 2)
// with relative errors < |lo|^3/2 <= 2^-16.
// exp(hi) and exp(mid) are extracted from small lookup tables.

// Round-to-nearest 1/16, tie-to-(+Int):
constexpr accum ONE_THIRTY_SECOND = 0x1.0p-5k;
// x_rounded = floor(x + 1/16).
accum x_rounded = ((x + ONE_THIRTY_SECOND) >> (FXRep::FRACTION_LEN - 4))
<< (FXRep::FRACTION_LEN - 4);
accum lo = x - x_rounded;

// Range of x_rounded:
// x_rounded >= floor((-0x1.62e4p3 + 0x1.0p-5) * 2^4) * 2^-4
// = -0x1.62p3 = -11.0625
// To get the indices, we shift the values so that it start with 0.
// Range of indices: 0 <= indices <= 355.
StorageType indices = cpp::bit_cast<StorageType>((x_rounded + 0x1.62p3k) >>
(FXRep::FRACTION_LEN - 4));
// So we have the following relation:
// indices = (hi + mid + 177/16) * 16
// That implies:
// hi + mid = indices/16 - 11.0625
// So for lookup tables, we can use the upper 4 bits to get:
// exp( floor(indices / 16) - 11 )
// and lower 4 bits for:
// exp( (indices - floor(indices)) - 0.0625 )
accum exp_hi = EXP_HI[indices >> 4];
accum exp_mid = EXP_MID[indices & 0xf];
// exp(x) ~ exp(hi) * exp(mid) * (1 + lo);
accum l1 = 0x1.0p0k + (lo >> 1); // = 1 + lo / 2
accum l2 = 0x1.0p0k + lo * l1; // = 1 + lo * (1 + lo / 2) = 1 + lo + lo^2/2
return (exp_hi * (exp_mid * l2));
}

} // namespace LIBC_NAMESPACE
20 changes: 20 additions & 0 deletions libc/src/stdfix/expk.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
//===-- Implementation header for expk --------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_STDFIX_EXPK_H
#define LLVM_LIBC_SRC_STDFIX_EXPK_H

#include "include/llvm-libc-macros/stdfix-macros.h"

namespace LIBC_NAMESPACE {

accum expk(accum x);

} // namespace LIBC_NAMESPACE

#endif // LLVM_LIBC_SRC_STDFIX_EXPK_H
4 changes: 2 additions & 2 deletions libc/test/src/__support/CPP/bit_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,9 @@ TYPED_TEST(LlvmLibcBitTest, CountZeros, UnsignedTypesNoBigInt) {
}

TYPED_TEST(LlvmLibcBitTest, CountOnes, UnsignedTypesNoBigInt) {
EXPECT_EQ(count_ones(T(0)), 0);
EXPECT_EQ(popcount(T(0)), 0);
for (int i = 0; i != cpp::numeric_limits<T>::digits; ++i)
EXPECT_EQ(count_ones<T>(cpp::numeric_limits<T>::max() >> i),
EXPECT_EQ(popcount<T>(cpp::numeric_limits<T>::max() >> i),
cpp::numeric_limits<T>::digits - i);
}

Expand Down
36 changes: 36 additions & 0 deletions libc/test/src/stdfix/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,39 @@ add_libc_test(
libc.src.__support.FPUtil.basic_operations
libc.src.__support.FPUtil.sqrt
)

add_libc_test(
exphk_test
SUITE
libc-stdfix-tests
HDRS
ExpTest.h
SRCS
exphk_test.cpp
COMPILE_OPTIONS
-O3
DEPENDS
libc.src.stdfix.exphk
libc.src.math.exp
libc.src.__support.CPP.bit
libc.src.__support.fixed_point.fx_rep
libc.src.__support.FPUtil.basic_operations
)

add_libc_test(
expk_test
SUITE
libc-stdfix-tests
HDRS
ExpTest.h
SRCS
expk_test.cpp
COMPILE_OPTIONS
-O3
DEPENDS
libc.src.stdfix.expk
libc.src.math.exp
libc.src.__support.CPP.bit
libc.src.__support.fixed_point.fx_rep
libc.src.__support.FPUtil.basic_operations
)
77 changes: 77 additions & 0 deletions libc/test/src/stdfix/ExpTest.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
//===-- Utility class to test integer sqrt ----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"

#include "src/__support/CPP/bit.h"
#include "src/__support/FPUtil/BasicOperations.h"
#include "src/__support/fixed_point/fx_rep.h"
#include "src/__support/fixed_point/sqrt.h"

#include "src/math/exp.h"

template <typename T> class ExpTest : public LIBC_NAMESPACE::testing::Test {

using FXRep = LIBC_NAMESPACE::fixed_point::FXRep<T>;
static constexpr T zero = FXRep::ZERO();
static constexpr T one = static_cast<T>(1);
static constexpr T eps = FXRep::EPS();

public:
typedef T (*ExpFunc)(T);

void test_special_numbers(ExpFunc func) {
EXPECT_EQ(one, func(T(0)));
EXPECT_EQ(FXRep::MAX(), func(T(30)));
EXPECT_EQ(zero, func(T(-30)));
}

void test_range_with_step(ExpFunc func, T step, bool rel_error) {
constexpr int COUNT = 255;
constexpr double ERR = 3.0 * static_cast<double>(eps);
double x_d = 0.0;
T x = step;
for (int i = 0; i < COUNT; ++i) {
x += step;
x_d = static_cast<double>(x);
double y_d = static_cast<double>(func(x));
double result = LIBC_NAMESPACE::exp(x_d);
double errors = rel_error
? LIBC_NAMESPACE::fputil::abs((y_d / result) - 1.0)
: LIBC_NAMESPACE::fputil::abs(y_d - result);
if (errors > ERR) {
// Print out the failure input and output.
EXPECT_EQ(x, T(0));
EXPECT_EQ(func(x), zero);
}
ASSERT_TRUE(errors <= ERR);
}
}

void test_positive_range(ExpFunc func) {
test_range_with_step(func, T(0x1.0p-6), /*rel_error*/ true);
}

void test_negative_range(ExpFunc func) {
test_range_with_step(func, T(-0x1.0p-6), /*rel_error*/ false);
}
};

#define LIST_EXP_TESTS(Name, T, func) \
using LlvmLibcExp##Name##Test = ExpTest<T>; \
TEST_F(LlvmLibcExp##Name##Test, SpecialNumbers) { \
test_special_numbers(&func); \
} \
TEST_F(LlvmLibcExp##Name##Test, PositiveRange) { \
test_positive_range(&func); \
} \
TEST_F(LlvmLibcExp##Name##Test, NegativeRange) { \
test_negative_range(&func); \
} \
static_assert(true, "Require semicolon.")
13 changes: 13 additions & 0 deletions libc/test/src/stdfix/exphk_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
//===-- Unittests for exphk -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "ExpTest.h"

#include "src/stdfix/exphk.h"

LIST_EXP_TESTS(hk, short accum, LIBC_NAMESPACE::exphk);
13 changes: 13 additions & 0 deletions libc/test/src/stdfix/expk_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
//===-- Unittests for expk ------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "ExpTest.h"

#include "src/stdfix/expk.h"

LIST_EXP_TESTS(k, accum, LIBC_NAMESPACE::expk);
27 changes: 27 additions & 0 deletions lldb/packages/Python/lldbsuite/test/lldbplatformutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

# System modules
import itertools
import json
import re
import subprocess
import sys
Expand All @@ -16,6 +17,7 @@
from . import lldbtest_config
import lldbsuite.test.lldbplatform as lldbplatform
from lldbsuite.test.builders import get_builder
from lldbsuite.test.lldbutil import is_exe


def check_first_register_readable(test_case):
Expand Down Expand Up @@ -333,3 +335,28 @@ def expectedCompiler(compilers):
return True

return False


# This is a helper function to determine if a specific version of Xcode's linker
# contains a TLS bug. We want to skip TLS tests if they contain this bug, but
# adding a linker/linker_version conditions to a decorator is challenging due to
# the number of ways linkers can enter the build process.
def xcode15LinkerBug():
"""Returns true iff a test is running on a darwin platform and the host linker is between versions 1000 and 1109."""
darwin_platforms = lldbplatform.translate(lldbplatform.darwin_all)
if getPlatform() not in darwin_platforms:
return False

try:
raw_version_details = subprocess.check_output(
("xcrun", "ld", "-version_details")
)
version_details = json.loads(raw_version_details)
version = version_details.get("version", "0")
version_tuple = tuple(int(x) for x in version.split("."))
if (1000,) <= version_tuple <= (1109,):
return True
except:
pass

return False
1 change: 1 addition & 0 deletions lldb/test/API/lang/c/tls_globals/TestTlsGlobals.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def setUp(self):
@skipIfWindows
@skipIf(oslist=["linux"], archs=["arm", "aarch64"])
@skipIf(oslist=no_match([lldbplatformutil.getDarwinOSTriples(), "linux"]))
@expectedFailureIf(lldbplatformutil.xcode15LinkerBug())
def test(self):
"""Test thread-local storage."""
self.build()
Expand Down
2 changes: 1 addition & 1 deletion lldb/test/Shell/Unwind/eh-frame-dwarf-unwind.test
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Test handing of dwarf expressions specifying the location of registers, if
# those expressions refer to the frame's CFA value.

# UNSUPPORTED: system-windows
# UNSUPPORTED: system-windows, ld_new-bug
# REQUIRES: target-x86_64, native

# RUN: %clang_host %p/Inputs/call-asm.c %p/Inputs/eh-frame-dwarf-unwind.s -o %t
Expand Down
2 changes: 1 addition & 1 deletion lldb/test/Shell/Unwind/thread-step-out-ret-addr-check.test
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# points to non-executable memory.

# REQUIRES: target-x86_64
# UNSUPPORTED: system-windows
# UNSUPPORTED: system-windows, ld_new-bug

# RUN: %clang_host %p/Inputs/call-asm.c -x assembler-with-cpp %p/Inputs/thread-step-out-ret-addr-check.s -o %t
# RUN: not %lldb %t -s %s -b 2>&1 | FileCheck %s
Expand Down
16 changes: 16 additions & 0 deletions lldb/test/Shell/lit.cfg.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# -*- Python -*-

import json
import os
import platform
import re
Expand Down Expand Up @@ -179,3 +180,18 @@ def calculate_arch_features(arch_string):

if "LD_PRELOAD" in os.environ:
config.available_features.add("ld_preload-present")

# Determine if a specific version of Xcode's linker contains a bug. We want to
# skip affected tests if they contain this bug.
if platform.system() == "Darwin":
try:
raw_version_details = subprocess.check_output(
("xcrun", "ld", "-version_details")
)
version_details = json.loads(raw_version_details)
version = version_details.get("version", "0")
version_tuple = tuple(int(x) for x in version.split("."))
if (1000,) <= version_tuple <= (1109,):
config.available_features.add("ld_new-bug")
except:
pass
18 changes: 18 additions & 0 deletions llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,8 @@ class LLJITBuilderState {

using PlatformSetupFunction = unique_function<Expected<JITDylibSP>(LLJIT &J)>;

using NotifyCreatedFunction = std::function<Error(LLJIT &)>;

std::unique_ptr<ExecutorProcessControl> EPC;
std::unique_ptr<ExecutionSession> ES;
std::optional<JITTargetMachineBuilder> JTMB;
Expand All @@ -321,6 +323,7 @@ class LLJITBuilderState {
CompileFunctionCreator CreateCompileFunction;
unique_function<Error(LLJIT &)> PrePlatformSetup;
PlatformSetupFunction SetUpPlatform;
NotifyCreatedFunction NotifyCreated;
unsigned NumCompileThreads = 0;

/// Called prior to JIT class construcion to fix up defaults.
Expand Down Expand Up @@ -441,6 +444,16 @@ class LLJITBuilderSetters {
return impl();
}

/// Set up a callback after successful construction of the JIT.
///
/// This is useful to attach generators to JITDylibs or inject initial symbol
/// definitions.
SetterImpl &
setNotifyCreatedCallback(LLJITBuilderState::NotifyCreatedFunction Callback) {
impl().NotifyCreated = std::move(Callback);
return impl();
}

/// Set the number of compile threads to use.
///
/// If set to zero, compilation will be performed on the execution thread when
Expand Down Expand Up @@ -474,6 +487,11 @@ class LLJITBuilderSetters {
std::unique_ptr<JITType> J(new JITType(impl(), Err));
if (Err)
return std::move(Err);

if (impl().NotifyCreated)
if (Error Err = impl().NotifyCreated(*J))
return std::move(Err);

return std::move(J);
}

Expand Down
6 changes: 4 additions & 2 deletions llvm/include/llvm/TextAPI/Record.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,18 +103,20 @@ class GlobalRecord : public Record {
};

GlobalRecord(StringRef Name, RecordLinkage Linkage, SymbolFlags Flags,
Kind GV)
: Record({Name, Linkage, Flags}), GV(GV) {}
Kind GV, bool Inlined)
: Record({Name, Linkage, Flags}), GV(GV), Inlined(Inlined) {}

bool isFunction() const { return GV == Kind::Function; }
bool isVariable() const { return GV == Kind::Variable; }
void setKind(const Kind &V) {
if (GV == Kind::Unknown)
GV = V;
}
bool isInlined() const { return Inlined; }

private:
Kind GV;
bool Inlined = false;
};

// Define Objective-C instance variable records.
Expand Down
5 changes: 4 additions & 1 deletion llvm/include/llvm/TextAPI/RecordsSlice.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,13 @@ class RecordsSlice {
/// \param Linkage The linkage of symbol.
/// \param GV The kind of global.
/// \param Flags The flags that describe attributes of the symbol.
/// \param Inlined Whether declaration is inlined, only applicable to
/// functions.
/// \return The non-owning pointer to added record in slice.
GlobalRecord *addGlobal(StringRef Name, RecordLinkage Linkage,
GlobalRecord::Kind GV,
SymbolFlags Flags = SymbolFlags::None);
SymbolFlags Flags = SymbolFlags::None,
bool Inlined = false);

/// Add ObjC Class record.
///
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6369,6 +6369,9 @@ bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
if (CondTy != LLT::scalar(1))
return false;

if (TrueTy.isPointer())
return false;

// Both are scalars.
std::optional<ValueAndVReg> TrueOpt =
getIConstantVRegValWithLookThrough(True, MRI);
Expand Down Expand Up @@ -6713,6 +6716,9 @@ bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic,
LLT CmpTy = MRI.getType(Cmp1->getReg(0));
LLT CmpOperandTy = MRI.getType(R1);

if (CmpOperandTy.isPointer())
return false;

// We build ands, adds, and constants of type CmpOperandTy.
// They must be legal to build.
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
Expand Down
12 changes: 10 additions & 2 deletions llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -720,14 +720,22 @@ Error ObjectLinkingLayer::notifyEmitted(MaterializationResponsibility &MR,
for (auto &P : Plugins)
Err = joinErrors(std::move(Err), P->notifyEmitted(MR));

if (Err)
if (Err) {
if (FA)
Err = joinErrors(std::move(Err), MemMgr.deallocate(std::move(FA)));
return Err;
}

if (!FA)
return Error::success();

return MR.withResourceKeyDo(
Err = MR.withResourceKeyDo(
[&](ResourceKey K) { Allocs[K].push_back(std::move(FA)); });

if (Err)
Err = joinErrors(std::move(Err), MemMgr.deallocate(std::move(FA)));

return Err;
}

Error ObjectLinkingLayer::handleRemoveResources(JITDylib &JD, ResourceKey K) {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -884,6 +884,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)

getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
.legalIf(typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64}))
.moreElementsToNextPow2(0)
.widenVectorEltsToVectorMinSize(0, 64);

getActionDefinitionsBuilder(G_BUILD_VECTOR)
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/VOP2Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemo
class VOP2_Real_Gen <VOP2_Pseudo ps, GFXGen Gen, string real_name = ps.Mnemonic> :
VOP2_Real <ps, Gen.Subtarget, real_name> {
let AssemblerPredicate = Gen.AssemblerPredicate;
let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []);
let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate);
let DecoderNamespace = Gen.DecoderNamespace#
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
}
Expand Down Expand Up @@ -1272,7 +1272,7 @@ class VOP2_DPP16_Gen<bits<6> op, VOP2_DPP_Pseudo ps, GFXGen Gen,
string opName = ps.OpName, VOPProfile p = ps.Pfl> :
VOP2_DPP16<op, ps, Gen.Subtarget, opName, p> {
let AssemblerPredicate = Gen.AssemblerPredicate;
let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []);
let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate);
let DecoderNamespace = Gen.DecoderNamespace#
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
}
Expand Down Expand Up @@ -1301,7 +1301,7 @@ class VOP2_DPP8_Gen<bits<6> op, VOP2_Pseudo ps, GFXGen Gen,
VOPProfile p = ps.Pfl> :
VOP2_DPP8<op, ps, p> {
let AssemblerPredicate = Gen.AssemblerPredicate;
let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []);
let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate);
let DecoderNamespace = Gen.DecoderNamespace#
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
}
Expand Down
14 changes: 7 additions & 7 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7246,25 +7246,25 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
// (select c, -1, y) -> -c | y
if (isAllOnesConstant(TrueV)) {
SDValue Neg = DAG.getNegative(CondV, DL, VT);
return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
}
// (select c, y, -1) -> (c-1) | y
if (isAllOnesConstant(FalseV)) {
SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
}

// (select c, 0, y) -> (c-1) & y
if (isNullConstant(TrueV)) {
SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
}
// (select c, y, 0) -> -c & y
if (isNullConstant(FalseV)) {
SDValue Neg = DAG.getNegative(CondV, DL, VT);
return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
}
}

Expand All @@ -7290,13 +7290,13 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
// (select !x, x, y) -> x & y
if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
FalseV);
DAG.getFreeze(FalseV));
}
// (select x, y, x) -> x & y
// (select !x, y, x) -> x | y
if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV,
FalseV);
return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
DAG.getFreeze(TrueV), FalseV);
}
}

Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/RISCV/RISCVInstrInfoM.td
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ def DIV : ALU_rr<0b0000001, 0b100, "div">,
def DIVU : ALU_rr<0b0000001, 0b101, "divu">,
Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>;
def REM : ALU_rr<0b0000001, 0b110, "rem">,
Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>;
Sched<[WriteIRem, ReadIRem, ReadIRem]>;
def REMU : ALU_rr<0b0000001, 0b111, "remu">,
Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>;
Sched<[WriteIRem, ReadIRem, ReadIRem]>;
} // Predicates = [HasStdExtM]

let Predicates = [HasStdExtMOrZmmul, IsRV64], IsSignExtendingOpW = 1 in {
Expand All @@ -57,9 +57,9 @@ def DIVW : ALUW_rr<0b0000001, 0b100, "divw">,
def DIVUW : ALUW_rr<0b0000001, 0b101, "divuw">,
Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>;
def REMW : ALUW_rr<0b0000001, 0b110, "remw">,
Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>;
Sched<[WriteIRem32, ReadIRem32, ReadIRem32]>;
def REMUW : ALUW_rr<0b0000001, 0b111, "remuw">,
Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>;
Sched<[WriteIRem32, ReadIRem32, ReadIRem32]>;
} // Predicates = [HasStdExtM, IsRV64]

//===----------------------------------------------------------------------===//
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/RISCV/RISCVSchedRocket.td
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,16 @@ def : WriteRes<WriteIDiv, [RocketUnitIDiv]> {
let ReleaseAtCycles = [33];
}

// Integer remainder
def : WriteRes<WriteIRem32, [RocketUnitIDiv]> {
let Latency = 34;
let ReleaseAtCycles = [34];
}
def : WriteRes<WriteIRem, [RocketUnitIDiv]> {
let Latency = 33;
let ReleaseAtCycles = [33];
}

// Memory
def : WriteRes<WriteSTB, [RocketUnitMem]>;
def : WriteRes<WriteSTH, [RocketUnitMem]>;
Expand Down Expand Up @@ -189,6 +199,8 @@ def : ReadAdvance<ReadShiftReg, 0>;
def : ReadAdvance<ReadShiftReg32, 0>;
def : ReadAdvance<ReadIDiv, 0>;
def : ReadAdvance<ReadIDiv32, 0>;
def : ReadAdvance<ReadIRem, 0>;
def : ReadAdvance<ReadIRem32, 0>;
def : ReadAdvance<ReadIMul, 0>;
def : ReadAdvance<ReadIMul32, 0>;
def : ReadAdvance<ReadAtomicWA, 0>;
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2>
WriteREV8, WriteORCB, WriteSFB,
WriteIMul, WriteIMul32,
WriteIDiv, WriteIDiv32,
WriteIRem, WriteIRem32,
WriteLDB, WriteLDH, WriteLDW, WriteLDD]>;

// SiFive7 machine model for scheduling and other instruction cost heuristics.
Expand Down Expand Up @@ -273,6 +274,16 @@ def : WriteRes<WriteIDiv32, [SiFive7PipeB, SiFive7IDiv]> {
let ReleaseAtCycles = [1, 33];
}

// Integer remainder
def : WriteRes<WriteIRem, [SiFive7PipeB, SiFive7IDiv]> {
let Latency = 66;
let ReleaseAtCycles = [1, 65];
}
def : WriteRes<WriteIRem32, [SiFive7PipeB, SiFive7IDiv]> {
let Latency = 34;
let ReleaseAtCycles = [1, 33];
}

// Bitmanip
let Latency = 3 in {
// Rotates are in the late-B ALU.
Expand Down Expand Up @@ -946,6 +957,8 @@ def : SiFive7AnyToGPRBypass<ReadShiftReg>;
def : SiFive7AnyToGPRBypass<ReadShiftReg32>;
def : ReadAdvance<ReadIDiv, 0>;
def : ReadAdvance<ReadIDiv32, 0>;
def : ReadAdvance<ReadIRem, 0>;
def : ReadAdvance<ReadIRem32, 0>;
def : ReadAdvance<ReadIMul, 0>;
def : ReadAdvance<ReadIMul32, 0>;
def : ReadAdvance<ReadAtomicWA, 0>;
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,16 @@ def : WriteRes<WriteIDiv32, [SiFiveP400MulDiv, SiFiveP400Div]> {
let ReleaseAtCycles = [1, 19];
}

// Integer remainder
def : WriteRes<WriteIRem, [SiFiveP400MulDiv, SiFiveP400Div]> {
let Latency = 35;
let ReleaseAtCycles = [1, 34];
}
def : WriteRes<WriteIRem32, [SiFiveP400MulDiv, SiFiveP400Div]> {
let Latency = 20;
let ReleaseAtCycles = [1, 19];
}

let Latency = 1 in {
// Bitmanip
def : WriteRes<WriteRotateImm, [SiFiveP400IntArith]>;
Expand Down Expand Up @@ -258,6 +268,8 @@ def : ReadAdvance<ReadShiftReg, 0>;
def : ReadAdvance<ReadShiftReg32, 0>;
def : ReadAdvance<ReadIDiv, 0>;
def : ReadAdvance<ReadIDiv32, 0>;
def : ReadAdvance<ReadIRem, 0>;
def : ReadAdvance<ReadIRem32, 0>;
def : ReadAdvance<ReadIMul, 0>;
def : ReadAdvance<ReadIMul32, 0>;
def : ReadAdvance<ReadAtomicWA, 0>;
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,12 @@ def : WriteRes<WriteShiftReg, [SCR1_ALU]>;
def : WriteRes<WriteIMul, [SCR1_MUL]>;
def : WriteRes<WriteIMul32, [SCR1_MUL]>;

// Integer division: latency 33, inverse throughput 33
// Integer division/remainder: latency 33, inverse throughput 33
let Latency = 33, ReleaseAtCycles = [33] in {
def : WriteRes<WriteIDiv32, [SCR1_DIV]>;
def : WriteRes<WriteIDiv, [SCR1_DIV]>;
def : WriteRes<WriteIRem32, [SCR1_DIV]>;
def : WriteRes<WriteIRem, [SCR1_DIV]>;
}

// Load/store instructions on SCR1 have latency 2 and inverse throughput 2
Expand Down Expand Up @@ -147,6 +149,8 @@ def : ReadAdvance<ReadShiftReg, 0>;
def : ReadAdvance<ReadShiftReg32, 0>;
def : ReadAdvance<ReadIDiv, 0>;
def : ReadAdvance<ReadIDiv32, 0>;
def : ReadAdvance<ReadIRem, 0>;
def : ReadAdvance<ReadIRem32, 0>;
def : ReadAdvance<ReadIMul, 0>;
def : ReadAdvance<ReadIMul32, 0>;
def : ReadAdvance<ReadAtomicWA, 0>;
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,13 @@ def : WriteRes<WriteIMul, [XS2MDU]>;
def : WriteRes<WriteIMul32, [XS2MDU]>;
}

// Integer division
// Integer division/remainder
// SRT16 algorithm
let Latency = 20, ReleaseAtCycles = [20] in {
def : WriteRes<WriteIDiv32, [XS2MDU]>;
def : WriteRes<WriteIDiv, [XS2MDU]>;
def : WriteRes<WriteIRem32, [XS2MDU]>;
def : WriteRes<WriteIRem, [XS2MDU]>;
}

// Zb*
Expand Down Expand Up @@ -221,6 +223,8 @@ def : XS2LoadToALUBypass<ReadShiftReg>;
def : XS2LoadToALUBypass<ReadShiftReg32>;
def : ReadAdvance<ReadIDiv, 0>;
def : ReadAdvance<ReadIDiv32, 0>;
def : ReadAdvance<ReadIRem, 0>;
def : ReadAdvance<ReadIRem32, 0>;
def : ReadAdvance<ReadIMul, 0>;
def : ReadAdvance<ReadIMul32, 0>;
def : ReadAdvance<ReadAtomicWA, 0>;
Expand Down
8 changes: 6 additions & 2 deletions llvm/lib/Target/RISCV/RISCVSchedule.td
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@ def WriteShiftImm : SchedWrite; // 32 or 64-bit shift by immediate operatio
def WriteShiftImm32 : SchedWrite; // 32-bit shift by immediate operations on RV64Ix
def WriteShiftReg : SchedWrite; // 32 or 64-bit shift by immediate operations
def WriteShiftReg32 : SchedWrite; // 32-bit shift by immediate operations on RV64Ix
def WriteIDiv : SchedWrite; // 32-bit or 64-bit divide and remainder
def WriteIDiv32 : SchedWrite; // 32-bit divide and remainder on RV64I
def WriteIDiv : SchedWrite; // 32-bit or 64-bit divide
def WriteIDiv32 : SchedWrite; // 32-bit divide on RV64I
def WriteIRem : SchedWrite; // 32-bit or 64-bit remainder
def WriteIRem32 : SchedWrite; // 32-bit remainder on RV64I
def WriteIMul : SchedWrite; // 32-bit or 64-bit multiply
def WriteIMul32 : SchedWrite; // 32-bit multiply on RV64I
def WriteJmp : SchedWrite; // Jump
Expand Down Expand Up @@ -135,6 +137,8 @@ def ReadShiftReg : SchedRead;
def ReadShiftReg32 : SchedRead; // 32-bit shift by register operations on RV64Ix
def ReadIDiv : SchedRead;
def ReadIDiv32 : SchedRead;
def ReadIRem : SchedRead;
def ReadIRem32 : SchedRead;
def ReadIMul : SchedRead;
def ReadIMul32 : SchedRead;
def ReadAtomicBA : SchedRead;
Expand Down
9 changes: 3 additions & 6 deletions llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1778,12 +1778,9 @@ bool X86InstructionSelector::selectMulDivRem(MachineInstr &I,
.addImm(8);

// Now reference the 8-bit subreg of the result.
BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(TargetOpcode::SUBREG_TO_REG))
.addDef(DstReg)
.addImm(0)
.addReg(ResultSuperReg)
.addImm(X86::sub_8bit);
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
DstReg)
.addReg(ResultSuperReg, 0, X86::sub_8bit);
} else {
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
DstReg)
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
return typeInSet(0, {s8, s16, s32})(Query) ||
(Is64Bit && typeInSet(0, {s64})(Query));
})
.libcallFor({s64})
.clampScalar(0, s8, sMaxScalar);

// integer shifts
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/TextAPI/RecordsSlice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,8 @@ ObjCIVarRecord *RecordsSlice::findObjCIVar(bool IsScopedName,
}

GlobalRecord *RecordsSlice::addGlobal(StringRef Name, RecordLinkage Linkage,
GlobalRecord::Kind GV,
SymbolFlags Flags) {
GlobalRecord::Kind GV, SymbolFlags Flags,
bool Inlined) {
if (GV == GlobalRecord::Kind::Function)
Flags |= SymbolFlags::Text;
else if (GV == GlobalRecord::Kind::Variable)
Expand All @@ -182,7 +182,7 @@ GlobalRecord *RecordsSlice::addGlobal(StringRef Name, RecordLinkage Linkage,
auto Result = Globals.insert({Name, nullptr});
if (Result.second)
Result.first->second =
std::make_unique<GlobalRecord>(Name, Linkage, Flags, GV);
std::make_unique<GlobalRecord>(Name, Linkage, Flags, GV, Inlined);
else {
updateLinkage(Result.first->second.get(), Linkage);
updateFlags(Result.first->second.get(), Flags);
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,7 @@ class RuntimeCallInserter {
ArrayRef<Value *> Args = {},
const Twine &Name = "") {
assert(IRB.GetInsertBlock()->getParent() == OwnerFn);
(void)OwnerFn;
return IRB.CreateCall(Callee, Args, Name, nullptr);
}
};
Expand Down
9 changes: 6 additions & 3 deletions llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -805,9 +805,12 @@ static bool expandUDivOrURem(BinaryOperator *Instr, const ConstantRange &XCR,
Value *FrozenX = X;
if (!isGuaranteedNotToBeUndef(X))
FrozenX = B.CreateFreeze(X, X->getName() + ".frozen");
auto *AdjX = B.CreateNUWSub(FrozenX, Y, Instr->getName() + ".urem");
auto *Cmp =
B.CreateICmp(ICmpInst::ICMP_ULT, FrozenX, Y, Instr->getName() + ".cmp");
Value *FrozenY = Y;
if (!isGuaranteedNotToBeUndef(Y))
FrozenY = B.CreateFreeze(Y, Y->getName() + ".frozen");
auto *AdjX = B.CreateNUWSub(FrozenX, FrozenY, Instr->getName() + ".urem");
auto *Cmp = B.CreateICmp(ICmpInst::ICMP_ULT, FrozenX, FrozenY,
Instr->getName() + ".cmp");
ExpandedOp = B.CreateSelect(Cmp, FrozenX, AdjX);
} else {
auto *Cmp =
Expand Down
63 changes: 63 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir
Original file line number Diff line number Diff line change
Expand Up @@ -406,3 +406,66 @@ body: |
%zext:_(<2 x s64>) = G_ZEXT %and(<2 x s1>)
$q0 = COPY %zext
...
---
name: test_dont_combine_pointers
body: |
; CHECK-LABEL: name: test_dont_combine_pointers
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -8
; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[C1]](s64)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16
; CHECK-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[C2]](s64)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x60000000), %bb.3(0x20000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[C]](p0) :: (load (p0))
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](p0), [[INTTOPTR]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](p0), [[INTTOPTR1]]
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP1]]
; CHECK-NEXT: G_BRCOND [[AND]](s1), %bb.3
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.1(0x55555555), %bb.3(0x2aaaaaab)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: G_BRCOND [[C3]](s1), %bb.1
; CHECK-NEXT: G_BR %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: G_BR %bb.1
bb.1:
%1:_(p0) = G_CONSTANT i64 0
%3:_(s64) = G_CONSTANT i64 -8
%2:_(p0) = G_INTTOPTR %3(s64)
%6:_(s64) = G_CONSTANT i64 -16
%5:_(p0) = G_INTTOPTR %6(s64)
%10:_(s1) = G_CONSTANT i1 false
bb.2:
successors: %bb.4(0x60000000), %bb.3(0x20000000)
%0:_(p0) = G_LOAD %1(p0) :: (load (p0))
%4:_(s1) = G_ICMP intpred(eq), %0(p0), %2
%7:_(s1) = G_ICMP intpred(eq), %0(p0), %5
%8:_(s1) = G_OR %4, %7
%9:_(s1) = G_SELECT %8(s1), %10, %10
G_BRCOND %8(s1), %bb.4
G_BR %bb.3
bb.4:
successors: %bb.2(0x55555555), %bb.3(0x2aaaaaab)
G_BRCOND %10(s1), %bb.2
G_BR %bb.3
bb.3:
G_BR %bb.2
...
37 changes: 37 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
Original file line number Diff line number Diff line change
Expand Up @@ -859,3 +859,40 @@ body: |
RET_ReallyLR implicit $x0
...
---
name: dont_combine_pointer_type_select_of_constant
alignment: 4
liveins:
- { reg: '$w0' }
- { reg: '$x1' }
body: |
bb.1:
liveins: $w0, $x1
; CHECK-LABEL: name: dont_combine_pointer_type_select_of_constant
; CHECK: liveins: $w0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8)
; CHECK-NEXT: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[C1]](s64)
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[TRUNC1]](s1), [[C]], [[INTTOPTR]]
; CHECK-NEXT: G_STORE [[SELECT]](p0), [[COPY1]](p0) :: (store (p0))
; CHECK-NEXT: RET_ReallyLR
%3:_(s32) = COPY $w0
%2:_(s8) = G_TRUNC %3(s32)
%1:_(p0) = COPY $x1
%4:_(s8) = G_ASSERT_ZEXT %2, 1
%0:_(s1) = G_TRUNC %4(s8)
%6:_(p0) = G_CONSTANT i64 0
%8:_(s64) = G_CONSTANT i64 -1
%7:_(p0) = G_INTTOPTR %8(s64)
%5:_(p0) = G_SELECT %0(s1), %6, %7
G_STORE %5(p0), %1(p0) :: (store (p0))
RET_ReallyLR
...
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,73 @@ body: |
$q0 = COPY %2(<2 x s64>)
RET_ReallyLR
...
---
name: v3s8_crash
body: |
; CHECK-LABEL: name: v3s8_crash
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $w1, $w2, $w3, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[DEF2]](s16)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s16>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[C2]](s16), [[C1]](s64)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[IVEC]](<4 x s16>)
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16)
; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s16)
; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16)
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s8>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR1]](<16 x s8>), [[BUILD_VECTOR2]], shufflemask(0, 16, 16, 16, 1, 16, 16, 16, 2, 16, 16, 16, undef, undef, undef, undef)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[SHUF]](<16 x s8>)
; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(<4 x s32>) = G_UITOFP [[BITCAST]](<4 x s32>)
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UITOFP]](<4 x s32>)
; CHECK-NEXT: G_STORE [[UV4]](s32), [[COPY]](p0) :: (store (s32), align 16)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
; CHECK-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD]](p0) :: (store (s32) into unknown-address + 4)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
; CHECK-NEXT: G_STORE [[UV6]](s32), [[PTR_ADD1]](p0) :: (store (s32) into unknown-address + 8, align 8)
; CHECK-NEXT: G_BR %bb.1
bb.1:
liveins: $w1, $w2, $w3, $x0
%0:_(p0) = COPY $x0
%2:_(s32) = COPY $w1
%3:_(s32) = COPY $w2
%4:_(s32) = COPY $w3
%5:_(<3 x s32>) = G_BUILD_VECTOR %2(s32), %3(s32), %4(s32)
%1:_(<3 x s8>) = G_TRUNC %5(<3 x s32>)
%8:_(s64) = G_CONSTANT i64 0
%11:_(s8) = G_IMPLICIT_DEF
%7:_(s8) = G_CONSTANT i8 0
%10:_(<3 x s8>) = G_BUILD_VECTOR %7(s8), %11(s8), %11(s8)
bb.2:
%14:_(s64) = G_CONSTANT i64 0
%15:_(s8) = G_CONSTANT i8 0
%6:_(<3 x s8>) = G_INSERT_VECTOR_ELT %1, %15(s8), %14(s64)
%9:_(<12 x s8>) = G_SHUFFLE_VECTOR %6(<3 x s8>), %10, shufflemask(0, 3, 3, 3, 1, 3, 3, 3, 2, 3, 3, 3)
%12:_(<3 x s32>) = G_BITCAST %9(<12 x s8>)
%13:_(<3 x s32>) = G_UITOFP %12(<3 x s32>)
G_STORE %13(<3 x s32>), %0(p0) :: (store (<3 x s32>))
G_BR %bb.2
...
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/RISCV/alu64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ define i64 @sltiu(i64 %a) nounwind {
; RV32I-LABEL: sltiu:
; RV32I: # %bb.0:
; RV32I-NEXT: sltiu a0, a0, 3
; RV32I-NEXT: seqz a1, a1
; RV32I-NEXT: snez a1, a1
; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: and a0, a1, a0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: ret
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
Original file line number Diff line number Diff line change
Expand Up @@ -372,10 +372,10 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; RV32IA-NEXT: # =>This Loop Header: Depth=1
; RV32IA-NEXT: # Child Loop BB2_3 Depth 2
; RV32IA-NEXT: mv a3, a2
; RV32IA-NEXT: addi a2, a2, 1
; RV32IA-NEXT: sltu a4, a3, a1
; RV32IA-NEXT: neg a4, a4
; RV32IA-NEXT: and a4, a4, a2
; RV32IA-NEXT: addi a4, a2, 1
; RV32IA-NEXT: sltu a2, a2, a1
; RV32IA-NEXT: neg a2, a2
; RV32IA-NEXT: and a4, a2, a4
; RV32IA-NEXT: .LBB2_3: # %atomicrmw.start
; RV32IA-NEXT: # Parent Loop BB2_1 Depth=1
; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
Expand Down Expand Up @@ -607,10 +607,10 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; RV64IA-NEXT: # =>This Loop Header: Depth=1
; RV64IA-NEXT: # Child Loop BB3_3 Depth 2
; RV64IA-NEXT: mv a3, a2
; RV64IA-NEXT: addi a2, a2, 1
; RV64IA-NEXT: sltu a4, a3, a1
; RV64IA-NEXT: neg a4, a4
; RV64IA-NEXT: and a4, a4, a2
; RV64IA-NEXT: addi a4, a2, 1
; RV64IA-NEXT: sltu a2, a2, a1
; RV64IA-NEXT: neg a2, a2
; RV64IA-NEXT: and a4, a2, a4
; RV64IA-NEXT: .LBB3_3: # %atomicrmw.start
; RV64IA-NEXT: # Parent Loop BB3_1 Depth=1
; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
Expand Down
169 changes: 96 additions & 73 deletions llvm/test/CodeGen/RISCV/bfloat-convert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -456,121 +456,142 @@ define i64 @fcvt_l_bf16(bfloat %a) nounwind {
define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
; RV32IZFBFMIN-LABEL: fcvt_l_bf16_sat:
; RV32IZFBFMIN: # %bb.0: # %start
; RV32IZFBFMIN-NEXT: addi sp, sp, -16
; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: addi sp, sp, -32
; RV32IZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0)
; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
; RV32IZFBFMIN-NEXT: flt.s s0, fa5, fs0
; RV32IZFBFMIN-NEXT: neg s1, s0
; RV32IZFBFMIN-NEXT: lui a0, 913408
; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a0
; RV32IZFBFMIN-NEXT: fle.s s0, fa5, fs0
; RV32IZFBFMIN-NEXT: fle.s s2, fa5, fs0
; RV32IZFBFMIN-NEXT: neg s3, s2
; RV32IZFBFMIN-NEXT: fmv.s fa0, fs0
; RV32IZFBFMIN-NEXT: call __fixsfdi
; RV32IZFBFMIN-NEXT: and a0, s3, a0
; RV32IZFBFMIN-NEXT: or a0, s1, a0
; RV32IZFBFMIN-NEXT: feq.s a2, fs0, fs0
; RV32IZFBFMIN-NEXT: neg a2, a2
; RV32IZFBFMIN-NEXT: lui a4, 524288
; RV32IZFBFMIN-NEXT: lui a2, 524288
; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_2
; RV32IZFBFMIN-NEXT: li a5, 1
; RV32IZFBFMIN-NEXT: lui a3, 524288
; RV32IZFBFMIN-NEXT: bne s2, a5, .LBB10_2
; RV32IZFBFMIN-NEXT: # %bb.1: # %start
; RV32IZFBFMIN-NEXT: mv a2, a1
; RV32IZFBFMIN-NEXT: mv a3, a1
; RV32IZFBFMIN-NEXT: .LBB10_2: # %start
; RV32IZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
; RV32IZFBFMIN-NEXT: flt.s a3, fa5, fs0
; RV32IZFBFMIN-NEXT: beqz a3, .LBB10_4
; RV32IZFBFMIN-NEXT: and a0, a2, a0
; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_4
; RV32IZFBFMIN-NEXT: # %bb.3:
; RV32IZFBFMIN-NEXT: addi a2, a4, -1
; RV32IZFBFMIN-NEXT: addi a3, a4, -1
; RV32IZFBFMIN-NEXT: .LBB10_4: # %start
; RV32IZFBFMIN-NEXT: feq.s a1, fs0, fs0
; RV32IZFBFMIN-NEXT: neg a4, a1
; RV32IZFBFMIN-NEXT: and a1, a4, a2
; RV32IZFBFMIN-NEXT: neg a2, a3
; RV32IZFBFMIN-NEXT: neg a3, s0
; RV32IZFBFMIN-NEXT: and a0, a3, a0
; RV32IZFBFMIN-NEXT: or a0, a2, a0
; RV32IZFBFMIN-NEXT: and a0, a4, a0
; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: addi sp, sp, 16
; RV32IZFBFMIN-NEXT: and a1, a2, a3
; RV32IZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: addi sp, sp, 32
; RV32IZFBFMIN-NEXT: ret
;
; R32IDZFBFMIN-LABEL: fcvt_l_bf16_sat:
; R32IDZFBFMIN: # %bb.0: # %start
; R32IDZFBFMIN-NEXT: addi sp, sp, -16
; R32IDZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: addi sp, sp, -32
; R32IDZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; R32IDZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0)
; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
; R32IDZFBFMIN-NEXT: flt.s s0, fa5, fs0
; R32IDZFBFMIN-NEXT: neg s1, s0
; R32IDZFBFMIN-NEXT: lui a0, 913408
; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a0
; R32IDZFBFMIN-NEXT: fle.s s0, fa5, fs0
; R32IDZFBFMIN-NEXT: fle.s s2, fa5, fs0
; R32IDZFBFMIN-NEXT: neg s3, s2
; R32IDZFBFMIN-NEXT: fmv.s fa0, fs0
; R32IDZFBFMIN-NEXT: call __fixsfdi
; R32IDZFBFMIN-NEXT: and a0, s3, a0
; R32IDZFBFMIN-NEXT: or a0, s1, a0
; R32IDZFBFMIN-NEXT: feq.s a2, fs0, fs0
; R32IDZFBFMIN-NEXT: neg a2, a2
; R32IDZFBFMIN-NEXT: lui a4, 524288
; R32IDZFBFMIN-NEXT: lui a2, 524288
; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_2
; R32IDZFBFMIN-NEXT: li a5, 1
; R32IDZFBFMIN-NEXT: lui a3, 524288
; R32IDZFBFMIN-NEXT: bne s2, a5, .LBB10_2
; R32IDZFBFMIN-NEXT: # %bb.1: # %start
; R32IDZFBFMIN-NEXT: mv a2, a1
; R32IDZFBFMIN-NEXT: mv a3, a1
; R32IDZFBFMIN-NEXT: .LBB10_2: # %start
; R32IDZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
; R32IDZFBFMIN-NEXT: flt.s a3, fa5, fs0
; R32IDZFBFMIN-NEXT: beqz a3, .LBB10_4
; R32IDZFBFMIN-NEXT: and a0, a2, a0
; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_4
; R32IDZFBFMIN-NEXT: # %bb.3:
; R32IDZFBFMIN-NEXT: addi a2, a4, -1
; R32IDZFBFMIN-NEXT: addi a3, a4, -1
; R32IDZFBFMIN-NEXT: .LBB10_4: # %start
; R32IDZFBFMIN-NEXT: feq.s a1, fs0, fs0
; R32IDZFBFMIN-NEXT: neg a4, a1
; R32IDZFBFMIN-NEXT: and a1, a4, a2
; R32IDZFBFMIN-NEXT: neg a2, a3
; R32IDZFBFMIN-NEXT: neg a3, s0
; R32IDZFBFMIN-NEXT: and a0, a3, a0
; R32IDZFBFMIN-NEXT: or a0, a2, a0
; R32IDZFBFMIN-NEXT: and a0, a4, a0
; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: and a1, a2, a3
; R32IDZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
; R32IDZFBFMIN-NEXT: addi sp, sp, 16
; R32IDZFBFMIN-NEXT: addi sp, sp, 32
; R32IDZFBFMIN-NEXT: ret
;
; RV32ID-LABEL: fcvt_l_bf16_sat:
; RV32ID: # %bb.0: # %start
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32ID-NEXT: addi sp, sp, -32
; RV32ID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV32ID-NEXT: lui a0, %hi(.LCPI10_0)
; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: slli a0, a0, 16
; RV32ID-NEXT: fmv.w.x fs0, a0
; RV32ID-NEXT: flt.s s0, fa5, fs0
; RV32ID-NEXT: neg s1, s0
; RV32ID-NEXT: lui a0, 913408
; RV32ID-NEXT: fmv.w.x fa5, a0
; RV32ID-NEXT: fle.s s0, fa5, fs0
; RV32ID-NEXT: fle.s s2, fa5, fs0
; RV32ID-NEXT: neg s3, s2
; RV32ID-NEXT: fmv.s fa0, fs0
; RV32ID-NEXT: call __fixsfdi
; RV32ID-NEXT: and a0, s3, a0
; RV32ID-NEXT: or a0, s1, a0
; RV32ID-NEXT: feq.s a2, fs0, fs0
; RV32ID-NEXT: neg a2, a2
; RV32ID-NEXT: lui a4, 524288
; RV32ID-NEXT: lui a2, 524288
; RV32ID-NEXT: beqz s0, .LBB10_2
; RV32ID-NEXT: li a5, 1
; RV32ID-NEXT: lui a3, 524288
; RV32ID-NEXT: bne s2, a5, .LBB10_2
; RV32ID-NEXT: # %bb.1: # %start
; RV32ID-NEXT: mv a2, a1
; RV32ID-NEXT: mv a3, a1
; RV32ID-NEXT: .LBB10_2: # %start
; RV32ID-NEXT: lui a1, %hi(.LCPI10_0)
; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
; RV32ID-NEXT: flt.s a3, fa5, fs0
; RV32ID-NEXT: beqz a3, .LBB10_4
; RV32ID-NEXT: and a0, a2, a0
; RV32ID-NEXT: beqz s0, .LBB10_4
; RV32ID-NEXT: # %bb.3:
; RV32ID-NEXT: addi a2, a4, -1
; RV32ID-NEXT: addi a3, a4, -1
; RV32ID-NEXT: .LBB10_4: # %start
; RV32ID-NEXT: feq.s a1, fs0, fs0
; RV32ID-NEXT: neg a4, a1
; RV32ID-NEXT: and a1, a4, a2
; RV32ID-NEXT: neg a2, a3
; RV32ID-NEXT: neg a3, s0
; RV32ID-NEXT: and a0, a3, a0
; RV32ID-NEXT: or a0, a2, a0
; RV32ID-NEXT: and a0, a4, a0
; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32ID-NEXT: and a1, a2, a3
; RV32ID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32ID-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
; RV32ID-NEXT: addi sp, sp, 16
; RV32ID-NEXT: addi sp, sp, 32
; RV32ID-NEXT: ret
;
; CHECK64ZFBFMIN-LABEL: fcvt_l_bf16_sat:
Expand Down Expand Up @@ -654,7 +675,8 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
; CHECK32ZFBFMIN-NEXT: neg s0, a0
; CHECK32ZFBFMIN-NEXT: fmv.w.x fa5, zero
; CHECK32ZFBFMIN-NEXT: fle.s a0, fa5, fa0
; CHECK32ZFBFMIN-NEXT: neg s1, a0
; CHECK32ZFBFMIN-NEXT: xori a0, a0, 1
; CHECK32ZFBFMIN-NEXT: addi s1, a0, -1
; CHECK32ZFBFMIN-NEXT: call __fixunssfdi
; CHECK32ZFBFMIN-NEXT: and a0, s1, a0
; CHECK32ZFBFMIN-NEXT: or a0, s0, a0
Expand All @@ -681,7 +703,8 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
; RV32ID-NEXT: neg s0, a0
; RV32ID-NEXT: fmv.w.x fa5, zero
; RV32ID-NEXT: fle.s a0, fa5, fa0
; RV32ID-NEXT: neg s1, a0
; RV32ID-NEXT: xori a0, a0, 1
; RV32ID-NEXT: addi s1, a0, -1
; RV32ID-NEXT: call __fixunssfdi
; RV32ID-NEXT: and a0, s1, a0
; RV32ID-NEXT: or a0, s0, a0
Expand Down
Loading