Skip to content

Commit

Permalink
[Support] Move getHostNumPhysicalCores to Threading.h
Browse files Browse the repository at this point in the history
This change is focussed on simplifying `Support/Host.h` to only do
target detection. In this case, this function is close in usage to
existing functions in `Support/Threading.h`, so I moved it into there.
The function is also renamed to `llvm::get_physical_cores()` to match
the style of threading's functions.

Differential Revision: https://reviews.llvm.org/D137836
  • Loading branch information
lenary committed Nov 25, 2022
1 parent b32931c commit 5577207
Show file tree
Hide file tree
Showing 7 changed files with 168 additions and 171 deletions.
6 changes: 1 addition & 5 deletions clang-tools-extra/clangd/test/Inputs/BenchmarkHeader.h
Expand Up @@ -7,11 +7,7 @@ class Dex;
} // namespace clang

namespace llvm {
namespace sys {

int getHostNumPhysicalCores();

} // namespace sys
int get_physical_cores();
} // namespace llvm

namespace {
Expand Down
5 changes: 0 additions & 5 deletions llvm/include/llvm/Support/Host.h
Expand Up @@ -54,11 +54,6 @@ namespace sys {
/// \return - True on success.
bool getHostCPUFeatures(StringMap<bool, MallocAllocator> &Features);

/// Get the number of physical cores (as opposed to logical cores returned
/// from thread::hardware_concurrency(), which includes hyperthreads).
/// Returns -1 if unknown for the current host system.
int getHostNumPhysicalCores();

namespace detail {
/// Helper functions to extract HostCPUName from /proc/cpuinfo on linux.
StringRef getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent);
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/Support/Threading.h
Expand Up @@ -231,6 +231,11 @@ constexpr bool llvm_is_multithreaded() { return LLVM_ENABLE_THREADS; }
/// Returns how many physical CPUs or NUMA groups the system has.
unsigned get_cpus();

/// Returns how many physical cores (as opposed to logical cores returned from
/// thread::hardware_concurrency(), which includes hyperthreads).
/// Returns -1 if unknown for the current host system.
int get_physical_cores();

enum class ThreadPriority {
/// Lower the current thread's priority as much as possible. Can be used
/// for long-running tasks that are not time critical; more energy-
Expand Down
122 changes: 0 additions & 122 deletions llvm/lib/Support/Host.cpp
Expand Up @@ -1576,128 +1576,6 @@ VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
} // namespace llvm
#endif

#if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
// On Linux, the number of physical cores can be computed from /proc/cpuinfo,
// using the number of unique physical/core id pairs. The following
// implementation reads the /proc/cpuinfo format on an x86_64 system.
static int computeHostNumPhysicalCores() {
// Enabled represents the number of physical id/core id pairs with at least
// one processor id enabled by the CPU affinity mask.
cpu_set_t Affinity, Enabled;
if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0)
return -1;
CPU_ZERO(&Enabled);

// Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
// mmapped because it appears to have 0 size.
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
if (std::error_code EC = Text.getError()) {
llvm::errs() << "Can't read "
<< "/proc/cpuinfo: " << EC.message() << "\n";
return -1;
}
SmallVector<StringRef, 8> strs;
(*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
/*KeepEmpty=*/false);
int CurProcessor = -1;
int CurPhysicalId = -1;
int CurSiblings = -1;
int CurCoreId = -1;
for (StringRef Line : strs) {
std::pair<StringRef, StringRef> Data = Line.split(':');
auto Name = Data.first.trim();
auto Val = Data.second.trim();
// These fields are available if the kernel is configured with CONFIG_SMP.
if (Name == "processor")
Val.getAsInteger(10, CurProcessor);
else if (Name == "physical id")
Val.getAsInteger(10, CurPhysicalId);
else if (Name == "siblings")
Val.getAsInteger(10, CurSiblings);
else if (Name == "core id") {
Val.getAsInteger(10, CurCoreId);
// The processor id corresponds to an index into cpu_set_t.
if (CPU_ISSET(CurProcessor, &Affinity))
CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
}
}
return CPU_COUNT(&Enabled);
}
#elif defined(__linux__) && defined(__s390x__)
static int computeHostNumPhysicalCores() {
return sysconf(_SC_NPROCESSORS_ONLN);
}
#elif defined(__linux__) && !defined(__ANDROID__)
static int computeHostNumPhysicalCores() {
cpu_set_t Affinity;
if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
return CPU_COUNT(&Affinity);

// The call to sched_getaffinity() may have failed because the Affinity
// mask is too small for the number of CPU's on the system (i.e. the
// system has more than 1024 CPUs). Allocate a mask large enough for
// twice as many CPUs.
cpu_set_t *DynAffinity;
DynAffinity = CPU_ALLOC(2048);
if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
int NumCPUs = CPU_COUNT(DynAffinity);
CPU_FREE(DynAffinity);
return NumCPUs;
}
return -1;
}
#elif defined(__APPLE__)
// Gets the number of *physical cores* on the machine.
static int computeHostNumPhysicalCores() {
uint32_t count;
size_t len = sizeof(count);
sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
if (count < 1) {
int nm[2];
nm[0] = CTL_HW;
nm[1] = HW_AVAILCPU;
sysctl(nm, 2, &count, &len, NULL, 0);
if (count < 1)
return -1;
}
return count;
}
#elif defined(__MVS__)
static int computeHostNumPhysicalCores() {
enum {
// Byte offset of the pointer to the Communications Vector Table (CVT) in
// the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
// will be zero-extended to uintptr_t.
FLCCVT = 16,
// Byte offset of the pointer to the Common System Data Area (CSD) in the
// CVT. The table entry is a 31-bit pointer and will be zero-extended to
// uintptr_t.
CVTCSD = 660,
// Byte offset to the number of live CPs in the LPAR, stored as a signed
// 32-bit value in the table.
CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
};
char *PSA = 0;
char *CVT = reinterpret_cast<char *>(
static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
char *CSD = reinterpret_cast<char *>(
static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
}
#elif defined(_WIN32) && LLVM_ENABLE_THREADS != 0
// Defined in llvm/lib/Support/Windows/Threading.inc
int computeHostNumPhysicalCores();
#else
// On other systems, return -1 to indicate unknown.
static int computeHostNumPhysicalCores() { return -1; }
#endif

int sys::getHostNumPhysicalCores() {
static int NumCores = computeHostNumPhysicalCores();
return NumCores;
}

#if defined(__i386__) || defined(_M_IX86) || \
defined(__x86_64__) || defined(_M_X64)
bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Expand Down
132 changes: 129 additions & 3 deletions llvm/lib/Support/Threading.cpp
Expand Up @@ -13,8 +13,12 @@

#include "llvm/Support/Threading.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/config.h"
#include "llvm/Support/Host.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"

#include <cassert>
#include <errno.h>
Expand Down Expand Up @@ -50,8 +54,8 @@ unsigned llvm::ThreadPoolStrategy::compute_thread_count() const {
static int computeHostNumHardwareThreads();

unsigned llvm::ThreadPoolStrategy::compute_thread_count() const {
int MaxThreadCount = UseHyperThreads ? computeHostNumHardwareThreads()
: sys::getHostNumPhysicalCores();
int MaxThreadCount =
UseHyperThreads ? computeHostNumHardwareThreads() : get_physical_cores();
if (MaxThreadCount <= 0)
MaxThreadCount = 1;
if (ThreadsRequested == 0)
Expand Down Expand Up @@ -106,3 +110,125 @@ llvm::get_threadpool_strategy(StringRef Num, ThreadPoolStrategy Default) {
S.ThreadsRequested = V;
return S;
}

#if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
// On Linux, the number of physical cores can be computed from /proc/cpuinfo,
// using the number of unique physical/core id pairs. The following
// implementation reads the /proc/cpuinfo format on an x86_64 system.
static int computeHostNumPhysicalCores() {
// Enabled represents the number of physical id/core id pairs with at least
// one processor id enabled by the CPU affinity mask.
cpu_set_t Affinity, Enabled;
if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0)
return -1;
CPU_ZERO(&Enabled);

// Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
// mmapped because it appears to have 0 size.
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
if (std::error_code EC = Text.getError()) {
llvm::errs() << "Can't read "
<< "/proc/cpuinfo: " << EC.message() << "\n";
return -1;
}
SmallVector<StringRef, 8> strs;
(*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
/*KeepEmpty=*/false);
int CurProcessor = -1;
int CurPhysicalId = -1;
int CurSiblings = -1;
int CurCoreId = -1;
for (StringRef Line : strs) {
std::pair<StringRef, StringRef> Data = Line.split(':');
auto Name = Data.first.trim();
auto Val = Data.second.trim();
// These fields are available if the kernel is configured with CONFIG_SMP.
if (Name == "processor")
Val.getAsInteger(10, CurProcessor);
else if (Name == "physical id")
Val.getAsInteger(10, CurPhysicalId);
else if (Name == "siblings")
Val.getAsInteger(10, CurSiblings);
else if (Name == "core id") {
Val.getAsInteger(10, CurCoreId);
// The processor id corresponds to an index into cpu_set_t.
if (CPU_ISSET(CurProcessor, &Affinity))
CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
}
}
return CPU_COUNT(&Enabled);
}
#elif defined(__linux__) && defined(__s390x__)
static int computeHostNumPhysicalCores() {
return sysconf(_SC_NPROCESSORS_ONLN);
}
#elif defined(__linux__) && !defined(__ANDROID__)
static int computeHostNumPhysicalCores() {
cpu_set_t Affinity;
if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
return CPU_COUNT(&Affinity);

// The call to sched_getaffinity() may have failed because the Affinity
// mask is too small for the number of CPU's on the system (i.e. the
// system has more than 1024 CPUs). Allocate a mask large enough for
// twice as many CPUs.
cpu_set_t *DynAffinity;
DynAffinity = CPU_ALLOC(2048);
if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
int NumCPUs = CPU_COUNT(DynAffinity);
CPU_FREE(DynAffinity);
return NumCPUs;
}
return -1;
}
#elif defined(__APPLE__)
// Gets the number of *physical cores* on the machine.
static int computeHostNumPhysicalCores() {
uint32_t count;
size_t len = sizeof(count);
sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
if (count < 1) {
int nm[2];
nm[0] = CTL_HW;
nm[1] = HW_AVAILCPU;
sysctl(nm, 2, &count, &len, NULL, 0);
if (count < 1)
return -1;
}
return count;
}
#elif defined(__MVS__)
static int computeHostNumPhysicalCores() {
enum {
// Byte offset of the pointer to the Communications Vector Table (CVT) in
// the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
// will be zero-extended to uintptr_t.
FLCCVT = 16,
// Byte offset of the pointer to the Common System Data Area (CSD) in the
// CVT. The table entry is a 31-bit pointer and will be zero-extended to
// uintptr_t.
CVTCSD = 660,
// Byte offset to the number of live CPs in the LPAR, stored as a signed
// 32-bit value in the table.
CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
};
char *PSA = 0;
char *CVT = reinterpret_cast<char *>(
static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
char *CSD = reinterpret_cast<char *>(
static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
}
#elif defined(_WIN32) && LLVM_ENABLE_THREADS != 0
// Defined in llvm/lib/Support/Windows/Threading.inc
int computeHostNumPhysicalCores();
#else
// On other systems, return -1 to indicate unknown.
static int computeHostNumPhysicalCores() { return -1; }
#endif

int llvm::get_physical_cores() {
static int NumCores = computeHostNumPhysicalCores();
return NumCores;
}
41 changes: 5 additions & 36 deletions llvm/unittests/Support/Host.cpp
Expand Up @@ -30,37 +30,6 @@

using namespace llvm;

class HostTest : public testing::Test {
Triple Host;

protected:
bool isSupportedArchAndOS() {
// Initially this is only testing detection of the number of
// physical cores, which is currently only supported/tested on
// some systems.
return (Host.isOSWindows() && llvm_is_multithreaded()) ||
Host.isOSDarwin() || (Host.isX86() && Host.isOSLinux()) ||
(Host.isOSLinux() && !Host.isAndroid()) ||
(Host.isSystemZ() && Host.isOSzOS());
}

HostTest() : Host(Triple::normalize(sys::getProcessTriple())) {}
};

TEST_F(HostTest, NumPhysicalCoresSupported) {
if (!isSupportedArchAndOS())
GTEST_SKIP();
int Num = sys::getHostNumPhysicalCores();
ASSERT_GT(Num, 0);
}

TEST_F(HostTest, NumPhysicalCoresUnsupported) {
if (isSupportedArchAndOS())
GTEST_SKIP();
int Num = sys::getHostNumPhysicalCores();
ASSERT_EQ(Num, -1);
}

TEST(getLinuxHostCPUName, ARM) {
StringRef CortexA9ProcCpuinfo = R"(
processor : 0
Expand Down Expand Up @@ -439,13 +408,13 @@ static bool runAndGetCommandOutput(
return Success;
}

TEST_F(HostTest, DummyRunAndGetCommandOutputUse) {
TEST(HostTest, DummyRunAndGetCommandOutputUse) {
// Suppress defined-but-not-used warnings when the tests using the helper are
// disabled.
(void)&runAndGetCommandOutput;
}

TEST_F(HostTest, getMacOSHostVersion) {
TEST(HostTest, getMacOSHostVersion) {
llvm::Triple HostTriple(llvm::sys::getProcessTriple());
if (!HostTriple.isMacOSX())
GTEST_SKIP();
Expand Down Expand Up @@ -491,7 +460,7 @@ static void getAIXSystemVersion(VersionTuple &SystemVersion) {
.getOSVersion();
}

TEST_F(HostTest, AIXHostVersionDetect) {
TEST(HostTest, AIXHostVersionDetect) {
llvm::Triple HostTriple(llvm::sys::getProcessTriple());
if (HostTriple.getOS() != Triple::AIX)
GTEST_SKIP();
Expand All @@ -517,7 +486,7 @@ TEST_F(HostTest, AIXHostVersionDetect) {
ASSERT_EQ(SysMinor, HostVersion.getMinor());
}

TEST_F(HostTest, AIXTargetVersionDetect) {
TEST(HostTest, AIXTargetVersionDetect) {
llvm::Triple TargetTriple(llvm::sys::getDefaultTargetTriple());
if (TargetTriple.getOS() != Triple::AIX)
GTEST_SKIP();
Expand All @@ -535,7 +504,7 @@ TEST_F(HostTest, AIXTargetVersionDetect) {
ASSERT_EQ(SystemVersion.getMinor(), TargetVersion.getMinor());
}

TEST_F(HostTest, AIXHostCPUDetect) {
TEST(HostTest, AIXHostCPUDetect) {
llvm::Triple HostTriple(llvm::sys::getProcessTriple());
if (HostTriple.getOS() != Triple::AIX)
GTEST_SKIP();
Expand Down

0 comments on commit 5577207

Please sign in to comment.