Skip to content

Commit

Permalink
fix: num_possible_cpus() with hot-unplugged CPUs
Browse files Browse the repository at this point in the history
We rely on sysconf(_SC_NPROCESSORS_CONF) to get the maximum possible
number of CPUs that can be attached to the system for the lifetime of an
application. We use this value to allocate an array of per-CPU buffers
that is indexed by the numerical id of the CPUs.

As such we expect that the highest possible CPU id would be one less
than the number returned by sysconf(_SC_NPROCESSORS_CONF) which is
unfortunatly not always the case and can vary across libc
implementations and versions.

Glibc up to 2.35 will count the number of "cpuX" directories in
"/sys/devices/system/cpu" which doesn't include CPUS that were
hot-unplugged.

This information is however provided by the kernel in
"/sys/devices/system/cpu/possible" in the form of a mask listing all the
CPUs that could possibly be hot-plugged in the system.

This patch changes the implementation of num_possible_cpus() to first
try parsing the possible CPU mask to extract the highest possible value
and if this fails fallback to the previous behavior.

Change-Id: I1a3cb1a446154ec443a391d6689cb7d4165726fd
Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
  • Loading branch information
mjeanson authored and compudj committed Jul 25, 2022
1 parent fa184a4 commit 66dbdc3
Show file tree
Hide file tree
Showing 2 changed files with 165 additions and 18 deletions.
147 changes: 131 additions & 16 deletions src/common/smp.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,25 @@
*/

#define _LGPL_SOURCE
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <pthread.h>
#include <stdlib.h>

#include <urcu/compiler.h>

#include "common/align.h"
#include "common/logging.h"
#include "common/smp.h"

static int num_possible_cpus_cache;

#if (defined(__GLIBC__) || defined( __UCLIBC__))
static void _get_num_possible_cpus(void)
int get_num_possible_cpus_fallback(void)
{
int result;

/* On Linux, when some processors are offline
* _SC_NPROCESSORS_CONF counts the offline
* processors, whereas _SC_NPROCESSORS_ONLN
Expand All @@ -28,10 +33,7 @@ static void _get_num_possible_cpus(void)
* this sysconf, in which case the arrays
* indexed by processor would overflow.
*/
result = sysconf(_SC_NPROCESSORS_CONF);
if (result == -1)
return;
num_possible_cpus_cache = result;
return sysconf(_SC_NPROCESSORS_CONF);
}

#else
Expand All @@ -54,9 +56,9 @@ static void _get_num_possible_cpus(void)

#define __max(a,b) ((a)>(b)?(a):(b))

static void _get_num_possible_cpus(void)
int get_num_possible_cpus_fallback(void)
{
int result, count = 0;
int count = 0;
DIR *cpudir;
struct dirent *entry;

Expand Down Expand Up @@ -87,22 +89,135 @@ static void _get_num_possible_cpus(void)
/*
* Get the sysconf value as a fallback. Keep the highest number.
*/
result = __max(sysconf(_SC_NPROCESSORS_CONF), count);
return __max(sysconf(_SC_NPROCESSORS_CONF), count);
}
#endif

/*
* Get the CPU possible mask string from sysfs.
*
* buf: the buffer where the mask will be read.
* max_bytes: the maximum number of bytes to write in the buffer.
*
* Returns the number of bytes read or -1 on error.
*/
int get_possible_cpu_mask_from_sysfs(char *buf, size_t max_bytes)
{
ssize_t bytes_read = 0;
size_t total_bytes_read = 0;
int fd = 0;

if (buf == NULL)
return -1;

fd = open("/sys/devices/system/cpu/possible", O_RDONLY);
if (fd < 0)
return -1;

do {
bytes_read = read(fd, buf + total_bytes_read,
max_bytes - total_bytes_read);

if (bytes_read < 0) {
if (errno == EINTR) {
continue; /* retry operation */
} else {
return -1;
}
}

total_bytes_read += bytes_read;
assert(total_bytes_read <= max_bytes);
} while (max_bytes > total_bytes_read && bytes_read > 0);

if (close(fd))
PERROR("close");

/*
* Make sure the mask read is a null terminated string.
*/
if (total_bytes_read < max_bytes)
buf[total_bytes_read] = '\0';
else
buf[max_bytes - 1] = '\0';

return total_bytes_read;
}

/*
* Get the number of CPUs from the possible cpu mask.
*
* pmask: the mask to parse.
* len: the len of the mask excluding '\0'.
*
* Returns the number of possible CPUs from the mask or 0 on error.
*/
int get_num_possible_cpus_from_mask(const char *pmask, size_t len)
{
ssize_t i;
unsigned long cpu_index;
char *endptr;

/* We need at least one char to read */
if (len < 1)
goto error;

/* Start from the end to read the last CPU index. */
for (i = len - 1; i > 0; i--) {
/* Break when we hit the first separator. */
if ((pmask[i] == ',') || (pmask[i] == '-')) {
i++;
break;
}
}

cpu_index = strtoul(&pmask[i], &endptr, 10);

/*
* If both methods failed, don't store the value.
* If we read a CPU index, increment it by one to return a number of
* CPUs.
*/
if (result < 1)
if ((&pmask[i] != endptr) && (cpu_index < INT_MAX))
return (int) cpu_index + 1;

error:
return 0;
}

static void _get_num_possible_cpus(void)
{
int ret;
int buf_len = LTTNG_UST_PAGE_SIZE;
char buf[buf_len];

/* Get the possible cpu mask from sysfs, fallback to sysconf. */
ret = get_possible_cpu_mask_from_sysfs((char *) &buf, buf_len);
if (ret <= 0)
goto fallback;

/* Parse the possible cpu mask, on failure fallback to sysconf. */
ret = get_num_possible_cpus_from_mask((char *) &buf, ret);
if (ret > 0)
goto end;

fallback:
/* Fallback to sysconf. */
ret = get_num_possible_cpus_fallback();

end:
/* If all methods failed, don't store the value. */
if (ret < 1)
return;
num_possible_cpus_cache = result;

num_possible_cpus_cache = ret;
}
#endif

/*
* Returns the total number of CPUs in the system. If the cache is not yet
* initialized, get the value from the system through sysconf and cache it.
* initialized, get the value from "/sys/devices/system/cpu/possible" or
* fallback to sysconf and cache it.
*
* If the sysconf call fails, don't populate the cache and return 0.
* If all methods fail, don't populate the cache and return 0.
*/
int num_possible_cpus(void)
{
Expand Down
36 changes: 34 additions & 2 deletions src/common/smp.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,43 @@
#ifndef _UST_COMMON_SMP_H
#define _UST_COMMON_SMP_H

/*
* Get the CPU possible mask string from sysfs.
*
* buf: the buffer where the mask will be read.
* max_bytes: the maximum number of bytes to write in the buffer.
*
* Returns the number of bytes read or -1 on error.
*/
int get_possible_cpu_mask_from_sysfs(char *buf, size_t max_bytes)
__attribute__((visibility("hidden")));

/*
* Get the number of possible CPUs in the system from either
* sysconf(_SC_NPROCESSORS_CONF) or some other mechanism depending on the libc.
*
* Returns the number of possible CPUs in the system or 0 on error.
*/
int get_num_possible_cpus_fallback(void)
__attribute__((visibility("hidden")));

/*
* Get the number of CPUs from the possible cpu mask.
*
* pmask: the mask to parse.
* len: the len of the mask excluding '\0'.
*
* Returns the number of possible CPUs from the mask or 0 on error.
*/
int get_num_possible_cpus_from_mask(const char *pmask, size_t len)
__attribute__((visibility("hidden")));

/*
* Returns the total number of CPUs in the system. If the cache is not yet
* initialized, get the value from the system through sysconf and cache it.
* initialized, get the value from "/sys/devices/system/cpu/possible" or
* fallback to sysconf and cache it.
*
* If the sysconf call fails, don't populate the cache and return 0.
* If all methods fail, don't populate the cache and return 0.
*/
int num_possible_cpus(void)
__attribute__((visibility("hidden")));
Expand Down

0 comments on commit 66dbdc3

Please sign in to comment.