Skip to content

Commit 048be0f

Browse files
authored
DEV: use number of physical cores in dev.py build by default (scipy#18451)
1 parent b542a2c commit 048be0f

File tree

4 files changed

+236
-9
lines changed

4 files changed

+236
-9
lines changed

.github/workflows/linux_meson.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ jobs:
9393
9494
- name: Setup build and install scipy
9595
run: |
96-
python dev.py build -j 2 --werror
96+
python dev.py build --werror
9797
9898
- name: Ccache performance
9999
shell: bash -l {0}
@@ -119,7 +119,7 @@ jobs:
119119
run: |
120120
export OMP_NUM_THREADS=2
121121
export SCIPY_USE_PROPACK=1
122-
python dev.py --no-build test -j 2 -- --durations 10 --timeout=60
122+
python dev.py --no-build test -j2 -- --durations 10 --timeout=60
123123
124124
#################################################################################
125125
test_venv_install:
@@ -317,7 +317,7 @@ jobs:
317317
318318
- name: Build and install SciPy
319319
run: |
320-
python dev.py build -j2 --gcov
320+
python dev.py build --gcov
321321
322322
- name: Ccache performance
323323
shell: bash -l {0}

.github/workflows/macos_meson.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,15 +113,15 @@ jobs:
113113
# https://conda-forge.org/docs/maintainer/knowledge_base.html#requiring-newer-macos-sdks
114114
export MACOSX_DEPLOYMENT_TARGET=10.9
115115
export MACOSX_SDK_VERSION=10.9
116-
CC="ccache $CC" python dev.py build -j 2
116+
CC="ccache $CC" python dev.py build
117117
118118
- name: Test SciPy
119119
shell: bash -l {0}
120120
run: |
121121
conda activate scipy-dev
122122
export OMP_NUM_THREADS=2
123123
export SCIPY_USE_PROPACK=1
124-
python dev.py -n test -j 2
124+
python dev.py -n test -j2
125125
126126
- name: Ccache statistics
127127
shell: bash -l {0}

.github/workflows/windows.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ jobs:
5757
- name: Build
5858
run: |
5959
echo "SCIPY_USE_PROPACK=1" >> $env:GITHUB_ENV
60-
python dev.py build -j 2 --win-cp-openblas
60+
python dev.py build --win-cp-openblas
6161
# Necessary because GitHub Actions checks out the repo to D:\ while OpenBLAS
6262
# got installed to C:\ higher up. The copying with `--win-cp-openblas` fails
6363
# when things are split over drives.
@@ -105,7 +105,7 @@ jobs:
105105
106106
- name: Build
107107
run: |
108-
python dev.py build -j 2 --win-cp-openblas
108+
python dev.py build --win-cp-openblas
109109
# Copy OpenBLAS DLL, write distributor-init (see first job in this file for why)
110110
cp C:\opt\64\bin\*.dll $pwd\build-install\Lib\site-packages\scipy\.libs\
111111
python tools\openblas_support.py --write-init $PWD\build-install\Lib\site-packages\scipy\

dev.py

Lines changed: 229 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,9 @@ def task_meta(cls, **kwargs):
107107
import errno
108108
import contextlib
109109
from sysconfig import get_path
110+
import math
111+
import traceback
112+
from concurrent.futures.process import _MAX_WINDOWS_WORKERS
110113

111114
# distutils is required to infer meson install path
112115
# if this needs to be replaced for Python 3.12 support and there's no
@@ -414,7 +417,7 @@ class Build(Task):
414417
parallel = Option(
415418
['--parallel', '-j'], default=None, metavar='N_JOBS',
416419
help=("Number of parallel jobs for building. "
417-
"This defaults to 2 * n_cpus + 2."))
420+
"This defaults to the number of available physical CPU cores"))
418421
setup_args = Option(
419422
['--setup-args', '-C'], default=[], multiple=True,
420423
help=("Pass along one or more arguments to `meson setup` "
@@ -499,7 +502,12 @@ def build_project(cls, dirs, args, env):
499502
Build a dev version of the project.
500503
"""
501504
cmd = ["ninja", "-C", str(dirs.build)]
502-
if args.parallel is not None:
505+
if args.parallel is None:
506+
# Use number of physical cores rather than ninja's default of 2N+2,
507+
# to avoid out of memory issues (see gh-17941 and gh-18443)
508+
n_cores = cpu_count(only_physical_cores=True)
509+
cmd += [f"-j{n_cores}"]
510+
else:
503511
cmd += ["-j", str(args.parallel)]
504512

505513
# Building with ninja-backend
@@ -1226,5 +1234,224 @@ def authors(ctx_obj, revision_args):
12261234
print('Error caught: Incorrect revision start or revision end')
12271235

12281236

1237+
# The following CPU core count functions were taken from loky/backend/context.py
1238+
# See https://github.com/joblib/loky
1239+
1240+
# Cache for the number of physical cores to avoid repeating subprocess calls.
1241+
# It should not change during the lifetime of the program.
1242+
physical_cores_cache = None
1243+
1244+
1245+
def cpu_count(only_physical_cores=False):
1246+
"""Return the number of CPUs the current process can use.
1247+
1248+
The returned number of CPUs accounts for:
1249+
* the number of CPUs in the system, as given by
1250+
``multiprocessing.cpu_count``;
1251+
* the CPU affinity settings of the current process
1252+
(available on some Unix systems);
1253+
* Cgroup CPU bandwidth limit (available on Linux only, typically
1254+
set by docker and similar container orchestration systems);
1255+
* the value of the LOKY_MAX_CPU_COUNT environment variable if defined.
1256+
and is given as the minimum of these constraints.
1257+
1258+
If ``only_physical_cores`` is True, return the number of physical cores
1259+
instead of the number of logical cores (hyperthreading / SMT). Note that
1260+
this option is not enforced if the number of usable cores is controlled in
1261+
any other way such as: process affinity, Cgroup restricted CPU bandwidth
1262+
or the LOKY_MAX_CPU_COUNT environment variable. If the number of physical
1263+
cores is not found, return the number of logical cores.
1264+
1265+
Note that on Windows, the returned number of CPUs cannot exceed 61 (or 60 for
1266+
Python < 3.10), see:
1267+
https://bugs.python.org/issue26903.
1268+
1269+
It is also always larger or equal to 1.
1270+
"""
1271+
# Note: os.cpu_count() is allowed to return None in its docstring
1272+
os_cpu_count = os.cpu_count() or 1
1273+
if sys.platform == "win32":
1274+
# On Windows, attempting to use more than 61 CPUs would result in a
1275+
# OS-level error. See https://bugs.python.org/issue26903. According to
1276+
# https://learn.microsoft.com/en-us/windows/win32/procthread/processor-groups
1277+
# it might be possible to go beyond with a lot of extra work but this
1278+
# does not look easy.
1279+
os_cpu_count = min(os_cpu_count, _MAX_WINDOWS_WORKERS)
1280+
1281+
cpu_count_user = _cpu_count_user(os_cpu_count)
1282+
aggregate_cpu_count = max(min(os_cpu_count, cpu_count_user), 1)
1283+
1284+
if not only_physical_cores:
1285+
return aggregate_cpu_count
1286+
1287+
if cpu_count_user < os_cpu_count:
1288+
# Respect user setting
1289+
return max(cpu_count_user, 1)
1290+
1291+
cpu_count_physical, exception = _count_physical_cores()
1292+
if cpu_count_physical != "not found":
1293+
return cpu_count_physical
1294+
1295+
# Fallback to default behavior
1296+
if exception is not None:
1297+
# warns only the first time
1298+
warnings.warn(
1299+
"Could not find the number of physical cores for the "
1300+
f"following reason:\n{exception}\n"
1301+
"Returning the number of logical cores instead. You can "
1302+
"silence this warning by setting LOKY_MAX_CPU_COUNT to "
1303+
"the number of cores you want to use."
1304+
)
1305+
traceback.print_tb(exception.__traceback__)
1306+
1307+
return aggregate_cpu_count
1308+
1309+
1310+
def _cpu_count_cgroup(os_cpu_count):
1311+
# Cgroup CPU bandwidth limit available in Linux since 2.6 kernel
1312+
cpu_max_fname = "/sys/fs/cgroup/cpu.max"
1313+
cfs_quota_fname = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us"
1314+
cfs_period_fname = "/sys/fs/cgroup/cpu/cpu.cfs_period_us"
1315+
if os.path.exists(cpu_max_fname):
1316+
# cgroup v2
1317+
# https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
1318+
with open(cpu_max_fname) as fh:
1319+
cpu_quota_us, cpu_period_us = fh.read().strip().split()
1320+
elif os.path.exists(cfs_quota_fname) and os.path.exists(cfs_period_fname):
1321+
# cgroup v1
1322+
# https://www.kernel.org/doc/html/latest/scheduler/sched-bwc.html#management
1323+
with open(cfs_quota_fname) as fh:
1324+
cpu_quota_us = fh.read().strip()
1325+
with open(cfs_period_fname) as fh:
1326+
cpu_period_us = fh.read().strip()
1327+
else:
1328+
# No Cgroup CPU bandwidth limit (e.g. non-Linux platform)
1329+
cpu_quota_us = "max"
1330+
cpu_period_us = 100_000 # unused, for consistency with default values
1331+
1332+
if cpu_quota_us == "max":
1333+
# No active Cgroup quota on a Cgroup-capable platform
1334+
return os_cpu_count
1335+
else:
1336+
cpu_quota_us = int(cpu_quota_us)
1337+
cpu_period_us = int(cpu_period_us)
1338+
if cpu_quota_us > 0 and cpu_period_us > 0:
1339+
return math.ceil(cpu_quota_us / cpu_period_us)
1340+
else: # pragma: no cover
1341+
# Setting a negative cpu_quota_us value is a valid way to disable
1342+
# cgroup CPU bandwith limits
1343+
return os_cpu_count
1344+
1345+
1346+
def _cpu_count_affinity(os_cpu_count):
1347+
# Number of available CPUs given affinity settings
1348+
if hasattr(os, "sched_getaffinity"):
1349+
try:
1350+
return len(os.sched_getaffinity(0))
1351+
except NotImplementedError:
1352+
pass
1353+
1354+
# On PyPy and possibly other platforms, os.sched_getaffinity does not exist
1355+
# or raises NotImplementedError, let's try with the psutil if installed.
1356+
try:
1357+
import psutil
1358+
1359+
p = psutil.Process()
1360+
if hasattr(p, "cpu_affinity"):
1361+
return len(p.cpu_affinity())
1362+
1363+
except ImportError: # pragma: no cover
1364+
if (
1365+
sys.platform == "linux"
1366+
and os.environ.get("LOKY_MAX_CPU_COUNT") is None
1367+
):
1368+
# PyPy does not implement os.sched_getaffinity on Linux which
1369+
# can cause severe oversubscription problems. Better warn the
1370+
# user in this particularly pathological case which can wreck
1371+
# havoc, typically on CI workers.
1372+
warnings.warn(
1373+
"Failed to inspect CPU affinity constraints on this system. "
1374+
"Please install psutil or explictly set LOKY_MAX_CPU_COUNT."
1375+
)
1376+
1377+
# This can happen for platforms that do not implement any kind of CPU
1378+
# infinity such as macOS-based platforms.
1379+
return os_cpu_count
1380+
1381+
1382+
def _cpu_count_user(os_cpu_count):
1383+
"""Number of user defined available CPUs"""
1384+
cpu_count_affinity = _cpu_count_affinity(os_cpu_count)
1385+
1386+
cpu_count_cgroup = _cpu_count_cgroup(os_cpu_count)
1387+
1388+
# User defined soft-limit passed as a loky specific environment variable.
1389+
cpu_count_loky = int(os.environ.get("LOKY_MAX_CPU_COUNT", os_cpu_count))
1390+
1391+
return min(cpu_count_affinity, cpu_count_cgroup, cpu_count_loky)
1392+
1393+
1394+
def _count_physical_cores():
1395+
"""Return a tuple (number of physical cores, exception)
1396+
1397+
If the number of physical cores is found, exception is set to None.
1398+
If it has not been found, return ("not found", exception).
1399+
1400+
The number of physical cores is cached to avoid repeating subprocess calls.
1401+
"""
1402+
exception = None
1403+
1404+
# First check if the value is cached
1405+
global physical_cores_cache
1406+
if physical_cores_cache is not None:
1407+
return physical_cores_cache, exception
1408+
1409+
# Not cached yet, find it
1410+
try:
1411+
if sys.platform == "linux":
1412+
cpu_info = subprocess.run(
1413+
"lscpu --parse=core".split(), capture_output=True, text=True
1414+
)
1415+
cpu_info = cpu_info.stdout.splitlines()
1416+
cpu_info = {line for line in cpu_info if not line.startswith("#")}
1417+
cpu_count_physical = len(cpu_info)
1418+
elif sys.platform == "win32":
1419+
cpu_info = subprocess.run(
1420+
"wmic CPU Get NumberOfCores /Format:csv".split(),
1421+
capture_output=True,
1422+
text=True,
1423+
)
1424+
cpu_info = cpu_info.stdout.splitlines()
1425+
cpu_info = [
1426+
l.split(",")[1]
1427+
for l in cpu_info
1428+
if (l and l != "Node,NumberOfCores")
1429+
]
1430+
cpu_count_physical = sum(map(int, cpu_info))
1431+
elif sys.platform == "darwin":
1432+
cpu_info = subprocess.run(
1433+
"sysctl -n hw.physicalcpu".split(),
1434+
capture_output=True,
1435+
text=True,
1436+
)
1437+
cpu_info = cpu_info.stdout
1438+
cpu_count_physical = int(cpu_info)
1439+
else:
1440+
raise NotImplementedError(f"unsupported platform: {sys.platform}")
1441+
1442+
# if cpu_count_physical < 1, we did not find a valid value
1443+
if cpu_count_physical < 1:
1444+
raise ValueError(f"found {cpu_count_physical} physical cores < 1")
1445+
1446+
except Exception as e:
1447+
exception = e
1448+
cpu_count_physical = "not found"
1449+
1450+
# Put the result in cache
1451+
physical_cores_cache = cpu_count_physical
1452+
1453+
return cpu_count_physical, exception
1454+
1455+
12291456
if __name__ == '__main__':
12301457
cli()

0 commit comments

Comments
 (0)