-
Notifications
You must be signed in to change notification settings - Fork 1.2k
/
Copy pathconfig.py
592 lines (462 loc) · 22.1 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
import platform
import sys
import os
import re
import shutil
import warnings
import traceback
# YAML needed to use file based Numba config
try:
import yaml
_HAVE_YAML = True
except ImportError:
_HAVE_YAML = False
import llvmlite.binding as ll
IS_WIN32 = sys.platform.startswith('win32')
IS_OSX = sys.platform.startswith('darwin')
MACHINE_BITS = tuple.__itemsize__ * 8
IS_32BITS = MACHINE_BITS == 32
# Python version in (major, minor) tuple
PYVERSION = sys.version_info[:2]
# this is the name of the user supplied configuration file
_config_fname = '.numba_config.yaml'
def _parse_cc(text):
"""
Parse CUDA compute capability version string.
"""
if not text:
return None
else:
m = re.match(r'(\d+)\.(\d+)', text)
if not m:
raise ValueError("Compute capability must be specified as a "
"string of \"major.minor\" where major "
"and minor are decimals")
grp = m.groups()
return int(grp[0]), int(grp[1])
def _os_supports_avx():
"""
Whether the current OS supports AVX, regardless of the CPU.
This is necessary because the user may be running a very old Linux
kernel (e.g. CentOS 5) on a recent CPU.
"""
if (not sys.platform.startswith('linux')
or platform.machine() not in ('i386', 'i586', 'i686', 'x86_64')):
return True
# Executing the CPUID instruction may report AVX available even though
# the kernel doesn't support it, so parse /proc/cpuinfo instead.
try:
f = open('/proc/cpuinfo', 'r')
except OSError:
# If /proc isn't available, assume yes
return True
with f:
for line in f:
head, _, body = line.partition(':')
if head.strip() == 'flags' and 'avx' in body.split():
return True
else:
return False
class _OptLevel(int):
"""This class holds the "optimisation level" set in `NUMBA_OPT`. As this env
var can be an int or a string, but is almost always interpreted as an int,
this class subclasses int so as to get the common behaviour but stores the
actual value as a `_raw_value` member. The value "max" is a special case
and the property `is_opt_max` can be queried to find if the optimisation
level (supplied value at construction time) is "max"."""
def __new__(cls, *args, **kwargs):
assert len(args) == 1
(value,) = args
_int_value = 3 if value == 'max' else int(value)
# the int ctor is always called with an appropriate integer value
new = super().__new__(cls, _int_value, **kwargs)
# raw value is max or int
new._raw_value = value if value == 'max' else _int_value
return new
@property
def is_opt_max(self):
"""Returns True if the the optimisation level is "max" False
otherwise."""
return self._raw_value == "max"
def __repr__(self):
if isinstance(self._raw_value, str):
arg = f"'{self._raw_value}'"
else:
arg = self._raw_value
return f"_OptLevel({arg})"
def _process_opt_level(opt_level):
if opt_level not in ('0', '1', '2', '3', 'max'):
msg = ("Environment variable `NUMBA_OPT` is set to an unsupported "
f"value '{opt_level}', supported values are 0, 1, 2, 3, and "
"'max'")
raise ValueError(msg)
else:
return _OptLevel(opt_level)
class _EnvReloader(object):
def __init__(self):
self.reset()
def reset(self):
self.old_environ = {}
self.update(force=True)
def update(self, force=False):
new_environ = {}
# first check if there's a .numba_config.yaml and use values from that
if os.path.exists(_config_fname) and os.path.isfile(_config_fname):
if not _HAVE_YAML:
msg = ("A Numba config file is found but YAML parsing "
"capabilities appear to be missing. "
"To use this feature please install `pyyaml`. e.g. "
"`conda install pyyaml`.")
warnings.warn(msg)
else:
with open(_config_fname, 'rt') as f:
y_conf = yaml.safe_load(f)
if y_conf is not None:
for k, v in y_conf.items():
new_environ['NUMBA_' + k.upper()] = v
# clobber file based config with any locally defined env vars
for name, value in os.environ.items():
if name.startswith('NUMBA_'):
new_environ[name] = value
# We update the config variables if at least one NUMBA environment
# variable was modified. This lets the user modify values
# directly in the config module without having them when
# reload_config() is called by the compiler.
if force or self.old_environ != new_environ:
self.process_environ(new_environ)
# Store a copy
self.old_environ = dict(new_environ)
self.validate()
def validate(self):
global CUDA_USE_NVIDIA_BINDING
if CUDA_USE_NVIDIA_BINDING: # noqa: F821
try:
import cuda # noqa: F401
except ImportError as ie:
msg = ("CUDA Python bindings requested (the environment "
"variable NUMBA_CUDA_USE_NVIDIA_BINDING is set), "
f"but they are not importable: {ie.msg}.")
warnings.warn(msg)
CUDA_USE_NVIDIA_BINDING = False
if CUDA_PER_THREAD_DEFAULT_STREAM: # noqa: F821
warnings.warn("PTDS support is handled by CUDA Python when "
"using the NVIDIA binding. Please set the "
"environment variable "
"CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM to 1 "
"instead.")
def process_environ(self, environ):
def _readenv(name, ctor, default):
value = environ.get(name)
if value is None:
return default() if callable(default) else default
try:
return ctor(value)
except Exception:
warnings.warn(f"Environment variable '{name}' is defined but "
f"its associated value '{value}' could not be "
"parsed.\nThe parse failed with exception:\n"
f"{traceback.format_exc()}",
RuntimeWarning)
return default
def optional_str(x):
return str(x) if x is not None else None
# Type casting rules selection
USE_LEGACY_TYPE_SYSTEM = _readenv(
"NUMBA_USE_LEGACY_TYPE_SYSTEM", int, 1
)
# developer mode produces full tracebacks, disables help instructions
DEVELOPER_MODE = _readenv("NUMBA_DEVELOPER_MODE", int, 0)
# disable performance warnings, will switch of the generation of
# warnings of the class NumbaPerformanceWarning
DISABLE_PERFORMANCE_WARNINGS = _readenv(
"NUMBA_DISABLE_PERFORMANCE_WARNINGS", int, 0)
# Flag to enable full exception reporting
FULL_TRACEBACKS = _readenv(
"NUMBA_FULL_TRACEBACKS", int, DEVELOPER_MODE)
# Show help text when an error occurs
SHOW_HELP = _readenv("NUMBA_SHOW_HELP", int, 0)
# The color scheme to use for error messages, default is no color
# just bold fonts in use.
COLOR_SCHEME = _readenv("NUMBA_COLOR_SCHEME", str, "no_color")
# Whether to globally enable bounds checking. The default None means
# to use the value of the flag to @njit. 0 or 1 overrides the flag
# globally.
BOUNDSCHECK = _readenv("NUMBA_BOUNDSCHECK", int, None)
# Whether to always warn about potential uninitialized variables
# because static controlflow analysis cannot find a definition
# in one or more of the incoming paths.
ALWAYS_WARN_UNINIT_VAR = _readenv(
"NUMBA_ALWAYS_WARN_UNINIT_VAR", int, 0,
)
# Whether to warn about kernel launches where the grid size will
# under utilize the GPU due to low occupancy. On by default.
CUDA_LOW_OCCUPANCY_WARNINGS = _readenv(
"NUMBA_CUDA_LOW_OCCUPANCY_WARNINGS", int, 1)
# Whether to use the official CUDA Python API Bindings
CUDA_USE_NVIDIA_BINDING = _readenv(
"NUMBA_CUDA_USE_NVIDIA_BINDING", int, 0)
# Debug flag to control compiler debug print
DEBUG = _readenv("NUMBA_DEBUG", int, 0)
# DEBUG print IR after pass names
DEBUG_PRINT_AFTER = _readenv("NUMBA_DEBUG_PRINT_AFTER", str, "none")
# DEBUG print IR before pass names
DEBUG_PRINT_BEFORE = _readenv("NUMBA_DEBUG_PRINT_BEFORE", str, "none")
# DEBUG print IR before and after pass names
DEBUG_PRINT_WRAP = _readenv("NUMBA_DEBUG_PRINT_WRAP", str, "none")
# Highlighting in intermediate dumps
HIGHLIGHT_DUMPS = _readenv("NUMBA_HIGHLIGHT_DUMPS", int, 0)
# JIT Debug flag to trigger IR instruction print
DEBUG_JIT = _readenv("NUMBA_DEBUG_JIT", int, 0)
# Enable debugging of front-end operation
# (up to and including IR generation)
DEBUG_FRONTEND = _readenv("NUMBA_DEBUG_FRONTEND", int, 0)
# Enable debug prints in nrtdynmod and use of "safe" API functions
DEBUG_NRT = _readenv("NUMBA_DEBUG_NRT", int, 0)
# Enable NRT statistics counters
NRT_STATS = _readenv("NUMBA_NRT_STATS", int, 0)
# How many recently deserialized functions to retain regardless
# of external references
FUNCTION_CACHE_SIZE = _readenv("NUMBA_FUNCTION_CACHE_SIZE", int, 128)
# Maximum tuple size that parfors will unpack and pass to
# internal gufunc.
PARFOR_MAX_TUPLE_SIZE = _readenv("NUMBA_PARFOR_MAX_TUPLE_SIZE",
int, 100)
# Enable logging of cache operation
DEBUG_CACHE = _readenv("NUMBA_DEBUG_CACHE", int, DEBUG)
# Redirect cache directory
# Contains path to the directory
CACHE_DIR = _readenv("NUMBA_CACHE_DIR", str, "")
# Enable tracing support
TRACE = _readenv("NUMBA_TRACE", int, 0)
# Enable chrome tracing support
CHROME_TRACE = _readenv("NUMBA_CHROME_TRACE", str, "")
# Enable debugging of type inference
DEBUG_TYPEINFER = _readenv("NUMBA_DEBUG_TYPEINFER", int, 0)
# Configure compilation target to use the specified CPU name
# and CPU feature as the host information.
# Note: this overrides "host" option for AOT compilation.
CPU_NAME = _readenv("NUMBA_CPU_NAME", optional_str, None)
CPU_FEATURES = _readenv("NUMBA_CPU_FEATURES", optional_str,
("" if str(CPU_NAME).lower() == 'generic'
else None))
# Optimization level
OPT = _readenv("NUMBA_OPT", _process_opt_level, _OptLevel(3))
# Force dump of Python bytecode
DUMP_BYTECODE = _readenv("NUMBA_DUMP_BYTECODE", int, DEBUG_FRONTEND)
# Force dump of control flow graph
DUMP_CFG = _readenv("NUMBA_DUMP_CFG", int, DEBUG_FRONTEND)
# Force dump of Numba IR
DUMP_IR = _readenv("NUMBA_DUMP_IR", int,
DEBUG_FRONTEND)
# Force dump of Numba IR in SSA form
DUMP_SSA = _readenv("NUMBA_DUMP_SSA", int,
DEBUG_FRONTEND or DEBUG_TYPEINFER)
# print debug info of analysis and optimization on array operations
DEBUG_ARRAY_OPT = _readenv("NUMBA_DEBUG_ARRAY_OPT", int, 0)
# insert debug stmts to print information at runtime
DEBUG_ARRAY_OPT_RUNTIME = _readenv(
"NUMBA_DEBUG_ARRAY_OPT_RUNTIME", int, 0)
# print stats about parallel for-loops
DEBUG_ARRAY_OPT_STATS = _readenv("NUMBA_DEBUG_ARRAY_OPT_STATS", int, 0)
# prints user friendly information about parallel
PARALLEL_DIAGNOSTICS = _readenv("NUMBA_PARALLEL_DIAGNOSTICS", int, 0)
# print debug info of inline closure pass
DEBUG_INLINE_CLOSURE = _readenv("NUMBA_DEBUG_INLINE_CLOSURE", int, 0)
# Force dump of LLVM IR
DUMP_LLVM = _readenv("NUMBA_DUMP_LLVM", int, DEBUG)
# Force dump of Function optimized LLVM IR
DUMP_FUNC_OPT = _readenv("NUMBA_DUMP_FUNC_OPT", int, DEBUG)
# Force dump of Optimized LLVM IR
DUMP_OPTIMIZED = _readenv("NUMBA_DUMP_OPTIMIZED", int, DEBUG)
# Force disable loop vectorize
LOOP_VECTORIZE = _readenv("NUMBA_LOOP_VECTORIZE", int, 1)
# Enable superword-level parallelism vectorization, default is off
# since #8705 (miscompilation).
SLP_VECTORIZE = _readenv("NUMBA_SLP_VECTORIZE", int, 0)
# Force dump of generated assembly
DUMP_ASSEMBLY = _readenv("NUMBA_DUMP_ASSEMBLY", int, DEBUG)
# Force dump of type annotation
ANNOTATE = _readenv("NUMBA_DUMP_ANNOTATION", int, 0)
# Dump IR in such as way as to aid in "diff"ing.
DIFF_IR = _readenv("NUMBA_DIFF_IR", int, 0)
# Dump type annotation in html format
def fmt_html_path(path):
if path is None:
return path
else:
return os.path.abspath(path)
HTML = _readenv("NUMBA_DUMP_HTML", fmt_html_path, None)
# x86-64 specific
# Enable AVX on supported platforms where it won't degrade performance.
def avx_default():
if not _os_supports_avx():
return False
else:
# There are various performance issues with AVX and LLVM
# on some CPUs (list at
# http://llvm.org/bugs/buglist.cgi?quicksearch=avx).
# For now we'd rather disable it, since it can pessimize code
cpu_name = CPU_NAME or ll.get_host_cpu_name()
disabled_cpus = {'corei7-avx', 'core-avx-i',
'sandybridge', 'ivybridge'}
# Disable known baseline CPU names that virtual machines may
# incorrectly report as having AVX support.
# This can cause problems with the SVML-pass's use of AVX512.
# See https://github.com/numba/numba/issues/9582
disabled_cpus |= {'nocona'}
return cpu_name not in disabled_cpus
ENABLE_AVX = _readenv("NUMBA_ENABLE_AVX", int, avx_default)
# if set and SVML is available, it will be disabled
# By default, it's disabled on 32-bit platforms.
DISABLE_INTEL_SVML = _readenv(
"NUMBA_DISABLE_INTEL_SVML", int, IS_32BITS)
# Disable jit for debugging
DISABLE_JIT = _readenv("NUMBA_DISABLE_JIT", int, 0)
# choose parallel backend to use
THREADING_LAYER_PRIORITY = _readenv(
"NUMBA_THREADING_LAYER_PRIORITY",
lambda string: string.split(),
['tbb', 'omp', 'workqueue'],
)
THREADING_LAYER = _readenv("NUMBA_THREADING_LAYER", str, 'default')
# CUDA Configs
# Whether to warn about kernel launches where a host array
# is used as a parameter, forcing a copy to and from the device.
# On by default.
CUDA_WARN_ON_IMPLICIT_COPY = _readenv(
"NUMBA_CUDA_WARN_ON_IMPLICIT_COPY", int, 1)
# Force CUDA compute capability to a specific version
FORCE_CUDA_CC = _readenv("NUMBA_FORCE_CUDA_CC", _parse_cc, None)
# The default compute capability to target when compiling to PTX.
CUDA_DEFAULT_PTX_CC = _readenv("NUMBA_CUDA_DEFAULT_PTX_CC", _parse_cc,
(5, 0))
# Disable CUDA support
DISABLE_CUDA = _readenv("NUMBA_DISABLE_CUDA",
int, int(MACHINE_BITS == 32))
# Enable CUDA simulator
ENABLE_CUDASIM = _readenv("NUMBA_ENABLE_CUDASIM", int, 0)
# CUDA logging level
# Any level name from the *logging* module. Case insensitive.
# Defaults to CRITICAL if not set or invalid.
# Note: This setting only applies when logging is not configured.
# Any existing logging configuration is preserved.
CUDA_LOG_LEVEL = _readenv("NUMBA_CUDA_LOG_LEVEL", str, '')
# Include argument values in the CUDA Driver API logs
CUDA_LOG_API_ARGS = _readenv("NUMBA_CUDA_LOG_API_ARGS", int, 0)
# Maximum number of pending CUDA deallocations (default: 10)
CUDA_DEALLOCS_COUNT = _readenv("NUMBA_CUDA_MAX_PENDING_DEALLOCS_COUNT",
int, 10)
# Maximum ratio of pending CUDA deallocations to capacity (default: 0.2)
CUDA_DEALLOCS_RATIO = _readenv("NUMBA_CUDA_MAX_PENDING_DEALLOCS_RATIO",
float, 0.2)
CUDA_ARRAY_INTERFACE_SYNC = _readenv("NUMBA_CUDA_ARRAY_INTERFACE_SYNC",
int, 1)
# Path of the directory that the CUDA driver libraries are located
CUDA_DRIVER = _readenv("NUMBA_CUDA_DRIVER", str, '')
# Buffer size for logs produced by CUDA driver operations (e.g.
# linking)
CUDA_LOG_SIZE = _readenv("NUMBA_CUDA_LOG_SIZE", int, 1024)
# Whether to generate verbose log messages when JIT linking
CUDA_VERBOSE_JIT_LOG = _readenv("NUMBA_CUDA_VERBOSE_JIT_LOG", int, 1)
# Whether the default stream is the per-thread default stream
CUDA_PER_THREAD_DEFAULT_STREAM = _readenv(
"NUMBA_CUDA_PER_THREAD_DEFAULT_STREAM", int, 0)
CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY = _readenv(
"NUMBA_CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY", int, 0)
# Location of the CUDA include files
if IS_WIN32:
cuda_path = os.environ.get('CUDA_PATH')
if cuda_path:
default_cuda_include_path = os.path.join(cuda_path, "include")
else:
default_cuda_include_path = "cuda_include_not_found"
else:
default_cuda_include_path = os.path.join(os.sep, 'usr', 'local',
'cuda', 'include')
CUDA_INCLUDE_PATH = _readenv("NUMBA_CUDA_INCLUDE_PATH", str,
default_cuda_include_path)
# Threading settings
# The default number of threads to use.
def num_threads_default():
try:
sched_getaffinity = os.sched_getaffinity
except AttributeError:
pass
else:
return max(1, len(sched_getaffinity(0)))
cpu_count = os.cpu_count()
if cpu_count is not None:
return max(1, cpu_count)
return 1
NUMBA_DEFAULT_NUM_THREADS = num_threads_default()
# Numba thread pool size (defaults to number of CPUs on the system).
_NUMBA_NUM_THREADS = _readenv("NUMBA_NUM_THREADS", int,
NUMBA_DEFAULT_NUM_THREADS)
if ('NUMBA_NUM_THREADS' in globals()
and globals()['NUMBA_NUM_THREADS'] != _NUMBA_NUM_THREADS):
from numba.np.ufunc import parallel
if parallel._is_initialized:
raise RuntimeError("Cannot set NUMBA_NUM_THREADS to a "
"different value once the threads have been "
"launched (currently have %s, "
"trying to set %s)" %
(_NUMBA_NUM_THREADS,
globals()['NUMBA_NUM_THREADS']))
NUMBA_NUM_THREADS = _NUMBA_NUM_THREADS
del _NUMBA_NUM_THREADS
# sys.monitoring support
ENABLE_SYS_MONITORING = _readenv("NUMBA_ENABLE_SYS_MONITORING",
int, 0)
# Profiling support
# Indicates if a profiler detected. Only VTune can be detected for now
RUNNING_UNDER_PROFILER = 'VS_PROFILER' in os.environ
# Enables jit events in LLVM to support profiling of dynamic code
ENABLE_PROFILING = _readenv(
"NUMBA_ENABLE_PROFILING", int, int(RUNNING_UNDER_PROFILER))
# Debug Info
# The default value for the `debug` flag
DEBUGINFO_DEFAULT = _readenv("NUMBA_DEBUGINFO", int, ENABLE_PROFILING)
CUDA_DEBUGINFO_DEFAULT = _readenv("NUMBA_CUDA_DEBUGINFO", int, 0)
EXTEND_VARIABLE_LIFETIMES = _readenv("NUMBA_EXTEND_VARIABLE_LIFETIMES",
int, 0)
# gdb binary location
def which_gdb(path_or_bin):
gdb = shutil.which(path_or_bin)
return gdb if gdb is not None else path_or_bin
GDB_BINARY = _readenv("NUMBA_GDB_BINARY", which_gdb, 'gdb')
# CUDA Memory management
CUDA_MEMORY_MANAGER = _readenv("NUMBA_CUDA_MEMORY_MANAGER", str,
'default')
# Experimental refprune pass
LLVM_REFPRUNE_PASS = _readenv(
"NUMBA_LLVM_REFPRUNE_PASS", int, 1,
)
LLVM_REFPRUNE_FLAGS = _readenv(
"NUMBA_LLVM_REFPRUNE_FLAGS", str,
"all" if LLVM_REFPRUNE_PASS else "",
)
# llvmlite memory manager
USE_LLVMLITE_MEMORY_MANAGER = _readenv(
"NUMBA_USE_LLVMLITE_MEMORY_MANAGER", int, None
)
# Timing support.
# LLVM_PASS_TIMINGS enables LLVM recording of pass timings.
LLVM_PASS_TIMINGS = _readenv(
"NUMBA_LLVM_PASS_TIMINGS", int, 0,
)
# Coverage support.
# JIT_COVERAGE (bool) controls whether the compiler report compiled
# lines to coverage tools. Defaults to off.
JIT_COVERAGE = _readenv(
"NUMBA_JIT_COVERAGE", int, 0,
)
# Inject the configuration values into the module globals
for name, value in locals().copy().items():
if name.isupper():
globals()[name] = value
_env_reloader = _EnvReloader()
def reload_config():
"""
Reload the configuration from environment variables, if necessary.
"""
_env_reloader.update()