/
cpu.py
345 lines (277 loc) · 11 KB
/
cpu.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Functionality for detecting the details of the currently available cpu
"""
import json
import os
import platform
import subprocess
import sys
from distutils.version import StrictVersion
from typing import Any, Tuple
__all__ = [
"VALID_VECTOR_EXTENSIONS",
"architecture",
"cpu_architecture",
"cpu_details",
"cpu_vnni_compatible",
"cpu_avx2_compatible",
"cpu_avx512_compatible",
"cpu_neon_compatible",
"cpu_sve_compatible",
"cpu_quantization_compatible",
"print_hardware_capability",
]
VALID_VECTOR_EXTENSIONS = {"avx2", "avx512", "neon", "sve"}
MINIMUM_DARWIN_VERSION = "13.0.0"
class _Memoize:
def __init__(self, f):
self.f = f
self.memo = {}
def __call__(self, *args):
if args not in self.memo:
self.memo[args] = self.f(*args)
return self.memo[args]
class architecture(dict):
"""
A class containing all the architecture details for the current CPU.
Members include (but are not limited to):
vendor - a string name of vendor)
isa - a string containing avx2, avx512 or unknown)
vnni - a boolean indicating VNNI support)
num_sockets - integer number of physical sockets
available_sockets - integer number of sockets available for use
cores_per_socket - integer number of physical cores per socket
available_cores_per_socket - integer number of available cores per socket
threads_per_core - integer physical number of threads per core
available_threads_per_core - integer available number of threads per core
L1_instruction_cache_size - L1 instruction cache size in bytes
L1_data_cache_size - L1 data cache size in bytes
L2_cache_size - L2 cache size in bytes
L3_cache_size - L3 cache size in bytes
"""
def __init__(self, *args, **kwargs):
super(architecture, self).__init__(*args, **kwargs)
self.__dict__ = self
def __setattr__(self, name: str, value: Any):
if name != "__dict__":
raise AttributeError(
"Neural Magic: Architecture: can't modify {} to {}".format(name, value)
)
else:
super(architecture, self).__setattr__(name, value)
def override_isa(self, value: str):
"""
Set the isa to the desired value.
:param value: the value to update the isa to
"""
object.__setattr__(self, "isa", value)
@property
def threads_per_socket(self):
"""
:return: the number of hyperthreads available per socket on the current machine
"""
return self.threads_per_core * self.cores_per_socket
@property
def num_threads(self):
"""
:return: the total number of hyperthreads on the current machine
"""
return self.threads_per_socket * self.num_sockets
@property
def num_physical_cores(self):
"""
:return: the total number of cores on the current machine
"""
return self.cores_per_socket * self.num_sockets
@property
def num_available_physical_cores(self):
"""
:return: the total number of cores available on the current machine
"""
return self.available_cores_per_socket * self.available_sockets
@_Memoize
def _parse_arch_bin() -> architecture:
package_path = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(package_path, "arch.bin")
error_msg = "Neural Magic: Encountered exception while trying to read arch.bin: {}"
try:
info_str = subprocess.check_output(file_path).decode("utf-8")
return architecture(json.loads(info_str))
except subprocess.CalledProcessError as ex:
error = json.loads(ex.stdout)
raise OSError(error_msg.format(error["error"]))
except Exception as ex:
raise OSError(error_msg.format(ex))
def get_darwin_version() -> str:
"""
If we are running Darwin, get the current version. Otherwise return None.
"""
if sys.platform.startswith("darwin"):
return platform.mac_ver()[0]
return None
def check_darwin_support() -> bool:
"""
Check if the system is running Darwin and it meets the minimum version
requirements.
"""
if sys.platform.startswith("darwin"):
ver = get_darwin_version()
return StrictVersion(ver) >= StrictVersion(MINIMUM_DARWIN_VERSION)
return False
def platform_error_msg() -> str:
"""
Generate unsupported platform error message.
"""
darwin_str = f" or MacOS >= {MINIMUM_DARWIN_VERSION}"
darwin_ver = get_darwin_version()
if darwin_ver:
current_os = f"MacOS {darwin_ver}"
else:
current_os = sys.platform
return f"Neural Magic: Only Linux{darwin_str} is supported, not '{current_os}'."
def cpu_architecture() -> architecture:
"""
Detect the CPU details on linux systems
If any other OS is used, an exception will be raised.
Specifically:
- the number of physical cores available per socket on the system
- detects the vector instruction set available (avx2, avx512)
- if vnni is available
NM_ARCH environment variable can be used to override the instruction
set detection
:return: an instance of the architecture class
"""
if not (sys.platform.startswith("linux") or check_darwin_support()):
raise OSError(platform_error_msg())
arch = _parse_arch_bin()
isa_type_override = os.getenv("NM_ARCH", None)
if isa_type_override and isa_type_override != arch.isa:
print(
"Neural Magic: Using env variable NM_ARCH={} for isa_type".format(
isa_type_override
)
)
if isa_type_override not in VALID_VECTOR_EXTENSIONS:
raise OSError(
(
"Neural Magic: Invalid instruction set '{}' must be " "one of {}."
).format(isa_type_override, ",".join(VALID_VECTOR_EXTENSIONS))
)
arch.override_isa(isa_type_override)
if arch.isa not in VALID_VECTOR_EXTENSIONS:
raise OSError(
(
"Neural Magic: Unable to determine instruction set '{}'. This system "
"may be unsupported but to try, set NM_ARCH to one of {} to continue."
).format(arch.isa, ",".join(VALID_VECTOR_EXTENSIONS))
)
return arch
def cpu_vnni_compatible() -> bool:
"""
:return: True if the current cpu has the VNNI instruction set,
used for running int8 quantized networks performantly.
"""
return cpu_architecture().vnni
def cpu_avx512_compatible() -> bool:
"""
:return: True if the current cpu has the AVX512 instruction set,
used for running neural networks performantly
"""
return cpu_architecture().isa == "avx512"
def cpu_avx2_compatible() -> bool:
"""
:return: True if the current cpu has the AVX2 or AVX512 instruction sets,
used for running neural networks performantly
(if AVX2 only then less performant compared to strictly AVX512)
"""
return cpu_architecture().isa == "avx2" or cpu_avx512_compatible()
def cpu_neon_compatible() -> bool:
"""
:return: True if the current cpu has the NEON instruction set,
used for running neural networks performantly
"""
return cpu_architecture().isa == "neon"
def cpu_sve_compatible() -> bool:
"""
:return: True if the current cpu has the SVE instruction set,
used for running neural networks performantly
"""
return cpu_architecture().isa == "sve"
def cpu_quantization_compatible() -> bool:
"""
:return: True if the current cpu has the AVX2, AVX512, NEON or SVE instruction sets,
used for running quantized neural networks performantly.
(AVX2 < AVX512 < VNNI)
"""
return (
cpu_avx2_compatible()
or cpu_avx512_compatible()
or cpu_neon_compatible()
or cpu_sve_compatible()
)
def cpu_details() -> Tuple[int, str, bool]:
"""
Detect the CPU details on linux systems
If any other OS is used, will raise an exception
Specifically:
- the number of physical cores available on the system
- detects the vector instruction set available (avx2, avx512)
- if vnni is available
NM_ARCH environment variable can be used to override the avx instruction
set detection
:return: a tuple containing the detected cpu information
(number of physical cores available, avx instruction set, vnni support)
"""
arch = cpu_architecture()
return arch.num_available_physical_cores, arch.isa, arch.vnni
def print_hardware_capability():
"""
Print out the detected CPU's hardware capability and general support for
model performance within the DeepSparse Engine.
"""
arch = cpu_architecture()
quantized_flag = "TRUE (emulated)" if cpu_quantization_compatible() else "FALSE"
if cpu_vnni_compatible() or cpu_neon_compatible() or cpu_sve_compatible():
quantized_flag = "TRUE"
fp32_flag = (
cpu_avx2_compatible()
or cpu_avx512_compatible()
or cpu_neon_compatible()
or cpu_sve_compatible()
)
message = (
f"{arch.vendor} CPU detected with {arch.num_available_physical_cores} cores. "
f"({arch.available_sockets} sockets with "
f"{arch.available_cores_per_socket} cores each)\n"
f"DeepSparse FP32 model performance supported: {fp32_flag}.\n"
"DeepSparse INT8 (quantized) model performance supported: "
f"{quantized_flag}.\n\n"
)
if not (cpu_neon_compatible() or cpu_sve_compatible()):
if cpu_avx2_compatible() and not cpu_avx512_compatible():
message += (
"AVX2 instruction set detected. Performance speedups are available, "
"but inference time will be slower compared with an AVX-512 system.\n\n"
)
if cpu_quantization_compatible() and not cpu_vnni_compatible():
message += (
"Non VNNI system detected. Performance speedups for INT8 (quantized) "
"models is available, but will be slower compared with a VNNI system. "
"Set NM_FAST_VNNI_EMULATION=True in the environment to enable faster "
"emulated inference which may have a minor effect on accuracy.\n\n"
)
message += f"Additional CPU info: {arch}"
print(message)