Tensorboard's SummaryWriter.add_graph() doesn't work with packedSequence #125033

daniel-padban · 2024-04-26T15:35:07Z

🐛 Describe the bug

Torch.utils.tensorboard SummaryWriter.add_graph() throws a RunTimeError when passing a packed sequence into an LSTM network.
Exception:

  File "/Users/user/Desktop/projects/NN/LSTM_nn.py", line 42, in forward
    lstm_out, (hn,cn) = lstm(x, (h0,c0))
                        ^^^^^^^^^^^^^^^^
  File "/Users/user/Desktop/projects/NN/LSTM_nn.py", line 46, in <module>
    writer.add_graph(model=model,input_to_model=packed_padded_dummy,verbose=True)
RuntimeError: For unbatched 2-D input, hx and cx should also be 2-D but got (3-D, 3-D) tensors

The code which generated this is:

import torch
from torch.utils.tensorboard import SummaryWriter
from torch.nn.utils.rnn import pad_sequence,pack_padded_sequence

dummy_tensor = torch.randn([DL_batch_size,seq_len,9],requires_grad=True)
padded_dummy = pad_sequence(dummy_tensor,batch_first=True,padding_value=33333)

lens = [len(seq) for seq in dummy_tensor]
packed_padded_dummy = pack_padded_sequence(padded_dummy,lens,batch_first=True,enforce_sorted=False)
packed_padded_dummy.data.reshape([20,15,9])
dummy_result = model(packed_padded_dummy)

writer.add_graph(model=model,input_to_model=packed_padded_dummy,verbose=True) #Line which causes error

The forward method in the network:

import torch.nn
from torch.nn.utils.rnn import pad_packed_sequence

def forward(self,x, *args):
        lstm = self.lstm  #this is nn.LSTM()
        batch_size = self.batch_size
        h0 = torch.zeros(self.num_layers,batch_size,self.hidden_size,)   
        c0 = torch.zeros(self.num_layers,batch_size,self.hidden_size,)
    
        lstm_out, (hn,cn) = lstm(x, (h0,c0)) #Exception is thrown on this line
        padded_lstm_out, _ = pad_packed_sequence(lstm_out,batch_first=True,padding_value=333,)

        output = self.fc1(padded_lstm_out[:,-1,:])

        return output

Versions

Unlike the rest of the PyTorch this file must be python2 compliant.

This script outputs relevant system environment info

Run it with `python collect_env.py` or `python -m torch.utils.collect_env`

import datetime
import locale
import re
import subprocess
import sys
import os
from collections import namedtuple

try:
import torch
TORCH_AVAILABLE = True
except (ImportError, NameError, AttributeError, OSError):
TORCH_AVAILABLE = False

System Environment Information

SystemEnv = namedtuple('SystemEnv', [
'torch_version',
'is_debug_build',
'cuda_compiled_version',
'gcc_version',
'clang_version',
'cmake_version',
'os',
'libc_version',
'python_version',
'python_platform',
'is_cuda_available',
'cuda_runtime_version',
'cuda_module_loading',
'nvidia_driver_version',
'nvidia_gpu_models',
'cudnn_version',
'pip_version', # 'pip' or 'pip3'
'pip_packages',
'conda_packages',
'hip_compiled_version',
'hip_runtime_version',
'miopen_runtime_version',
'caching_allocator_config',
'is_xnnpack_available',
'cpu_info',
])

DEFAULT_CONDA_PATTERNS = {
"torch",
"numpy",
"cudatoolkit",
"soumith",
"mkl",
"magma",
"triton",
"optree",
}

DEFAULT_PIP_PATTERNS = {
"torch",
"numpy",
"mypy",
"flake8",
"triton",
"optree",
"onnx",
}

def run(command):
"""Return (return-code, stdout, stderr)."""
shell = True if type(command) is str else False
p = subprocess.Popen(command, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, shell=shell)
raw_output, raw_err = p.communicate()
rc = p.returncode
if get_platform() == 'win32':
enc = 'oem'
else:
enc = locale.getpreferredencoding()
output = raw_output.decode(enc)
err = raw_err.decode(enc)
return rc, output.strip(), err.strip()

def run_and_read_all(run_lambda, command):
"""Run command using run_lambda; reads and returns entire output if rc is 0."""
rc, out, _ = run_lambda(command)
if rc != 0:
return None
return out

def run_and_parse_first_match(run_lambda, command, regex):
"""Run command using run_lambda, returns the first regex match if it exists."""
rc, out, _ = run_lambda(command)
if rc != 0:
return None
match = re.search(regex, out)
if match is None:
return None
return match.group(1)

def run_and_return_first_line(run_lambda, command):
"""Run command using run_lambda and returns first line if output is not empty."""
rc, out, _ = run_lambda(command)
if rc != 0:
return None
return out.split('\n')[0]

def get_conda_packages(run_lambda, patterns=None):
if patterns is None:
patterns = DEFAULT_CONDA_PATTERNS
conda = os.environ.get('CONDA_EXE', 'conda')
out = run_and_read_all(run_lambda, "{} list".format(conda))
if out is None:
return out

return "\n".join(
    line
    for line in out.splitlines()
    if not line.startswith("#")
    and any(name in line for name in patterns)
)

def get_gcc_version(run_lambda):
return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)')

def get_clang_version(run_lambda):
return run_and_parse_first_match(run_lambda, 'clang --version', r'clang version (.*)')

def get_cmake_version(run_lambda):
return run_and_parse_first_match(run_lambda, 'cmake --version', r'cmake (.*)')

def get_nvidia_driver_version(run_lambda):
if get_platform() == 'darwin':
cmd = 'kextstat | grep -i cuda'
return run_and_parse_first_match(run_lambda, cmd,
r'com[.]nvidia[.]CUDA ([)]')
smi = get_nvidia_smi()
return run_and_parse_first_match(run_lambda, smi, r'Driver Version: (.*?) ')

def get_gpu_info(run_lambda):
if get_platform() == 'darwin' or (TORCH_AVAILABLE and hasattr(torch.version, 'hip') and torch.version.hip is not None):
if TORCH_AVAILABLE and torch.cuda.is_available():
if torch.version.hip is not None:
prop = torch.cuda.get_device_properties(0)
if hasattr(prop, "gcnArchName"):
gcnArch = " ({})".format(prop.gcnArchName)
else:
gcnArch = "NoGCNArchNameOnOldPyTorch"
else:
gcnArch = ""
return torch.cuda.get_device_name(None) + gcnArch
return None
smi = get_nvidia_smi()
uuid_regex = re.compile(r' (UUID: .+?)')
rc, out, _ = run_lambda(smi + ' -L')
if rc != 0:
return None
# Anonymize GPUs by removing their UUID
return re.sub(uuid_regex, '', out)

def get_running_cuda_version(run_lambda):
return run_and_parse_first_match(run_lambda, 'nvcc --version', r'release .+ V(.*)')

def get_cudnn_version(run_lambda):
"""Return a list of libcudnn.so; it's hard to tell which one is being used."""
if get_platform() == 'win32':
system_root = os.environ.get('SYSTEMROOT', 'C:\Windows')
cuda_path = os.environ.get('CUDA_PATH', "%CUDA_PATH%")
where_cmd = os.path.join(system_root, 'System32', 'where')
cudnn_cmd = '{} /R "{}\bin" cudnn*.dll'.format(where_cmd, cuda_path)
elif get_platform() == 'darwin':
# CUDA libraries and drivers can be found in /usr/local/cuda/. See
# https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#install
# https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installmac
# Use CUDNN_LIBRARY when cudnn library is installed elsewhere.
cudnn_cmd = 'ls /usr/local/cuda/lib/libcudnn*'
else:
cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev'
rc, out, _ = run_lambda(cudnn_cmd)
# find will return 1 if there are permission errors or if not found
if len(out) == 0 or (rc != 1 and rc != 0):
l = os.environ.get('CUDNN_LIBRARY')
if l is not None and os.path.isfile(l):
return os.path.realpath(l)
return None
files_set = set()
for fn in out.split('\n'):
fn = os.path.realpath(fn) # eliminate symbolic links
if os.path.isfile(fn):
files_set.add(fn)
if not files_set:
return None
# Alphabetize the result because the order is non-deterministic otherwise
files = sorted(files_set)
if len(files) == 1:
return files[0]
result = '\n'.join(files)
return 'Probably one of the following:\n{}'.format(result)

def get_nvidia_smi():
# Note: nvidia-smi is currently available only on Windows and Linux
smi = 'nvidia-smi'
if get_platform() == 'win32':
system_root = os.environ.get('SYSTEMROOT', 'C:\Windows')
program_files_root = os.environ.get('PROGRAMFILES', 'C:\Program Files')
legacy_path = os.path.join(program_files_root, 'NVIDIA Corporation', 'NVSMI', smi)
new_path = os.path.join(system_root, 'System32', smi)
smis = [new_path, legacy_path]
for candidate_smi in smis:
if os.path.exists(candidate_smi):
smi = '"{}"'.format(candidate_smi)
break
return smi

example outputs of CPU infos

* linux

Architecture: x86_64

CPU op-mode(s): 32-bit, 64-bit

Address sizes: 46 bits physical, 48 bits virtual

Byte Order: Little Endian

CPU(s): 128

On-line CPU(s) list: 0-127

Vendor ID: GenuineIntel

Model name: Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz

CPU family: 6

Model: 106

Thread(s) per core: 2

Core(s) per socket: 32

Socket(s): 2

Stepping: 6

BogoMIPS: 5799.78

Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr

sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl

xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq monitor ssse3 fma cx16

pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand

hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced

fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid avx512f avx512dq rdseed adx smap

avx512ifma clflushopt clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1

xsaves wbnoinvd ida arat avx512vbmi pku ospke avx512_vbmi2 gfni vaes vpclmulqdq

avx512_vnni avx512_bitalg tme avx512_vpopcntdq rdpid md_clear flush_l1d arch_capabilities

Virtualization features:

Hypervisor vendor: KVM

Virtualization type: full

Caches (sum of all):

L1d: 3 MiB (64 instances)

L1i: 2 MiB (64 instances)

L2: 80 MiB (64 instances)

L3: 108 MiB (2 instances)

NUMA:

NUMA node(s): 2

NUMA node0 CPU(s): 0-31,64-95

NUMA node1 CPU(s): 32-63,96-127

Vulnerabilities:

Itlb multihit: Not affected

L1tf: Not affected

Mds: Not affected

Meltdown: Not affected

Mmio stale data: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown

Retbleed: Not affected

Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp

Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization

Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence

Srbds: Not affected

Tsx async abort: Not affected

* win32

Architecture=9

CurrentClockSpeed=2900

DeviceID=CPU0

Family=179

L2CacheSize=40960

L2CacheSpeed=

Manufacturer=GenuineIntel

MaxClockSpeed=2900

Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz

ProcessorType=3

Revision=27142

Architecture=9

CurrentClockSpeed=2900

DeviceID=CPU1

Family=179

L2CacheSize=40960

L2CacheSpeed=

Manufacturer=GenuineIntel

MaxClockSpeed=2900

Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz

ProcessorType=3

Revision=27142

def get_cpu_info(run_lambda):
rc, out, err = 0, '', ''
if get_platform() == 'linux':
rc, out, err = run_lambda('lscpu')
elif get_platform() == 'win32':
rc, out, err = run_lambda('wmic cpu get Name,Manufacturer,Family,Architecture,ProcessorType,DeviceID,
CurrentClockSpeed,MaxClockSpeed,L2CacheSize,L2CacheSpeed,Revision /VALUE')
elif get_platform() == 'darwin':
rc, out, err = run_lambda("sysctl -n machdep.cpu.brand_string")
cpu_info = 'None'
if rc == 0:
cpu_info = out
else:
cpu_info = err
return cpu_info

def get_platform():
if sys.platform.startswith('linux'):
return 'linux'
elif sys.platform.startswith('win32'):
return 'win32'
elif sys.platform.startswith('cygwin'):
return 'cygwin'
elif sys.platform.startswith('darwin'):
return 'darwin'
else:
return sys.platform

def get_mac_version(run_lambda):
return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion', r'(.*)')

def get_windows_version(run_lambda):
system_root = os.environ.get('SYSTEMROOT', 'C:\Windows')
wmic_cmd = os.path.join(system_root, 'System32', 'Wbem', 'wmic')
findstr_cmd = os.path.join(system_root, 'System32', 'findstr')
return run_and_read_all(run_lambda, '{} os get Caption | {} /v Caption'.format(wmic_cmd, findstr_cmd))

def get_lsb_version(run_lambda):
return run_and_parse_first_match(run_lambda, 'lsb_release -a', r'Description:\t(.*)')

def check_release_file(run_lambda):
return run_and_parse_first_match(run_lambda, 'cat /etc/-release',
r'PRETTY_NAME="(.)"')

def get_os(run_lambda):
from platform import machine
platform = get_platform()

if platform == 'win32' or platform == 'cygwin':
    return get_windows_version(run_lambda)

if platform == 'darwin':
    version = get_mac_version(run_lambda)
    if version is None:
        return None
    return 'macOS {} ({})'.format(version, machine())

if platform == 'linux':
    # Ubuntu/Debian based
    desc = get_lsb_version(run_lambda)
    if desc is not None:
        return '{} ({})'.format(desc, machine())

    # Try reading /etc/*-release
    desc = check_release_file(run_lambda)
    if desc is not None:
        return '{} ({})'.format(desc, machine())

    return '{} ({})'.format(platform, machine())

# Unknown platform
return platform

def get_python_platform():
import platform
return platform.platform()

def get_libc_version():
import platform
if get_platform() != 'linux':
return 'N/A'
return '-'.join(platform.libc_ver())

def get_pip_packages(run_lambda, patterns=None):
"""Return pip list output. Note: will also find conda-installed pytorch and numpy packages."""
if patterns is None:
patterns = DEFAULT_PIP_PATTERNS

# People generally have `pip` as `pip` or `pip3`
# But here it is invoked as `python -mpip`
def run_with_pip(pip):
    out = run_and_read_all(run_lambda, pip + ["list", "--format=freeze"])
    return "\n".join(
        line
        for line in out.splitlines()
        if any(name in line for name in patterns)
    )

pip_version = 'pip3' if sys.version[0] == '3' else 'pip'
out = run_with_pip([sys.executable, '-mpip'])

return pip_version, out

def get_cachingallocator_config():
ca_config = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', '')
return ca_config

def get_cuda_module_loading_config():
if TORCH_AVAILABLE and torch.cuda.is_available():
torch.cuda.init()
config = os.environ.get('CUDA_MODULE_LOADING', '')
return config
else:
return "N/A"

def is_xnnpack_available():
if TORCH_AVAILABLE:
import torch.backends.xnnpack
return str(torch.backends.xnnpack.enabled) # type: ignore[attr-defined]
else:
return "N/A"

def get_env_info():
run_lambda = run
pip_version, pip_list_output = get_pip_packages(run_lambda)

if TORCH_AVAILABLE:
    version_str = torch.__version__
    debug_mode_str = str(torch.version.debug)
    cuda_available_str = str(torch.cuda.is_available())
    cuda_version_str = torch.version.cuda
    if not hasattr(torch.version, 'hip') or torch.version.hip is None:  # cuda version
        hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'
    else:  # HIP version
        def get_version_or_na(cfg, prefix):
            _lst = [s.rsplit(None, 1)[-1] for s in cfg if prefix in s]
            return _lst[0] if _lst else 'N/A'

        cfg = torch._C._show_config().split('\n')
        hip_runtime_version = get_version_or_na(cfg, 'HIP Runtime')
        miopen_runtime_version = get_version_or_na(cfg, 'MIOpen')
        cuda_version_str = 'N/A'
        hip_compiled_version = torch.version.hip
else:
    version_str = debug_mode_str = cuda_available_str = cuda_version_str = 'N/A'
    hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'

sys_version = sys.version.replace("\n", " ")

conda_packages = get_conda_packages(run_lambda)

return SystemEnv(
    torch_version=version_str,
    is_debug_build=debug_mode_str,
    python_version='{} ({}-bit runtime)'.format(sys_version, sys.maxsize.bit_length() + 1),
    python_platform=get_python_platform(),
    is_cuda_available=cuda_available_str,
    cuda_compiled_version=cuda_version_str,
    cuda_runtime_version=get_running_cuda_version(run_lambda),
    cuda_module_loading=get_cuda_module_loading_config(),
    nvidia_gpu_models=get_gpu_info(run_lambda),
    nvidia_driver_version=get_nvidia_driver_version(run_lambda),
    cudnn_version=get_cudnn_version(run_lambda),
    hip_compiled_version=hip_compiled_version,
    hip_runtime_version=hip_runtime_version,
    miopen_runtime_version=miopen_runtime_version,
    pip_version=pip_version,
    pip_packages=pip_list_output,
    conda_packages=conda_packages,
    os=get_os(run_lambda),
    libc_version=get_libc_version(),
    gcc_version=get_gcc_version(run_lambda),
    clang_version=get_clang_version(run_lambda),
    cmake_version=get_cmake_version(run_lambda),
    caching_allocator_config=get_cachingallocator_config(),
    is_xnnpack_available=is_xnnpack_available(),
    cpu_info=get_cpu_info(run_lambda),
)

env_info_fmt = """
PyTorch version: {torch_version}
Is debug build: {is_debug_build}
CUDA used to build PyTorch: {cuda_compiled_version}
ROCM used to build PyTorch: {hip_compiled_version}

OS: {os}
GCC version: {gcc_version}
Clang version: {clang_version}
CMake version: {cmake_version}
Libc version: {libc_version}

Python version: {python_version}
Python platform: {python_platform}
Is CUDA available: {is_cuda_available}
CUDA runtime version: {cuda_runtime_version}
CUDA_MODULE_LOADING set to: {cuda_module_loading}
GPU models and configuration: {nvidia_gpu_models}
Nvidia driver version: {nvidia_driver_version}
cuDNN version: {cudnn_version}
HIP runtime version: {hip_runtime_version}
MIOpen runtime version: {miopen_runtime_version}
Is XNNPACK available: {is_xnnpack_available}

CPU:
{cpu_info}

Versions of relevant libraries:
{pip_packages}
{conda_packages}
""".strip()

def pretty_str(envinfo):
def replace_nones(dct, replacement='Could not collect'):
for key in dct.keys():
if dct[key] is not None:
continue
dct[key] = replacement
return dct

def replace_bools(dct, true='Yes', false='No'):
    for key in dct.keys():
        if dct[key] is True:
            dct[key] = true
        elif dct[key] is False:
            dct[key] = false
    return dct

def prepend(text, tag='[prepend]'):
    lines = text.split('\n')
    updated_lines = [tag + line for line in lines]
    return '\n'.join(updated_lines)

def replace_if_empty(text, replacement='No relevant packages'):
    if text is not None and len(text) == 0:
        return replacement
    return text

def maybe_start_on_next_line(string):
    # If `string` is multiline, prepend a \n to it.
    if string is not None and len(string.split('\n')) > 1:
        return '\n{}\n'.format(string)
    return string

mutable_dict = envinfo._asdict()

# If nvidia_gpu_models is multiline, start on the next line
mutable_dict['nvidia_gpu_models'] = \
    maybe_start_on_next_line(envinfo.nvidia_gpu_models)

# If the machine doesn't have CUDA, report some fields as 'No CUDA'
dynamic_cuda_fields = [
    'cuda_runtime_version',
    'nvidia_gpu_models',
    'nvidia_driver_version',
]
all_cuda_fields = dynamic_cuda_fields + ['cudnn_version']
all_dynamic_cuda_fields_missing = all(
    mutable_dict[field] is None for field in dynamic_cuda_fields)
if TORCH_AVAILABLE and not torch.cuda.is_available() and all_dynamic_cuda_fields_missing:
    for field in all_cuda_fields:
        mutable_dict[field] = 'No CUDA'
    if envinfo.cuda_compiled_version is None:
        mutable_dict['cuda_compiled_version'] = 'None'

# Replace True with Yes, False with No
mutable_dict = replace_bools(mutable_dict)

# Replace all None objects with 'Could not collect'
mutable_dict = replace_nones(mutable_dict)

# If either of these are '', replace with 'No relevant packages'
mutable_dict['pip_packages'] = replace_if_empty(mutable_dict['pip_packages'])
mutable_dict['conda_packages'] = replace_if_empty(mutable_dict['conda_packages'])

# Tag conda and pip packages with a prefix
# If they were previously None, they'll show up as ie '[conda] Could not collect'
if mutable_dict['pip_packages']:
    mutable_dict['pip_packages'] = prepend(mutable_dict['pip_packages'],
                                           '[{}] '.format(envinfo.pip_version))
if mutable_dict['conda_packages']:
    mutable_dict['conda_packages'] = prepend(mutable_dict['conda_packages'],
                                             '[conda] ')
mutable_dict['cpu_info'] = envinfo.cpu_info
return env_info_fmt.format(**mutable_dict)

def get_pretty_env_info():
return pretty_str(get_env_info())

def main():
print("Collecting environment information...")
output = get_pretty_env_info()
print(output)

if TORCH_AVAILABLE and hasattr(torch, 'utils') and hasattr(torch.utils, '_crash_handler'):
    minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR
    if sys.platform == "linux" and os.path.exists(minidump_dir):
        dumps = [os.path.join(minidump_dir, dump) for dump in os.listdir(minidump_dir)]
        latest = max(dumps, key=os.path.getctime)
        ctime = os.path.getctime(latest)
        creation_time = datetime.datetime.fromtimestamp(ctime).strftime('%Y-%m-%d %H:%M:%S')
        msg = "\n*** Detected a minidump at {} created on {}, ".format(latest, creation_time) + \
              "if this is related to your bug please include it when you file a report ***"
        print(msg, file=sys.stderr)

if name == 'main':
main()

The text was updated successfully, but these errors were encountered:

mikaylagawarecki added module: tensorboard oncall: visualization Related to visualization in PyTorch, e.g., tensorboard labels Apr 26, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Tensorboard's SummaryWriter.add_graph() doesn't work with packedSequence #125033

Tensorboard's SummaryWriter.add_graph() doesn't work with packedSequence #125033

daniel-padban commented Apr 26, 2024

Tensorboard's SummaryWriter.add_graph() doesn't work with packedSequence #125033

Tensorboard's SummaryWriter.add_graph() doesn't work with packedSequence #125033

Comments

daniel-padban commented Apr 26, 2024

🐛 Describe the bug

Versions

Unlike the rest of the PyTorch this file must be python2 compliant.

This script outputs relevant system environment info

Run it with python collect_env.py or python -m torch.utils.collect_env

System Environment Information

example outputs of CPU infos

* linux

Architecture: x86_64

CPU op-mode(s): 32-bit, 64-bit

Address sizes: 46 bits physical, 48 bits virtual

Byte Order: Little Endian

CPU(s): 128

On-line CPU(s) list: 0-127

Vendor ID: GenuineIntel

Model name: Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz

CPU family: 6

Model: 106

Thread(s) per core: 2

Core(s) per socket: 32

Socket(s): 2

Stepping: 6

BogoMIPS: 5799.78

Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr

sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl

xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq monitor ssse3 fma cx16

pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand

hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced

fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid avx512f avx512dq rdseed adx smap

avx512ifma clflushopt clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1

xsaves wbnoinvd ida arat avx512vbmi pku ospke avx512_vbmi2 gfni vaes vpclmulqdq

avx512_vnni avx512_bitalg tme avx512_vpopcntdq rdpid md_clear flush_l1d arch_capabilities

Virtualization features:

Hypervisor vendor: KVM

Virtualization type: full

Caches (sum of all):

L1d: 3 MiB (64 instances)

L1i: 2 MiB (64 instances)

L2: 80 MiB (64 instances)

L3: 108 MiB (2 instances)

NUMA:

NUMA node(s): 2

NUMA node0 CPU(s): 0-31,64-95

NUMA node1 CPU(s): 32-63,96-127

Vulnerabilities:

Itlb multihit: Not affected

L1tf: Not affected

Mds: Not affected

Meltdown: Not affected

Mmio stale data: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown

Retbleed: Not affected

Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp

Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization

Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence

Srbds: Not affected

Tsx async abort: Not affected

* win32

Architecture=9

CurrentClockSpeed=2900

DeviceID=CPU0

Family=179

L2CacheSize=40960

L2CacheSpeed=

Manufacturer=GenuineIntel

MaxClockSpeed=2900

Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz

ProcessorType=3

Revision=27142

Architecture=9

CurrentClockSpeed=2900

DeviceID=CPU1

Family=179

L2CacheSize=40960

L2CacheSpeed=

Manufacturer=GenuineIntel

MaxClockSpeed=2900

Run it with `python collect_env.py` or `python -m torch.utils.collect_env`