Skip to content

Commit

Permalink
[inductor] optimize isa dry compile time. (#124602)
Browse files Browse the repository at this point in the history
Fixes #100378
Original issue caused by startup dry compile need cost almost 1 second.

This PR add compiler version info, isa build options and pytorch version info to the test binary path hash.
So same compile, same isa and same pytorch can skip the dry compile.

Local test:
First time:
<img width="1588" alt="image" src="https://github.com/pytorch/pytorch/assets/8433590/d0b83f5d-849e-4f37-9977-3b0276e5a5a5">
We need to compile all c++ modules and it cost 16.5s.

Second time:
<img width="1589" alt="image" src="https://github.com/pytorch/pytorch/assets/8433590/44f07fb0-5a15-4342-b0f6-dfe2c880b5d3">
We skipped dry compile due to the same isa fingerprint. It is only cost 0.36s.

Pull Request resolved: #124602
Approved by: https://github.com/jgong5, https://github.com/ezyang
  • Loading branch information
xuhancn authored and pytorchmergebot committed Apr 25, 2024
1 parent db3a2d7 commit c715e76
Showing 1 changed file with 45 additions and 2 deletions.
47 changes: 45 additions & 2 deletions torch/_inductor/codecache.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ def use_global_cache() -> bool:

LOCK_TIMEOUT = 600

_IS_WINDOWS = sys.platform == "win32"

# timing metrics for time spent in the compilation
_cumulative_compile_time = 0.0
_t0: Optional[float] = None
Expand Down Expand Up @@ -1049,6 +1051,40 @@ def is_clang() -> bool:
return bool(re.search(r"(clang|clang\+\+)", cpp_compiler()))


def get_compiler_version_info(compiler):
SUBPROCESS_DECODE_ARGS = ("oem",) if _IS_WINDOWS else ()
env = os.environ.copy()
env["LC_ALL"] = "C" # Don't localize output
try:
version_string = subprocess.check_output(
[compiler, "-v"], stderr=subprocess.STDOUT, env=env
).decode(*SUBPROCESS_DECODE_ARGS)
except Exception as e:
try:
version_string = subprocess.check_output(
[compiler, "--version"], stderr=subprocess.STDOUT, env=env
).decode(*SUBPROCESS_DECODE_ARGS)
except Exception as e:
return ""
# Mutiple lines to one line string.
version_string = version_string.replace("\r", "_")
version_string = version_string.replace("\n", "_")
return version_string


def _get_isa_dry_compile_fingerprint(isa_flags: str) -> str:
# ISA dry compile will cost about 1 sec time each startup time.
# Please check the issue: https://github.com/pytorch/pytorch/issues/100378
# Actually, dry compile is checking compile capability for ISA.
# We just record the compiler version, isa options and pytorch version info,
# and generated them to output binary hash path.
# It would optimize and skip compile existing binary.
compiler_info = get_compiler_version_info(cpp_compiler())
torch_version = torch.__version__
fingerprint = f"{compiler_info}={isa_flags}={torch_version}"
return fingerprint


class VecISA:
_bit_width: int
_macro: str
Expand Down Expand Up @@ -1114,7 +1150,11 @@ def __bool__(self) -> bool:
if config.is_fbcode():
return True

key, input_path = write(VecISA._avx_code, "cpp")
key, input_path = write(
VecISA._avx_code,
"cpp",
extra=_get_isa_dry_compile_fingerprint(self._arch_flags),
)
from filelock import FileLock

lock_dir = get_lock_dir()
Expand All @@ -1127,8 +1167,11 @@ def __bool__(self) -> bool:
)
)
try:
# Check if the output file exist, and compile when not.
if not os.path.isfile(output_path):
compile_file(input_path, output_path, build_cmd)

# Check build result
compile_file(input_path, output_path, build_cmd)
subprocess.check_call(
[
sys.executable,
Expand Down

0 comments on commit c715e76

Please sign in to comment.