Skip to content

Commit

Permalink
[lit][clang] Avoid realpath on Windows due to MAX_PATH limitations
Browse files Browse the repository at this point in the history
Running lit tests on Windows can fail because its use of
`os.path.realpath` expands substitute drives, which are used to keep
paths short and avoid hitting MAX_PATH limitations.

Changes lit logic to:

Use `os.path.abspath` on Windows, where `MAX_PATH` is a concern that we
can work around using substitute drives, which `os.path.realpath` would
resolve.

Use `os.path.realpath` on Unix, where the current directory always has
symlinks resolved, so it is impossible to preserve symlinks in the
presence of relative paths, and so we must make sure that all code paths
use real paths.

Also updates clang's `FileManager::getCanonicalName` and `ExtractAPI`
code to avoid resolving substitute drives (i.e. resolving to a path
under a different root).

How tested: built with `-DLLVM_ENABLE_PROJECTS=clang` and built `check-all` on both Windows

Differential Revision: https://reviews.llvm.org/D154130
Reviewed By: @benlangmuir

Patch by Tristan Labelle <tristan@thebrowser.company>!
  • Loading branch information
compnerd committed Aug 1, 2023
1 parent cdb7d57 commit 05d613e
Show file tree
Hide file tree
Showing 16 changed files with 150 additions and 92 deletions.
7 changes: 7 additions & 0 deletions clang/include/clang/Basic/FileManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,13 @@ class FileManager : public RefCountedBase<FileManager> {
/// required, which is (almost) never.
StringRef getCanonicalName(const FileEntry *File);

private:
/// Retrieve the canonical name for a given file or directory.
///
/// The first param is a key in the CanonicalNames array.
StringRef getCanonicalName(const void *Entry, StringRef Name);

public:
void PrintStats() const;
};

Expand Down
55 changes: 35 additions & 20 deletions clang/lib/Basic/FileManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -632,33 +632,48 @@ void FileManager::GetUniqueIDMapping(
}

StringRef FileManager::getCanonicalName(DirectoryEntryRef Dir) {
auto Known = CanonicalNames.find(Dir);
if (Known != CanonicalNames.end())
return Known->second;

StringRef CanonicalName(Dir.getName());

SmallString<4096> CanonicalNameBuf;
if (!FS->getRealPath(Dir.getName(), CanonicalNameBuf))
CanonicalName = CanonicalNameBuf.str().copy(CanonicalNameStorage);

CanonicalNames.insert({Dir, CanonicalName});
return CanonicalName;
return getCanonicalName(Dir, Dir.getName());
}

StringRef FileManager::getCanonicalName(const FileEntry *File) {
llvm::DenseMap<const void *, llvm::StringRef>::iterator Known
= CanonicalNames.find(File);
return getCanonicalName(File, File->getName());
}

StringRef FileManager::getCanonicalName(const void *Entry, StringRef Name) {
llvm::DenseMap<const void *, llvm::StringRef>::iterator Known =
CanonicalNames.find(Entry);
if (Known != CanonicalNames.end())
return Known->second;

StringRef CanonicalName(File->getName());

SmallString<4096> CanonicalNameBuf;
if (!FS->getRealPath(File->getName(), CanonicalNameBuf))
CanonicalName = CanonicalNameBuf.str().copy(CanonicalNameStorage);
// Name comes from FileEntry/DirectoryEntry::getName(), so it is safe to
// store it in the DenseMap below.
StringRef CanonicalName(Name);

SmallString<256> AbsPathBuf;
SmallString<256> RealPathBuf;
if (!FS->getRealPath(Name, RealPathBuf)) {
if (is_style_windows(llvm::sys::path::Style::native)) {
// For Windows paths, only use the real path if it doesn't resolve
// a substitute drive, as those are used to avoid MAX_PATH issues.
AbsPathBuf = Name;
if (!FS->makeAbsolute(AbsPathBuf)) {
if (llvm::sys::path::root_name(RealPathBuf) ==
llvm::sys::path::root_name(AbsPathBuf)) {
CanonicalName = RealPathBuf.str().copy(CanonicalNameStorage);
} else {
// Fallback to using the absolute path.
// Simplifying /../ is semantically valid on Windows even in the
// presence of symbolic links.
llvm::sys::path::remove_dots(AbsPathBuf, /*remove_dot_dot=*/true);
CanonicalName = AbsPathBuf.str().copy(CanonicalNameStorage);
}
}
} else {
CanonicalName = RealPathBuf.str().copy(CanonicalNameStorage);
}
}

CanonicalNames.insert({File, CanonicalName});
CanonicalNames.insert({Entry, CanonicalName});
return CanonicalName;
}

Expand Down
4 changes: 1 addition & 3 deletions clang/lib/ExtractAPI/ExtractAPIConsumer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,7 @@ struct LocationFileChecker {
if (ExternalFileEntries.count(File))
return false;

StringRef FileName = File->tryGetRealPathName().empty()
? File->getName()
: File->tryGetRealPathName();
StringRef FileName = SM.getFileManager().getCanonicalName(File);

// Try to reduce the include name the same way we tried to include it.
bool IsQuoted = false;
Expand Down
8 changes: 7 additions & 1 deletion clang/test/Lexer/case-insensitive-include-win.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,15 @@
// This file should only include code that really needs a Windows host OS to
// run.

// Note: We must use the real path here, because the logic to detect case
// mismatches must resolve the real path to figure out the original casing.
// If we use %t and we are on a substitute drive S: mapping to C:\subst,
// then we will compare "S:\test.dir\FOO.h" to "C:\subst\test.dir\foo.h"
// and avoid emitting the diagnostic because the structure is different.

// REQUIRES: system-windows
// RUN: mkdir -p %t.dir
// RUN: touch %t.dir/foo.h
// RUN: not %clang_cl /FI\\?\%t.dir\FOO.h /WX -fsyntax-only %s 2>&1 | FileCheck %s
// RUN: not %clang_cl /FI \\?\%{t:real}.dir\FOO.h /WX -fsyntax-only %s 2>&1 | FileCheck %s

// CHECK: non-portable path to file '"\\?\
9 changes: 7 additions & 2 deletions llvm/cmake/modules/AddLLVM.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1711,10 +1711,15 @@ endfunction()
# use it and can't be in a lit module. Use with make_paths_relative().
string(CONCAT LLVM_LIT_PATH_FUNCTION
"# Allow generated file to be relocatable.\n"
"from pathlib import Path\n"
"import os\n"
"import platform\n"
"def path(p):\n"
" if not p: return ''\n"
" return str((Path(__file__).parent / p).resolve())\n"
" # Follows lit.util.abs_path_preserve_drive, which cannot be imported here.\n"
" if platform.system() == 'Windows':\n"
" return os.path.abspath(os.path.join(os.path.dirname(__file__), p))\n"
" else:\n"
" return os.path.realpath(os.path.join(os.path.dirname(__file__), p))\n"
)

# This function provides an automatic way to 'configure'-like generate a file
Expand Down
10 changes: 10 additions & 0 deletions llvm/docs/CommandGuide/lit.rst
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,16 @@ TestRunner.py:
%/p %p but ``\`` is replaced by ``/``
%/t %t but ``\`` is replaced by ``/``
%/T %T but ``\`` is replaced by ``/``
%{s:real} %s after expanding all symbolic links and substitute drives
%{S:real} %S after expanding all symbolic links and substitute drives
%{p:real} %p after expanding all symbolic links and substitute drives
%{t:real} %t after expanding all symbolic links and substitute drives
%{T:real} %T after expanding all symbolic links and substitute drives
%{/s:real} %/s after expanding all symbolic links and substitute drives
%{/S:real} %/S after expanding all symbolic links and substitute drives
%{/p:real} %/p after expanding all symbolic links and substitute drives
%{/t:real} %/t after expanding all symbolic links and substitute drives
%{/T:real} %/T after expanding all symbolic links and substitute drives
%{/s:regex_replacement} %/s but escaped for use in the replacement of a ``s@@@`` command in sed
%{/S:regex_replacement} %/S but escaped for use in the replacement of a ``s@@@`` command in sed
%{/p:regex_replacement} %/p but escaped for use in the replacement of a ``s@@@`` command in sed
Expand Down
14 changes: 12 additions & 2 deletions llvm/docs/TestingGuide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -671,7 +671,7 @@ RUN lines:
``${fs-sep}``
Expands to the file system separator, i.e. ``/`` or ``\`` on Windows.

``%/s, %/S, %/t, %/T:``
``%/s, %/S, %/t, %/T``

Act like the corresponding substitution above but replace any ``\``
character with a ``/``. This is useful to normalize path separators.
Expand All @@ -680,7 +680,17 @@ RUN lines:

Example: ``%/s: C:/Desktop Files/foo_test.s.tmp``

``%:s, %:S, %:t, %:T:``
``%{s:real}, %{S:real}, %{t:real}, %{T:real}``
``%{/s:real}, %{/S:real}, %{/t:real}, %{/T:real}``

Act like the corresponding substitution, including with ``/``, but use
the real path by expanding all symbolic links and substitute drives.

Example: ``%s: S:\foo_test.s.tmp``

Example: ``%{/s:real}: C:/SDrive/foo_test.s.tmp``

``%:s, %:S, %:t, %:T``

Act like the corresponding substitution above but remove colons at
the beginning of Windows paths. This is useful to allow concatenation
Expand Down
90 changes: 40 additions & 50 deletions llvm/utils/lit/lit/TestRunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def change_dir(self, newdir):
if os.path.isabs(newdir):
self.cwd = newdir
else:
self.cwd = os.path.realpath(os.path.join(self.cwd, newdir))
self.cwd = lit.util.abs_path_preserve_drive(os.path.join(self.cwd, newdir))


class TimeoutHelper(object):
Expand Down Expand Up @@ -427,7 +427,7 @@ def executeBuiltinMkdir(cmd, cmd_shenv):
dir = to_unicode(dir) if kIsWindows else to_bytes(dir)
cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd)
if not os.path.isabs(dir):
dir = os.path.realpath(os.path.join(cwd, dir))
dir = lit.util.abs_path_preserve_drive(os.path.join(cwd, dir))
if parent:
lit.util.mkdir_p(dir)
else:
Expand Down Expand Up @@ -473,7 +473,7 @@ def on_rm_error(func, path, exc_info):
path = to_unicode(path) if kIsWindows else to_bytes(path)
cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd)
if not os.path.isabs(path):
path = os.path.realpath(os.path.join(cwd, path))
path = lit.util.abs_path_preserve_drive(os.path.join(cwd, path))
if force and not os.path.exists(path):
continue
try:
Expand Down Expand Up @@ -1243,18 +1243,10 @@ def getDefaultSubstitutions(test, tmpDir, tmpBase, normalize_slashes=False):
substitutions.extend(test.config.substitutions)
tmpName = tmpBase + ".tmp"
baseName = os.path.basename(tmpBase)
substitutions.extend(
[
("%s", sourcepath),
("%S", sourcedir),
("%p", sourcedir),
("%{pathsep}", os.pathsep),
("%t", tmpName),
("%basename_t", baseName),
("%T", tmpDir),
]
)

substitutions.append(("%{pathsep}", os.pathsep))
substitutions.append(("%basename_t", baseName))

substitutions.extend(
[
("%{fs-src-root}", pathlib.Path(sourcedir).anchor),
Expand All @@ -1263,49 +1255,47 @@ def getDefaultSubstitutions(test, tmpDir, tmpBase, normalize_slashes=False):
]
)

# "%/[STpst]" should be normalized.
substitutions.extend(
[
("%/s", sourcepath.replace("\\", "/")),
("%/S", sourcedir.replace("\\", "/")),
("%/p", sourcedir.replace("\\", "/")),
("%/t", tmpBase.replace("\\", "/") + ".tmp"),
("%/T", tmpDir.replace("\\", "/")),
("%/et", tmpName.replace("\\", "\\\\\\\\\\\\\\\\")),
]
)
substitutions.append(("%/et", tmpName.replace("\\", "\\\\\\\\\\\\\\\\")))

# "%{/[STpst]:regex_replacement}" should be normalized like "%/[STpst]" but we're
# also in a regex replacement context of a s@@@ regex.
def regex_escape(s):
s = s.replace("@", r"\@")
s = s.replace("&", r"\&")
return s

substitutions.extend(
[
("%{/s:regex_replacement}", regex_escape(sourcepath.replace("\\", "/"))),
("%{/S:regex_replacement}", regex_escape(sourcedir.replace("\\", "/"))),
("%{/p:regex_replacement}", regex_escape(sourcedir.replace("\\", "/"))),
(
"%{/t:regex_replacement}",
regex_escape(tmpBase.replace("\\", "/")) + ".tmp",
),
("%{/T:regex_replacement}", regex_escape(tmpDir.replace("\\", "/"))),
]
)
path_substitutions = [
("s", sourcepath), ("S", sourcedir), ("p", sourcedir),
("t", tmpName), ("T", tmpDir)
]
for path_substitution in path_substitutions:
letter = path_substitution[0]
path = path_substitution[1]

# Original path variant
substitutions.append(("%" + letter, path))

# Normalized path separator variant
substitutions.append(("%/" + letter, path.replace("\\", "/")))

# realpath variants
# Windows paths with substitute drives are not expanded by default
# as they are used to avoid MAX_PATH issues, but sometimes we do
# need the fully expanded path.
real_path = os.path.realpath(path)
substitutions.append(("%{" + letter + ":real}", real_path))
substitutions.append(("%{/" + letter + ":real}",
real_path.replace("\\", "/")))

# "%{/[STpst]:regex_replacement}" should be normalized like
# "%/[STpst]" but we're also in a regex replacement context
# of a s@@@ regex.
substitutions.append(
("%{/" + letter + ":regex_replacement}",
regex_escape(path.replace("\\", "/"))))

# "%:[STpst]" are normalized paths without colons and without
# a leading slash.
substitutions.append(("%:" + letter, colonNormalizePath(path)))

# "%:[STpst]" are normalized paths without colons and without a leading
# slash.
substitutions.extend(
[
("%:s", colonNormalizePath(sourcepath)),
("%:S", colonNormalizePath(sourcedir)),
("%:p", colonNormalizePath(sourcedir)),
("%:t", colonNormalizePath(tmpBase + ".tmp")),
("%:T", colonNormalizePath(tmpDir)),
]
)
return substitutions


Expand Down
2 changes: 1 addition & 1 deletion llvm/utils/lit/lit/builtin_commands/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def main(argv):
try:
for file in args:
if file != "-" and not os.path.isabs(file):
file = os.path.realpath(os.path.join(os.getcwd(), file))
file = util.abs_path_preserve_drive(file)

if flags.recursive_diff:
if file == "-":
Expand Down
12 changes: 6 additions & 6 deletions llvm/utils/lit/lit/discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import sys

from lit.TestingConfig import TestingConfig
from lit import LitConfig, Test
from lit import LitConfig, Test, util


def chooseConfigFileFromDir(dir, config_names):
Expand Down Expand Up @@ -56,8 +56,8 @@ def search1(path):
# configuration to load instead.
config_map = litConfig.params.get("config_map")
if config_map:
cfgpath = os.path.realpath(cfgpath)
target = config_map.get(os.path.normcase(cfgpath))
cfgpath = util.abs_path_preserve_drive(cfgpath)
target = config_map.get(cfgpath)
if target:
cfgpath = target

Expand All @@ -67,13 +67,13 @@ def search1(path):

cfg = TestingConfig.fromdefaults(litConfig)
cfg.load_from_path(cfgpath, litConfig)
source_root = os.path.realpath(cfg.test_source_root or path)
exec_root = os.path.realpath(cfg.test_exec_root or path)
source_root = util.abs_path_preserve_drive(cfg.test_source_root or path)
exec_root = util.abs_path_preserve_drive(cfg.test_exec_root or path)
return Test.TestSuite(cfg.name, source_root, exec_root, cfg), ()

def search(path):
# Check for an already instantiated test suite.
real_path = os.path.realpath(path)
real_path = util.abs_path_preserve_drive(path)
res = cache.get(real_path)
if res is None:
cache[real_path] = res = search1(path)
Expand Down
17 changes: 17 additions & 0 deletions llvm/utils/lit/lit/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,23 @@ def usable_core_count():

return n

def abs_path_preserve_drive(path):
"""Return the absolute path without resolving drive mappings on Windows.
"""
if platform.system() == "Windows":
# Windows has limitations on path length (MAX_PATH) that
# can be worked around using substitute drives, which map
# a drive letter to a longer path on another drive.
# Since Python 3.8, os.path.realpath resolves sustitute drives,
# so we should not use it. In Python 3.7, os.path.realpath
# was implemented as os.path.abspath.
return os.path.normpath(os.path.abspath(path))
else:
# On UNIX, the current directory always has symbolic links resolved,
# so any program accepting relative paths cannot preserve symbolic
# links in paths and we should always use os.path.realpath.
return os.path.normpath(os.path.realpath(path))

def mkdir(path):
try:
Expand Down
4 changes: 1 addition & 3 deletions llvm/utils/lit/tests/Inputs/config-map-discovery/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@
import os
import sys

main_config = sys.argv[1]
main_config = os.path.realpath(main_config)
main_config = os.path.normcase(main_config)
main_config = lit.util.abs_path_preserve_drive(sys.argv[1])

config_map = {main_config: sys.argv[2]}
builtin_parameters = {"config_map": config_map}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ config.suffixes = ['.txt']
config.test_format = lit.formats.ShTest()

import os
config.test_exec_root = os.path.realpath(os.path.dirname(__file__))
config.test_exec_root = os.path.dirname(lit.util.abs_path_preserve_drive(__file__))
config.test_source_root = os.path.join(config.test_exec_root, "tests")

0 comments on commit 05d613e

Please sign in to comment.