From 13cb36533478e60a2ee139d666b166be21f85101 Mon Sep 17 00:00:00 2001 From: Yi Cheng Date: Thu, 26 Jul 2018 10:32:32 -0700 Subject: [PATCH 01/14] hard link coursier --- src/python/pants/backend/jvm/tasks/coursier_resolve.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/pants/backend/jvm/tasks/coursier_resolve.py b/src/python/pants/backend/jvm/tasks/coursier_resolve.py index 11110c131cf..3bb62adac59 100644 --- a/src/python/pants/backend/jvm/tasks/coursier_resolve.py +++ b/src/python/pants/backend/jvm/tasks/coursier_resolve.py @@ -613,7 +613,7 @@ def _map_coord_to_resolved_jars(cls, result, coursier_cache_path, pants_jar_path if not os.path.exists(pants_path): safe_mkdir(os.path.dirname(pants_path)) - os.symlink(jar_path, pants_path) + os.link(jar_path, pants_path) coord = cls.to_m2_coord(coord) resolved_jar = ResolvedJar(coord, From 3bb05d68fdffe14cbbc653e91b1f8a64cccf9aa0 Mon Sep 17 00:00:00 2001 From: Yi Cheng Date: Thu, 26 Jul 2018 14:52:56 -0700 Subject: [PATCH 02/14] ivy_util to use hard link --- src/python/pants/backend/jvm/ivy_utils.py | 126 +++++++++++----------- 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/src/python/pants/backend/jvm/ivy_utils.py b/src/python/pants/backend/jvm/ivy_utils.py index 955a27db141..f660f7af2c3 100644 --- a/src/python/pants/backend/jvm/ivy_utils.py +++ b/src/python/pants/backend/jvm/ivy_utils.py @@ -86,19 +86,19 @@ def workdir(self): return os.path.join(self.global_ivy_workdir, self.hash_name) @property - def symlink_classpath_filename(self): + def hardlink_classpath_filename(self): return os.path.join(self.workdir, 'classpath') @property def ivy_cache_classpath_filename(self): - return '{}.raw'.format(self.symlink_classpath_filename) + return '{}.raw'.format(self.hardlink_classpath_filename) @property def frozen_resolve_file(self): return os.path.join(self.workdir, 'resolution.json') @property - def symlink_dir(self): + def hardlink_dir(self): return os.path.join(self.global_ivy_workdir, 'jars') @abstractmethod @@ -109,13 +109,13 @@ def ivy_xml_path(self): def resolve_report_path(self, conf): """Location of the resolve report in the workdir.""" - def _construct_and_load_symlink_map(self): - artifact_paths, symlink_map = IvyUtils.construct_and_load_symlink_map( - self.symlink_dir, + def _construct_and_load_hardlink_map(self): + artifact_paths, hardlink_map = IvyUtils.construct_and_load_hardlink_map( + self.hardlink_dir, self.ivy_repository_cache_dir, self.ivy_cache_classpath_filename, - self.symlink_classpath_filename) - return artifact_paths, symlink_map + self.hardlink_classpath_filename) + return artifact_paths, hardlink_map def _call_ivy(self, executor, extra_args, ivyxml, jvm_options, hash_name_for_report, workunit_factory, workunit_name): @@ -179,9 +179,9 @@ def exec_and_load(self, executor, extra_args, targets, jvm_options, workunit_nam return result def _load_from_fetch(self, frozen_resolutions): - artifact_paths, symlink_map = self._construct_and_load_symlink_map() + artifact_paths, hardlink_map = self._construct_and_load_hardlink_map() return IvyFetchResolveResult(artifact_paths, - symlink_map, + hardlink_map, self.hash_name, self.workdir_reports_by_conf, frozen_resolutions) @@ -227,9 +227,9 @@ def ivy_xml_path(self): return os.path.join(self.workdir, 'resolve-ivy.xml') def load(self, targets): - artifact_paths, symlink_map = self._construct_and_load_symlink_map() + artifact_paths, hardlink_map = self._construct_and_load_hardlink_map() return IvyResolveResult(artifact_paths, - symlink_map, + hardlink_map, self.hash_name, self.workdir_reports_by_conf) @@ -381,11 +381,11 @@ class IvyResolveResult(object): and the targets that requested them and the hash name of the resolve. """ - def __init__(self, resolved_artifact_paths, symlink_map, resolve_hash_name, reports_by_conf): + def __init__(self, resolved_artifact_paths, hardlink_map, resolve_hash_name, reports_by_conf): self._reports_by_conf = reports_by_conf self.resolved_artifact_paths = resolved_artifact_paths self.resolve_hash_name = resolve_hash_name - self._symlink_map = symlink_map + self._hardlink_map = hardlink_map @property def has_resolved_artifacts(self): @@ -437,7 +437,7 @@ def resolved_jars_for_each_target(self, conf, targets): ivy_jar_memo = {} for target in jar_library_targets: # Add the artifacts from each dependency module. - resolved_jars = self._resolved_jars_with_symlinks(conf, ivy_info, ivy_jar_memo, + resolved_jars = self._resolved_jars_with_hardlinks(conf, ivy_info, ivy_jar_memo, self._jar_dependencies_for_target(conf, target), target) @@ -450,33 +450,33 @@ def _ivy_info_for(self, conf): report_path = self._reports_by_conf.get(conf) return IvyUtils.parse_xml_report(conf, report_path) - def _new_resolved_jar_with_symlink_path(self, conf, target, resolved_jar_without_symlink): + def _new_resolved_jar_with_hardlink_path(self, conf, target, resolved_jar_without_hardlink): def candidate_cache_paths(): # There is a focus on being lazy here to avoid `os.path.realpath` when we can. - yield resolved_jar_without_symlink.cache_path - yield os.path.realpath(resolved_jar_without_symlink.cache_path) + yield resolved_jar_without_hardlink.cache_path + yield os.path.realpath(resolved_jar_without_hardlink.cache_path) for cache_path in candidate_cache_paths(): - pants_path = self._symlink_map.get(cache_path) + pants_path = self._hardlink_map.get(cache_path) if pants_path: break else: raise IvyResolveMappingError( 'Jar {resolved_jar} in {spec} not resolved to the ivy ' - 'symlink map in conf {conf}.' + 'hardlink map in conf {conf}.' .format(spec=target.address.spec, - resolved_jar=resolved_jar_without_symlink.cache_path, + resolved_jar=resolved_jar_without_hardlink.cache_path, conf=conf)) - return ResolvedJar(coordinate=resolved_jar_without_symlink.coordinate, + return ResolvedJar(coordinate=resolved_jar_without_hardlink.coordinate, pants_path=pants_path, - cache_path=resolved_jar_without_symlink.cache_path) + cache_path=resolved_jar_without_hardlink.cache_path) - def _resolved_jars_with_symlinks(self, conf, ivy_info, ivy_jar_memo, coordinates, target): + def _resolved_jars_with_hardlinks(self, conf, ivy_info, ivy_jar_memo, coordinates, target): raw_resolved_jars = ivy_info.get_resolved_jars_for_coordinates(coordinates, memo=ivy_jar_memo) - resolved_jars = [self._new_resolved_jar_with_symlink_path(conf, target, raw_resolved_jar) + resolved_jars = [self._new_resolved_jar_with_hardlink_path(conf, target, raw_resolved_jar) for raw_resolved_jar in raw_resolved_jars] return resolved_jars @@ -484,9 +484,9 @@ def _resolved_jars_with_symlinks(self, conf, ivy_info, ivy_jar_memo, coordinates class IvyFetchResolveResult(IvyResolveResult): """A resolve result that uses the frozen resolution to look up dependencies.""" - def __init__(self, resolved_artifact_paths, symlink_map, resolve_hash_name, reports_by_conf, + def __init__(self, resolved_artifact_paths, hardlink_map, resolve_hash_name, reports_by_conf, frozen_resolutions): - super(IvyFetchResolveResult, self).__init__(resolved_artifact_paths, symlink_map, + super(IvyFetchResolveResult, self).__init__(resolved_artifact_paths, hardlink_map, resolve_hash_name, reports_by_conf) self._frozen_resolutions = frozen_resolutions @@ -693,8 +693,8 @@ class IvyUtils(object): # Protects ivy executions. _ivy_lock = threading.RLock() - # Protect writes to the global map of jar path -> symlinks to that jar. - _symlink_map_lock = threading.Lock() + # Protect writes to the global map of jar path -> hardlinks to that jar. + _hardlink_map_lock = threading.Lock() INTERNAL_ORG_NAME = 'internal' @@ -795,7 +795,7 @@ def _exec_ivy(cls, ivy, confs, ivyxml, args, jvm_options, executor, ivy_args.extend(args) ivy_jvm_options = list(jvm_options) - # Disable cache in File.getCanonicalPath(), makes Ivy work with -symlink option properly on ng. + # Disable cache in File.getCanonicalPath(), makes Ivy work with -hardlink option properly on ng. ivy_jvm_options.append('-Dsun.io.useCanonCaches=false') runner = ivy.runner(jvm_options=ivy_jvm_options, args=ivy_args, executor=executor) @@ -810,66 +810,66 @@ def _exec_ivy(cls, ivy, confs, ivyxml, args, jvm_options, executor, raise IvyUtils.IvyError(e) @classmethod - def construct_and_load_symlink_map(cls, symlink_dir, ivy_repository_cache_dir, - ivy_cache_classpath_filename, symlink_classpath_filename): - # Make our actual classpath be symlinks, so that the paths are uniform across systems. + def construct_and_load_hardlink_map(cls, hardlink_dir, ivy_repository_cache_dir, + ivy_cache_classpath_filename, hardlink_classpath_filename): + # Make our actual classpath be hardlinks, so that the paths are uniform across systems. # Note that we must do this even if we read the raw_target_classpath_file from the artifact - # cache. If we cache the target_classpath_file we won't know how to create the symlinks. - with IvyUtils._symlink_map_lock: - # A common dir for symlinks into the ivy2 cache. This ensures that paths to jars + # cache. If we cache the target_classpath_file we won't know how to create the hardlinks. + with IvyUtils._hardlink_map_lock: + # A common dir for hardlinks into the ivy2 cache. This ensures that paths to jars # in artifact-cached analysis files are consistent across systems. - # Note that we have one global, well-known symlink dir, again so that paths are + # Note that we have one global, well-known hardlink dir, again so that paths are # consistent across builds. - symlink_map = cls._symlink_cachepath(ivy_repository_cache_dir, + hardlink_map = cls._hardlink_cachepath(ivy_repository_cache_dir, ivy_cache_classpath_filename, - symlink_dir, - symlink_classpath_filename) - classpath = cls._load_classpath_from_cachepath(symlink_classpath_filename) - return classpath, symlink_map + hardlink_dir, + hardlink_classpath_filename) + classpath = cls._load_classpath_from_cachepath(hardlink_classpath_filename) + return classpath, hardlink_map @classmethod - def _symlink_cachepath(cls, ivy_repository_cache_dir, inpath, symlink_dir, outpath): - """Symlinks all paths listed in inpath that are under ivy_repository_cache_dir into symlink_dir. + def _hardlink_cachepath(cls, ivy_repository_cache_dir, inpath, hardlink_dir, outpath): + """hardlinks all paths listed in inpath that are under ivy_repository_cache_dir into hardlink_dir. - If there is an existing symlink for a file under inpath, it is used rather than creating - a new symlink. Preserves all other paths. Writes the resulting paths to outpath. - Returns a map of path -> symlink to that path. + If there is an existing hardlink for a file under inpath, it is used rather than creating + a new hardlink. Preserves all other paths. Writes the resulting paths to outpath. + Returns a map of path -> hardlink to that path. """ - safe_mkdir(symlink_dir) - # The ivy_repository_cache_dir might itself be a symlink. In this case, ivy may return paths that + safe_mkdir(hardlink_dir) + # The ivy_repository_cache_dir might itself be a hardlink. In this case, ivy may return paths that # reference the realpath of the .jar file after it is resolved in the cache dir. To handle - # this case, add both the symlink'ed path and the realpath to the jar to the symlink map. + # this case, add both the hardlink'ed path and the realpath to the jar to the hardlink map. real_ivy_cache_dir = os.path.realpath(ivy_repository_cache_dir) - symlink_map = OrderedDict() + hardlink_map = OrderedDict() inpaths = cls._load_classpath_from_cachepath(inpath) paths = OrderedSet([os.path.realpath(path) for path in inpaths]) for path in paths: if path.startswith(real_ivy_cache_dir): - symlink_map[path] = os.path.join(symlink_dir, os.path.relpath(path, real_ivy_cache_dir)) + hardlink_map[path] = os.path.join(hardlink_dir, os.path.relpath(path, real_ivy_cache_dir)) else: - # This path is outside the cache. We won't symlink it. - symlink_map[path] = path + # This path is outside the cache. We won't hardlink it. + hardlink_map[path] = path - # Create symlinks for paths in the ivy cache dir. - for path, symlink in six.iteritems(symlink_map): - if path == symlink: - # Skip paths that aren't going to be symlinked. + # Create hardlinks for paths in the ivy cache dir. + for path, hardlink in six.iteritems(hardlink_map): + if path == hardlink: + # Skip paths that aren't going to be hardlinked. continue - safe_mkdir(os.path.dirname(symlink)) + safe_mkdir(os.path.dirname(hardlink)) try: - os.symlink(path, symlink) + os.link(path, hardlink) except OSError as e: - # We don't delete and recreate the symlink, as this may break concurrently executing code. + # We don't delete and recreate the hardlink, as this may break concurrently executing code. if e.errno != errno.EEXIST: raise # (re)create the classpath with all of the paths with safe_open(outpath, 'w') as outfile: - outfile.write(':'.join(OrderedSet(symlink_map.values()))) + outfile.write(':'.join(OrderedSet(hardlink_map.values()))) - return dict(symlink_map) + return dict(hardlink_map) @classmethod def xml_report_path(cls, resolution_cache_dir, resolve_hash_name, conf): From 3df6d364a15c0f5f44cbb011793f2d54a6e2bceb Mon Sep 17 00:00:00 2001 From: Yi Cheng Date: Thu, 26 Jul 2018 14:53:32 -0700 Subject: [PATCH 03/14] doc in coursier_resovle --- src/python/pants/backend/jvm/tasks/coursier_resolve.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/python/pants/backend/jvm/tasks/coursier_resolve.py b/src/python/pants/backend/jvm/tasks/coursier_resolve.py index 3bb62adac59..e8c91dd0e8a 100644 --- a/src/python/pants/backend/jvm/tasks/coursier_resolve.py +++ b/src/python/pants/backend/jvm/tasks/coursier_resolve.py @@ -191,7 +191,7 @@ def _prepare_vts_results_dir(self, pants_workdir, vts): def _prepare_workdir(self, pants_workdir): """ - Given pants workdir, prepare the location in pants workdir to store all the symlinks + Given pants workdir, prepare the location in pants workdir to store all the hardlinks to coursier cache dir. """ pants_jar_base_dir = os.path.join(pants_workdir, 'coursier', 'cache') @@ -424,7 +424,7 @@ def _load_json_result(self, conf, compile_classpath, coursier_cache_path, invali :param compile_classpath: `ClasspathProducts` that will be modified :param coursier_cache_path: cache location that is managed by coursier :param invalidation_check: InvalidationCheck - :param pants_jar_path_base: location under pants workdir that contains all the symlinks to coursier cache + :param pants_jar_path_base: location under pants workdir that contains all the hardlinks to coursier cache :param result: result dict converted from the json produced by one coursier run :return: n/a """ @@ -592,7 +592,7 @@ def _map_coord_to_resolved_jars(cls, result, coursier_cache_path, pants_jar_path :param result: coursier json output :param coursier_cache_path: coursier cache location - :param pants_jar_path_base: location under pants workdir to store the symlink to the coursier cache + :param pants_jar_path_base: location under pants workdir to store the hardlink to the coursier cache :return: a map from maven coordinate to a resolved jar. """ @@ -632,7 +632,7 @@ def _get_path_to_jar(cls, coursier_cache_path, pants_jar_path_base, jar_path): Create the path to the jar that will live in .pants.d :param coursier_cache_path: coursier cache location - :param pants_jar_path_base: location under pants workdir to store the symlink to the coursier cache + :param pants_jar_path_base: location under pants workdir to store the hardlink to the coursier cache :param jar_path: path of the jar :return: """ From 661a7f2b5c0a2befdc28cdfa7b9e7d09294bc04c Mon Sep 17 00:00:00 2001 From: Yi Cheng Date: Thu, 26 Jul 2018 16:04:02 -0700 Subject: [PATCH 04/14] change test --- .../backend/jvm/tasks/test_ivy_utils.py | 52 +++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/tests/python/pants_test/backend/jvm/tasks/test_ivy_utils.py b/tests/python/pants_test/backend/jvm/tasks/test_ivy_utils.py index d9987ca5553..9a113db30e2 100644 --- a/tests/python/pants_test/backend/jvm/tasks/test_ivy_utils.py +++ b/tests/python/pants_test/backend/jvm/tasks/test_ivy_utils.py @@ -332,10 +332,10 @@ def find_single(self, elem, xpath): def assert_attributes(self, elem, **kwargs): self.assertEqual(dict(**kwargs), dict(elem.attrib)) - def test_construct_and_load_symlink_map(self): + def test_construct_and_load_hardlink_map(self): self.maxDiff = None with temporary_dir() as mock_cache_dir: - with temporary_dir() as symlink_dir: + with temporary_dir() as hardlink_dir: with temporary_dir() as classpath_dir: input_path = os.path.join(classpath_dir, 'inpath') output_path = os.path.join(classpath_dir, 'classpath') @@ -345,21 +345,21 @@ def test_construct_and_load_symlink_map(self): with open(input_path, 'w') as inpath: inpath.write(foo_path) - result_classpath, result_map = IvyUtils.construct_and_load_symlink_map(symlink_dir, + result_classpath, result_map = IvyUtils.construct_and_load_hardlink_map(hardlink_dir, mock_cache_dir, input_path, output_path) - symlink_foo_path = os.path.join(symlink_dir, 'foo.jar') - self.assertEquals([symlink_foo_path], result_classpath) + hardlink_foo_path = os.path.join(hardlink_dir, 'foo.jar') + self.assertEquals([hardlink_foo_path], result_classpath) self.assertEquals( { - os.path.realpath(foo_path): symlink_foo_path + os.path.realpath(foo_path): hardlink_foo_path }, result_map) with open(output_path, 'r') as outpath: - self.assertEquals(symlink_foo_path, outpath.readline()) - self.assertTrue(os.path.islink(symlink_foo_path)) - self.assertTrue(os.path.exists(symlink_foo_path)) + self.assertEquals(hardlink_foo_path, outpath.readline()) + self.assertFalse(os.path.islink(hardlink_foo_path)) + self.assertTrue(os.path.exists(hardlink_foo_path)) # Now add an additional path to the existing map bar_path = os.path.join(mock_cache_dir, 'bar.jar') @@ -367,35 +367,35 @@ def test_construct_and_load_symlink_map(self): bar.write("test jar contents2") with open(input_path, 'w') as inpath: inpath.write(os.pathsep.join([foo_path, bar_path])) - result_classpath, result_map = IvyUtils.construct_and_load_symlink_map(symlink_dir, + result_classpath, result_map = IvyUtils.construct_and_load_hardlink_map(hardlink_dir, mock_cache_dir, input_path, output_path) - symlink_bar_path = os.path.join(symlink_dir, 'bar.jar') + hardlink_bar_path = os.path.join(hardlink_dir, 'bar.jar') self.assertEquals( { - os.path.realpath(foo_path): symlink_foo_path, - os.path.realpath(bar_path): symlink_bar_path, + os.path.realpath(foo_path): hardlink_foo_path, + os.path.realpath(bar_path): hardlink_bar_path, }, result_map) - self.assertEquals([symlink_foo_path, symlink_bar_path], result_classpath) + self.assertEquals([hardlink_foo_path, hardlink_bar_path], result_classpath) with open(output_path, 'r') as outpath: - self.assertEquals(symlink_foo_path + os.pathsep + symlink_bar_path, outpath.readline()) - self.assertTrue(os.path.islink(symlink_foo_path)) - self.assertTrue(os.path.exists(symlink_foo_path)) - self.assertTrue(os.path.islink(symlink_bar_path)) - self.assertTrue(os.path.exists(symlink_bar_path)) + self.assertEquals(hardlink_foo_path + os.pathsep + hardlink_bar_path, outpath.readline()) + self.assertFalse(os.path.islink(hardlink_foo_path)) + self.assertTrue(os.path.exists(hardlink_foo_path)) + self.assertFalse(os.path.islink(hardlink_bar_path)) + self.assertTrue(os.path.exists(hardlink_bar_path)) # Reverse the ordering and make sure order is preserved in the output path with open(input_path, 'w') as inpath: inpath.write(os.pathsep.join([bar_path, foo_path])) - IvyUtils.construct_and_load_symlink_map(symlink_dir, + IvyUtils.construct_and_load_hardlink_map(hardlink_dir, mock_cache_dir, input_path, output_path) with open(output_path, 'r') as outpath: - self.assertEquals(symlink_bar_path + os.pathsep + symlink_foo_path, outpath.readline()) + self.assertEquals(hardlink_bar_path + os.pathsep + hardlink_foo_path, outpath.readline()) def test_missing_ivy_report(self): self.set_options_for_scope(IvySubsystem.options_scope, @@ -587,7 +587,7 @@ def test_ivy_resolve_report_copying_fails_when_report_is_missing(self): class IvyUtilsResolveStepsTest(TestBase): - def test_if_not_all_symlinked_files_exist_after_successful_resolve_fail(self): + def test_if_not_all_hardlinked_files_exist_after_successful_resolve_fail(self): resolve = IvyResolveStep( ['default'], 'hash_name', @@ -604,7 +604,7 @@ def test_if_not_all_symlinked_files_exist_after_successful_resolve_fail(self): with self.assertRaises(IvyResolveMappingError): resolve.exec_and_load(None, None, [], None, None, None) - def test_if_not_all_symlinked_files_exist_after_successful_fetch_fail(self): + def test_if_not_all_hardlinked_files_exist_after_successful_fetch_fail(self): fetch = IvyFetchStep(['default'], 'hash_name', False, @@ -620,9 +620,9 @@ def test_if_not_all_symlinked_files_exist_after_successful_fetch_fail(self): with self.assertRaises(IvyResolveMappingError): fetch.exec_and_load(None, None, [], None, None, None) - def test_missing_symlinked_jar_in_candidates(self): - empty_symlink_map = {} - result = IvyResolveResult(['non-existent-file-location'], empty_symlink_map, 'hash-name', + def test_missing_hardlinked_jar_in_candidates(self): + empty_hardlink_map = {} + result = IvyResolveResult(['non-existent-file-location'], empty_hardlink_map, 'hash-name', {'default': self.ivy_report_path('ivy_utils_resources/report_with_diamond.xml') }) From 18c777c02c00f4f02dc72622f8416ab4b3982176 Mon Sep 17 00:00:00 2001 From: Yi Cheng Date: Thu, 26 Jul 2018 20:30:49 -0700 Subject: [PATCH 05/14] use safe_copy --- src/python/pants/backend/jvm/ivy_utils.py | 8 ++------ src/python/pants/backend/jvm/tasks/coursier_resolve.py | 3 ++- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/python/pants/backend/jvm/ivy_utils.py b/src/python/pants/backend/jvm/ivy_utils.py index f660f7af2c3..31d721b5e23 100644 --- a/src/python/pants/backend/jvm/ivy_utils.py +++ b/src/python/pants/backend/jvm/ivy_utils.py @@ -17,6 +17,7 @@ from functools import total_ordering import six +from pex.common import safe_copy from twitter.common.collections import OrderedSet from pants.backend.jvm.subsystems.jar_dependency_management import (JarDependencyManagement, @@ -858,12 +859,7 @@ def _hardlink_cachepath(cls, ivy_repository_cache_dir, inpath, hardlink_dir, out # Skip paths that aren't going to be hardlinked. continue safe_mkdir(os.path.dirname(hardlink)) - try: - os.link(path, hardlink) - except OSError as e: - # We don't delete and recreate the hardlink, as this may break concurrently executing code. - if e.errno != errno.EEXIST: - raise + safe_copy(path, hardlink) # (re)create the classpath with all of the paths with safe_open(outpath, 'w') as outfile: diff --git a/src/python/pants/backend/jvm/tasks/coursier_resolve.py b/src/python/pants/backend/jvm/tasks/coursier_resolve.py index e8c91dd0e8a..a2c23909d47 100644 --- a/src/python/pants/backend/jvm/tasks/coursier_resolve.py +++ b/src/python/pants/backend/jvm/tasks/coursier_resolve.py @@ -12,6 +12,7 @@ from collections import defaultdict from future.moves.urllib import parse +from pex.common import safe_copy from pants.backend.jvm.ivy_utils import IvyUtils from pants.backend.jvm.subsystems.jar_dependency_management import (JarDependencyManagement, @@ -613,7 +614,7 @@ def _map_coord_to_resolved_jars(cls, result, coursier_cache_path, pants_jar_path if not os.path.exists(pants_path): safe_mkdir(os.path.dirname(pants_path)) - os.link(jar_path, pants_path) + safe_copy(jar_path, pants_path) coord = cls.to_m2_coord(coord) resolved_jar = ResolvedJar(coord, From 7ecd27f85bf9c863becf1095fb41d7e6e573d250 Mon Sep 17 00:00:00 2001 From: Yi Cheng Date: Thu, 26 Jul 2018 21:47:33 -0700 Subject: [PATCH 06/14] fixed test_when_invalid_coursier_cache_should_trigger_resolve --- .../jvm/tasks/test_coursier_resolve.py | 47 +++++++++++-------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/tests/python/pants_test/backend/jvm/tasks/test_coursier_resolve.py b/tests/python/pants_test/backend/jvm/tasks/test_coursier_resolve.py index 40b2e67f785..d94b6c7973c 100644 --- a/tests/python/pants_test/backend/jvm/tasks/test_coursier_resolve.py +++ b/tests/python/pants_test/backend/jvm/tasks/test_coursier_resolve.py @@ -8,8 +8,9 @@ from builtins import str from contextlib import contextmanager +from future.backports.test.support import rmdir from mock import MagicMock -from psutil.tests import safe_remove +from psutil.tests import safe_remove, safe_rmdir from pants.backend.jvm.subsystems.jar_dependency_management import (JarDependencyManagement, PinnedJarArtifactSet) @@ -25,6 +26,7 @@ from pants.java.jar.jar_dependency import JarDependency from pants.task.task import Task from pants.util.contextutil import temporary_dir, temporary_file_path +from pants.util.dirutil import safe_rmtree from pants_test.jvm.jvm_tool_task_test_base import JvmToolTaskTestBase from pants_test.subsystem.subsystem_util import init_subsystem from pants_test.task_test_base import TaskTestBase @@ -219,6 +221,7 @@ def test_second_noop_does_not_invoke_coursier(self): def test_when_invalid_artifact_symlink_should_trigger_resolve(self): jar_lib = self._make_junit_target() with self._temp_workdir(): + self.set_options_for_scope('coursier', cache_dir='coursier') context = self.context(target_roots=[jar_lib]) task = self.execute(context) @@ -247,34 +250,40 @@ def test_when_invalid_artifact_symlink_should_trigger_resolve(self): def test_when_invalid_coursier_cache_should_trigger_resolve(self): jar_lib = self._make_junit_target() with self._temp_workdir(): + with temporary_dir() as couriser_cache_dir: + self.set_options_for_scope('coursier', cache_dir=couriser_cache_dir) - context = self.context(target_roots=[jar_lib]) - task = self.execute(context) - compile_classpath = context.products.get_data('compile_classpath') + context = self.context(target_roots=[jar_lib]) + task = self.execute(context) + compile_classpath = context.products.get_data('compile_classpath') - jar_cp = compile_classpath.get_for_target(jar_lib) + jar_cp = compile_classpath.get_for_target(jar_lib) - # └─ junit:junit:4.12 - # └─ org.hamcrest:hamcrest-core:1.3 - self.assertEquals(2, len(jar_cp)) + # └─ junit:junit:4.12 + # └─ org.hamcrest:hamcrest-core:1.3 + self.assertEquals(2, len(jar_cp)) - # Take a sample jar path, remove it, then call the task again, it should invoke coursier again - conf, path = jar_cp[0] + # Take a sample jar path, remove it, then call the task again, it should invoke coursier again + conf, path = jar_cp[0] - self.assertTrue(os.path.islink(path)) + self.assertFalse(os.path.islink(path)) - safe_remove(os.path.realpath(path)) + # Remove the hard link under .pants.d/ + safe_remove(path) - task.runjava = MagicMock() + # Remove coursier's cache + safe_rmtree(couriser_cache_dir) - # Ignore any error because runjava may fail due to undefined behavior - try: - task.execute() - except TaskError: - pass + task.runjava = MagicMock() - task.runjava.assert_called() + # Ignore any error because runjava may fail due to undefined behavior + try: + task.execute() + except TaskError: + pass + + task.runjava.assert_called() def test_resolve_jarless_pom(self): jar = JarDependency('org.apache.commons', 'commons-weaver-privilizer-parent', '1.3') From 1426c797595ce8435ccb1127db1ac8739b96938b Mon Sep 17 00:00:00 2001 From: Yi Cheng Date: Thu, 26 Jul 2018 21:48:28 -0700 Subject: [PATCH 07/14] rename test --- .../jvm/tasks/test_coursier_resolve.py | 31 +------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/tests/python/pants_test/backend/jvm/tasks/test_coursier_resolve.py b/tests/python/pants_test/backend/jvm/tasks/test_coursier_resolve.py index d94b6c7973c..82c8bdae9d2 100644 --- a/tests/python/pants_test/backend/jvm/tasks/test_coursier_resolve.py +++ b/tests/python/pants_test/backend/jvm/tasks/test_coursier_resolve.py @@ -218,36 +218,7 @@ def test_second_noop_does_not_invoke_coursier(self): task.execute() task.runjava.assert_not_called() - def test_when_invalid_artifact_symlink_should_trigger_resolve(self): - jar_lib = self._make_junit_target() - with self._temp_workdir(): - self.set_options_for_scope('coursier', cache_dir='coursier') - - context = self.context(target_roots=[jar_lib]) - task = self.execute(context) - compile_classpath = context.products.get_data('compile_classpath') - - jar_cp = compile_classpath.get_for_target(jar_lib) - - # └─ junit:junit:4.12 - # └─ org.hamcrest:hamcrest-core:1.3 - self.assertEquals(2, len(jar_cp)) - - # Take a sample jar path, remove it, then call the task again, it should invoke coursier again - conf, path = jar_cp[0] - safe_remove(os.path.realpath(path)) - - task.runjava = MagicMock() - - # Ignore any error because runjava may fail due to undefined behavior - try: - task.execute() - except TaskError: - pass - - task.runjava.assert_called() - - def test_when_invalid_coursier_cache_should_trigger_resolve(self): + def test_when_invalid_hardlink_and_coursier_cache_should_trigger_resolve(self): jar_lib = self._make_junit_target() with self._temp_workdir(): with temporary_dir() as couriser_cache_dir: From 7406b3956388c8726b8d4364b5eba3a2454930ad Mon Sep 17 00:00:00 2001 From: Yi Cheng Date: Thu, 26 Jul 2018 22:11:04 -0700 Subject: [PATCH 08/14] --amend --- src/python/pants/backend/jvm/ivy_utils.py | 1 - .../pants_test/backend/jvm/tasks/test_coursier_resolve.py | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/python/pants/backend/jvm/ivy_utils.py b/src/python/pants/backend/jvm/ivy_utils.py index 31d721b5e23..054229dcecc 100644 --- a/src/python/pants/backend/jvm/ivy_utils.py +++ b/src/python/pants/backend/jvm/ivy_utils.py @@ -4,7 +4,6 @@ from __future__ import absolute_import, division, print_function, unicode_literals -import errno import json import logging import os diff --git a/tests/python/pants_test/backend/jvm/tasks/test_coursier_resolve.py b/tests/python/pants_test/backend/jvm/tasks/test_coursier_resolve.py index 82c8bdae9d2..72b5c9b47f6 100644 --- a/tests/python/pants_test/backend/jvm/tasks/test_coursier_resolve.py +++ b/tests/python/pants_test/backend/jvm/tasks/test_coursier_resolve.py @@ -8,9 +8,8 @@ from builtins import str from contextlib import contextmanager -from future.backports.test.support import rmdir from mock import MagicMock -from psutil.tests import safe_remove, safe_rmdir +from psutil.tests import safe_remove from pants.backend.jvm.subsystems.jar_dependency_management import (JarDependencyManagement, PinnedJarArtifactSet) From bb477a8b095e793e3cf935b17a5e62f2f4f256ea Mon Sep 17 00:00:00 2001 From: Yi Cheng Date: Thu, 26 Jul 2018 22:53:57 -0700 Subject: [PATCH 09/14] fix ivy test --- tests/python/pants_test/backend/jvm/tasks/test_ivy_imports.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/pants_test/backend/jvm/tasks/test_ivy_imports.py b/tests/python/pants_test/backend/jvm/tasks/test_ivy_imports.py index 0c21ef084ab..77f0545d46a 100644 --- a/tests/python/pants_test/backend/jvm/tasks/test_ivy_imports.py +++ b/tests/python/pants_test/backend/jvm/tasks/test_ivy_imports.py @@ -65,6 +65,6 @@ def verify_product_mapping(self, jar_import_products, target, expected_coordinat jars_by_coordinate = dict(jar_import_products.imports(target)) self.assertIn(expected_coordinate, jars_by_coordinate) jar_filename = jars_by_coordinate[expected_coordinate] - self.assertTrue(os.path.islink(jar_filename)) + self.assertFalse(os.path.islink(jar_filename)) # Make sure there is a real .jar there self.assertTrue(zipfile.is_zipfile(jar_filename)) From 63a9284bce3ee01acebdda2293769005237d327e Mon Sep 17 00:00:00 2001 From: Yi Cheng Date: Thu, 26 Jul 2018 23:00:29 -0700 Subject: [PATCH 10/14] address comments --- src/python/pants/backend/jvm/ivy_utils.py | 5 ++- .../backend/jvm/tasks/coursier_resolve.py | 4 +-- src/python/pants/util/fileutil.py | 33 +++++++++++++++++++ 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/src/python/pants/backend/jvm/ivy_utils.py b/src/python/pants/backend/jvm/ivy_utils.py index 054229dcecc..7e8a98e5e06 100644 --- a/src/python/pants/backend/jvm/ivy_utils.py +++ b/src/python/pants/backend/jvm/ivy_utils.py @@ -16,7 +16,6 @@ from functools import total_ordering import six -from pex.common import safe_copy from twitter.common.collections import OrderedSet from pants.backend.jvm.subsystems.jar_dependency_management import (JarDependencyManagement, @@ -32,7 +31,7 @@ from pants.java.jar.jar_dependency_utils import M2Coordinate, ResolvedJar from pants.java.util import execute_runner from pants.util.dirutil import safe_concurrent_creation, safe_mkdir, safe_open -from pants.util.fileutil import atomic_copy +from pants.util.fileutil import atomic_copy, safe_hardlink_or_copy class IvyResolutionStep(object): @@ -858,7 +857,7 @@ def _hardlink_cachepath(cls, ivy_repository_cache_dir, inpath, hardlink_dir, out # Skip paths that aren't going to be hardlinked. continue safe_mkdir(os.path.dirname(hardlink)) - safe_copy(path, hardlink) + safe_hardlink_or_copy(path, hardlink) # (re)create the classpath with all of the paths with safe_open(outpath, 'w') as outfile: diff --git a/src/python/pants/backend/jvm/tasks/coursier_resolve.py b/src/python/pants/backend/jvm/tasks/coursier_resolve.py index a2c23909d47..34943780376 100644 --- a/src/python/pants/backend/jvm/tasks/coursier_resolve.py +++ b/src/python/pants/backend/jvm/tasks/coursier_resolve.py @@ -12,7 +12,6 @@ from collections import defaultdict from future.moves.urllib import parse -from pex.common import safe_copy from pants.backend.jvm.ivy_utils import IvyUtils from pants.backend.jvm.subsystems.jar_dependency_management import (JarDependencyManagement, @@ -30,6 +29,7 @@ from pants.java.jar.jar_dependency_utils import M2Coordinate, ResolvedJar from pants.util.contextutil import temporary_file from pants.util.dirutil import safe_mkdir +from pants.util.fileutil import safe_hardlink_or_copy class CoursierResultNotFound(Exception): @@ -614,7 +614,7 @@ def _map_coord_to_resolved_jars(cls, result, coursier_cache_path, pants_jar_path if not os.path.exists(pants_path): safe_mkdir(os.path.dirname(pants_path)) - safe_copy(jar_path, pants_path) + safe_hardlink_or_copy(jar_path, pants_path) coord = cls.to_m2_coord(coord) resolved_jar = ResolvedJar(coord, diff --git a/src/python/pants/util/fileutil.py b/src/python/pants/util/fileutil.py index 81df4531df2..ea9ab5b0772 100644 --- a/src/python/pants/util/fileutil.py +++ b/src/python/pants/util/fileutil.py @@ -8,6 +8,9 @@ import random import shutil +import errno +from uuid import uuid4 + from pants.util.contextutil import temporary_file @@ -37,3 +40,33 @@ def line_count(filename): 'nosize': lambda srcs: 0, 'random': lambda srcs: random.randint(0, 10000), } + + +# Copied from pex for JVM related code not to depend on pex +# https://github.com/pantsbuild/pex/blob/d1f946c8b46f8c5123b01bcd376c87bb67c8a884/pex/common.py#L25-L50 +def safe_hardlink_or_copy(source, dest, overwrite=False): + def do_copy(): + temp_dest = dest + uuid4().hex + shutil.copyfile(source, temp_dest) + os.rename(temp_dest, dest) + + # If the platform supports hard-linking, use that and fall back to copying. + # Windows does not support hard-linking. + if hasattr(os, 'link'): + try: + os.link(source, dest) + except OSError as e: + if e.errno == errno.EEXIST: + # File already exists. If overwrite=True, write otherwise skip. + if overwrite: + do_copy() + elif e.errno == errno.EXDEV: + # Hard link across devices, fall back on copying + do_copy() + else: + raise + elif os.path.exists(dest): + if overwrite: + do_copy() + else: + do_copy() From 99b74ada582324dbe9036cb3b66fb2010fc06506 Mon Sep 17 00:00:00 2001 From: Yi Cheng Date: Thu, 26 Jul 2018 23:16:09 -0700 Subject: [PATCH 11/14] sorting --- src/python/pants/util/fileutil.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/python/pants/util/fileutil.py b/src/python/pants/util/fileutil.py index ea9ab5b0772..cc207267346 100644 --- a/src/python/pants/util/fileutil.py +++ b/src/python/pants/util/fileutil.py @@ -4,11 +4,10 @@ from __future__ import absolute_import, division, print_function, unicode_literals +import errno import os import random import shutil - -import errno from uuid import uuid4 from pants.util.contextutil import temporary_file From 997b0deb797857f94d1cfefffcb126014cbf3a8c Mon Sep 17 00:00:00 2001 From: Yi Cheng Date: Fri, 27 Jul 2018 09:47:21 -0700 Subject: [PATCH 12/14] minor fix --- src/python/pants/backend/jvm/ivy_utils.py | 2 +- .../pants_test/backend/jvm/tasks/test_coursier_resolve.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/python/pants/backend/jvm/ivy_utils.py b/src/python/pants/backend/jvm/ivy_utils.py index 7e8a98e5e06..173d1c814f3 100644 --- a/src/python/pants/backend/jvm/ivy_utils.py +++ b/src/python/pants/backend/jvm/ivy_utils.py @@ -794,7 +794,7 @@ def _exec_ivy(cls, ivy, confs, ivyxml, args, jvm_options, executor, ivy_args.extend(args) ivy_jvm_options = list(jvm_options) - # Disable cache in File.getCanonicalPath(), makes Ivy work with -hardlink option properly on ng. + # Disable cache in File.getCanonicalPath(), makes Ivy work with -symlink option properly on ng. ivy_jvm_options.append('-Dsun.io.useCanonCaches=false') runner = ivy.runner(jvm_options=ivy_jvm_options, args=ivy_args, executor=executor) diff --git a/tests/python/pants_test/backend/jvm/tasks/test_coursier_resolve.py b/tests/python/pants_test/backend/jvm/tasks/test_coursier_resolve.py index 72b5c9b47f6..a5a400747d9 100644 --- a/tests/python/pants_test/backend/jvm/tasks/test_coursier_resolve.py +++ b/tests/python/pants_test/backend/jvm/tasks/test_coursier_resolve.py @@ -237,8 +237,6 @@ def test_when_invalid_hardlink_and_coursier_cache_should_trigger_resolve(self): # Take a sample jar path, remove it, then call the task again, it should invoke coursier again conf, path = jar_cp[0] - self.assertFalse(os.path.islink(path)) - # Remove the hard link under .pants.d/ safe_remove(path) From d53afe4e1afee97f3a3ab3cfdc4ca445efd4106f Mon Sep 17 00:00:00 2001 From: Yi Cheng Date: Sun, 29 Jul 2018 21:57:08 -0700 Subject: [PATCH 13/14] add tests --- tests/python/pants_test/util/BUILD | 1 + tests/python/pants_test/util/test_fileutil.py | 60 ++++++++++++++++++- 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/tests/python/pants_test/util/BUILD b/tests/python/pants_test/util/BUILD index 9243b08fe04..e966b27ee3e 100644 --- a/tests/python/pants_test/util/BUILD +++ b/tests/python/pants_test/util/BUILD @@ -58,6 +58,7 @@ python_tests( name = 'fileutil', sources = ['test_fileutil.py'], dependencies = [ + '3rdparty/python:mock', 'src/python/pants/util:contextutil', 'src/python/pants/util:fileutil', ] diff --git a/tests/python/pants_test/util/test_fileutil.py b/tests/python/pants_test/util/test_fileutil.py index 1f854386d6f..2e3ff29c94b 100644 --- a/tests/python/pants_test/util/test_fileutil.py +++ b/tests/python/pants_test/util/test_fileutil.py @@ -4,12 +4,16 @@ from __future__ import absolute_import, division, print_function, unicode_literals +import errno import os import random +import stat import unittest -from pants.util.contextutil import temporary_file, temporary_file_path -from pants.util.fileutil import atomic_copy, create_size_estimators +from mock import mock + +from pants.util.contextutil import temporary_dir, temporary_file, temporary_file_path +from pants.util.fileutil import atomic_copy, create_size_estimators, safe_hardlink_or_copy class FileutilTest(unittest.TestCase): @@ -37,3 +41,55 @@ def test_random_estimator(self): random.seed(seedValue) with temporary_file_path() as src: self.assertEqual(create_size_estimators()['random']([src]), rand) + + @classmethod + def _is_hard_link(cls, filename, other): + s1 = os.stat(filename) + s2 = os.stat(other) + return (s1[stat.ST_INO], s1[stat.ST_DEV]) == \ + (s2[stat.ST_INO], s2[stat.ST_DEV]) + + def test_hardlink_or_copy(self): + content = b'hello' + + with temporary_dir() as src_dir, temporary_file() as dst: + dst.write(content) + dst.close() + + src_path = os.path.join(src_dir, 'src') + safe_hardlink_or_copy(dst.name, src_path) + + with open(src_path, 'rb') as f: + self.assertEqual(content, f.read()) + + # Make sure it's not symlink + self.assertFalse(os.path.islink(dst.name)) + + # Make sure they point to the same node + self.assertTrue(self._is_hard_link(dst.name, src_path)) + + def test_hardlink_or_copy_cross_device_should_copy(self): + content = b'hello' + + # Mock os.link to throw an CROSS-DEVICE error + with mock.patch('os.link') as os_mock: + err = OSError() + err.errno = errno.EXDEV + os_mock.side_effect = err + + with temporary_dir() as src_dir, temporary_file() as dst: + dst.write(content) + dst.close() + + src_path = os.path.join(src_dir, 'src') + + safe_hardlink_or_copy(dst.name, src_path) + + with open(src_path, 'rb') as f: + self.assertEqual(content, f.read()) + + # Make sure it's not symlink + self.assertFalse(os.path.islink(dst.name)) + + # Make sure they are separate copies + self.assertFalse(self._is_hard_link(dst.name, src_path)) From a1b17a1b431735855620c7378b10c13ec664eba7 Mon Sep 17 00:00:00 2001 From: Yi Cheng Date: Sun, 29 Jul 2018 21:58:52 -0700 Subject: [PATCH 14/14] drop windows --- src/python/pants/util/fileutil.py | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/src/python/pants/util/fileutil.py b/src/python/pants/util/fileutil.py index cc207267346..eaa92469cb9 100644 --- a/src/python/pants/util/fileutil.py +++ b/src/python/pants/util/fileutil.py @@ -41,31 +41,21 @@ def line_count(filename): } -# Copied from pex for JVM related code not to depend on pex -# https://github.com/pantsbuild/pex/blob/d1f946c8b46f8c5123b01bcd376c87bb67c8a884/pex/common.py#L25-L50 def safe_hardlink_or_copy(source, dest, overwrite=False): def do_copy(): temp_dest = dest + uuid4().hex shutil.copyfile(source, temp_dest) os.rename(temp_dest, dest) - # If the platform supports hard-linking, use that and fall back to copying. - # Windows does not support hard-linking. - if hasattr(os, 'link'): - try: - os.link(source, dest) - except OSError as e: - if e.errno == errno.EEXIST: - # File already exists. If overwrite=True, write otherwise skip. - if overwrite: - do_copy() - elif e.errno == errno.EXDEV: - # Hard link across devices, fall back on copying + try: + os.link(source, dest) + except OSError as e: + if e.errno == errno.EEXIST: + # File already exists. If overwrite=True, write otherwise skip. + if overwrite: do_copy() - else: - raise - elif os.path.exists(dest): - if overwrite: + elif e.errno == errno.EXDEV: + # Hard link across devices, fall back on copying do_copy() - else: - do_copy() + else: + raise