Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate and maybe prune interpreter cache run over run #7225

Merged
merged 5 commits into from Feb 11, 2019
Merged
Diff settings

Always

Just for now

@@ -115,18 +115,19 @@ def select_interpreter_for_targets(self, targets):
# Return the lowest compatible interpreter.
return min(allowed_interpreters)

def _interpreter_from_path(self, path, filters=()):
try:
executable = os.readlink(os.path.join(path, 'python'))
if not os.path.exists(executable):
if os.path.dirname(path) == self._cache_dir:
def _interpreter_from_relpath(self, path, filters=()):
path = os.path.join(self._cache_dir, path)
if os.path.isdir(path):

This comment has been minimized.

Copy link
@jsirois

jsirois Feb 8, 2019

Member

You can ditch this check, the os.readlink below will already raise OSError if any part of the path DNE.

try:
executable = os.readlink(os.path.join(path, 'python'))
if not os.path.exists(executable):
self._purge_interpreter(path)
return None
except OSError:
return None
except OSError:
return None
interpreter = PythonInterpreter.from_binary(executable, include_site_extras=False)
if self._matches(interpreter, filters=filters):
return self._resolve(interpreter)
interpreter = PythonInterpreter.from_binary(executable, include_site_extras=False)
if self._matches(interpreter, filters=filters):
return self._resolve(interpreter)
return None

def _setup_interpreter(self, interpreter, cache_target_path):
@@ -138,22 +139,20 @@ def _setup_interpreter(self, interpreter, cache_target_path):
def _setup_cached(self, filters=()):
"""Find all currently-cached interpreters."""
for interpreter_dir in os.listdir(self._cache_dir):
path = os.path.join(self._cache_dir, interpreter_dir)
if os.path.isdir(path):
pi = self._interpreter_from_path(path, filters=filters)
if pi:
logger.debug('Detected interpreter {}: {}'.format(pi.binary, str(pi.identity)))
yield pi
pi = self._interpreter_from_relpath(interpreter_dir, filters=filters)
if pi:
logger.debug('Detected interpreter {}: {}'.format(pi.binary, str(pi.identity)))
yield pi

def _setup_paths(self, paths, filters=()):
"""Find interpreters under paths, and cache them."""
for interpreter in self._matching(PythonInterpreter.all(paths), filters=filters):
identity_str = str(interpreter.identity)
cache_path = os.path.join(self._cache_dir, identity_str)
pi = self._interpreter_from_path(cache_path, filters=filters)
pi = self._interpreter_from_relpath(identity_str, filters=filters)
if pi is None:
cache_path = os.path.join(self._cache_dir, identity_str)
self._setup_interpreter(interpreter, cache_path)

This comment has been minimized.

Copy link
@jsirois

jsirois Feb 8, 2019

Member

It would be a bit more symmetric on the whole and less error-prone here to make _setup_interpreter take a relpath (identity_str) and have it do the final cache path construction.

pi = self._interpreter_from_path(cache_path, filters=filters)
pi = self._interpreter_from_relpath(identity_str, filters=filters)
if pi:
yield pi

@@ -99,10 +99,8 @@ def _interpreter_path_file(self, target_set_id):
return os.path.join(self.workdir, target_set_id, 'interpreter.info')

def _detect_and_purge_invalid_interpreter(self, interpreter_path_file):
with open(interpreter_path_file, 'r') as infile:
lines = infile.readlines()
binary = lines[0].strip()
if not os.path.exists(binary):
interpreter = self._get_interpreter(interpreter_path_file)
if not os.path.exists(interpreter.binary):
self.context.log.info('Stale interpreter reference detected: {}, removing reference and '
'selecting a new interpreter.'.format(binary))
os.remove(interpreter_path_file)
@@ -5,6 +5,7 @@
from __future__ import absolute_import, division, print_function, unicode_literals

import os
import shutil
from builtins import str
from contextlib import contextmanager

@@ -16,6 +17,7 @@
from pants.backend.python.interpreter_cache import PythonInterpreter, PythonInterpreterCache
from pants.subsystem.subsystem import Subsystem
from pants.util.contextutil import temporary_dir
from pants.util.dirutil import chmod_plus_x, safe_mkdir
from pants_test.backend.python.interpreter_selection_utils import (PY_27, PY_36,
python_interpreter_path,
skip_unless_python27_and_python36)
@@ -171,3 +173,32 @@ def test_setup_cached_warm(self):
def test_setup_cached_cold(self):
with self._setup_cache() as (cache, _):
self.assertEqual([], list(cache._setup_cached()))

def test_interpreter_from_relpath_purges_stale_interpreter(self):
"""
Simulates a stale interpreter cache and tests that _interpreter_from_relpath
properly detects it and removes the stale dist directory.
See https://github.com/pantsbuild/pants/issues/3416 for more info.
"""
with temporary_dir() as temp_dir:
# Setup a interpreter distribution that we can safely mutate.
test_interpreter_binary = os.path.join(temp_dir, 'python2.7')
src = '/usr/bin/python2.7'

This comment has been minimized.

Copy link
@jsirois

jsirois Feb 8, 2019

Member

It realy is more robust to os.path.realpath(sys.executable) - you know that oath must exist and points to the actual binary file and not a symlink.

This comment has been minimized.

Copy link
@CMLivingston

CMLivingston Feb 8, 2019

Author Contributor

I tried to use the output of which python2.7 like above tests with py27 path, however only copying the binary for that path gave me issues with linking against the python runtime. I'll give this a shot.

This comment has been minimized.

Copy link
@CMLivingston

CMLivingston Feb 8, 2019

Author Contributor

I am experiencing a different issue with sys.exe:

[tw-mbp-clivingston pants (clivingston/PY-135)]$ /var/folders/7n/x4k4tqh94k9gwm61g046m2340000gn/T/tmp9p8l3H/python2.7
Could not find platform independent libraries <prefix>
Could not find platform dependent libraries <exec_prefix>
Consider setting $PYTHONHOME to <prefix>[:<exec_prefix>]
ImportError: No module named site

(due to the venv I think)

I think that /usr/bin/python2.7 gets me PATH coherence for free which is why I did that. I'm happy to play around with PYTHONPATH or temp PATH setting.

shutil.copyfile(src, test_interpreter_binary)

This comment has been minimized.

Copy link
@jsirois
chmod_plus_x(test_interpreter_binary)

with self._setup_cache(constraints=[]) as (cache, path):
# Setup cache for test interpreter distribution.
identity_str = str(PythonInterpreter.from_binary(test_interpreter_binary).identity)
cached_interpreter_dir = os.path.join(cache._cache_dir, identity_str)
safe_mkdir(cached_interpreter_dir)
cached_symlink = os.path.join(cached_interpreter_dir, 'python')
os.symlink(test_interpreter_binary, cached_symlink)

# Remove the test interpreter binary from filesystem and assert that the cache is purged.
os.remove(test_interpreter_binary)
self.assertEqual(os.path.exists(test_interpreter_binary), False)
self.assertEqual(os.path.exists(cached_interpreter_dir), True)
cache._interpreter_from_relpath(identity_str)
self.assertEqual(os.path.exists(cached_interpreter_dir), False)
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.