/
pex.py
615 lines (506 loc) · 22.1 KB
/
pex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
# Copyright 2014 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).
from __future__ import absolute_import, print_function
import os
import sys
from distutils import sysconfig
from site import USER_SITE
import pex.third_party.pkg_resources as pkg_resources
from pex import third_party
from pex.bootstrap import Bootstrap
from pex.common import die
from pex.environment import PEXEnvironment
from pex.executor import Executor
from pex.finders import get_entry_point_from_console_script, get_script_from_distributions
from pex.interpreter import PythonInterpreter
from pex.orderedset import OrderedSet
from pex.pex_info import PexInfo
from pex.third_party.pkg_resources import EntryPoint, WorkingSet, find_distributions
from pex.tracer import TRACER
from pex.util import iter_pth_paths, named_temporary_file
from pex.variables import ENV
class DevNull(object):
def __init__(self):
pass
def write(self, *args, **kw):
pass
def flush(self):
pass
class PEX(object): # noqa: T000
"""PEX, n. A self-contained python environment."""
class Error(Exception): pass
class NotFound(Error): pass
class InvalidEntryPoint(Error): pass
@classmethod
def clean_environment(cls):
try:
del os.environ['MACOSX_DEPLOYMENT_TARGET']
except KeyError:
pass
# Cannot change dictionary size during __iter__
filter_keys = [key for key in os.environ if key.startswith('PEX_')]
for key in filter_keys:
del os.environ[key]
def __init__(self, pex=sys.argv[0], interpreter=None, env=ENV, verify_entry_point=False):
self._pex = pex
self._interpreter = interpreter or PythonInterpreter.get()
self._pex_info = PexInfo.from_pex(self._pex)
self._pex_info_overrides = PexInfo.from_env(env=env)
self._vars = env
self._envs = []
self._working_set = None
if verify_entry_point:
self._do_entry_point_verification()
def _activate(self):
if not self._working_set:
working_set = WorkingSet([])
# set up the local .pex environment
pex_info = self._pex_info.copy()
pex_info.update(self._pex_info_overrides)
pex_info.merge_pex_path(self._vars.PEX_PATH)
self._envs.append(PEXEnvironment(self._pex, pex_info, interpreter=self._interpreter))
# N.B. by this point, `pex_info.pex_path` will contain a single pex path
# merged from pex_path in `PEX-INFO` and `PEX_PATH` set in the environment.
# `PEX_PATH` entries written into `PEX-INFO` take precedence over those set
# in the environment.
if pex_info.pex_path:
# set up other environments as specified in pex_path
for pex_path in filter(None, pex_info.pex_path.split(os.pathsep)):
pex_info = PexInfo.from_pex(pex_path)
pex_info.update(self._pex_info_overrides)
self._envs.append(PEXEnvironment(pex_path, pex_info, interpreter=self._interpreter))
# activate all of them
for env in self._envs:
for dist in env.activate():
working_set.add(dist)
# Ensure that pkg_resources is not imported until at least every pex environment
# (i.e. PEX_PATH) has been merged into the environment
PEXEnvironment.declare_namespace_packages(working_set)
self._working_set = working_set
return self._working_set
@classmethod
def _extras_paths(cls):
standard_lib = sysconfig.get_python_lib(standard_lib=True)
try:
makefile = sysconfig.parse_makefile(sysconfig.get_makefile_filename())
except (AttributeError, IOError):
# This is not available by default in PyPy's distutils.sysconfig or it simply is
# no longer available on the system (IOError ENOENT)
makefile = {}
extras_paths = filter(None, makefile.get('EXTRASPATH', '').split(':'))
for path in extras_paths:
yield os.path.join(standard_lib, path)
# Handle .pth injected paths as extras.
sitedirs = cls._get_site_packages()
for pth_path in cls._scan_pth_files(sitedirs):
TRACER.log('Found .pth file: %s' % pth_path, V=3)
for extras_path in iter_pth_paths(pth_path):
yield extras_path
@staticmethod
def _scan_pth_files(dir_paths):
"""Given an iterable of directory paths, yield paths to all .pth files within."""
for dir_path in dir_paths:
if not os.path.exists(dir_path):
continue
pth_filenames = (f for f in os.listdir(dir_path) if f.endswith('.pth'))
for pth_filename in pth_filenames:
yield os.path.join(dir_path, pth_filename)
@staticmethod
def _get_site_packages():
try:
from site import getsitepackages
return set(getsitepackages())
except ImportError:
return set()
@classmethod
def site_libs(cls):
site_libs = cls._get_site_packages()
site_libs.update([sysconfig.get_python_lib(plat_specific=False),
sysconfig.get_python_lib(plat_specific=True)])
# On windows getsitepackages() returns the python stdlib too.
if sys.prefix in site_libs:
site_libs.remove(sys.prefix)
real_site_libs = set(os.path.realpath(path) for path in site_libs)
return site_libs | real_site_libs
@classmethod
def _tainted_path(cls, path, site_libs):
paths = frozenset([path, os.path.realpath(path)])
return any(path.startswith(site_lib) for site_lib in site_libs for path in paths)
@classmethod
def minimum_sys_modules(cls, site_libs, modules=None):
"""Given a set of site-packages paths, return a "clean" sys.modules.
When importing site, modules within sys.modules have their __path__'s populated with
additional paths as defined by *-nspkg.pth in site-packages, or alternately by distribution
metadata such as *.dist-info/namespace_packages.txt. This can possibly cause namespace
packages to leak into imports despite being scrubbed from sys.path.
NOTE: This method mutates modules' __path__ attributes in sys.modules, so this is currently an
irreversible operation.
"""
modules = modules or sys.modules
new_modules = {}
for module_name, module in modules.items():
# Tainted modules should be dropped.
module_file = getattr(module, '__file__', None)
if module_file and cls._tainted_path(module_file, site_libs):
TRACER.log('Dropping %s' % (module_name,), V=3)
continue
# Untainted non-packages (builtin modules) need no further special handling and can stay.
if not hasattr(module, '__path__'):
new_modules[module_name] = module
continue
# Unexpected objects, e.g. PEP 420 namespace packages, should just be dropped.
if not isinstance(module.__path__, list):
TRACER.log('Dropping %s' % (module_name,), V=3)
continue
# Drop tainted package paths.
for k in reversed(range(len(module.__path__))):
if cls._tainted_path(module.__path__[k], site_libs):
TRACER.log('Scrubbing %s.__path__: %s' % (module_name, module.__path__[k]), V=3)
module.__path__.pop(k)
# The package still contains untainted path elements, so it can stay.
if module.__path__:
new_modules[module_name] = module
return new_modules
_PYTHONPATH = 'PYTHONPATH'
_STASHED_PYTHONPATH = '_PEX_PYTHONPATH'
@classmethod
def stash_pythonpath(cls):
pythonpath = os.environ.pop(cls._PYTHONPATH, None)
if pythonpath is not None:
os.environ[cls._STASHED_PYTHONPATH] = pythonpath
return pythonpath
@classmethod
def unstash_pythonpath(cls):
pythonpath = os.environ.pop(cls._STASHED_PYTHONPATH, None)
if pythonpath is not None:
os.environ[cls._PYTHONPATH] = pythonpath
return pythonpath
@classmethod
def minimum_sys_path(cls, site_libs, inherit_path):
scrub_paths = OrderedSet()
site_distributions = OrderedSet()
user_site_distributions = OrderedSet()
def all_distribution_paths(path):
locations = set(dist.location for dist in find_distributions(path))
return set([path]) | locations | set(os.path.realpath(path) for path in locations)
for path_element in sys.path:
if cls._tainted_path(path_element, site_libs):
TRACER.log('Tainted path element: %s' % path_element)
site_distributions.update(all_distribution_paths(path_element))
else:
TRACER.log('Not a tainted path element: %s' % path_element, V=2)
user_site_distributions.update(all_distribution_paths(USER_SITE))
if inherit_path == 'false':
scrub_paths = site_distributions | user_site_distributions
for path in user_site_distributions:
TRACER.log('Scrubbing from user site: %s' % path)
for path in site_distributions:
TRACER.log('Scrubbing from site-packages: %s' % path)
scrubbed_sys_path = list(OrderedSet(sys.path) - scrub_paths)
pythonpath = cls.unstash_pythonpath()
if pythonpath is not None:
original_pythonpath = pythonpath.split(os.pathsep)
user_pythonpath = list(OrderedSet(original_pythonpath) - set(sys.path))
if original_pythonpath == user_pythonpath:
TRACER.log('Unstashed PYTHONPATH of %s' % pythonpath, V=2)
else:
TRACER.log('Extracted user PYTHONPATH of %s from unstashed PYTHONPATH of %s'
% (os.pathsep.join(user_pythonpath), pythonpath), V=2)
if inherit_path == 'false':
for path in user_pythonpath:
TRACER.log('Scrubbing user PYTHONPATH element: %s' % path)
elif inherit_path == 'prefer':
TRACER.log('Prepending user PYTHONPATH: %s' % os.pathsep.join(user_pythonpath))
scrubbed_sys_path = user_pythonpath + scrubbed_sys_path
elif inherit_path == 'fallback':
TRACER.log('Appending user PYTHONPATH: %s' % os.pathsep.join(user_pythonpath))
scrubbed_sys_path = scrubbed_sys_path + user_pythonpath
scrub_from_importer_cache = filter(
lambda key: any(key.startswith(path) for path in scrub_paths),
sys.path_importer_cache.keys())
scrubbed_importer_cache = dict((key, value) for (key, value) in sys.path_importer_cache.items()
if key not in scrub_from_importer_cache)
for importer_cache_entry in scrub_from_importer_cache:
TRACER.log('Scrubbing from path_importer_cache: %s' % importer_cache_entry, V=2)
return scrubbed_sys_path, scrubbed_importer_cache
@classmethod
def minimum_sys(cls, inherit_path):
"""Return the minimum sys necessary to run this interpreter, a la python -S.
:returns: (sys.path, sys.path_importer_cache, sys.modules) tuple of a
bare python installation.
"""
site_libs = set(cls.site_libs())
for site_lib in site_libs:
TRACER.log('Found site-library: %s' % site_lib)
for extras_path in cls._extras_paths():
TRACER.log('Found site extra: %s' % extras_path)
site_libs.add(extras_path)
site_libs = set(os.path.normpath(path) for path in site_libs)
sys_path, sys_path_importer_cache = cls.minimum_sys_path(site_libs, inherit_path)
sys_modules = cls.minimum_sys_modules(site_libs)
return sys_path, sys_path_importer_cache, sys_modules
@classmethod
def patch_pkg_resources(cls, working_set):
"""Patch pkg_resources given a new working set."""
pkg_resources.working_set = working_set
pkg_resources.require = working_set.require
pkg_resources.iter_entry_points = working_set.iter_entry_points
pkg_resources.run_script = pkg_resources.run_main = working_set.run_script
pkg_resources.add_activation_listener = working_set.subscribe
# Thar be dragons -- when this function exits, the interpreter is potentially in a wonky state
# since the patches here (minimum_sys_modules for example) actually mutate global state.
def patch_sys(self, inherit_path):
"""Patch sys with all site scrubbed."""
def patch_dict(old_value, new_value):
old_value.clear()
old_value.update(new_value)
def patch_all(path, path_importer_cache, modules):
sys.path[:] = path
patch_dict(sys.path_importer_cache, path_importer_cache)
patch_dict(sys.modules, modules)
new_sys_path, new_sys_path_importer_cache, new_sys_modules = self.minimum_sys(inherit_path)
patch_all(new_sys_path, new_sys_path_importer_cache, new_sys_modules)
def _wrap_coverage(self, runner, *args):
if not self._vars.PEX_COVERAGE and self._vars.PEX_COVERAGE_FILENAME is None:
return runner(*args)
try:
import coverage
except ImportError:
die('Could not bootstrap coverage module, aborting.')
pex_coverage_filename = self._vars.PEX_COVERAGE_FILENAME
if pex_coverage_filename is not None:
cov = coverage.coverage(data_file=pex_coverage_filename)
else:
cov = coverage.coverage(data_suffix=True)
TRACER.log('Starting coverage.')
cov.start()
try:
return runner(*args)
finally:
TRACER.log('Stopping coverage')
cov.stop()
# TODO(wickman) Post-process coverage to elide $PEX_ROOT and make
# the report more useful/less noisy. #89
if pex_coverage_filename:
cov.save()
else:
cov.report(show_missing=False, ignore_errors=True, file=sys.stdout)
def _wrap_profiling(self, runner, *args):
if not self._vars.PEX_PROFILE and self._vars.PEX_PROFILE_FILENAME is None:
return runner(*args)
pex_profile_filename = self._vars.PEX_PROFILE_FILENAME
pex_profile_sort = self._vars.PEX_PROFILE_SORT
try:
import cProfile as profile
except ImportError:
import profile
profiler = profile.Profile()
try:
return profiler.runcall(runner, *args)
finally:
if pex_profile_filename is not None:
profiler.dump_stats(pex_profile_filename)
else:
profiler.print_stats(sort=pex_profile_sort)
def path(self):
"""Return the path this PEX was built at."""
return self._pex
def execute(self):
"""Execute the PEX.
This function makes assumptions that it is the last function called by
the interpreter.
"""
teardown_verbosity = self._vars.PEX_TEARDOWN_VERBOSE
try:
pex_inherit_path = self._vars.PEX_INHERIT_PATH
if pex_inherit_path == "false":
pex_inherit_path = self._pex_info.inherit_path
self.patch_sys(pex_inherit_path)
working_set = self._activate()
self.patch_pkg_resources(working_set)
exit_code = self._wrap_coverage(self._wrap_profiling, self._execute)
if exit_code:
sys.exit(exit_code)
except Exception:
# Allow the current sys.excepthook to handle this app exception before we tear things down in
# finally, then reraise so that the exit status is reflected correctly.
sys.excepthook(*sys.exc_info())
raise
except SystemExit as se:
# Print a SystemExit error message, avoiding a traceback in python3.
# This must happen here, as sys.stderr is about to be torn down
if not isinstance(se.code, int) and se.code is not None:
print(se.code, file=sys.stderr)
raise
finally:
# squash all exceptions on interpreter teardown -- the primary type here are
# atexit handlers failing to run because of things such as:
# http://stackoverflow.com/questions/2572172/referencing-other-modules-in-atexit
if not teardown_verbosity:
sys.stderr.flush()
sys.stderr = DevNull()
sys.excepthook = lambda *a, **kw: None
def _execute(self):
force_interpreter = self._vars.PEX_INTERPRETER
self.clean_environment()
if force_interpreter:
TRACER.log('PEX_INTERPRETER specified, dropping into interpreter')
return self.execute_interpreter()
if self._pex_info_overrides.script and self._pex_info_overrides.entry_point:
die('Cannot specify both script and entry_point for a PEX!')
if self._pex_info.script and self._pex_info.entry_point:
die('Cannot specify both script and entry_point for a PEX!')
if self._pex_info_overrides.script:
return self.execute_script(self._pex_info_overrides.script)
elif self._pex_info_overrides.entry_point:
return self.execute_entry(self._pex_info_overrides.entry_point)
elif self._pex_info.script:
return self.execute_script(self._pex_info.script)
elif self._pex_info.entry_point:
return self.execute_entry(self._pex_info.entry_point)
else:
TRACER.log('No entry point specified, dropping into interpreter')
return self.execute_interpreter()
@classmethod
def demote_bootstrap(cls):
TRACER.log('Bootstrap complete, performing final sys.path modifications...')
should_log = {level: TRACER.should_log(V=level) for level in range(1, 10)}
def log(msg, V=1):
if should_log.get(V, False):
print('pex: {}'.format(msg), file=sys.stderr)
# Remove the third party resources pex uses and demote pex bootstrap code to the end of
# sys.path for the duration of the run to allow conflicting versions supplied by user
# dependencies to win during the course of the execution of user code.
third_party.uninstall()
bootstrap = Bootstrap.locate()
log('Demoting code from %s' % bootstrap, V=2)
for module in bootstrap.demote():
log('un-imported {}'.format(module), V=9)
import pex
log('Re-imported pex from {}'.format(pex.__path__), V=3)
log('PYTHONPATH contains:')
for element in sys.path:
log(' %c %s' % (' ' if os.path.exists(element) else '*', element))
log(' * - paths that do not exist or will be imported via zipimport')
def execute_interpreter(self):
args = sys.argv[1:]
if args:
# NB: We take care here to setup sys.argv to match how CPython does it for each case.
arg = args[0]
if arg == '-c':
content = args[1]
sys.argv = ['-c'] + args[2:]
self.execute_content('-c <cmd>', content, argv0='-c')
elif arg == '-m':
module = args[1]
sys.argv = args[1:]
self.execute_module(module)
else:
try:
if arg == '-':
content = sys.stdin.read()
else:
with open(arg) as fp:
content = fp.read()
except IOError as e:
die("Could not open %s in the environment [%s]: %s" % (arg, sys.argv[0], e))
sys.argv = args
self.execute_content(arg, content)
else:
self.demote_bootstrap()
import code
code.interact()
def execute_script(self, script_name):
dists = list(self._activate())
dist, entry_point = get_entry_point_from_console_script(script_name, dists)
if entry_point:
TRACER.log('Found console_script %r in %r' % (entry_point, dist))
sys.exit(self.execute_entry(entry_point))
dist_script = get_script_from_distributions(script_name, dists)
if not dist_script:
raise self.NotFound('Could not find script %r in pex!' % script_name)
TRACER.log('Found script %r in %r' % (script_name, dist))
return self.execute_content(dist_script.path, dist_script.read_contents(), argv0=script_name)
@classmethod
def execute_content(cls, name, content, argv0=None):
argv0 = argv0 or name
try:
ast = compile(content, name, 'exec', flags=0, dont_inherit=1)
except SyntaxError:
die('Unable to parse %s. PEX script support only supports Python scripts.' % name)
cls.demote_bootstrap()
from pex.compatibility import exec_function
sys.argv[0] = argv0
globals_map = globals().copy()
globals_map['__name__'] = '__main__'
globals_map['__file__'] = name
exec_function(ast, globals_map)
@classmethod
def execute_entry(cls, entry_point):
runner = cls.execute_pkg_resources if ':' in entry_point else cls.execute_module
return runner(entry_point)
@classmethod
def execute_module(cls, module_name):
cls.demote_bootstrap()
import runpy
runpy.run_module(module_name, run_name='__main__')
@classmethod
def execute_pkg_resources(cls, spec):
entry = EntryPoint.parse("run = {}".format(spec))
cls.demote_bootstrap()
runner = entry.resolve()
return runner()
def cmdline(self, args=()):
"""The commandline to run this environment.
:keyword args: Additional arguments to be passed to the application being invoked by the
environment.
"""
cmds = [self._interpreter.binary]
cmds.append(self._pex)
cmds.extend(args)
return cmds
def run(self, args=(), with_chroot=False, blocking=True, setsid=False, **kwargs):
"""Run the PythonEnvironment in an interpreter in a subprocess.
:keyword args: Additional arguments to be passed to the application being invoked by the
environment.
:keyword with_chroot: Run with cwd set to the environment's working directory.
:keyword blocking: If true, return the return code of the subprocess.
If false, return the Popen object of the invoked subprocess.
:keyword setsid: If true, run the PEX in a separate operating system session.
Remaining keyword arguments are passed directly to subprocess.Popen.
"""
self.clean_environment()
cmdline = self.cmdline(args)
TRACER.log('PEX.run invoking %s' % ' '.join(cmdline))
process = Executor.open_process(cmdline,
cwd=self._pex if with_chroot else os.getcwd(),
preexec_fn=os.setsid if setsid else None,
stdin=kwargs.pop('stdin', None),
stdout=kwargs.pop('stdout', None),
stderr=kwargs.pop('stderr', None),
**kwargs)
return process.wait() if blocking else process
def _do_entry_point_verification(self):
entry_point = self._pex_info.entry_point
ep_split = entry_point.split(':')
# a.b.c:m ->
# ep_module = 'a.b.c'
# ep_method = 'm'
# Only module is specified
if len(ep_split) == 1:
ep_module = ep_split[0]
import_statement = 'import {}'.format(ep_module)
elif len(ep_split) == 2:
ep_module = ep_split[0]
ep_method = ep_split[1]
import_statement = 'from {} import {}'.format(ep_module, ep_method)
else:
raise self.InvalidEntryPoint("Failed to parse: `{}`".format(entry_point))
with named_temporary_file() as fp:
fp.write(import_statement.encode('utf-8'))
fp.close()
retcode = self.run([fp.name], env={'PEX_INTERPRETER': '1'})
if retcode != 0:
raise self.InvalidEntryPoint('Invalid entry point: `{}`\n'
'Entry point verification failed: `{}`'
.format(entry_point, import_statement))