/
jail_code.py
378 lines (291 loc) · 12.9 KB
/
jail_code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
"""Run code in a jail."""
import logging
import os
import os.path
import resource
import shutil
import sys
from .proxy import run_subprocess_through_proxy
from .subproc import run_subprocess
from .util import temp_directory
log = logging.getLogger("codejail")
# TODO: limit too much stdout data? # pylint: disable=fixme
# Configure the commands
# COMMANDS is a map from an abstract command name to a list of command-line
# pieces, such as subprocess.Popen wants.
COMMANDS = {}
def configure(command, bin_path, user=None):
"""
Configure a command for `jail_code` to use.
`command` is the abstract command you're configuring, such as "python" or
"node". `bin_path` is the path to the binary. `user`, if provided, is
the user name to run the command under.
"""
cmd_argv = [bin_path]
# Command-specific arguments
if command == "python":
# -E means ignore the environment variables PYTHON*
# -B means don't try to write .pyc files.
cmd_argv.extend(['-E', '-B'])
COMMANDS[command] = {
# The start of the command line for this program.
'cmdline_start': cmd_argv,
# The user to run this as, perhaps None.
'user': user,
}
def is_configured(command):
"""
Has `jail_code` been configured for `command`?
Returns true if the abstract command `command` has been configured for use
in the `jail_code` function.
"""
return command in COMMANDS
# By default, look where our current Python is, and maybe there's a
# python-sandbox alongside. Only do this if running in a virtualenv.
# The check for sys.real_prefix covers virtualenv
# the equality of non-empty sys.base_prefix with sys.prefix covers venv
running_in_virtualenv = (
hasattr(sys, 'real_prefix') or
(hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix)
)
if running_in_virtualenv:
# On jenkins
sandbox_user = os.getenv('CODEJAIL_TEST_USER')
sandbox_env = os.getenv('CODEJAIL_TEST_VENV')
if sandbox_env and sandbox_user:
configure("python", f"{sandbox_env}/bin/python", sandbox_user)
# or fall back to defaults
elif os.path.isdir(sys.prefix + "-sandbox"):
configure("python", sys.prefix + "-sandbox/bin/python", "sandbox")
# The resource limits that we unless otherwise configured.
DEFAULT_LIMITS = {
# CPU seconds, defaulting to 1.
"CPU": 1,
# Real time, defaulting to 1 second.
"REALTIME": 1,
# Total process virutal memory, in bytes, defaulting to unlimited.
"VMEM": 0,
# Size of files creatable, in bytes, defaulting to nothing can be written.
"FSIZE": 0,
# The number of processes and threads to allow.
"NPROC": 15,
# Whether to use a proxy process or not. None means use an environment
# variable to decide. NOTE: using a proxy process is NOT THREAD-SAFE, only
# one thread can use CodeJail at a time if you are using a proxy process.
"PROXY": None,
}
# Configured resource limits.
# Modified by calling `set_limit`.
LIMITS = DEFAULT_LIMITS.copy()
# Map from limit_overrides_contexts (strings) to dictionaries in the shape of LIMITS.
# Modified by calling `override_limit`.
LIMIT_OVERRIDES = {}
def set_limit(limit_name, value):
"""
Set a limit for `jail_code`.
`limit_name` is a string, the name of the limit to set. `value` is the
value to use for that limit. The type, meaning, default, and range of
accepted values depend on `limit_name`.
These limits are available:
* `"CPU"`: the maximum number of CPU seconds the jailed code can use.
The value is an integer, defaulting to 1.
* `"REALTIME"`: the maximum number of seconds the jailed code can run,
in real time. The default is 1 second.
* `"VMEM"`: the total virtual memory available to the jailed code, in
bytes. The default is 0 (no memory limit).
* `"FSIZE"`: the maximum size of files creatable by the jailed code,
in bytes. The default is 0 (no files may be created).
* `"NPROC"`: the maximum number of process or threads creatable by the
jailed code. The default is 15.
* `"PROXY"`: 1 to use a proxy process, 0 to not use one. This isn't
really a limit, sorry about that.
Limits are process-wide, and will affect all future calls to jail_code.
Providing a limit of 0 will disable that limit.
"""
LIMITS[limit_name] = value
def get_effective_limits(overrides_context=None):
"""
Calculate the effective limits dictionary.
Arguments:
overrides_context (str|None): Identifies which set of overrides to use.
If None or missing from `LIMIT_OVERRIDES`, then just return `LIMITS` as-is.
"""
overrides = LIMIT_OVERRIDES.get(overrides_context, {}) if overrides_context else {}
return {**LIMITS, **overrides}
def override_limit(limit_name, value, limit_overrides_context):
"""
Override a limit for `jail_code`, but only in the context of `limit_overrides_context`.
See `set_limit` for the meaning of `limit_name` and `value`.
All limits may be overriden except PROXY. Having this setting be different between
executions of code is not supported. If one attempts to override PROXY, the override
will be ignored and the globally-configured value will be used instead.
"""
if limit_name == 'PROXY' and LIMITS['PROXY'] != value:
log.error(
'Tried to override value of PROXY to %s. '
'Overriding PROXY on a per-context basis is not permitted. '
'Will use globally-configured value instead: %s.',
value,
LIMITS['PROXY'],
)
return
if limit_overrides_context not in LIMIT_OVERRIDES:
LIMIT_OVERRIDES[limit_overrides_context] = {}
LIMIT_OVERRIDES[limit_overrides_context][limit_name] = value
class JailResult:
"""
A passive object for us to return from jail_code.
"""
def __init__(self):
self.stdout = self.stderr = self.status = None
def jail_code(command, code=None, files=None, extra_files=None, argv=None,
stdin=None, limit_overrides_context=None, slug=None):
"""
Run code in a jailed subprocess.
`command` is an abstract command ("python", "node", ...) that must have
been configured using `configure`.
`code` is a string containing the code to run. If no code is supplied,
then the code to run must be in one of the `files` copied, and must be
named in the `argv` list.
`files` is a list of file paths, they are all copied to the jailed
directory. Note that no check is made here that the files don't contain
sensitive information. The caller must somehow determine whether to allow
the code access to the files. Symlinks will be copied as symlinks. If the
linked-to file is not accessible to the sandbox, the symlink will be
unreadable as well.
`extra_files` is a list of pairs, each pair is a filename and a bytestring
of contents to write into that file. These files will be created in the
temp directory and cleaned up automatically. No subdirectories are
supported in the filename.
`argv` is the command-line arguments to supply.
`stdin` is a string, the data to provide as the stdin for the process.
`limit_overrides_context` is an optional string to use as a key against the
configured limit overrides contexts. If omitted or if no such limit override context
has been configured, then use the default limits.
`slug` is an arbitrary string, a description that's meaningful to the
caller, that will be used in log messages.
Return an object with:
.stdout: stdout of the program, a string
.stderr: stderr of the program, a string
.status: exit status of the process: an int, 0 for success
"""
# pylint: disable=too-many-statements
if not is_configured(command):
raise Exception("jail_code needs to be configured for %r" % command)
# We make a temp directory to serve as the home of the sandboxed code.
# It has a writable "tmp" directory within it for temp files.
with temp_directory() as homedir:
# Make directory readable by other users ('sandbox' user needs to be
# able to read it).
os.chmod(homedir, 0o775)
# Make a subdir to use for temp files, world-writable so that the
# sandbox user can write to it.
tmptmp = os.path.join(homedir, "tmp")
os.mkdir(tmptmp)
os.chmod(tmptmp, 0o777)
argv = argv or []
# All the supporting files are copied into our directory.
for filename in files or ():
dest = os.path.join(homedir, os.path.basename(filename))
if os.path.islink(filename):
os.symlink(os.readlink(filename), dest)
elif os.path.isfile(filename):
shutil.copy(filename, homedir)
else:
shutil.copytree(filename, dest, symlinks=True)
# Create the main file.
if code:
with open(os.path.join(homedir, "jailed_code"), "wb") as jailed:
code_bytes = bytes(code, 'utf8')
jailed.write(code_bytes)
argv = ["jailed_code"] + argv
# Create extra files requested by the caller:
for name, content in extra_files or ():
with open(os.path.join(homedir, name), "wb") as extra:
extra.write(content)
cmd = []
rm_cmd = []
# Build the command to run.
user = COMMANDS[command]['user']
if user:
# Run as the specified user
cmd.extend(['sudo', '-u', user])
rm_cmd.extend(['sudo', '-u', user])
# Point TMPDIR at our temp directory.
# FIXME: This breaks command execution unless user param has been set.
# Issue: https://github.com/openedx/codejail/issues/162
cmd.extend(['TMPDIR=tmp'])
# Start with the command line dictated by "python" or whatever.
cmd.extend(COMMANDS[command]['cmdline_start'])
# Add the code-specific command line pieces.
cmd.extend(argv)
# Determine effective resource limits.
effective_limits = get_effective_limits(limit_overrides_context)
if slug:
log.info(
"Preparing to execute jailed code %r "
"(overrides context = %r, resource limits = %r).",
slug,
limit_overrides_context,
effective_limits,
)
# Use the configuration and maybe an environment variable to determine
# whether to use a proxy process.
use_proxy = effective_limits["PROXY"]
if use_proxy is None:
use_proxy = int(os.environ.get("CODEJAIL_PROXY", "0"))
if use_proxy:
run_subprocess_fn = run_subprocess_through_proxy
else:
run_subprocess_fn = run_subprocess
if stdin:
stdin = bytes(stdin, 'utf-8')
# Run the subprocess.
status, stdout, stderr = run_subprocess_fn(
cmd=cmd, cwd=homedir, env={}, slug=slug,
stdin=stdin,
realtime=effective_limits["REALTIME"],
rlimits=create_rlimits(effective_limits),
)
result = JailResult()
result.status = status
result.stdout = stdout
result.stderr = stderr
# Remove the tmptmp directory as the sandbox user since the sandbox
# user may have written files that the application user can't delete.
rm_cmd.extend([
'/usr/bin/find', tmptmp,
'-mindepth', '1', '-maxdepth', '1',
'-exec', 'rm', '-rf', '{}', ';'
])
# Run the rm command subprocess.
run_subprocess_fn(rm_cmd, cwd=homedir)
return result
def create_rlimits(effective_limits):
"""
Create a list of resource limits for our jailed processes.
Arguments:
effective_limits (dict)
"""
rlimits = []
# Allow a small number of subprocess and threads. One limit controls both,
# and at least OpenBLAS (imported by numpy) requires threads.
nproc = effective_limits["NPROC"]
if nproc:
rlimits.append((resource.RLIMIT_NPROC, (nproc, nproc)))
# CPU seconds, not wall clock time.
cpu = effective_limits["CPU"]
if cpu:
# Set the soft limit and the hard limit differently. When the process
# reaches the soft limit, a SIGXCPU will be sent, which should kill the
# process. If you set the soft and hard limits the same, then the hard
# limit is reached, and a SIGKILL is sent, which is less distinctive.
rlimits.append((resource.RLIMIT_CPU, (cpu, cpu+1)))
# Total process virtual memory.
vmem = effective_limits["VMEM"]
if vmem:
rlimits.append((resource.RLIMIT_AS, (vmem, vmem)))
# Size of written files. Can be zero (nothing can be written).
fsize = effective_limits["FSIZE"]
rlimits.append((resource.RLIMIT_FSIZE, (fsize, fsize)))
return rlimits