|
3 | 3 |
|
4 | 4 | """Defense-in-depth RestrictedUnpickler for TensorWatch. |
5 | 5 |
|
6 | | -Blocks modules commonly exploited in pickle deserialization attacks while |
7 | | -allowing the data types that TensorWatch legitimately serializes (numpy |
8 | | -arrays, torch tensors, TensorWatch data classes, built-in collections, etc.). |
9 | | -
|
10 | | -WARNING: This is NOT a complete sandbox. A determined attacker may still find |
11 | | -bypass techniques. Do not load pickle data from untrusted sources. |
| 6 | +Uses an **allowlist** approach: only modules that TensorWatch legitimately |
| 7 | +needs for serialization are permitted. Everything else is blocked by default. |
| 8 | +
|
| 9 | +Allowed module families: |
| 10 | + - Python builtins and standard-library data types (collections, datetime, |
| 11 | + decimal, fractions, numbers, uuid) |
| 12 | + - Pickle internals (_codecs, copyreg, _collections — used by the pickle |
| 13 | + protocol itself) |
| 14 | + - numpy, torch, pandas — data-science libraries whose objects appear in |
| 15 | + StreamItem values |
| 16 | + - tensorwatch — the library's own data classes |
| 17 | +
|
| 18 | +WARNING: This is NOT a complete sandbox. Pickle allowlists reduce the attack |
| 19 | +surface dramatically compared to blocklists, but do not eliminate all risk. |
| 20 | +Do not load pickle data from untrusted sources. |
12 | 21 | """ |
13 | 22 |
|
14 | 23 | import io |
15 | 24 | import pickle |
16 | 25 | import logging |
17 | 26 |
|
18 | | -_BLOCKED_MODULES = frozenset({ |
19 | | - # OS / filesystem access |
20 | | - 'os', 'posix', 'nt', 'os.path', |
21 | | - 'shutil', 'pathlib', |
22 | | - 'tempfile', 'glob', 'fnmatch', |
23 | | - # Process / subprocess execution |
24 | | - 'subprocess', 'multiprocessing', |
25 | | - 'pty', 'commands', |
26 | | - # Code compilation / execution |
27 | | - 'code', 'codeop', 'compileall', |
28 | | - 'importlib', 'runpy', 'pkgutil', |
29 | | - # Network |
30 | | - 'socket', 'http', 'urllib', 'ftplib', 'smtplib', 'xmlrpc', |
31 | | - 'socketserver', 'asyncio', |
32 | | - # Low-level / FFI |
33 | | - 'ctypes', 'mmap', |
34 | | - # Interactive / debug |
35 | | - 'pdb', 'profile', 'webbrowser', |
36 | | - # Signal handling |
37 | | - 'signal', |
| 27 | +# --------------------------------------------------------------------------- |
| 28 | +# Allowlist — a module is permitted if its top-level package appears here. |
| 29 | +# Any module NOT in this set is rejected outright. |
| 30 | +# --------------------------------------------------------------------------- |
| 31 | +_ALLOWED_PREFIXES = frozenset({ |
| 32 | + # Python built-in / standard-library data types |
| 33 | + 'builtins', |
| 34 | + 'collections', '_collections', # _collections holds C-accelerated types |
| 35 | + 'datetime', |
| 36 | + 'decimal', |
| 37 | + 'fractions', |
| 38 | + 'numbers', |
| 39 | + 'uuid', |
| 40 | + # Pickle protocol internals (required for __reduce__ reconstruction) |
| 41 | + 'copyreg', |
| 42 | + '_codecs', |
| 43 | + # Data-science libraries |
| 44 | + 'numpy', |
| 45 | + 'torch', |
| 46 | + 'pandas', |
| 47 | + # TensorWatch's own types |
| 48 | + 'tensorwatch', |
38 | 49 | }) |
39 | 50 |
|
40 | | -# Specific names blocked from builtins module |
| 51 | +# Even within allowed modules, these builtins are too dangerous to permit. |
41 | 52 | _BLOCKED_BUILTINS = frozenset({ |
42 | 53 | 'eval', 'exec', 'compile', '__import__', |
43 | 54 | 'open', 'input', 'breakpoint', |
|
46 | 57 | 'getattr', 'setattr', 'delattr', |
47 | 58 | }) |
48 | 59 |
|
| 60 | +_log = logging.getLogger(__name__) |
| 61 | + |
49 | 62 |
|
50 | 63 | class RestrictedUnpickler(pickle.Unpickler): |
51 | | - """Unpickler that blocks known-dangerous modules and callables. |
| 64 | + """Unpickler that only allows classes from explicitly approved modules. |
52 | 65 |
|
53 | | - Allowed: numpy, torch, tensorwatch, collections, standard data types. |
54 | | - Blocked: os, subprocess, socket, ctypes, importlib, etc. |
| 66 | + Uses an allowlist (not a blocklist), so unknown or newly-introduced |
| 67 | + dangerous modules are blocked by default. |
55 | 68 | """ |
56 | 69 |
|
57 | 70 | def find_class(self, module, name): |
58 | 71 | top_module = module.split('.')[0] |
59 | 72 |
|
60 | | - if top_module in _BLOCKED_MODULES: |
| 73 | + # Reject any module not in the allowlist |
| 74 | + if top_module not in _ALLOWED_PREFIXES: |
| 75 | + _log.warning("Pickle restricted: blocked %s.%s (module '%s' not in allowlist)", |
| 76 | + module, name, top_module) |
61 | 77 | raise pickle.UnpicklingError( |
62 | | - "Blocked: unpickling {}.{} is not allowed " |
63 | | - "(module '{}' is restricted)".format(module, name, top_module)) |
| 78 | + "Blocked: module '{}' is not in the allowlist " |
| 79 | + "(attempted to load {}.{})".format(top_module, module, name)) |
64 | 80 |
|
| 81 | + # Block dangerous builtins even though 'builtins' is allowed |
65 | 82 | if top_module == 'builtins' and name in _BLOCKED_BUILTINS: |
| 83 | + _log.warning("Pickle restricted: blocked builtins.%s", name) |
66 | 84 | raise pickle.UnpicklingError( |
67 | | - "Blocked: unpickling builtins.{} is not allowed".format(name)) |
| 85 | + "Blocked: builtins.{} is not allowed".format(name)) |
68 | 86 |
|
69 | 87 | return super().find_class(module, name) |
70 | 88 |
|
|
0 commit comments