/
pkdebug.py
712 lines (584 loc) · 22.2 KB
/
pkdebug.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
# -*- coding: utf-8 -*-
"""Logging or regex-controlled print statements
We take the view that there are two types of logging: permanent
(`pkdp`) and dynamically controlled (`pkdc`). In both cases, you want
to know where the print statement originated from. Python's `logging`
module supports logging the origin, but it doesn't support user-
defined dynamic filters in the form of a regular expression.
Permanent print statements (`pkdp`) is what people traditionally think
of as "logging". What we don't care about is "logging levels", because
one person's "critical" is another person's "whatever". It's up to
operations people to figure out what's an error worth paying attention
to, and what's not. That's managed by external rule bases, not inside
some library module.
Python's logging module gives programmers too many options and
adds complexity to filters. If you don't attach a handler to
the loggers, you'll never see some messages. Or, if the handler
has `logging.WARN` set, and you really want to see `logging.INFO`
messages, you won't see them.
There are cases when you really don't want to see output. We call
these dynamically controlled print statements. They are typically used
by developers when debugging a new module, and sometimes these lines
get left in the code for a while so that you can get detailed (noisy)
debugging information on production systems.
Dynamically controlled print statements (`pkdc`) would overwhelm logs
so they are off by default and can be filtered by regular expressions
supplied via configuration (including environment variables) or
programmatically via `init`.
Examples:
In a module, you would write::
from pykern.debug import pkdc, pkdexc, pkdp
pkdp('user entered: {}', val)
pkdc('user context: name={name}, id={id}', **user_rec)
The first print statement always appears.
The second does not unless you specify a "control" via the
environment variable ``$PYKERN_PKDEBUG_CONTROL``:
PYKERN_PKDEBUG_CONTROL=my_mod python my_prog.py
or with the shortname pkdebug::
pkdebug=my_mod python my_prog.py
Or, if you want a specific conditional print::
PYKERN_PKDEBUG_CONTROL=my_mod.py:52:
You can match any text in the line output with a regular expression, which
is case insensitive.
If you omit args and kwargs, `pkdp` returns its argument, of use when you
want to inspect values within the normal flow of the code, e.g.::
def my_function():
...
return pkdp(res)
or
a = pkdp(my_function_1()) + pkdp(my_function_2())
NOTE: format arguments are converted to strings by this module, and
not by `str.format`. Then they are passed to `str.format`. This means
that you should assume the argument is a string or, better, just a
default type. Arguments are trunctated by `pkdformat`.
If `output` is a string, will open the file to write to. The initial
value of output is ``$PYKERN_PKDEBUG_OUTPUT``.
:copyright: Copyright (c) 2014-2016 RadiaSoft LLC. All Rights Reserved.
:license: http://www.apache.org/licenses/LICENSE-2.0.html
"""
from pykern import pkcompat
from pykern import pkconfig
from pykern import pkconst
from pykern import pkinspect
import datetime
import functools
import inspect
import json
import logging
import numbers
import os
import pprint
import re
import six
import sys
import threading
import traceback
#: Maximum number of exceptions thrown before printing stops
MAX_EXCEPTION_COUNT = 5
#: Was control initialized?
_have_control = False
#: Object which does the writing, initialized every time :func:`init` is called.
_printer = None
#: Get IPython InteractiveShell.write()
# See https://github.com/ipython/ipython/blob/master/IPython/core/interactiveshell.py)
_ipython_write = None
try:
_ipython_write = get_ipython().write_err
except Exception:
pass
#: Type of a regular expression
_RE_TYPE = type(re.compile(""))
#: How to parse thread names
_THREAD_ID_RE = re.compile(r"Thread-(\d+)", re.IGNORECASE)
#: name of function objects can implement to control their own string
# representation
PKDEBUG_STR_FUNCTION_NAME = "pkdebug_str"
#: redeacted value marker
REDACTED = "<REDACTED>"
#: keys whose values will be redacted from logs
SECRETS_RE = re.compile(r"(?:secret|otp\b|passw|private)", re.IGNORECASE)
#: truncated string marker
SNIP = "<SNIP>"
def init(**kwargs):
"""May be called to (re)initialize this module.
`control` is a regular expression, which is used to control the
output of :func:`pkdc`. Messages from :func:`pkdp` and :func:`pkdc`
are written to `output`.
`output` is either an object which implements `write` or a `str`, in which
case it is opened with :func:`io.open`.
Args:
control(str or re.RegexObject): lines matching will be output
output (str or file): where to write messages [error output]
redirect_logging (bool): Redirect Python's logging to output [True]
want_pid_time (bool): display PID and time in messages [False]
"""
global _printer
global _have_control
_printer = _Printer(**kwargs)
_have_control = _printer.have_control
def pkdc(fmt, *args, **kwargs):
"""Conditional print a message to `output` selectively based on `control`.
Controlled output that you can leave in your code permanently.
Args:
fmt (str): how to :func:`str.format`
args: what to format
kwargs: what to format
"""
# Since calls are left in for product, this check has
# some value.
if _have_control:
_printer._write(fmt, args, kwargs, with_control=True)
def pkdexc(exc_info=None):
"""Return last exception and stack as a string
Must be called from an ``except``. This function removes
the last two calls from the stack. It joins the traceback
so you get a complete stack trace.
Will catch exceptions during the formatting and returns a
string in all cases.
Example::
try:
something
except:
pkdlog("exception={}", pkdexc())
Args:
exc_info (tuple): (type(e), e, traceback) [sys.exc_info]
Returns:
str: formatted exception and stack trace
"""
try:
e = exc_info or sys.exc_info()
if hasattr(traceback, "TracebackException"):
return "".join(
traceback.format_exception(*e)
+ ["\nException was printed at:\n\n"]
+ traceback.format_exception_only(e[0], e[1])
+ traceback.format_stack()[:-2],
)
else:
return "".join(
traceback.format_exception_only(e[0], e[1])
+ traceback.format_stack()[:-2]
+ traceback.format_tb(e[2]),
)
except Exception as e:
return "pykern.pkdebug.pkdexc: unable to retrieve exception info"
def pkdformat(fmt, *args, **kwargs):
"""Truncate and redact args & kwargs and then format
NOTE: converts all elements to strings before calling `str.format`.
Args:
fmt (str): how to format
args (list): what to format
kwargs (dict): what to format
Returns:
str: formatted output
"""
try:
args = (_format_arg(a) for a in args)
kwargs = dict(((k, _format_arg(v)) for k, v in kwargs.items()))
return fmt.format(*args, **kwargs)
except Exception:
_printer.exception_count += 1
return "invalid format format={} args={} kwargs={} stack={}".format(
fmt,
args,
kwargs,
pkdexc(),
)
def pkdlog(fmt_or_arg, *args, **kwargs):
"""Print messages that are intended to be permanent logging.
See `pkdp` for usage.
"""
# implementation aliased below
pass
def pkdp(fmt_or_arg, *args, **kwargs):
"""Print a message to `output` unconditionally, possibly returning its arg
Use for print statements in your code that you don't intend
to keep in the system.
Use `pkdlog` for permanent log messages.
Args:
fmt_or_arg (object): how to :func:`str.format`, or object to print
args: what to format
kwargs: what to format
Returns:
object: Will return fmt_or_arg, if args and kwargs are empty
"""
if args or kwargs:
_printer._write(fmt_or_arg, args, kwargs, with_control=False)
else:
_printer._write("{}", [fmt_or_arg], {}, with_control=False)
return fmt_or_arg
# Alias since they are the same
pkdlog = pkdp
def pkdpretty(obj):
"""Return pretty print the object.
If `obj` is JSON, parse and print it. If it is a regular python
data structure, pretty print that. Any exceptions are caught,
and the return value will be `obj`.
Args:
obj (object): JSON string or python object
Returns:
str: pretty printed string
"""
try:
if isinstance(obj, six.string_types):
try:
obj = json.loads(obj)
except Exception:
pass
# try to dump as JSON else dump as Python
try:
return (
json.dumps(
obj,
sort_keys=True,
indent=4,
separators=(",", ": "),
)
+ "\n"
)
except Exception as e:
pass
if pprint.isreadable(obj):
return pprint.pformat(obj, indent=4) + "\n"
except Exception:
pass
return obj
class _LoggingHandler(logging.Handler):
"""Handler added to root logger."""
def emit(self, record):
"""Emit a log record via _printer
Writes all `logging.INFO` and above like `pkdp`, that is,
always. Below `logging.INFO` (i.e. DEBUG) is written like
`pkdc` using the same matching algorithms by converting
log records appropriately.
"""
wc = record.levelno < logging.INFO
_printer._process(
lambda: pkinspect.Call(record),
lambda: "{}:{}:{}".format(
record.levelname, record.name, self.format(record)
),
lambda: (
record.process,
datetime.datetime.utcfromtimestamp(record.created),
),
with_control=wc,
)
class _Printer(object):
"""Internal implementation of :func:`init`. Don't call directly."""
def __init__(self, **kwargs):
self.too_many_exceptions = False
self.exception_count = 0
# Safe values are set here so we can get over initialization errors
for k in cfg:
setattr(self, k, cfg[k])
self.logging_handler = None
try:
self.want_pid_time = self._init_want_pid_time(kwargs)
self.output = self._init_output(kwargs)
self.redirect_logging = self._init_redirect_logging(kwargs)
self.control = self._init_control(kwargs)
self.have_control = bool(self.control)
except Exception:
for k in cfg:
setattr(self, k, cfg[k])
self._err("initialization failed, reverting values", pkdexc())
self._logging_install()
def _err(self, msg, exc):
"""When a logging error occurs."""
self.exception_count += 1
self._out("pykern.pkdebug error: " + msg + "\n" + exc)
def _init_control(self, kwargs):
try:
if "control" in kwargs:
return _cfg_control(kwargs["control"])
except Exception:
self._err("control compile error, using safe value", pkdexc())
return cfg.control
def _init_output(self, kwargs):
try:
if "output" in kwargs:
return _cfg_output(kwargs["output"])
except Exception:
self._err("output could not be opened, using safe value", pkdexc())
return cfg.output
def _init_redirect_logging(self, kwargs):
return bool(kwargs.get("redirect_logging", cfg.redirect_logging))
def _init_want_pid_time(self, kwargs):
return bool(kwargs.get("want_pid_time", cfg.want_pid_time))
def _logging_install(self):
"""Initialize logging based on redirect_logging"""
self.logging_handler = None
self.logging_prev_handlers = None
self.logging_prev_level = None
try:
if _printer:
_printer._logging_uninstall()
if not self.redirect_logging:
return
# Optimization: Loggers check the level first before creating
# the LogRecord, just like pkdc checks _have_control so don't
# want to create LogRecord unnecessarily
rl = logging.getLogger()
self.logging_prev_level = rl.level
self.logging_prev_handlers = []
while rl.handlers:
h = rl.handlers[0]
rl.removeHandler(h)
self.logging_prev_handlers.append(h)
level = logging.DEBUG if self.have_control else logging.INFO
self.logging_handler = _LoggingHandler(level=level)
rl.addHandler(self.logging_handler)
rl.setLevel(level)
except Exception:
self._err("unable to install logging handler", pkdexc())
def _logging_uninstall(self):
"""Remove handler from logging stack and set level to previous"""
try:
if not self.logging_handler:
return
rl = logging.getLogger()
if not self.logging_prev_level is None:
rl.setLevel(self.logging_prev_level)
for h in self.logging_prev_handlers:
rl.addHandler(h)
rl.removeHandler(self.logging_handler)
except Exception:
pass
self.logging_handler = None
self.logging_prev_handlers = None
self.logging_prev_level = None
def _out(self, msg):
"""Writes msg to output (or error output if not output)
If running in IPython, then use ``get_ipython().write_err()``
so that logging comes out in the cell as an error. Otherwise,
use stderr.
If an error occurs, output is reset to None.
Args:
output (file): where to write
msg (str): what to write
"""
try:
output = self.output
if not output:
if _ipython_write:
_ipython_write(msg)
return
output = sys.stderr
output.write(msg)
if hasattr(output, "flush"):
output.flush()
except Exception as e:
self.exception_count += 1
sys.__stderr__.write("output error: " + str(e))
sys.__stderr__.flush()
def _pid_time(self, pid, time):
"""Creates pid-time string for output
Args:
pid (int): process id
time (datetime): when did it happen (UTC)
Returns:
str: formatted
"""
if not self.want_pid_time:
return ""
try:
# Force the thread id to a reasonable length so that
# we don't clutter the logs. It can't be used for anything
# other than identifying "in the small" log line relationships.
i = self._thread_id() % 99999
return "{:%b %d %H:%M:%S} {:5d} {:5d} ".format(time, pid, i)
except Exception:
self.exception_count += 1
self._err("error formatting pid and time", pkdexc())
return "Xxx 00 00:00:00 00000.0"
def _prefix(self, call):
"""Format prefix line from location details
Args:
caller: state of call
Returns:
str: formatted prefix
"""
return "{} ".format(call)
def _process(self, call, message, pid_time_values, with_control):
"""Writes formatted message to output with location prefix.
If not `with_control`, always writes message to
:attr:`output`. If `with_control` and whole expression matches
:attr:`control`, writes message, else nothing is output.
Args:
call (func): returns filename, line, funcname
message (func): returns message with prefix as string
pid_time_values (func): returns pid and time
with_control (bool): respect :attr:`control`
"""
if self.too_many_exceptions or with_control and not self.control:
return
try:
msg = self._prefix(call()) + message()
if not with_control or self.control.search(msg):
self._out(self._pid_time(*pid_time_values()) + msg.rstrip() + "\n")
except Exception:
self._err("unable to process message", pkdexc())
finally:
if self.exception_count >= MAX_EXCEPTION_COUNT:
self.too_many_exceptions = True
def _thread_id(self):
"""Returns a number to identify the current thread
Returns:
int: some number that uniquely identifies the thread
"""
t = threading.current_thread()
n = t.name
if n == "MainThread":
return 0
m = _THREAD_ID_RE.search(t.name)
if m:
return int(m.group(1))
return t.ident
def _write(self, fmt, args, kwargs, with_control=False):
"""Provides formatter for message to _process
Args:
fmt_or_record (str or LogRecord): how to format
args (list): what to format
kwargs (dict): what to format
with_control (bool): respect :attr:`control`
"""
def msg():
return pkdformat(fmt, *args, **kwargs)
def pid_time():
return (os.getpid(), datetime.datetime.utcnow())
def prefix():
return pkinspect.Call(
kwargs.get("pkdebug_frame")
or inspect.currentframe().f_back.f_back.f_back.f_back,
)
self._process(prefix, msg, pid_time, with_control)
@pkconfig.parse_none
def _cfg_control(anything):
if anything is None:
return None
if isinstance(anything, _RE_TYPE):
return anything
return re.compile(anything, flags=re.IGNORECASE)
@pkconfig.parse_none
def _cfg_output(anything):
if anything is None:
return None
if hasattr(anything, "write"):
return anything
return open(anything, "w")
def _format_arg(obj, depth=0):
"""Redact and truncate `obj`
Args:
obj (object): Any argument
depth (int): nested data structure depth [default: 0]
Returns:
object: Redacted and truncated str or obj in exception
"""
def _dict(obj, depth):
return _iterate(
sorted(obj),
lambda k: _format_arg(k, depth)
+ ": "
+ (_redacted(k) or _format_arg(obj[k], depth)),
obj,
)
def _iterate(sequence, format_fn, obj):
r = ""
for i, v in enumerate(sequence):
if i >= cfg.max_elements:
break
r += format_fn(v) + ", "
return _snip(r, len(obj))
def _object(obj, depth):
depth += 1
c = str(type(obj)()) if isinstance(obj, (list, tuple)) else "{}"
if depth > cfg.max_depth:
return c[0] + SNIP + c[1]
m = _dict if isinstance(obj, dict) else _sequence
return c[0] + m(obj, depth) + c[1]
def _redacted(key):
return (
isinstance(key, pkconst.STRING_TYPES)
and SECRETS_RE.search(key)
and REDACTED
)
def _sequence(obj, depth):
return _iterate(
obj,
lambda v: _format_arg(v, depth),
obj,
)
def _snip(truncated, count_of_original_elements):
truncated = truncated[:-2]
return (
truncated + ", " + SNIP
if count_of_original_elements > cfg.max_elements
else truncated
)
def _string(value):
if r"\n" in value:
value = pkcompat.unicode_unescape(value)
# '\n File"' is at the start of stack traces. The end of stack
# traces are more interesting than the beginning so truncate
# the beginning.
if '\n File "' in value and "Exception was printed at" not in value:
return (
SNIP + value[-cfg.max_string :]
if len(value) > cfg.max_string
else value
)
return value[: cfg.max_string] + (value[cfg.max_string :] and SNIP)
try:
f = getattr(obj, PKDEBUG_STR_FUNCTION_NAME, None)
# only call if an instance, otherwise is unbound method
if f and not isinstance(obj, type):
s = f()
else:
if cfg.snip:
if isinstance(obj, (dict, list, set, tuple)):
return _object(obj, depth)
if isinstance(obj, pkconst.STRING_TYPES):
s = _string(obj)
# only enclose in quotes strings contained within another object
if depth > 0:
return "'" + s + "'"
return s
if depth <= 0 and isinstance(obj, numbers.Number):
# allow format string arguments ({.2f}), which only occurs when depth=0
return obj
s = str(obj)
# Always truncates a converted string
return _string(s)
except Exception as e:
_printer._err(
"unable to format obj type={} exception={}".format(type(obj), e),
pkdexc(),
)
# may get another exception later, but that would be in format.
return obj
def _z(msg):
"""Useful for debugging this module"""
with open("/dev/tty", "w") as f:
f.write(str(msg) + "\n")
return msg
cfg = pkconfig.init(
control=(None, _cfg_control, "Pattern to match against pkdc messages"),
max_depth=(10, int, "Maximum depth to recurse into and object when logging"),
max_elements=(30, int, "Maximum number of elements in a dict, list, set, or tuple"),
max_string=(8000, int, "Maximum length of an individual string"),
output=(
None,
_cfg_output,
'Where to write messages either as a "writable" or file name',
),
redirect_logging=(False, bool, "Redirect Python's logging to output"),
snip=(
True,
bool,
"Redact secrets and truncate objects if true",
),
want_pid_time=(False, bool, "Display pid and time in messages"),
)
if cfg:
init()