-
Notifications
You must be signed in to change notification settings - Fork 4
/
logs.py
244 lines (194 loc) · 8.45 KB
/
logs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
"""A handler that serves all app logs for an App Engine HTTP request.
StackDriver Logging API: https://cloud.google.com/logging/docs/apis
"""
import calendar
from datetime import datetime, timedelta, timezone
import html
import logging
import re
import time
import urllib.request, urllib.parse, urllib.error
from flask import request
from google.api_core.exceptions import InvalidArgument
from google.cloud import ndb
from google.cloud.logging import Client
from oauth_dropins.webutil.util import json_dumps, json_loads
from .appengine_info import APP_ID
from . import flask_util, util
from .flask_util import error
logger = logging.getLogger(__name__)
LEVELS = {
logging.DEBUG: 'D',
logging.INFO: 'I',
logging.WARNING: 'W',
logging.ERROR: 'E',
logging.CRITICAL: 'F',
}
CACHE_TIME = timedelta(days=1)
MAX_LOG_AGE = timedelta(days=30)
# App Engine's launch, roughly
MIN_START_TIME = time.mktime(datetime(2008, 4, 1, tzinfo=timezone.utc).timetuple())
MAX_START_TIME = time.mktime(datetime(2099, 1, 1, tzinfo=timezone.utc).timetuple())
SANITIZE_RE = re.compile(r"""
((?:access|api|oauth)?[ _]?
(?:code|accessJwt|consumer_key|consumer_secret|nonce|password|refreshJwt|secret|signature|token|verifier)
(?:u?['"])?
(?:=|:|\ |,\ |%3D)\ *
(?:u?['"])?
)
[^ &='"]+
""", flags=re.VERBOSE | re.IGNORECASE)
def sanitize(msg):
"""Sanitizes access tokens and Authorization headers."""
return SANITIZE_RE.sub(r'\1...', msg)
def url(when, key, **params):
"""Returns the relative URL (no scheme or host) to a log page.
Args:
when (:class:`datetime`)
key (:class:`ndb.Key` or str)
params: included as query params, eg module, path
"""
assert 'start_time' not in params and 'key' not in params, params
if isinstance(params.get('path'), (list, tuple)):
for path in params['path']:
assert ',' not in path, path
params['path'] = ','.join(params['path'])
params.update({
'start_time': calendar.timegm(when.utctimetuple()),
'key': key.urlsafe().decode() if isinstance(key, ndb.Key) else key,
})
return 'log?' + urllib.parse.urlencode(params)
def maybe_link(when, key, time_class='dt-updated', link_class='', **params):
"""Returns an HTML snippet with a timestamp and maybe a log page link.
Example::
<a href="/log?start_time=1513904267&key=aglz..." class="u-bridgy-log">
<time class="dt-updated" datetime="2017-12-22T00:57:47.222060"
title="Fri Dec 22 00:57:47 2017">
3 days ago
</time>
</a>
The ``<a>`` tag is only included if the timestamp is 30 days old or less, since
Stackdriver's basic tier doesn't store logs older than that:
* https://cloud.google.com/monitoring/accounts/tiers#logs_ingestion
* https://github.com/snarfed/bridgy/issues/767
Args:
when (datetime)
key (:class:`ndb.Key` or str)
time_class (str): optional class value for the ``<time>`` tag
link_class (str): optional class value for the ``<a>`` tag (if generated)
params (dict, str: str): query params to include in the link URL,
eg module, path
Returns: string HTML
"""
# always show time zone. assume naive timestamps are UTC.
if when.tzinfo is None:
when = when.replace(tzinfo=timezone.utc)
now = util.now(tz=when.tzinfo)
time = f'<time class="{time_class}" datetime="{when.isoformat()}" title="{when.ctime()} {when.tzname()}">{util.naturaltime(when, when=now)}</time>'
if now > when > now - MAX_LOG_AGE:
return f'<a class="{link_class}" href="/{url(when, key, **params)}">{time}</a>'
return time
# datastore string keys are url-safe-base64 of, say, at least 32(ish) chars.
# https://cloud.google.com/appengine/docs/python/ndb/keyclass#Key_urlsafe
# http://tools.ietf.org/html/rfc3548.html#section-4
BASE64 = 'A-Za-z0-9-_='
DATASTORE_KEY_RE = re.compile("([^%s])(([%s]{8})[%s]{24,})([^%s])" % ((BASE64,) * 4))
def linkify_datastore_keys(msg):
"""Converts string datastore keys to links to the admin console viewer."""
def linkify_key(match):
try:
# Useful for logging, but also causes false positives in the search, we
# find and use log requests instead of real requests.
# logger.debug(f'Linkifying datastore key: {'match.group(2)}')
key = ndb.Key(urlsafe=match.group(2))
tokens = [(kind, f"{'id' if isinstance(id, int) else 'name'}:{id}")
for kind, id in key.pairs()]
key_str = '0/|' + '|'.join(f'{len(kind)}/{kind}|{len(id)}/{id}'
for kind, id in tokens)
key_quoted = urllib.parse.quote(urllib.parse.quote(key_str, safe=''), safe='')
html = f"{match.group(1)}<a title='{match.group(2)}' href='https://console.cloud.google.com/datastore/entities;kind={key.kind()};ns=__$DEFAULT$__/edit;key={key_quoted}?project={APP_ID}'>{match.group(3)}...</a>{match.group(4)}"
# logger.debug(f'Returning {html}')
return html
except BaseException:
# logger.debug("Couldn't linkify candidate datastore key.") # too noisy
return match.group(0)
return DATASTORE_KEY_RE.sub(linkify_key, msg)
def log(module=None, path=None):
"""Flask view that searches for and renders app logs for an HTTP request.
URL parameters:
* ``start_time`` (float): seconds since the epoch
* ``key`` (str): token to find in the first app log of the request
Install with::
app.add_url_rule('/log', view_func=logs.log)
Or::
@app.get('/log')
@cache.cached(600)
def log():
return logs.log()
Args:
module (str): App Engine module to search. Defaults to all.
path (str or sequence of str): optional HTTP request path(s) to
limit logs to.
Returns:
(str response body, dict headers) tuple: Flask response
"""
if not module:
module = request.values.get('module')
if not path:
path = request.values.get('path')
start_time = flask_util.get_required_param('start_time')
if not util.is_float(start_time):
return error(f"Couldn't convert start_time to float: {start_time!r}")
start_time = float(start_time)
if start_time < MIN_START_TIME:
return error(f'start_time must be >= {MIN_START_TIME}')
elif start_time > MAX_START_TIME:
return error(f'start_time must be <= {MAX_START_TIME}')
client = Client()
project = f'projects/{APP_ID}'
key = urllib.parse.unquote_plus(flask_util.get_required_param('key'))
# first, find the individual log message to get the trace id
utcfromtimestamp = datetime.utcfromtimestamp
timestamp_filter = (
f"timestamp>=\"{utcfromtimestamp(start_time - 60).isoformat() + 'Z'}\" "
f"timestamp<=\"{utcfromtimestamp(start_time + 120).isoformat() + 'Z'}\"")
query = f'logName="{project}/logs/python" textPayload:"{key}" {timestamp_filter}'
if module:
query += f' resource.labels.module_id="{module}"'
if path:
or_paths = ' OR '.join(f'"{path}"' for path in path.split(','))
query += f' httpRequest.requestUrl:({or_paths})'
# don't log key; would be a false positive for future searches
logger.info('Searching logs...') # with: {query}')
try:
# https://googleapis.dev/python/logging/latest/client.html#google.cloud.logging_v2.client.Client.list_entries
log = next(iter(client.list_entries(filter_=query, page_size=1)))
except (InvalidArgument, StopIteration):
logger.info('No log found!')
return 'No log found!', 404
logger.info(f'Got insert id {log.insert_id} trace {log.trace}')
# now, print all logs with that trace
resp = """\
<html>
<body style="font-family: monospace; white-space: pre">
"""
query = f'logName="{project}/logs/python" trace="{log.trace}" resource.type="gae_app" {timestamp_filter}'
logger.info(f'Searching logs with: {query}')
# sanitize and render each line
for log in client.list_entries(filter_=query, page_size=1000):
# payload is a union that can be string, JSON, or protobuf
# https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry#FIELDS.oneof_payload
msg = log.payload
if not msg:
continue
elif isinstance(msg, (dict, list)):
msg = json_dumps(msg, indent=2)
else:
msg = str(msg)
msg = linkify_datastore_keys(util.linkify(html.escape(
msg if msg.startswith('Created by this poll:') else sanitize(msg),
quote=False)))
resp += '%s %s %s<br />' % (
log.severity[0], log.timestamp, msg.replace('\n', '<br />'))
resp += '</body>\n</html>'
return resp, {'Content-Type': 'text/html; charset=utf-8'}