Skip to content

Commit

Permalink
Return Unicode text from log-fetching methods
Browse files Browse the repository at this point in the history
When streaming or fetching logs, return Unicode text by decoding
the response according to the HTTP headers.

Use response.text to automatically decode the HTTP response when not
streaming it.

When streaming it, response.iter_lines() returns the encoded bytes. Use
the response headers to decide how to decode it; if these are not
present, use guess_json_utf() if JSON is expected, and guess 'utf-8'
otherwise.

We always decode responses before passing them back to the client
because only the response headers say how to interpret the bytes.

Signed-off-by: Tim Waugh <twaugh@redhat.com>
  • Loading branch information
twaugh committed Jun 13, 2017
1 parent fc3e678 commit fc5ea18
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 44 deletions.
11 changes: 7 additions & 4 deletions osbs/cli/capture.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,21 @@ class IterLinesSaver(object):
Wrap HttpStream.iter_lines() and save responses.
"""

def __init__(self, path, fn):
def __init__(self, encoding, path, fn):
self.encoding = encoding or 'utf-8'
self.path = path
self.fn = fn
self.line = 0

def iter_lines(self):
encoding = self.encoding
for line in self.fn():
path = "{f}-{n:0>3}.json".format(f=self.path, n=self.line)
logger.debug("capturing to %s", path)
text = line.decode(encoding, errors='replace')
with open(path, "w") as outf:
try:
json.dump(json.loads(line), outf, sort_keys=True, indent=4)
json.dump(json.loads(text), outf, sort_keys=True, indent=4)
except ValueError:
outf.write(line)

Expand Down Expand Up @@ -68,15 +71,15 @@ def request(self, url, method, *args, **kwargs):

if kwargs.get('stream', False):
stream = self.fn(url, method, *args, **kwargs)
stream.iter_lines = IterLinesSaver(path,
stream.iter_lines = IterLinesSaver(stream.encoding, path,
stream.iter_lines).iter_lines
return stream
else:
response = self.fn(url, method, *args, **kwargs)
logger.debug("capturing to %s.json", path)
with open(path + ".json", "w") as outf:
try:
json.dump(json.loads(response.content), outf,
json.dump(json.loads(response.text), outf,
sort_keys=True, indent=4)
except ValueError:
outf.write(response.content)
Expand Down
47 changes: 35 additions & 12 deletions osbs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
OsbsWatchBuildNotFound, OsbsAuthException)
from osbs.utils import graceful_chain_get
from requests.exceptions import ConnectionError
from requests.utils import guess_json_utf

try:
# py2
Expand All @@ -44,13 +45,15 @@

def check_response(response):
if response.status_code not in (httplib.OK, httplib.CREATED):
if hasattr(response, 'content'):
content = response.content
if hasattr(response, 'text'):
text = response.text
else:
content = ''.join(response.iter_lines())
encoding = response.encoding or 'utf-8'
text = b''.join(response.iter_lines()).decode(encoding,
errors='replace')

logger.error("[%d] %s", response.status_code, content)
raise OsbsResponseException(message=content, status_code=response.status_code)
logger.error("[%d] %s", response.status_code, text)
raise OsbsResponseException(message=text, status_code=response.status_code)


def retry_on_conflict(func, sleep_seconds=0.5, max_attempts=10):
Expand Down Expand Up @@ -413,7 +416,7 @@ def stream_logs(self, build_id):
stream logs from build
:param build_id: str
:return: iterator
:return: iterator yielding Unicode text
"""
kwargs = {'follow': 1}

Expand All @@ -433,13 +436,20 @@ def stream_logs(self, build_id):
headers={'Connection': 'close'})
check_response(response)

encoding = response.encoding
if not encoding:
# The HTTP response headers didn't tell us the
# encoding; make a reasonable guess
encoding = 'utf-8'

for line in response.iter_lines():
last_activity = time.time()
yield line
yield line.decode(encoding)

# NOTE1: If self._get causes ChunkedEncodingError, ConnectionError,
# or IncompleteRead to be raised, they'll be wrapped in
# OsbsNetworkException or OsbsException
# NOTE2: If decode_json or iter_lines causes ChunkedEncodingError, ConnectionError,
# NOTE2: If iter_lines causes ChunkedEncodingError, ConnectionError,
# or IncompleteRead to be raised, it'll simply be silenced.
# NOTE3: An exception may be raised from
# check_response(). In this case, exception will be
Expand Down Expand Up @@ -467,7 +477,7 @@ def logs(self, build_id, follow=False, build_json=None, wait_if_missing=False):
:param follow: bool, fetch logs as they come?
:param build_json: dict, to save one get-build query
:param wait_if_missing: bool, if build doesn't exist, wait
:return: None, str or iterator
:return: None, Unicode text or iterator yielding Unicode text
"""
# does build exist?
try:
Expand All @@ -494,7 +504,7 @@ def logs(self, build_id, follow=False, build_json=None, wait_if_missing=False):
buildlogs_url = self._build_url("builds/%s/log/" % build_id)
response = self._get(buildlogs_url, headers={'Connection': 'close'})
check_response(response)
return response.content
return response.text

def list_builds(self, build_config_id=None, koji_task_id=None,
field_selector=None):
Expand Down Expand Up @@ -583,10 +593,23 @@ def watch_resource(self, resource_type, resource_name=None, **request_args):
while True:
with self._get(url, stream=True, headers={'Connection': 'close'}) as response:
check_response(response)
encoding = response.encoding
for line in response.iter_lines():
logger.debug(line)

if not encoding and len(line) > 3:
encoding = guess_json_utf(line)
try:
text = line.decode(encoding)
except UnicodeDecodeError:
# Not UTF?
logger.error("Unable to guess encoding, skipping")
pass
else:
text = line.decode(encoding)

try:
j = json.loads(line)
j = json.loads(text)
except ValueError:
logger.error("Cannot decode watch event: %s", line)
continue
Expand Down Expand Up @@ -687,7 +710,7 @@ def adjust_attributes_on_object(self, collection, name, things, values, how):
"""
url = self._build_url("%s/%s" % (collection, name))
response = self._get(url)
logger.debug("before modification: %s", response.content)
logger.debug("before modification: %s", response.text)
build_json = response.json()
how(build_json['metadata'], things, values)
response = self._put(url, data=json.dumps(build_json), use_json=True)
Expand Down
12 changes: 6 additions & 6 deletions osbs/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ def request(self, url, *args, **kwargs):
return stream

with stream as s:
content = s.req.text
return HttpResponse(s.status_code, s.headers, content)
text = s.req.text
return HttpResponse(s.status_code, s.headers, text)
except requests.exceptions.HTTPError as ex:
raise OsbsNetworkException(url, str(ex), ex.response.status_code,
cause=ex, traceback=sys.exc_info()[2])
Expand Down Expand Up @@ -173,13 +173,13 @@ def __exit__(self, exc_type, exc_value, traceback):


class HttpResponse(object):
def __init__(self, status_code, headers, content):
def __init__(self, status_code, headers, text):
self.status_code = status_code
self.headers = headers
self.content = content
self.text = text

def json(self, check=True):
if check and self.status_code not in (0, requests.codes.OK, requests.codes.CREATED):
raise OsbsResponseException(self.content, self.status_code)
raise OsbsResponseException(self.text, self.status_code)

return json.loads(self.content)
return json.loads(self.text)
3 changes: 2 additions & 1 deletion tests/fake_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,10 @@ def __init__(self, status_code=200, content=b'', headers=None):
self.status_code = status_code
self.content = content
self.headers = headers or {}
self.encoding = 'utf-8'

def iter_lines(self):
yield self.content.decode("utf-8")
yield self.content

def __enter__(self):
return self
Expand Down
30 changes: 16 additions & 14 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,44 +36,46 @@ class Response(object):
def __init__(self, status_code, content=None, iterable=None):
self.status_code = status_code
self.iterable = iterable
self.encoding = 'utf-8'
if content is not None:
self.content = content
self.text = content.decode(self.encoding)

def iter_lines(self):
for line in self.iterable:
yield line


class TestCheckResponse(object):
@pytest.mark.parametrize('content', [None, 'OK'])
@pytest.mark.parametrize('content', [None, b'OK'])
@pytest.mark.parametrize('status_code', [httplib.OK, httplib.CREATED])
def test_check_response_ok(self, status_code, content):
response = Response(status_code, content=content)
check_response(response)

def test_check_response_bad_stream(self, caplog):
iterable = ['iter', 'lines']
iterable = [x.encode('utf-8') for x in [u'itér', u'línes']]
status_code = httplib.CONFLICT
response = Response(status_code, iterable=iterable)
with pytest.raises(OsbsResponseException):
check_response(response)

logged = [l.getMessage() for l in caplog.records()]
assert len(logged) == 1
assert logged[0] == '[{code}] {message}'.format(code=status_code,
message='iterlines')
assert logged[0] == u'[{code}] {message}'.format(code=status_code,
message=u'itérlínes')

def test_check_response_bad_nostream(self, caplog):
status_code = httplib.CONFLICT
content = 'content'
response = Response(status_code, content=content)
text = u'Téxt'
response = Response(status_code, content=text.encode('utf-8'))
with pytest.raises(OsbsResponseException):
check_response(response)

logged = [l.getMessage() for l in caplog.records()]
assert len(logged) == 1
assert logged[0] == '[{code}] {message}'.format(code=status_code,
message=content)
assert logged[0] == u'[{code}] {message}'.format(code=status_code,
message=text)


class TestOpenshift(object):
Expand All @@ -85,10 +87,10 @@ def test_set_labels_on_build(self, openshift): # noqa
ConnectionError('Connection aborted.', httplib.BadStatusLine("''",)),
])
def test_stream_logs_bad_initial_connection(self, openshift, exc):
response = flexmock(status_code=httplib.OK)
response = flexmock(status_code=httplib.OK, encoding='utf-8')
(response
.should_receive('iter_lines')
.and_return(["{'stream': 'foo\n'}"])
.and_return([b"{'stream': 'foo\n'}"])
.and_raise(StopIteration))

wrapped_exc = OsbsNetworkException('http://spam.com', str(exc), status_code=None,
Expand All @@ -109,10 +111,10 @@ def test_stream_logs_bad_initial_connection(self, openshift, exc):
assert len([log for log in logs]) == 1

def test_stream_logs_utf8(self, openshift): # noqa
response = flexmock(status_code=httplib.OK)
response = flexmock(status_code=httplib.OK, encoding='utf-8')
(response
.should_receive('iter_lines')
.and_return(["{'stream': 'Uňícode íš hářd\n'}"])
.and_return([u"{'stream': 'Uňícode íš hářd\n'}".encode('utf-8')])
.and_raise(StopIteration))

(flexmock(openshift)
Expand Down Expand Up @@ -272,10 +274,10 @@ def test_retry_update_attributes(self, openshift,
for status_code in status_codes:
get_response = HttpResponse(httplib.OK,
headers={},
content='{"metadata": {}}')
text='{"metadata": {}}')
put_response = HttpResponse(status_code,
headers={},
content='')
text='')
get_expectation = get_expectation.and_return(get_response)
put_expectation = put_expectation.and_return(put_response)

Expand Down
14 changes: 7 additions & 7 deletions tests/test_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class TestHttpSession(object):
def test_single_multi_secure_without_redirs(self, s):
response_single = s.get("https://httpbin.org/get")
logger.debug(response_single.headers)
logger.debug(response_single.content)
logger.debug(response_single.text)
assert len(response_single.headers) > 2
assert response_single.headers['content-type'] == 'application/json'
response_multi = s.get("https://httpbin.org/stream/3", stream=True)
Expand All @@ -63,7 +63,7 @@ def test_single_multi_secure_without_redirs(self, s):
def test_single_multi_without_redirs(self, s):
response_single = s.get("http://httpbin.org/get")
logger.debug(response_single.headers)
logger.debug(response_single.content)
logger.debug(response_single.text)
response_multi = s.get("http://httpbin.org/stream/3", stream=True)
with response_multi as r:
for line in r.iter_lines():
Expand All @@ -72,7 +72,7 @@ def test_single_multi_without_redirs(self, s):
def test_single_multi_secure(self, s):
response_single = s.get("https://httpbin.org/get", allow_redirects=False)
logger.debug(response_single.headers)
logger.debug(response_single.content)
logger.debug(response_single.text)
response_multi = s.get("https://httpbin.org/stream/3", stream=True, allow_redirects=False)
with response_multi as r:
for line in r.iter_lines():
Expand All @@ -81,7 +81,7 @@ def test_single_multi_secure(self, s):
def test_single_multi(self, s):
response_single = s.get("http://httpbin.org/get", allow_redirects=False)
logger.debug(response_single.headers)
logger.debug(response_single.content)
logger.debug(response_single.text)
response_multi = s.get("http://httpbin.org/stream/3", stream=True, allow_redirects=False)
with response_multi as r:
for line in r.iter_lines():
Expand All @@ -103,7 +103,7 @@ def test_single_multi_multi(self, s):
response_single = s.get("http://httpbin.org/basic-auth/user/pwd",
username="user", password="pwd")
logger.debug(response_single.headers)
logger.debug(response_single.content)
logger.debug(response_single.text)
response = s.get("http://httpbin.org/stream/3", stream=True)
logger.debug(response.headers)
with response as r:
Expand All @@ -123,12 +123,12 @@ def test_multi_single(self, s):
logger.debug(line)
response_single = s.get("http://httpbin.org/get")
logger.debug(response_single.headers)
logger.debug(response_single.content)
logger.debug(response_single.text)

def test_utf8_encoding(self, s):
response_multi = s.get("http://httpbin.org/encoding/utf8")
logger.debug(response_multi.headers)
logger.debug(response_multi.content)
logger.debug(response_multi.text)

def test_raise(self, s):
with pytest.raises(RuntimeError):
Expand Down

0 comments on commit fc5ea18

Please sign in to comment.