Skip to content

Commit

Permalink
Return Unicode text from log-fetching methods
Browse files Browse the repository at this point in the history
When streaming or fetching logs, return Unicode text by decoding
the response according to the HTTP headers.

Use response.text to automatically decode the HTTP response when not
streaming it.

When streaming it, response.iter_lines() returns the encoded bytes. Use
the response headers to decide how to decode it; if these are not
present, use guess_json_utf() if JSON is expected, and guess 'utf-8'
otherwise.

We always decode responses before passing them back to the client
because only the response headers say how to interpret the bytes.

Signed-off-by: Tim Waugh <twaugh@redhat.com>
  • Loading branch information
twaugh committed Jun 13, 2017
1 parent fc3e678 commit b31d4cb
Show file tree
Hide file tree
Showing 9 changed files with 182 additions and 55 deletions.
16 changes: 12 additions & 4 deletions osbs/cli/capture.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,26 @@ class IterLinesSaver(object):
Wrap HttpStream.iter_lines() and save responses.
"""

def __init__(self, path, fn):
def __init__(self, encoding, path, fn):
self.encoding = encoding or 'utf-8'
self.path = path
self.fn = fn
self.line = 0

def iter_lines(self):
encoding = self.encoding
for line in self.fn():
path = "{f}-{n:0>3}.json".format(f=self.path, n=self.line)
logger.debug("capturing to %s", path)
try:
text = line.decode(encoding, errors='replace')
except TypeError:
# py26
text = line.decode(encoding)

with open(path, "w") as outf:
try:
json.dump(json.loads(line), outf, sort_keys=True, indent=4)
json.dump(json.loads(text), outf, sort_keys=True, indent=4)
except ValueError:
outf.write(line)

Expand Down Expand Up @@ -68,15 +76,15 @@ def request(self, url, method, *args, **kwargs):

if kwargs.get('stream', False):
stream = self.fn(url, method, *args, **kwargs)
stream.iter_lines = IterLinesSaver(path,
stream.iter_lines = IterLinesSaver(stream.encoding, path,
stream.iter_lines).iter_lines
return stream
else:
response = self.fn(url, method, *args, **kwargs)
logger.debug("capturing to %s.json", path)
with open(path + ".json", "w") as outf:
try:
json.dump(json.loads(response.content), outf,
json.dump(json.loads(response.text), outf,
sort_keys=True, indent=4)
except ValueError:
outf.write(response.content)
Expand Down
52 changes: 40 additions & 12 deletions osbs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
OsbsWatchBuildNotFound, OsbsAuthException)
from osbs.utils import graceful_chain_get
from requests.exceptions import ConnectionError
from requests.utils import guess_json_utf

try:
# py2
Expand All @@ -44,13 +45,19 @@

def check_response(response):
if response.status_code not in (httplib.OK, httplib.CREATED):
if hasattr(response, 'content'):
content = response.content
if hasattr(response, 'text'):
text = response.text
else:
content = ''.join(response.iter_lines())
encoding = response.encoding or 'utf-8'
content = b''.join(response.iter_lines())
try:
text = content.decode(encoding, errors='replace')
except TypeError:
# py26
text = content.decode(encoding)

logger.error("[%d] %s", response.status_code, content)
raise OsbsResponseException(message=content, status_code=response.status_code)
logger.error("[%d] %s", response.status_code, text)
raise OsbsResponseException(message=text, status_code=response.status_code)


def retry_on_conflict(func, sleep_seconds=0.5, max_attempts=10):
Expand Down Expand Up @@ -413,7 +420,7 @@ def stream_logs(self, build_id):
stream logs from build
:param build_id: str
:return: iterator
:return: iterator yielding Unicode text
"""
kwargs = {'follow': 1}

Expand All @@ -433,13 +440,20 @@ def stream_logs(self, build_id):
headers={'Connection': 'close'})
check_response(response)

encoding = response.encoding
if not encoding:
# The HTTP response headers didn't tell us the
# encoding; make a reasonable guess
encoding = 'utf-8'

for line in response.iter_lines():
last_activity = time.time()
yield line
yield line.decode(encoding)

# NOTE1: If self._get causes ChunkedEncodingError, ConnectionError,
# or IncompleteRead to be raised, they'll be wrapped in
# OsbsNetworkException or OsbsException
# NOTE2: If decode_json or iter_lines causes ChunkedEncodingError, ConnectionError,
# NOTE2: If iter_lines causes ChunkedEncodingError, ConnectionError,
# or IncompleteRead to be raised, it'll simply be silenced.
# NOTE3: An exception may be raised from
# check_response(). In this case, exception will be
Expand Down Expand Up @@ -467,7 +481,7 @@ def logs(self, build_id, follow=False, build_json=None, wait_if_missing=False):
:param follow: bool, fetch logs as they come?
:param build_json: dict, to save one get-build query
:param wait_if_missing: bool, if build doesn't exist, wait
:return: None, str or iterator
:return: None, Unicode text or iterator yielding Unicode text
"""
# does build exist?
try:
Expand All @@ -494,7 +508,7 @@ def logs(self, build_id, follow=False, build_json=None, wait_if_missing=False):
buildlogs_url = self._build_url("builds/%s/log/" % build_id)
response = self._get(buildlogs_url, headers={'Connection': 'close'})
check_response(response)
return response.content
return response.text

def list_builds(self, build_config_id=None, koji_task_id=None,
field_selector=None):
Expand Down Expand Up @@ -583,10 +597,24 @@ def watch_resource(self, resource_type, resource_name=None, **request_args):
while True:
with self._get(url, stream=True, headers={'Connection': 'close'}) as response:
check_response(response)
encoding = response.encoding
for line in response.iter_lines():
logger.debug(line)

if not encoding and len(line) > 3:
encoding = guess_json_utf(line)
try:
text = line.decode(encoding)
except UnicodeDecodeError:
# Not UTF?
logger.error("Unable to guess encoding, skipping")
encoding = None
continue
else:
text = line.decode(encoding)

try:
j = json.loads(line)
j = json.loads(text)
except ValueError:
logger.error("Cannot decode watch event: %s", line)
continue
Expand Down Expand Up @@ -687,7 +715,7 @@ def adjust_attributes_on_object(self, collection, name, things, values, how):
"""
url = self._build_url("%s/%s" % (collection, name))
response = self._get(url)
logger.debug("before modification: %s", response.content)
logger.debug("before modification: %s", response.text)
build_json = response.json()
how(build_json['metadata'], things, values)
response = self._put(url, data=json.dumps(build_json), use_json=True)
Expand Down
18 changes: 8 additions & 10 deletions osbs/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ def request(self, url, *args, **kwargs):
return stream

with stream as s:
content = s.req.text
return HttpResponse(s.status_code, s.headers, content)
text = s.req.text
return HttpResponse(s.status_code, s.headers, text)
except requests.exceptions.HTTPError as ex:
raise OsbsNetworkException(url, str(ex), ex.response.status_code,
cause=ex, traceback=sys.exc_info()[2])
Expand Down Expand Up @@ -134,17 +134,15 @@ def __init__(self, url, method, data=None, kerberos_auth=False,

self.headers = self.req.headers
self.status_code = self.req.status_code
self.encoding = self.req.encoding

def _get_received_data(self):
return self.req.text

def iter_chunks(self):
return self.req.iter_content(None)

def iter_lines(self):
kwargs = {
'decode_unicode': True
}
def iter_lines(self, **kwargs):
if requests.__version__.startswith('2.6.'):
kwargs['chunk_size'] = 1
try:
Expand Down Expand Up @@ -173,13 +171,13 @@ def __exit__(self, exc_type, exc_value, traceback):


class HttpResponse(object):
def __init__(self, status_code, headers, content):
def __init__(self, status_code, headers, text):
self.status_code = status_code
self.headers = headers
self.content = content
self.text = text

def json(self, check=True):
if check and self.status_code not in (0, requests.codes.OK, requests.codes.CREATED):
raise OsbsResponseException(self.content, self.status_code)
raise OsbsResponseException(self.text, self.status_code)

return json.loads(self.content)
return json.loads(self.text)
5 changes: 3 additions & 2 deletions tests/fake_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,10 @@ def __init__(self, status_code=200, content=b'', headers=None):
self.status_code = status_code
self.content = content
self.headers = headers or {}
self.encoding = 'utf-8'

def iter_lines(self):
yield self.content.decode("utf-8")
yield self.content

def __enter__(self):
return self
Expand Down Expand Up @@ -395,7 +396,7 @@ def get_response_content(self, file_name):
json_path = os.path.join(this_dir, "mock_jsons", self.version, file_name)
logger.debug("File: %s", json_path)
with open(json_path, "r") as fd:
return fd.read().encode("utf-8")
return fd.read()

def response_mapping(self, url_path, method):
key, value_to_use = self.lookup(url_path)
Expand Down
2 changes: 1 addition & 1 deletion tests/mock_jsons/0.5.4/build_test-build-123_logs.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
line 1
lîne 1
2 changes: 1 addition & 1 deletion tests/mock_jsons/1.0.4/build_test-build-123_logs.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
line 1
lîne 1
13 changes: 9 additions & 4 deletions tests/test_api.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
"""
Copyright (c) 2015 Red Hat, Inc
All rights reserved.
Expand Down Expand Up @@ -831,16 +832,20 @@ def test_login_api_kerberos(self, osbs): # noqa
# osbs is a fixture here
def test_build_logs_api(self, osbs): # noqa
logs = osbs.get_build_logs(TEST_BUILD)
assert isinstance(logs, six.string_types)
assert logs == "line 1"
# Logs are returned as Unicode text
assert isinstance(logs, six.text_type)
assert logs == u"lîne 1"

# osbs is a fixture here
def test_build_logs_api_follow(self, osbs): # noqa
logs = osbs.get_build_logs(TEST_BUILD, follow=True)
assert isinstance(logs, GeneratorType)
assert next(logs) == "line 1"
text = next(logs)
# Logs are returned as Unicode text
assert isinstance(text, six.text_type)
assert text == u"lîne 1"
with pytest.raises(StopIteration):
assert next(logs)
assert isinstance(next(logs), six.text_type)

# osbs is a fixture here
def test_pause_builds(self, osbs): # noqa
Expand Down
Loading

0 comments on commit b31d4cb

Please sign in to comment.