Skip to content

Commit

Permalink
http: handle receiving incomplete utf-8 sequences
Browse files Browse the repository at this point in the history
Whenever we received a chunk that ended with start of multi-byte utf-8
character, we'd get UnicodeDecodeError. The decoding is now done using
codecs.IncrementalDecoder which decodes what it can and remembers the
rest for the next time we use it.
  • Loading branch information
mmilata committed Aug 17, 2015
1 parent 6d51801 commit 00fcc10
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 3 deletions.
7 changes: 5 additions & 2 deletions osbs/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import sys
import json
import time
import codecs
import logging
from io import BytesIO

Expand Down Expand Up @@ -160,6 +161,7 @@ def __init__(self, url, method, data=None, kerberos_auth=False,
self.headers = None
self.response_buffer = BytesIO()
self.headers_buffer = BytesIO()
self.response_decoder = None

self.url = url
headers = headers or {}
Expand Down Expand Up @@ -230,6 +232,7 @@ def __init__(self, url, method, data=None, kerberos_auth=False,

self.headers = parse_headers(self.headers_buffer.getvalue())
self.status_code = self.c.getinfo(pycurl.HTTP_CODE)
self.response_decoder = codecs.getincrementaldecoder(self.encoding)()

def _perform(self):
while True:
Expand Down Expand Up @@ -261,10 +264,10 @@ def _any_data_received(self):
return self.response_buffer.tell() != 0

def _get_received_data(self):
result = self.response_buffer.getvalue().decode(self.encoding)
result = self.response_buffer.getvalue()
self.response_buffer.truncate(0)
self.response_buffer.seek(0)
return result
return self.response_decoder.decode(result, final=self.finished)

def iter_chunks(self):
while True:
Expand Down
43 changes: 42 additions & 1 deletion tests/unit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import pytest
import logging
import six
import pycurl
from .fake_api import openshift, osbs
from osbs.build.manipulate import DockJsonManipulator
from osbs.build.build_response import BuildResponse
Expand All @@ -30,7 +31,8 @@
from osbs.constants import PROD_WITH_SECRET_BUILD_TYPE
from osbs.exceptions import OsbsValidationException
from osbs import utils
from osbs.http import HttpResponse, parse_headers
from osbs.http import HttpSession, HttpStream, HttpResponse, parse_headers
import osbs.http as osbs_http
from tests.constants import TEST_BUILD, TEST_BUILD_CONFIG, TEST_LABEL, TEST_LABEL_VALUE
from tests.constants import TEST_GIT_URI, TEST_GIT_REF, TEST_GIT_BRANCH, TEST_USER
from tests.constants import TEST_COMPONENT, TEST_TARGET, TEST_ARCH
Expand Down Expand Up @@ -988,3 +990,42 @@ def test_force_str():
s = u"s"
assert str_on_2_unicode_on_3(s) == b
assert str_on_2_unicode_on_3(b) == b


@pytest.mark.parametrize('chunks,expected_content', [
([b'foo', b'', b'bar', b'baz'], u'foobarbaz'),
([b'a', b'b', b'\xc4', b'\x8d', b'x'], u'ab\u010dx'),
([b'\xe2', b'\x8a', b'\x86'], u'\u2286'),
([b'\xe2\x8a', b'\x86'], u'\u2286'),
([b'\xe2', b'\x8a\x86'], u'\u2286'),
([b'aaaa', b'\xe2\x8a', b'\x86'], u'aaaa\u2286'),
([b'aaaa\xe2\x8a', b'\x86'], u'aaaa\u2286'),
([b'\xe2\x8a', b'\x86ffff'], u'\u2286ffff'),
])
def test_http_multibyte_decoding(chunks, expected_content):
class Whatever(object):
def __getattr__(self, name):
return self

def __call__(self, *args, **kwargs):
return self
flexmock(pycurl).should_receive('Curl').and_return(Whatever())
flexmock(pycurl).should_receive('CurlMulti').and_return(Whatever())
(flexmock(osbs_http).should_receive('parse_headers')
.and_return({ 'content-type': 'application/json; charset=utf-8' }))
flexmock(HttpStream, _select=lambda: None)

def mock_perform(self):
if chunks:
self.response_buffer.write(chunks.pop(0))
else:
self.finished = True

try:
orig_perform = HttpStream._perform
HttpStream._perform = mock_perform

r = HttpSession(verbose=True).get('http://')
assert r.content == expected_content
finally:
HttpStream._perform = orig_perform

0 comments on commit 00fcc10

Please sign in to comment.