Skip to content

Commit

Permalink
Fix processing of upload data in debug=True mode
Browse files Browse the repository at this point in the history
  • Loading branch information
lorien committed Nov 22, 2015
1 parent 340cdf0 commit 8602a58
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 17 deletions.
24 changes: 13 additions & 11 deletions grab/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,8 @@ def default_config():

class Grab(DeprecatedThings):

__slots__ = ('request_head', 'request_log', 'request_body',
__slots__ = ('request_head', 'request_body',
#'request_log',
'proxylist', 'config',
'transport',
'transport_param', 'request_method', 'request_counter',
Expand All @@ -200,7 +201,8 @@ class Grab(DeprecatedThings):

# Attributes which should be processed when clone
# of Grab instance is creating
clonable_attributes = ('request_head', 'request_log', 'request_body',
clonable_attributes = ('request_head', 'request_body',
#'request_log',
'proxylist')

# Complex config items which points to mutable objects
Expand Down Expand Up @@ -272,7 +274,7 @@ def reset(self):
"""

self.request_head = None
self.request_log = None
#self.request_log = None
self.request_body = None
self.request_method = None
self.transport.reset()
Expand Down Expand Up @@ -580,7 +582,7 @@ def copy_request_data(self):
# TODO: Maybe request object?
self.request_head = self.transport.request_head
self.request_body = self.transport.request_body
self.request_log = self.transport.request_log
#self.request_log = self.transport.request_log

def setup_document(self, content, **kwargs):
"""
Expand Down Expand Up @@ -666,15 +668,15 @@ def save_dumps(self):
thread_name = '-%s' % thread_name
file_name = os.path.join(self.config['log_dir'], '%02d%s.log' % (
self.request_counter, thread_name))
with open(file_name, 'w') as out:
out.write('Request headers:\n')
with open(file_name, 'wb') as out:
out.write(b'Request headers:\n')
out.write(self.request_head)
out.write('\n')
out.write('Request body:\n')
out.write(b'\n')
out.write(b'Request body:\n')
out.write(self.request_body)
out.write('\n\n')
out.write('Response headers:\n')
out.write(self.doc.head.decode('ascii'))
out.write(b'\n\n')
out.write(b'Response headers:\n')
out.write(self.doc.head)

file_extension = 'html'
file_name = os.path.join(self.config['log_dir'], '%02d%s.%s' % (
Expand Down
15 changes: 12 additions & 3 deletions grab/transport/curl.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ def reset(self):
self.verbose_logging = False

# Maybe move to super-class???
self.request_head = ''
self.request_body = ''
self.request_log = ''
self.request_head = b''
self.request_body = b''
#self.request_log = ''

def header_processor(self, chunk):
"""
Expand Down Expand Up @@ -156,9 +156,18 @@ def debug_processor(self, _type, text):
"""

if _type == pycurl.INFOTYPE_HEADER_OUT:
if isinstance(text, six.text_type):
text = text.encode('utf-8')
self.request_head += text

if _type == pycurl.INFOTYPE_DATA_OUT:
# Untill 7.19.5.2 version
# pycurl gives unicode in `text` variable
# WTF??? Probably that codes would fails
# or does unexpected things if you use
# pycurl<7.19.5.2
if isinstance(text, six.text_type):
text = text.encode('utf-8')
self.request_body += text

"""
Expand Down
13 changes: 10 additions & 3 deletions test/grab_debug.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ def test_log_dir_request_content_headers_and_post(self):
g.go(self.server.get_url())
self.assertEqual(sorted(os.listdir(tmp_dir)), ['01.html', '01.log'])
log_file_content = open(os.path.join(tmp_dir, '01.log')).read()
if not 'x-name' in log_file_content.lower():
print('CONTENT OF 01.log:')
print(log_file_content)
#if not 'x-name' in log_file_content.lower():
# print('CONTENT OF 01.log:')
# print(log_file_content)
self.assertTrue('x-name' in log_file_content.lower())
self.assertTrue('xxx=post' in log_file_content.lower())

Expand All @@ -100,6 +100,13 @@ def test_debug_post(self):
g.go(self.server.get_url())
self.assertEqual(b'x', g.doc.body)

def test_debug_nonascii_post(self):
g = build_grab(debug=True)
g.setup(post=u'фыва'.encode('cp1251'))
g.go(self.server.get_url())
g.setup(multipart_post=[('x', u'фыва'.encode('cp1251'))])
g.go(self.server.get_url())

def test_debug_post_integer_bug(self):
g = build_grab(debug_post=True)
g.setup(post={'foo': 3})
Expand Down

0 comments on commit 8602a58

Please sign in to comment.