Skip to content

Commit 8602a58

Browse files
committed
Fix processing of upload data in debug=True mode
1 parent 340cdf0 commit 8602a58

File tree

3 files changed

+35
-17
lines changed

3 files changed

+35
-17
lines changed

grab/base.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,8 @@ def default_config():
186186

187187
class Grab(DeprecatedThings):
188188

189-
__slots__ = ('request_head', 'request_log', 'request_body',
189+
__slots__ = ('request_head', 'request_body',
190+
#'request_log',
190191
'proxylist', 'config',
191192
'transport',
192193
'transport_param', 'request_method', 'request_counter',
@@ -200,7 +201,8 @@ class Grab(DeprecatedThings):
200201

201202
# Attributes which should be processed when clone
202203
# of Grab instance is creating
203-
clonable_attributes = ('request_head', 'request_log', 'request_body',
204+
clonable_attributes = ('request_head', 'request_body',
205+
#'request_log',
204206
'proxylist')
205207

206208
# Complex config items which points to mutable objects
@@ -272,7 +274,7 @@ def reset(self):
272274
"""
273275

274276
self.request_head = None
275-
self.request_log = None
277+
#self.request_log = None
276278
self.request_body = None
277279
self.request_method = None
278280
self.transport.reset()
@@ -580,7 +582,7 @@ def copy_request_data(self):
580582
# TODO: Maybe request object?
581583
self.request_head = self.transport.request_head
582584
self.request_body = self.transport.request_body
583-
self.request_log = self.transport.request_log
585+
#self.request_log = self.transport.request_log
584586

585587
def setup_document(self, content, **kwargs):
586588
"""
@@ -666,15 +668,15 @@ def save_dumps(self):
666668
thread_name = '-%s' % thread_name
667669
file_name = os.path.join(self.config['log_dir'], '%02d%s.log' % (
668670
self.request_counter, thread_name))
669-
with open(file_name, 'w') as out:
670-
out.write('Request headers:\n')
671+
with open(file_name, 'wb') as out:
672+
out.write(b'Request headers:\n')
671673
out.write(self.request_head)
672-
out.write('\n')
673-
out.write('Request body:\n')
674+
out.write(b'\n')
675+
out.write(b'Request body:\n')
674676
out.write(self.request_body)
675-
out.write('\n\n')
676-
out.write('Response headers:\n')
677-
out.write(self.doc.head.decode('ascii'))
677+
out.write(b'\n\n')
678+
out.write(b'Response headers:\n')
679+
out.write(self.doc.head)
678680

679681
file_extension = 'html'
680682
file_name = os.path.join(self.config['log_dir'], '%02d%s.%s' % (

grab/transport/curl.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,9 @@ def reset(self):
104104
self.verbose_logging = False
105105

106106
# Maybe move to super-class???
107-
self.request_head = ''
108-
self.request_body = ''
109-
self.request_log = ''
107+
self.request_head = b''
108+
self.request_body = b''
109+
#self.request_log = ''
110110

111111
def header_processor(self, chunk):
112112
"""
@@ -156,9 +156,18 @@ def debug_processor(self, _type, text):
156156
"""
157157

158158
if _type == pycurl.INFOTYPE_HEADER_OUT:
159+
if isinstance(text, six.text_type):
160+
text = text.encode('utf-8')
159161
self.request_head += text
160162

161163
if _type == pycurl.INFOTYPE_DATA_OUT:
164+
# Untill 7.19.5.2 version
165+
# pycurl gives unicode in `text` variable
166+
# WTF??? Probably that codes would fails
167+
# or does unexpected things if you use
168+
# pycurl<7.19.5.2
169+
if isinstance(text, six.text_type):
170+
text = text.encode('utf-8')
162171
self.request_body += text
163172

164173
"""

test/grab_debug.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,9 @@ def test_log_dir_request_content_headers_and_post(self):
8787
g.go(self.server.get_url())
8888
self.assertEqual(sorted(os.listdir(tmp_dir)), ['01.html', '01.log'])
8989
log_file_content = open(os.path.join(tmp_dir, '01.log')).read()
90-
if not 'x-name' in log_file_content.lower():
91-
print('CONTENT OF 01.log:')
92-
print(log_file_content)
90+
#if not 'x-name' in log_file_content.lower():
91+
# print('CONTENT OF 01.log:')
92+
# print(log_file_content)
9393
self.assertTrue('x-name' in log_file_content.lower())
9494
self.assertTrue('xxx=post' in log_file_content.lower())
9595

@@ -100,6 +100,13 @@ def test_debug_post(self):
100100
g.go(self.server.get_url())
101101
self.assertEqual(b'x', g.doc.body)
102102

103+
def test_debug_nonascii_post(self):
104+
g = build_grab(debug=True)
105+
g.setup(post=u'фыва'.encode('cp1251'))
106+
g.go(self.server.get_url())
107+
g.setup(multipart_post=[('x', u'фыва'.encode('cp1251'))])
108+
g.go(self.server.get_url())
109+
103110
def test_debug_post_integer_bug(self):
104111
g = build_grab(debug_post=True)
105112
g.setup(post={'foo': 3})

0 commit comments

Comments
 (0)