Skip to content

Commit

Permalink
Code cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
lorien committed Feb 14, 2017
1 parent b907acf commit 2fa0326
Show file tree
Hide file tree
Showing 33 changed files with 208 additions and 166 deletions.
3 changes: 2 additions & 1 deletion grab/util/log.py
Expand Up @@ -34,7 +34,8 @@ def default_logging(grab_log=None,#'/tmp/grab.log',

class PycurlSigintHandler(TextIOBase):
# TextIOBase to avoid errors in py36: https://bugs.python.org/issue29130
def __init__(self):
def __init__(self, *args, **kwargs):
super(PycurlSigintHandler, self).__init__(*args, **kwargs)
self.orig_stderr = None
self.buf = []

Expand Down
1 change: 1 addition & 0 deletions pylintrc
@@ -1,6 +1,7 @@
[MASTER]
jobs=4
extension-pkg-whitelist=pycurl,MySQLdb,lxml
ignore=invalid_import.py

[TYPECHEK]
ignored-modules=six.moves.urllib, six.moves.http_cookiejar, six.moves.queue, six.moves.socketserver, six.moves.SimpleHTTPServer
Expand Down
1 change: 1 addition & 0 deletions requirements_dev.txt
Expand Up @@ -19,3 +19,4 @@ pycurl;platform_system!="Windows"
item;platform_system!="Windows"
pylint
psutil
tox
1 change: 1 addition & 0 deletions runtest.py
Expand Up @@ -40,6 +40,7 @@
'test.grab_limit_option',
'test.grab_charset_issue',
'test.grab_pickle', # FIXME: fix tests excluded for urllib3
'test.proxy',
# *** Extensions
'test.ext_text',
'test.ext_rex',
Expand Down
26 changes: 22 additions & 4 deletions test/ext_form.py
Expand Up @@ -95,29 +95,37 @@ def test_choose_form(self):

# check results
self.g.choose_form(0)
# pylint: disable=no-member,protected-access
self.assertEqual('form', self.g.doc._lxml_form.tag)
self.assertEqual('search_form', self.g.doc._lxml_form.get('id'))
# pylint: enable=no-member,protected-access

# reset current form
self.g.doc._lxml_form = None
self.g.doc._lxml_form = None # pylint: disable=protected-access

self.g.choose_form(id='common_form')
# pylint: disable=no-member,protected-access
self.assertEqual('form', self.g.doc._lxml_form.tag)
self.assertEqual('common_form', self.g.doc._lxml_form.get('id'))
# pylint: enable=no-member,protected-access

# reset current form
self.g.doc._lxml_form = None
self.g.doc._lxml_form = None # pylint: disable=protected-access

self.g.choose_form(name='dummy')
# pylint: disable=no-member,protected-access
self.assertEqual('form', self.g.doc._lxml_form.tag)
self.assertEqual('dummy', self.g.doc._lxml_form.get('name'))
# pylint: enable=no-member,protected-access

# reset current form
self.g.doc._lxml_form = None
self.g.doc._lxml_form = None # pylint: disable=protected-access

self.g.choose_form(xpath='//form[contains(@action, "/dummy")]')
# pylint: disable=no-member,protected-access
self.assertEqual('form', self.g.doc._lxml_form.tag)
self.assertEqual('dummy', self.g.doc._lxml_form.get('name'))
# pylint: enable=no-member,protected-access

def assertEqualQueryString(self, qs1, qs2):
args1 = set([(x, y) for x, y in parse_qsl(qs1)])
Expand Down Expand Up @@ -173,23 +181,33 @@ def test_set_methods(self):
self.server.response['get.data'] = FORMS
g.go(self.server.get_url())

# pylint: disable=protected-access
self.assertEqual(g.doc._lxml_form, None)
# pylint: enable=protected-access

g.set_input('gender', '1')
# pylint: disable=no-member,protected-access
self.assertEqual('common_form', g.doc._lxml_form.get('id'))
# pylint: enable=no-member,protected-access

# pylint: disable=no-member,protected-access
self.assertRaises(KeyError, lambda: g.set_input('query', 'asdf'))
# pylint: enable=no-member,protected-access

g.doc._lxml_form = None
g.doc._lxml_form = None # pylint: disable=protected-access
g.set_input_by_id('search_box', 'asdf')
# pylint: disable=no-member,protected-access
self.assertEqual('search_form', g.doc._lxml_form.get('id'))
# pylint: enable=no-member,protected-access

g.choose_form(xpath='//form[@id="common_form"]')
g.set_input_by_number(0, 'asdf')

# pylint: disable=no-member,protected-access
g.doc._lxml_form = None
g.set_input_by_xpath('//*[@name="gender"]', '2')
self.assertEqual('common_form', g.doc._lxml_form.get('id'))
# pylint: enable=no-member,protected-access

def test_html_without_forms(self):
g = build_grab()
Expand Down
2 changes: 1 addition & 1 deletion test/grab_charset.py
Expand Up @@ -38,7 +38,7 @@ def test_document_charset_lowercase(self):
self.server.response['charset'] = 'UTF-8'
g = build_grab()
g.go(self.server.get_url())
self.assertEquals('utf-8', g.doc.charset)
self.assertEqual('utf-8', g.doc.charset)


def test_dash_issue(self):
Expand Down
4 changes: 2 additions & 2 deletions test/grab_cookies.py
Expand Up @@ -26,8 +26,8 @@ def test_multiple_cookies(self):
g.setup(cookies={'foo': '1', 'bar': '2'})
g.go(self.server.get_url())
self.assertEqual(
set(map(lambda item: item.strip(),
self.server.request['headers']['Cookie'].split('; '))),
set([x.strip() for x in
self.server.request['headers']['Cookie'].split('; ')]),
set(['foo=1', 'bar=2']))

def test_session(self):
Expand Down
6 changes: 4 additions & 2 deletions test/grab_deprecated.py
Expand Up @@ -112,8 +112,10 @@ def test_pyquery(self):
data = b'''<form><input type="text" id="f" name="foo" value="val">
</form>'''
g = build_grab(data)
# pylint: disable=no-member
self.assertEqual(g.doc('//input').node().value,
g.pyquery('input')[0].value)
# pylint: enable=no-member

def test_assert_xpath(self):
data = b'''<h1>tet</h1>'''
Expand Down Expand Up @@ -141,10 +143,10 @@ def test_find_link_rex(self):
data = b'''<a href="http://ya.ru/">ya.ru</a>'''
g = build_grab(data)
self.assertEqual('http://ya.ru/',
g.find_link_rex(re.compile('ya\.ru'),
g.find_link_rex(re.compile(r'ya\.ru'),
make_absolute=True))
self.assertEqual(None,
g.find_link_rex(re.compile('google\.ru'),
g.find_link_rex(re.compile(r'google\.ru'),
make_absolute=True))

def test_find_link(self):
Expand Down
18 changes: 10 additions & 8 deletions test/grab_proxy.py
Expand Up @@ -55,7 +55,7 @@ def test_load_proxylist(self):
g.load_proxylist(tmp_file, 'text_file', auto_change=False)
self.assertEqual(g.config['proxy_auto_change'], False)
servers = set()
for x in six.moves.range(10):
for _ in six.moves.range(10):
g.go('http://yandex.ru')
servers.add(g.config['proxy'])
self.assertEqual(len(servers), 1)
Expand Down Expand Up @@ -90,7 +90,9 @@ def test_change_proxy(self):

g.load_proxylist(tmp_file, 'text_file', auto_init=True,
auto_change=False)
# pylint: disable=unsupported-membership-test
self.assertTrue('server-' in g.config['proxy'])
# pylint: enable=unsupported-membership-test

def test_list_proxysource(self):
g = build_grab()
Expand Down Expand Up @@ -133,30 +135,30 @@ def test_global_proxy_userpwd_argument(self):
g = build_grab()
items = [PROXY1]
g.proxylist.load_list(items)
self.assertEquals(g.proxylist.get_next_proxy().username, None)
self.assertEqual(g.proxylist.get_next_proxy().username, None)

g.proxylist.load_list(items, proxy_userpwd='foo:bar')
proxy = g.proxylist.get_next_proxy()
self.assertEquals(proxy.username, 'foo')
self.assertEquals(proxy.password, 'bar')
self.assertEqual(proxy.username, 'foo')
self.assertEqual(proxy.password, 'bar')

items = [PROXY1 + ':admin:test', PROXY2]
g.proxylist.load_list(items, proxy_userpwd='foo:bar')
proxy = g.proxylist.get_next_proxy()
self.assertEquals(proxy.username, 'admin')
self.assertEquals(proxy.password, 'test')
self.assertEqual(proxy.username, 'admin')
self.assertEqual(proxy.password, 'test')

def test_global_proxy_type_argument(self):
g = build_grab()
items = [PROXY1]

g.proxylist.load_list(items)
proxy = g.proxylist.get_next_proxy()
self.assertEquals(proxy.proxy_type, 'http')
self.assertEqual(proxy.proxy_type, 'http')

g.proxylist.load_list(items, proxy_type='socks')
proxy = g.proxylist.get_next_proxy()
self.assertEquals(proxy.proxy_type, 'socks')
self.assertEqual(proxy.proxy_type, 'socks')

def test_setup_with_proxyline(self):
g = build_grab()
Expand Down
14 changes: 7 additions & 7 deletions test/grab_request.py
Expand Up @@ -13,20 +13,20 @@ def setUp(self):
def test_get_method(self):
g = build_grab()
g.go(self.server.get_url())
self.assertEquals('GET', self.server.request['method'])
self.assertEqual('GET', self.server.request['method'])

def test_delete_method(self):
g = build_grab()
g.setup(method='delete')
g.go(self.server.get_url())
self.assertEquals('DELETE', self.server.request['method'])
self.assertEqual('DELETE', self.server.request['method'])

def test_put_method(self):
g = build_grab()
g.setup(method='put', post=b'abc')
g.go(self.server.get_url())
self.assertEquals('PUT', self.server.request['method'])
self.assertEquals('3', self.server.request['headers']['Content-Length'])
self.assertEqual('PUT', self.server.request['method'])
self.assertEqual('3', self.server.request['headers']['Content-Length'])

def test_head_with_invalid_bytes(self):
def callback(server):
Expand Down Expand Up @@ -62,13 +62,13 @@ def test_options_method(self):
g = build_grab()
g.setup(method='options', post=b'abc')
g.go(self.server.get_url())
self.assertEquals('OPTIONS', self.server.request['method'])
self.assertEquals('3', self.server.request['headers']['Content-Length'])
self.assertEqual('OPTIONS', self.server.request['method'])
self.assertEqual('3', self.server.request['headers']['Content-Length'])

g = build_grab()
g.setup(method='options')
g.go(self.server.get_url())
self.assertEquals('OPTIONS', self.server.request['method'])
self.assertEqual('OPTIONS', self.server.request['method'])
self.assertTrue('Content-Length' not in self.server.request['headers'])

@exclude_grab_transport('urllib3')
Expand Down
12 changes: 9 additions & 3 deletions test/grab_response_body_processing.py
Expand Up @@ -23,7 +23,9 @@ def test_body_inmemory_false(self):
self.assertTrue(os.path.exists(g.response.body_path))
self.assertTrue(tmp_dir in g.response.body_path)
self.assertEqual(b'foo', open(g.response.body_path, 'rb').read())
# pylint: disable=protected-access
self.assertEqual(g.response._bytes_body, None)
# pylint: enable=protected-access
old_path = g.response.body_path

g.go(self.server.get_url())
Expand All @@ -42,29 +44,33 @@ def test_body_inmemory_false(self):
self.assertEqual(os.path.join(tmp_dir, 'music.mp3'),
g.response.body_path)
self.assertEqual(g.response.body, b'foo')
# pylint: disable=protected-access
self.assertEqual(g.response._bytes_body, None)
# pylint: enable=protected-access

def test_body_inmemory_true(self):
g = build_grab()
self.server.response['data'] = b'bar'
g.go(self.server.get_url())
# pylint: disable=protected-access
self.assertEqual(g.response._bytes_body, b'bar')
# pylint: enable=protected-access

def test_assign_unicode_to_body(self):
g = build_grab()
g.doc.body = b'abc'
g.doc.body = b'def'

def bad_func():
with self.assertRaises(GrabMisuseError):
g.doc.body = u'Спутник'

self.assertRaises(GrabMisuseError, bad_func)

def test_empty_response(self):
self.server.response['data'] = b''
g = build_grab()
g.go(self.server.get_url())
# pylint: disable=pointless-statement
g.doc.tree # should not raise exception
# pylint: enable=pointless-statement

#def test_emoji_processing(self):
# #html = u'''
Expand Down
4 changes: 2 additions & 2 deletions test/grab_upload_file.py
Expand Up @@ -51,7 +51,7 @@ def test_upload_content_random_filename(self):
g.doc.submit()
self.assertEqual(data,
self.server.request['files']['image'][0]['body'])
self.assertTrue(
self.assertEqual(
10, len(self.server.request['files']['image'][0]['filename']))
self.assertEqual(
'application/octet-stream',
Expand All @@ -70,7 +70,7 @@ def test_upload_content_content_type(self):
g.doc.submit()
self.assertEqual(data,
self.server.request['files']['image'][0]['body'])
self.assertTrue(
self.assertEqual(
10, len(self.server.request['files']['image'][0]['filename']))
self.assertEqual(
'application/grab',
Expand Down
8 changes: 4 additions & 4 deletions test/grab_url_processing.py
Expand Up @@ -25,13 +25,13 @@ def test_nonascii_path(self):
g = build_grab()
self.server.response['data'] = 'medved'
g.go(self.server.get_url(u'/превед'))
self.assertEquals(b'medved', g.doc.body)
self.assertEquals('/%D0%BF%D1%80%D0%B5%D0%B2%D0%B5%D0%B4',
self.assertEqual(b'medved', g.doc.body)
self.assertEqual('/%D0%BF%D1%80%D0%B5%D0%B2%D0%B5%D0%B4',
self.server.request['path'])

def test_nonascii_query(self):
g = build_grab()
self.server.response['data'] = 'medved'
g.go(self.server.get_url(u'/search?q=превед'))
self.assertEquals(b'medved', g.doc.body)
self.assertEquals(u'превед', self.server.request['args']['q'])
self.assertEqual(b'medved', g.doc.body)
self.assertEqual(u'превед', self.server.request['args']['q'])
37 changes: 16 additions & 21 deletions test/proxy.py
@@ -1,5 +1,5 @@
# coding: utf-8
from test.util import build_grab, get_temp_file
from test.util import build_grab, temp_file
from test.util import BaseGrabTestCase

from grab.proxylist import ProxyList
Expand All @@ -13,12 +13,6 @@ class GrabProxyTestCase(BaseGrabTestCase):
def setUp(self):
self.server.reset()

def generate_plist_file(self, data=DEFAULT_PLIST_DATA):
path = get_temp_file()
with open(path, 'w') as out:
out.write(data)
return path

def test_no_proxy_list(self):
g = build_grab()
self.assertEqual(0, g.proxylist.size())
Expand All @@ -28,8 +22,7 @@ class ProxyListTestCase(BaseGrabTestCase):
def setUp(self):
self.server.reset()

def generate_plist_file(self, data=DEFAULT_PLIST_DATA):
path = get_temp_file()
def generate_plist_file(self, path, data=DEFAULT_PLIST_DATA):
with open(path, 'w') as out:
out.write(data)
return path
Expand All @@ -40,10 +33,11 @@ def test_basic(self):


def test_file_proxy_source(self):
pl = ProxyList()
path = self.generate_plist_file()
pl.load_file(path)
self.assertEqual(2, pl.size())
with temp_file() as path:
pl = ProxyList()
self.generate_plist_file(path)
pl.load_file(path)
self.assertEqual(2, pl.size())

def test_web_proxy_source(self):
pl = ProxyList()
Expand All @@ -52,11 +46,12 @@ def test_web_proxy_source(self):
self.assertEqual(2, pl.size())

def test_get_next_proxy(self):
pl = ProxyList()
path = self.generate_plist_file('foo:1\nbar:1')
pl.load_file(path)
self.assertEqual(pl.get_next_proxy().host, 'foo')
self.assertEqual(pl.get_next_proxy().host, 'bar')
self.assertEqual(pl.get_next_proxy().host, 'foo')
pl.load_file(path)
self.assertEqual(pl.get_next_proxy().host, 'foo')
with temp_file() as path:
pl = ProxyList()
self.generate_plist_file(path, 'foo:1\nbar:1')
pl.load_file(path)
self.assertEqual(pl.get_next_proxy().host, 'foo')
self.assertEqual(pl.get_next_proxy().host, 'bar')
self.assertEqual(pl.get_next_proxy().host, 'foo')
pl.load_file(path)
self.assertEqual(pl.get_next_proxy().host, 'foo')

0 comments on commit 2fa0326

Please sign in to comment.