Skip to content

Commit

Permalink
Yet another cookie engine refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
lorien committed Apr 29, 2015
1 parent 959547b commit f242aa9
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 26 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,4 @@ docs/libpeerconnection.log
/docs/ru/_build/
/build/
/en
/.v4
47 changes: 22 additions & 25 deletions grab/transport/curl.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from weblib.user_agent import random_user_agent
from weblib.encoding import make_str, decode_list, decode_pairs
import six
from six.moves.http_cookies import SimpleCookie
from six.moves.http_cookiejar import CookieJar

from grab.cookie import create_cookie, CookieManager
Expand Down Expand Up @@ -369,7 +368,7 @@ def process_config(self, grab):
grab.config['reject_file_size'])

def process_cookie_options(self, grab, request_url):
host = urlsplit(request_url).netloc.split(':')[0]
request_host = urlsplit(request_url).netloc.split(':')[0]

# `cookiefile` option should be processed before `cookies` option
# because `load_cookies` updates `cookies` option
Expand All @@ -385,15 +384,9 @@ def process_cookie_options(self, grab, request_url):
if not isinstance(grab.config['cookies'], dict):
raise error.GrabMisuseError('cookies option should be a dict')
for name, value in grab.config['cookies'].items():
if '.' in host:
domain = '.' + host
else:
# TODO: should I do that?
domain = ''
grab.cookies.set(
name=name,
value=value,
domain=domain
value=value
)

# Erase known cookies stored in pycurl handler
Expand All @@ -408,22 +401,26 @@ def process_cookie_options(self, grab, request_url):
# Pycurl cookie engine is smart enough to send
# only cookies belong to the current request's host name
for cookie in grab.cookies.cookiejar:
cookies = SimpleCookie()
cname = cookie.name
# python2: should be py2 <str>
# python3: should be py3 <str>
if six.PY2:
cname = cname.encode('ascii')
cookies[cname] = cookie.value
cookies[cname]['domain'] = cookie.domain
cookies[cname]['httponly'] =\
cookie.get_nonstandard_attr('HttpOnly')
for key in ('path', 'comment', 'expires', 'secure',
'version'):
val = getattr(cookie, key)
if val:
cookies[cname][key] = getattr(cookie, key)
self.curl.setopt(pycurl.COOKIELIST, cookies.output())
self.curl.setopt(pycurl.COOKIELIST,
self.get_netscape_cookie_spec(cookie,
request_host))

def get_netscape_cookie_spec(self, cookie, request_host):
host = cookie.domain or request_host
if cookie.get_nonstandard_attr('HttpOnly'):
host = '#HttpOnly_' + host
items = [
host,
'TRUE',
cookie.path,
'TRUE' if cookie.secure else 'FALSE',
str(cookie.expires) if cookie.expires\
else 'Fri, 31 Dec 9999 23:59:59 GMT',
cookie.name,
cookie.value,
]
out = u'\t'.join(items)
return out

def request(self):

Expand Down
7 changes: 6 additions & 1 deletion test/grab_cookies.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ def test_cookiefile(self):
self.server.response['cookies'] = {'godzilla': 'monkey'}.items()
g.setup(cookiefile=TMP_FILE, debug=True)
g.go(self.server.get_url())
print(g.request_head)
self.assertEqual(self.server.request['cookies']['spam'].value, 'ham')

# This is correct reslt of combining two cookies
Expand Down Expand Up @@ -164,7 +165,11 @@ def test_different_domains(self):

self.server.response['cookies'] = {'bar': 'bar'}.items()
g.go('http://bar:%d' % self.server.port)
self.assertEqual(dict(g.response.cookies.items()), {'bar': 'bar'})

# response.cookies contains cookies from both domains
# because it just accumulates cookies over time
self.assertEqual(dict(g.response.cookies.items()), {'foo': 'foo',
'bar': 'bar'})

def test_cookie_domain(self):
g = Grab()
Expand Down

0 comments on commit f242aa9

Please sign in to comment.