Skip to content

Commit

Permalink
Merge caeeb09 into cfae62f
Browse files Browse the repository at this point in the history
  • Loading branch information
dangra committed Aug 24, 2015
2 parents cfae62f + caeeb09 commit b1b0f99
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 5 deletions.
1 change: 1 addition & 0 deletions requirements-py3.txt
Expand Up @@ -5,3 +5,4 @@ cssselect>=0.9
queuelib>=1.1.1
w3lib>=1.8.0
service_identity
https://github.com/dangra/txtulip/archive/0a30192.zip#egg=txtulip
5 changes: 5 additions & 0 deletions scrapy/_monkeypatches.py
@@ -1,4 +1,5 @@
import sys
import six
from six.moves import copyreg

if sys.version_info[0] == 2:
Expand All @@ -14,6 +15,10 @@
from urlparse import uses_query
uses_query.append('s3')

# Enable asyncio reactor
if six.PY3:
from txtulip.reactor import install
install()

# Undo what Twisted's perspective broker adds to pickle register
# to prevent bugs like Twisted#7989 while serializing requests
Expand Down
56 changes: 56 additions & 0 deletions scrapy/core/downloader/handlers/aiohttp.py
@@ -0,0 +1,56 @@
"""Download handlers for http and https schemes"""
import logging
import asyncio

import aiohttp
from twisted.internet import defer

from scrapy.http import Headers
from scrapy.responsetypes import responsetypes

logger = logging.getLogger(__name__)


class HTTPDownloadHandler(object):

def __init__(self, settings):
self.settings = settings

def download_request(self, request, spider):
"""Return a deferred for the HTTP download"""
headers=list((k.decode('latin1'), v.decode('latin1'))
for k, vs in request.headers.items()
for v in vs)

dfd = _force_deferred(
aiohttp.request(
method=request.method,
url=request.url,
data=request.body,
allow_redirects=False,
headers=headers,
))

def _on_response(aioresponse):
return _force_deferred(aioresponse.read()).addCallback(
_on_body, aioresponse=aioresponse)

def _on_body(body, aioresponse):
url = request.url
status = aioresponse.status
headers = Headers(
(k.encode('latin1'), [v.encode('latin1')])
for k, v in aioresponse.headers.items()
)
respcls = responsetypes.from_args(headers=headers, url=url)
return respcls(url=url, status=status, headers=headers, body=body,
flags=[])

return dfd.addCallback(_on_response)


def _force_deferred(coro):
dfd = defer.Deferred().addCallback(lambda f: f.result())
future = asyncio.async(coro)
future.add_done_callback(dfd.callback)
return dfd
12 changes: 9 additions & 3 deletions scrapy/downloadermiddlewares/redirect.py
Expand Up @@ -62,24 +62,30 @@ def process_response(self, request, response, spider):

if request.method == 'HEAD':
if response.status in [301, 302, 303, 307] and 'Location' in response.headers:
redirected_url = urljoin(request.url, response.headers['location'])
redirected_url = self._urljoin_location(request, response)
redirected = request.replace(url=redirected_url)
return self._redirect(redirected, request, spider, response.status)
else:
return response

if response.status in [302, 303] and 'Location' in response.headers:
redirected_url = urljoin(request.url, response.headers['location'])
redirected_url = self._urljoin_location(request, response)
redirected = self._redirect_request_using_get(request, redirected_url)
return self._redirect(redirected, request, spider, response.status)

if response.status in [301, 307] and 'Location' in response.headers:
redirected_url = urljoin(request.url, response.headers['location'])
redirected_url = self._urljoin_location(request, response)
redirected = request.replace(url=redirected_url)
return self._redirect(redirected, request, spider, response.status)

return response

def _urljoin_location(self, request, response):
return urljoin(
request.url,
response.headers['location'].decode('latin1')
)


class MetaRefreshMiddleware(BaseRedirectMiddleware):

Expand Down
4 changes: 2 additions & 2 deletions scrapy/settings/default_settings.py
Expand Up @@ -66,8 +66,8 @@
DOWNLOAD_HANDLERS = {}
DOWNLOAD_HANDLERS_BASE = {
'file': 'scrapy.core.downloader.handlers.file.FileDownloadHandler',
'http': 'scrapy.core.downloader.handlers.http.HTTPDownloadHandler',
'https': 'scrapy.core.downloader.handlers.http.HTTPDownloadHandler',
'http': 'scrapy.core.downloader.handlers.aiohttp.HTTPDownloadHandler',
'https': 'scrapy.core.downloader.handlers.aiohttp.HTTPDownloadHandler',
's3': 'scrapy.core.downloader.handlers.s3.S3DownloadHandler',
'ftp': 'scrapy.core.downloader.handlers.ftp.FTPDownloadHandler',
}
Expand Down

0 comments on commit b1b0f99

Please sign in to comment.