Skip to content

Commit

Permalink
Merge pull request #1815 from redapple/backport-1.1-pr1794
Browse files Browse the repository at this point in the history
[backport][1.1] Use best practices for TLS connections when using Twisted>=14.0
  • Loading branch information
redapple committed Feb 24, 2016
2 parents 300f162 + bdc1356 commit 394f424
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 24 deletions.
116 changes: 93 additions & 23 deletions scrapy/core/downloader/contextfactory.py
@@ -1,28 +1,98 @@
from OpenSSL import SSL
from twisted.internet.ssl import ClientContextFactory

try:
# available since twisted 14.0

from zope.interface.declarations import implementer

# the following should be available from Twisted 14.0.0
from twisted.internet.ssl import optionsForClientTLS, CertificateOptions, platformTrust
from twisted.internet._sslverify import ClientTLSOptions
from twisted.web.client import BrowserLikePolicyForHTTPS
from twisted.web.iweb import IPolicyForHTTPS

@implementer(IPolicyForHTTPS)
class ScrapyClientContextFactory(BrowserLikePolicyForHTTPS):
"""
Non-peer-certificate verifying HTTPS context factory
Default OpenSSL method is TLS_METHOD (also called SSLv23_METHOD)
which allows TLS protocol negotiation
'A TLS/SSL connection established with [this method] may
understand the SSLv3, TLSv1, TLSv1.1 and TLSv1.2 protocols.'
"""

def __init__(self, method=SSL.SSLv23_METHOD, *args, **kwargs):
super(ScrapyClientContextFactory, self).__init__(*args, **kwargs)
self._ssl_method = method

def getCertificateOptions(self):
# setting verify=True will require you to provide CAs
# to verify against; in other words: it's not that simple

# backward-compatible SSL/TLS method:
#
# * this will respect `method` attribute in often recommended
# `ScrapyClientContextFactory` subclass
# (https://github.com/scrapy/scrapy/issues/1429#issuecomment-131782133)
#
# * getattr() for `_ssl_method` attribute for context factories
# not calling super(..., self).__init__
return CertificateOptions(verify=False,
method=getattr(self, 'method',
getattr(self, '_ssl_method', None)))

# kept for old-style HTTP/1.0 downloader context twisted calls,
# e.g. connectSSL()
def getContext(self, hostname=None, port=None):
return self.getCertificateOptions().getContext()

def creatorForNetloc(self, hostname, port):
return ClientTLSOptions(hostname.decode("ascii"), self.getContext())


@implementer(IPolicyForHTTPS)
class BrowserLikeContextFactory(ScrapyClientContextFactory):
"""
Twisted-recommended context factory for web clients.
Quoting http://twistedmatrix.com/documents/current/api/twisted.web.client.Agent.html:
"The default is to use a BrowserLikePolicyForHTTPS,
so unless you have special requirements you can leave this as-is."
creatorForNetloc() is the same as BrowserLikePolicyForHTTPS
except this context factory allows setting the TLS/SSL method to use.
Default OpenSSL method is TLS_METHOD (also called SSLv23_METHOD)
which allows TLS protocol negotiation.
"""
def creatorForNetloc(self, hostname, port):

# trustRoot set to platformTrust() will use the platform's root CAs.
#
# This means that a website like https://www.cacert.org will be rejected
# by default, since CAcert.org CA certificate is seldom shipped.
return optionsForClientTLS(hostname.decode("ascii"),
trustRoot=platformTrust(),
extraCertificateOptions={
'method': self._ssl_method,
})

except ImportError:
ClientTLSOptions = None


class ScrapyClientContextFactory(ClientContextFactory):
"A SSL context factory which is more permissive against SSL bugs."
# see https://github.com/scrapy/scrapy/issues/82
# and https://github.com/scrapy/scrapy/issues/26
# and https://github.com/scrapy/scrapy/issues/981

def __init__(self):
# see this issue on why we use TLSv1_METHOD by default
# https://github.com/scrapy/scrapy/issues/194
self.method = SSL.TLSv1_METHOD

def getContext(self, hostname=None, port=None):
ctx = ClientContextFactory.getContext(self)
# Enable all workarounds to SSL bugs as documented by
# http://www.openssl.org/docs/ssl/SSL_CTX_set_options.html
ctx.set_options(SSL.OP_ALL)
if hostname and ClientTLSOptions is not None: # workaround for TLS SNI
ClientTLSOptions(hostname, ctx)
return ctx

class ScrapyClientContextFactory(ClientContextFactory):
"A SSL context factory which is more permissive against SSL bugs."
# see https://github.com/scrapy/scrapy/issues/82
# and https://github.com/scrapy/scrapy/issues/26
# and https://github.com/scrapy/scrapy/issues/981

def __init__(self, method=SSL.SSLv23_METHOD):
self.method = method

def getContext(self, hostname=None, port=None):
ctx = ClientContextFactory.getContext(self)
# Enable all workarounds to SSL bugs as documented by
# http://www.openssl.org/docs/ssl/SSL_CTX_set_options.html
ctx.set_options(SSL.OP_ALL)
return ctx
17 changes: 16 additions & 1 deletion scrapy/core/downloader/handlers/http11.py
Expand Up @@ -4,6 +4,7 @@
import logging
from io import BytesIO
from time import time
import warnings
from six.moves.urllib.parse import urldefrag

from zope.interface import implementer
Expand All @@ -18,6 +19,7 @@
from scrapy.http import Headers
from scrapy.responsetypes import responsetypes
from scrapy.core.downloader.webclient import _parse
from scrapy.core.downloader.tls import openssl_methods
from scrapy.utils.misc import load_object
from scrapy.utils.python import to_bytes, to_unicode
from scrapy import twisted_version
Expand All @@ -31,8 +33,21 @@ def __init__(self, settings):
self._pool = HTTPConnectionPool(reactor, persistent=True)
self._pool.maxPersistentPerHost = settings.getint('CONCURRENT_REQUESTS_PER_DOMAIN')
self._pool._factory.noisy = False

self._sslMethod = openssl_methods[settings.get('DOWNLOADER_CLIENT_TLS_METHOD')]
self._contextFactoryClass = load_object(settings['DOWNLOADER_CLIENTCONTEXTFACTORY'])
self._contextFactory = self._contextFactoryClass()
# try method-aware context factory
try:
self._contextFactory = self._contextFactoryClass(method=self._sslMethod)
except TypeError:
# use context factory defaults
self._contextFactory = self._contextFactoryClass()
msg = """
'%s' does not accept `method` argument (type OpenSSL.SSL method,\
e.g. OpenSSL.SSL.SSLv23_METHOD).\
Please upgrade your context factory class to handle it or ignore it.""" % (
settings['DOWNLOADER_CLIENTCONTEXTFACTORY'],)
warnings.warn(msg)
self._default_maxsize = settings.getint('DOWNLOAD_MAXSIZE')
self._default_warnsize = settings.getint('DOWNLOAD_WARNSIZE')
self._disconnect_timeout = 1
Expand Down
16 changes: 16 additions & 0 deletions scrapy/core/downloader/tls.py
@@ -0,0 +1,16 @@
from OpenSSL import SSL


METHOD_SSLv3 = 'SSLv3'
METHOD_TLS = 'TLS'
METHOD_TLSv10 = 'TLSv1.0'
METHOD_TLSv11 = 'TLSv1.1'
METHOD_TLSv12 = 'TLSv1.2'

openssl_methods = {
METHOD_TLS: SSL.SSLv23_METHOD, # protocol negotiation (recommended)
METHOD_SSLv3: SSL.SSLv3_METHOD, # SSL 3 (NOT recommended)
METHOD_TLSv10: SSL.TLSv1_METHOD, # TLS 1.0 only
METHOD_TLSv11: getattr(SSL, 'TLSv1_1_METHOD', 5), # TLS 1.1 only
METHOD_TLSv12: getattr(SSL, 'TLSv1_2_METHOD', 6), # TLS 1.2 only
}
2 changes: 2 additions & 0 deletions scrapy/settings/default_settings.py
Expand Up @@ -83,6 +83,8 @@

DOWNLOADER_HTTPCLIENTFACTORY = 'scrapy.core.downloader.webclient.ScrapyHTTPClientFactory'
DOWNLOADER_CLIENTCONTEXTFACTORY = 'scrapy.core.downloader.contextfactory.ScrapyClientContextFactory'
DOWNLOADER_CLIENT_TLS_METHOD = 'TLS' # Use highest TLS/SSL protocol version supported by the platform,
# also allowing negotiation

DOWNLOADER_MIDDLEWARES = {}

Expand Down

0 comments on commit 394f424

Please sign in to comment.