Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[backport][1.1] Use best practices for TLS connections when using Twisted>=14.0 #1815

Merged
merged 1 commit into from
Feb 24, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
116 changes: 93 additions & 23 deletions scrapy/core/downloader/contextfactory.py
@@ -1,28 +1,98 @@
from OpenSSL import SSL
from twisted.internet.ssl import ClientContextFactory

try:
# available since twisted 14.0

from zope.interface.declarations import implementer

# the following should be available from Twisted 14.0.0
from twisted.internet.ssl import optionsForClientTLS, CertificateOptions, platformTrust
from twisted.internet._sslverify import ClientTLSOptions
from twisted.web.client import BrowserLikePolicyForHTTPS
from twisted.web.iweb import IPolicyForHTTPS

@implementer(IPolicyForHTTPS)
class ScrapyClientContextFactory(BrowserLikePolicyForHTTPS):
"""
Non-peer-certificate verifying HTTPS context factory

Default OpenSSL method is TLS_METHOD (also called SSLv23_METHOD)
which allows TLS protocol negotiation

'A TLS/SSL connection established with [this method] may
understand the SSLv3, TLSv1, TLSv1.1 and TLSv1.2 protocols.'
"""

def __init__(self, method=SSL.SSLv23_METHOD, *args, **kwargs):
super(ScrapyClientContextFactory, self).__init__(*args, **kwargs)
self._ssl_method = method

def getCertificateOptions(self):
# setting verify=True will require you to provide CAs
# to verify against; in other words: it's not that simple

# backward-compatible SSL/TLS method:
#
# * this will respect `method` attribute in often recommended
# `ScrapyClientContextFactory` subclass
# (https://github.com/scrapy/scrapy/issues/1429#issuecomment-131782133)
#
# * getattr() for `_ssl_method` attribute for context factories
# not calling super(..., self).__init__
return CertificateOptions(verify=False,
method=getattr(self, 'method',
getattr(self, '_ssl_method', None)))

# kept for old-style HTTP/1.0 downloader context twisted calls,
# e.g. connectSSL()
def getContext(self, hostname=None, port=None):
return self.getCertificateOptions().getContext()

def creatorForNetloc(self, hostname, port):
return ClientTLSOptions(hostname.decode("ascii"), self.getContext())


@implementer(IPolicyForHTTPS)
class BrowserLikeContextFactory(ScrapyClientContextFactory):
"""
Twisted-recommended context factory for web clients.

Quoting http://twistedmatrix.com/documents/current/api/twisted.web.client.Agent.html:
"The default is to use a BrowserLikePolicyForHTTPS,
so unless you have special requirements you can leave this as-is."

creatorForNetloc() is the same as BrowserLikePolicyForHTTPS
except this context factory allows setting the TLS/SSL method to use.

Default OpenSSL method is TLS_METHOD (also called SSLv23_METHOD)
which allows TLS protocol negotiation.
"""
def creatorForNetloc(self, hostname, port):

# trustRoot set to platformTrust() will use the platform's root CAs.
#
# This means that a website like https://www.cacert.org will be rejected
# by default, since CAcert.org CA certificate is seldom shipped.
return optionsForClientTLS(hostname.decode("ascii"),
trustRoot=platformTrust(),
extraCertificateOptions={
'method': self._ssl_method,
})

except ImportError:
ClientTLSOptions = None


class ScrapyClientContextFactory(ClientContextFactory):
"A SSL context factory which is more permissive against SSL bugs."
# see https://github.com/scrapy/scrapy/issues/82
# and https://github.com/scrapy/scrapy/issues/26
# and https://github.com/scrapy/scrapy/issues/981

def __init__(self):
# see this issue on why we use TLSv1_METHOD by default
# https://github.com/scrapy/scrapy/issues/194
self.method = SSL.TLSv1_METHOD

def getContext(self, hostname=None, port=None):
ctx = ClientContextFactory.getContext(self)
# Enable all workarounds to SSL bugs as documented by
# http://www.openssl.org/docs/ssl/SSL_CTX_set_options.html
ctx.set_options(SSL.OP_ALL)
if hostname and ClientTLSOptions is not None: # workaround for TLS SNI
ClientTLSOptions(hostname, ctx)
return ctx

class ScrapyClientContextFactory(ClientContextFactory):
"A SSL context factory which is more permissive against SSL bugs."
# see https://github.com/scrapy/scrapy/issues/82
# and https://github.com/scrapy/scrapy/issues/26
# and https://github.com/scrapy/scrapy/issues/981

def __init__(self, method=SSL.SSLv23_METHOD):
self.method = method

def getContext(self, hostname=None, port=None):
ctx = ClientContextFactory.getContext(self)
# Enable all workarounds to SSL bugs as documented by
# http://www.openssl.org/docs/ssl/SSL_CTX_set_options.html
ctx.set_options(SSL.OP_ALL)
return ctx
17 changes: 16 additions & 1 deletion scrapy/core/downloader/handlers/http11.py
Expand Up @@ -4,6 +4,7 @@
import logging
from io import BytesIO
from time import time
import warnings
from six.moves.urllib.parse import urldefrag

from zope.interface import implementer
Expand All @@ -18,6 +19,7 @@
from scrapy.http import Headers
from scrapy.responsetypes import responsetypes
from scrapy.core.downloader.webclient import _parse
from scrapy.core.downloader.tls import openssl_methods
from scrapy.utils.misc import load_object
from scrapy.utils.python import to_bytes, to_unicode
from scrapy import twisted_version
Expand All @@ -31,8 +33,21 @@ def __init__(self, settings):
self._pool = HTTPConnectionPool(reactor, persistent=True)
self._pool.maxPersistentPerHost = settings.getint('CONCURRENT_REQUESTS_PER_DOMAIN')
self._pool._factory.noisy = False

self._sslMethod = openssl_methods[settings.get('DOWNLOADER_CLIENT_TLS_METHOD')]
self._contextFactoryClass = load_object(settings['DOWNLOADER_CLIENTCONTEXTFACTORY'])
self._contextFactory = self._contextFactoryClass()
# try method-aware context factory
try:
self._contextFactory = self._contextFactoryClass(method=self._sslMethod)
except TypeError:
# use context factory defaults
self._contextFactory = self._contextFactoryClass()
msg = """
'%s' does not accept `method` argument (type OpenSSL.SSL method,\
e.g. OpenSSL.SSL.SSLv23_METHOD).\
Please upgrade your context factory class to handle it or ignore it.""" % (
settings['DOWNLOADER_CLIENTCONTEXTFACTORY'],)
warnings.warn(msg)
self._default_maxsize = settings.getint('DOWNLOAD_MAXSIZE')
self._default_warnsize = settings.getint('DOWNLOAD_WARNSIZE')
self._disconnect_timeout = 1
Expand Down
16 changes: 16 additions & 0 deletions scrapy/core/downloader/tls.py
@@ -0,0 +1,16 @@
from OpenSSL import SSL


METHOD_SSLv3 = 'SSLv3'
METHOD_TLS = 'TLS'
METHOD_TLSv10 = 'TLSv1.0'
METHOD_TLSv11 = 'TLSv1.1'
METHOD_TLSv12 = 'TLSv1.2'

openssl_methods = {
METHOD_TLS: SSL.SSLv23_METHOD, # protocol negotiation (recommended)
METHOD_SSLv3: SSL.SSLv3_METHOD, # SSL 3 (NOT recommended)
METHOD_TLSv10: SSL.TLSv1_METHOD, # TLS 1.0 only
METHOD_TLSv11: getattr(SSL, 'TLSv1_1_METHOD', 5), # TLS 1.1 only
METHOD_TLSv12: getattr(SSL, 'TLSv1_2_METHOD', 6), # TLS 1.2 only
}
2 changes: 2 additions & 0 deletions scrapy/settings/default_settings.py
Expand Up @@ -83,6 +83,8 @@

DOWNLOADER_HTTPCLIENTFACTORY = 'scrapy.core.downloader.webclient.ScrapyHTTPClientFactory'
DOWNLOADER_CLIENTCONTEXTFACTORY = 'scrapy.core.downloader.contextfactory.ScrapyClientContextFactory'
DOWNLOADER_CLIENT_TLS_METHOD = 'TLS' # Use highest TLS/SSL protocol version supported by the platform,
# also allowing negotiation

DOWNLOADER_MIDDLEWARES = {}

Expand Down