Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for overriding OpenSSL ciphers #3442

Merged
merged 4 commits into from Aug 19, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
23 changes: 21 additions & 2 deletions docs/topics/settings.rst
Expand Up @@ -442,8 +442,27 @@ or even enable client-side authentication (and various other things).

If you do use a custom ContextFactory, make sure its ``__init__`` method
accepts a ``method`` parameter (this is the ``OpenSSL.SSL`` method mapping
:setting:`DOWNLOADER_CLIENT_TLS_METHOD`) and a ``tls_verbose_logging``
parameter (``bool``).
:setting:`DOWNLOADER_CLIENT_TLS_METHOD`), a ``tls_verbose_logging``
parameter (``bool``) and a ``tls_ciphers`` parameter (see
:setting:`DOWNLOADER_CLIENT_TLS_CIPHERS`).

.. setting:: DOWNLOADER_CLIENT_TLS_CIPHERS

DOWNLOADER_CLIENT_TLS_CIPHERS
-----------------------------

Default: ``'DEFAULT'``
kmike marked this conversation as resolved.
Show resolved Hide resolved

Use this setting to customize the TLS/SSL ciphers used by the default
HTTP/1.1 downloader.

The setting should contain a string in the `OpenSSL cipher list format`_,
these ciphers will be used as client ciphers. Changing this setting may be
necessary to access certain HTTPS websites: for example, you may need to use
``'DEFAULT:!DH'`` for a website with weak DH parameters or enable a
specific cipher that is not included in ``DEFAULT`` if a website requires it.

.. _OpenSSL cipher list format: https://www.openssl.org/docs/manmaster/man1/ciphers.html#CIPHER-LIST-FORMAT

.. setting:: DOWNLOADER_CLIENT_TLS_METHOD

Expand Down
13 changes: 9 additions & 4 deletions scrapy/core/downloader/contextfactory.py
@@ -1,5 +1,5 @@
from OpenSSL import SSL
from twisted.internet.ssl import optionsForClientTLS, CertificateOptions, platformTrust
from twisted.internet.ssl import optionsForClientTLS, CertificateOptions, platformTrust, AcceptableCiphers
from twisted.web.client import BrowserLikePolicyForHTTPS
from twisted.web.iweb import IPolicyForHTTPS
from zope.interface.declarations import implementer
Expand All @@ -19,15 +19,20 @@ class ScrapyClientContextFactory(BrowserLikePolicyForHTTPS):
understand the SSLv3, TLSv1, TLSv1.1 and TLSv1.2 protocols.'
"""

def __init__(self, method=SSL.SSLv23_METHOD, tls_verbose_logging=False, *args, **kwargs):
def __init__(self, method=SSL.SSLv23_METHOD, tls_verbose_logging=False, tls_ciphers=None, *args, **kwargs):
super(ScrapyClientContextFactory, self).__init__(*args, **kwargs)
self._ssl_method = method
self.tls_verbose_logging = tls_verbose_logging
if tls_ciphers:
self.tls_ciphers = AcceptableCiphers.fromOpenSSLCipherString(tls_ciphers)
else:
self.tls_ciphers = DEFAULT_CIPHERS

@classmethod
def from_settings(cls, settings, method=SSL.SSLv23_METHOD, *args, **kwargs):
tls_verbose_logging = settings.getbool('DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING')
return cls(method=method, tls_verbose_logging=tls_verbose_logging, *args, **kwargs)
tls_ciphers = settings['DOWNLOADER_CLIENT_TLS_CIPHERS']
return cls(method=method, tls_verbose_logging=tls_verbose_logging, tls_ciphers=tls_ciphers, *args, **kwargs)

def getCertificateOptions(self):
# setting verify=True will require you to provide CAs
Expand All @@ -45,7 +50,7 @@ def getCertificateOptions(self):
method=getattr(self, 'method',
getattr(self, '_ssl_method', None)),
fixBrokenPeers=True,
acceptableCiphers=DEFAULT_CIPHERS)
acceptableCiphers=self.tls_ciphers)

# kept for old-style HTTP/1.0 downloader context twisted calls,
# e.g. connectSSL()
Expand Down
2 changes: 1 addition & 1 deletion scrapy/core/downloader/handlers/http11.py
Expand Up @@ -54,7 +54,7 @@ def __init__(self, settings):
)
msg = """
'%s' does not accept `method` argument (type OpenSSL.SSL method,\
e.g. OpenSSL.SSL.SSLv23_METHOD) and/or `tls_verbose_logging` argument.\
e.g. OpenSSL.SSL.SSLv23_METHOD) and/or `tls_verbose_logging` argument and/or `tls_ciphers` argument.\
Please upgrade your context factory class to handle them or ignore them.""" % (
settings['DOWNLOADER_CLIENTCONTEXTFACTORY'],)
warnings.warn(msg)
Expand Down
1 change: 1 addition & 0 deletions scrapy/settings/default_settings.py
Expand Up @@ -85,6 +85,7 @@

DOWNLOADER_HTTPCLIENTFACTORY = 'scrapy.core.downloader.webclient.ScrapyHTTPClientFactory'
DOWNLOADER_CLIENTCONTEXTFACTORY = 'scrapy.core.downloader.contextfactory.ScrapyClientContextFactory'
DOWNLOADER_CLIENT_TLS_CIPHERS = 'DEFAULT'
DOWNLOADER_CLIENT_TLS_METHOD = 'TLS' # Use highest TLS/SSL protocol version supported by the platform,
# also allowing negotiation
DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING = False
Expand Down
5 changes: 5 additions & 0 deletions scrapy/utils/ssl.py
Expand Up @@ -6,6 +6,11 @@
from scrapy.utils.python import to_native_str


# The OpenSSL symbol is present since 1.1.1 but it's not currently supported in any version of pyOpenSSL.
# Using the binding directly, as this code does, requires cryptography 2.4.
SSL_OP_NO_TLSv1_3 = getattr(pyOpenSSLutil.lib, 'SSL_OP_NO_TLSv1_3', 0)


def ffi_buf_to_string(buf):
return to_native_str(pyOpenSSLutil.ffi.string(buf))

Expand Down
13 changes: 10 additions & 3 deletions tests/mockserver.py
Expand Up @@ -3,6 +3,7 @@
from six.moves.urllib.parse import urlencode
from subprocess import Popen, PIPE

from OpenSSL import SSL
from twisted.web.server import Site, NOT_DONE_YET
from twisted.web.resource import Resource
from twisted.web.static import File
Expand All @@ -13,8 +14,8 @@
from twisted.internet import reactor, ssl
from twisted.internet.task import deferLater


from scrapy.utils.python import to_bytes, to_unicode
from scrapy.utils.ssl import SSL_OP_NO_TLSv1_3


def getarg(request, name, default=None, type=None):
Expand Down Expand Up @@ -215,11 +216,17 @@ def url(self, path, is_secure=False):
return host + path


def ssl_context_factory(keyfile='keys/localhost.key', certfile='keys/localhost.crt'):
return ssl.DefaultOpenSSLContextFactory(
def ssl_context_factory(keyfile='keys/localhost.key', certfile='keys/localhost.crt', cipher_string=None):
factory = ssl.DefaultOpenSSLContextFactory(
os.path.join(os.path.dirname(__file__), keyfile),
os.path.join(os.path.dirname(__file__), certfile),
)
if cipher_string:
ctx = factory.getContext()
# disabling TLS1.2+ because it unconditionally enables some strong ciphers
ctx.set_options(SSL.OP_CIPHER_SERVER_PREFERENCE | SSL.OP_NO_TLSv1_2 | SSL_OP_NO_TLSv1_3)
ctx.set_cipher_list(to_bytes(cipher_string))
return factory


if __name__ == "__main__":
Expand Down
41 changes: 41 additions & 0 deletions tests/test_downloader_handlers.py
Expand Up @@ -553,6 +553,47 @@ def setUp(self):
super(Https11InvalidDNSPattern, self).setUp()


class Https11CustomCiphers(unittest.TestCase):
scheme = 'https'
download_handler_cls = HTTP11DownloadHandler

keyfile = 'keys/localhost.key'
certfile = 'keys/localhost.crt'

def setUp(self):
self.tmpname = self.mktemp()
os.mkdir(self.tmpname)
FilePath(self.tmpname).child("file").setContent(b"0123456789")
r = static.File(self.tmpname)
self.site = server.Site(r, timeout=None)
self.wrapper = WrappingFactory(self.site)
self.host = 'localhost'
self.port = reactor.listenSSL(
0, self.wrapper, ssl_context_factory(self.keyfile, self.certfile, cipher_string='CAMELLIA256-SHA'),
interface=self.host)
self.portno = self.port.getHost().port
self.download_handler = self.download_handler_cls(
Settings({'DOWNLOADER_CLIENT_TLS_CIPHERS': 'CAMELLIA256-SHA'}))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you please also add a test which shows that downloading doesn't work with default DOWNLOADER_CLIENT_TLS_CIPHERS, to make sure the other test is valid?

self.download_request = self.download_handler.download_request

@defer.inlineCallbacks
def tearDown(self):
yield self.port.stopListening()
if hasattr(self.download_handler, 'close'):
yield self.download_handler.close()
shutil.rmtree(self.tmpname)

def getURL(self, path):
return "%s://%s:%d/%s" % (self.scheme, self.host, self.portno, path)

def test_download(self):
request = Request(self.getURL('file'))
d = self.download_request(request, Spider('foo'))
d.addCallback(lambda r: r.body)
d.addCallback(self.assertEqual, b"0123456789")
return d


class Http11MockServerTestCase(unittest.TestCase):
"""HTTP 1.1 test case with MockServer"""

Expand Down
57 changes: 57 additions & 0 deletions tests/test_webclient.py
Expand Up @@ -6,6 +6,7 @@
import six
import shutil

import OpenSSL.SSL
from twisted.trial import unittest
from twisted.web import server, static, util, resource
from twisted.internet import reactor, defer
Expand All @@ -15,8 +16,12 @@
from twisted.internet.defer import inlineCallbacks

from scrapy.core.downloader import webclient as client
from scrapy.core.downloader.contextfactory import ScrapyClientContextFactory
from scrapy.http import Request, Headers
from scrapy.settings import Settings
from scrapy.utils.misc import create_instance
from scrapy.utils.python import to_bytes, to_unicode
from tests.mockserver import ssl_context_factory


def getPage(url, contextFactory=None, response_transform=None, *args, **kwargs):
Expand Down Expand Up @@ -363,3 +368,55 @@ def _check_Encoding(self, response, original_body):
self.assertEqual(content_encoding, EncodingResource.out_encoding)
self.assertEqual(
response.body.decode(content_encoding), to_unicode(original_body))


class WebClientSSLTestCase(unittest.TestCase):
context_factory = None

def _listen(self, site):
return reactor.listenSSL(
0, site,
contextFactory=self.context_factory or ssl_context_factory(),
interface="127.0.0.1")

def getURL(self, path):
return "https://127.0.0.1:%d/%s" % (self.portno, path)

def setUp(self):
self.tmpname = self.mktemp()
os.mkdir(self.tmpname)
FilePath(self.tmpname).child("file").setContent(b"0123456789")
r = static.File(self.tmpname)
r.putChild(b"payload", PayloadResource())
self.site = server.Site(r, timeout=None)
self.wrapper = WrappingFactory(self.site)
self.port = self._listen(self.wrapper)
self.portno = self.port.getHost().port

@inlineCallbacks
def tearDown(self):
yield self.port.stopListening()
shutil.rmtree(self.tmpname)

def testPayload(self):
s = "0123456789" * 10
return getPage(self.getURL("payload"), body=s).addCallback(
self.assertEqual, to_bytes(s))


class WebClientCustomCiphersSSLTestCase(WebClientSSLTestCase):
# we try to use a cipher that is not enabled by default in OpenSSL
custom_ciphers = 'CAMELLIA256-SHA'
context_factory = ssl_context_factory(cipher_string=custom_ciphers)

def testPayload(self):
s = "0123456789" * 10
settings = Settings({'DOWNLOADER_CLIENT_TLS_CIPHERS': self.custom_ciphers})
client_context_factory = create_instance(ScrapyClientContextFactory, settings=settings, crawler=None)
return getPage(self.getURL("payload"), body=s,
contextFactory=client_context_factory).addCallback(self.assertEqual, to_bytes(s))

def testPayloadDefaultCiphers(self):
s = "0123456789" * 10
d = getPage(self.getURL("payload"), body=s, contextFactory=ScrapyClientContextFactory())
return self.assertFailure(d, OpenSSL.SSL.Error)