Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Log cipher, certificate and temp key info on establishing an SSL connection #3450

Merged
merged 9 commits into from Jul 23, 2019
21 changes: 18 additions & 3 deletions docs/topics/settings.rst
Expand Up @@ -438,9 +438,10 @@ or even enable client-side authentication (and various other things).
which uses the platform's certificates to validate remote endpoints.
**This is only available if you use Twisted>=14.0.**

If you do use a custom ContextFactory, make sure it accepts a ``method``
parameter at init (this is the ``OpenSSL.SSL`` method mapping
:setting:`DOWNLOADER_CLIENT_TLS_METHOD`).
If you do use a custom ContextFactory, make sure its ``__init__` method accepts
Gallaecio marked this conversation as resolved.
Show resolved Hide resolved
a ``method`` parameter (this is the ``OpenSSL.SSL`` method mapping
:setting:`DOWNLOADER_CLIENT_TLS_METHOD`) and a ``settings`` parameter (this is
the Scrapy settings object).
Gallaecio marked this conversation as resolved.
Show resolved Hide resolved

.. setting:: DOWNLOADER_CLIENT_TLS_METHOD

Expand Down Expand Up @@ -468,6 +469,20 @@ This setting must be one of these string values:
We recommend that you use PyOpenSSL>=0.13 and Twisted>=0.13
or above (Twisted>=14.0 if you can).

.. setting:: DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING

DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING
-------------------------------------

Default: ``False``

Setting this to ``True`` will enable DEBUG level messages about TLS connection
parameters after establishing HTTPS connections. The kind of information logged
depends on the versions of OpenSSL and pyOpenSSL.

This setting is only used for the default
:setting:`DOWNLOADER_CLIENTCONTEXTFACTORY`.

.. setting:: DOWNLOADER_MIDDLEWARES

DOWNLOADER_MIDDLEWARES
Expand Down
14 changes: 12 additions & 2 deletions scrapy/core/downloader/contextfactory.py
Expand Up @@ -2,6 +2,7 @@
from twisted.internet.ssl import ClientContextFactory

from scrapy import twisted_version
from scrapy.utils.misc import create_instance

if twisted_version >= (14, 0, 0):

Expand All @@ -28,9 +29,17 @@ class ScrapyClientContextFactory(BrowserLikePolicyForHTTPS):
understand the SSLv3, TLSv1, TLSv1.1 and TLSv1.2 protocols.'
"""

def __init__(self, method=SSL.SSLv23_METHOD, *args, **kwargs):
def __init__(self, method=SSL.SSLv23_METHOD, settings=None, *args, **kwargs):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think a common practice is to pass tls_verbose_logging, not Settings instance, and extract option value in from_settings / from_crawler.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm fine with changing this, though I don't like that we need to list all optional args in the documentation and the error message, as I was going to pass yet another setting here in a different PR, maybe we can rephrase the messages.

super(ScrapyClientContextFactory, self).__init__(*args, **kwargs)
self._ssl_method = method
if settings:
self.tls_verbose_logging = settings['DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING']
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
self.tls_verbose_logging = settings['DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING']
self.tls_verbose_logging = settings.getbool('DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING')

Without it, -s DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING=0 in command-line may be evaluated as True, because self.tls_verbose_logging will be '0'.

else:
self.tls_verbose_logging = False

@classmethod
def from_settings(cls, settings, method=SSL.SSLv23_METHOD, *args, **kwargs):
return cls(method=method, settings=settings, *args, **kwargs)

def getCertificateOptions(self):
# setting verify=True will require you to provide CAs
Expand All @@ -56,7 +65,8 @@ def getContext(self, hostname=None, port=None):
return self.getCertificateOptions().getContext()

def creatorForNetloc(self, hostname, port):
return ScrapyClientTLSOptions(hostname.decode("ascii"), self.getContext())
return ScrapyClientTLSOptions(hostname.decode("ascii"), self.getContext(),
verbose_logging=self.tls_verbose_logging)


@implementer(IPolicyForHTTPS)
Expand Down
7 changes: 4 additions & 3 deletions scrapy/core/downloader/handlers/http10.py
@@ -1,7 +1,7 @@
"""Download handlers for http and https schemes
"""
from twisted.internet import reactor
from scrapy.utils.misc import load_object
from scrapy.utils.misc import load_object, create_instance
from scrapy.utils.python import to_unicode


Expand All @@ -11,6 +11,7 @@ class HTTP10DownloadHandler(object):
def __init__(self, settings):
self.HTTPClientFactory = load_object(settings['DOWNLOADER_HTTPCLIENTFACTORY'])
self.ClientContextFactory = load_object(settings['DOWNLOADER_CLIENTCONTEXTFACTORY'])
self._settings = settings

def download_request(self, request, spider):
"""Return a deferred for the HTTP download"""
Expand All @@ -21,7 +22,7 @@ def download_request(self, request, spider):
def _connect(self, factory):
host, port = to_unicode(factory.host), factory.port
if factory.scheme == b'https':
return reactor.connectSSL(host, port, factory,
self.ClientContextFactory())
client_context_factory = create_instance(self.ClientContextFactory, settings=self._settings, crawler=None)
return reactor.connectSSL(host, port, factory, client_context_factory)
else:
return reactor.connectTCP(host, port, factory)
11 changes: 6 additions & 5 deletions scrapy/core/downloader/handlers/http11.py
Expand Up @@ -25,7 +25,7 @@
from scrapy.responsetypes import responsetypes
from scrapy.core.downloader.webclient import _parse
from scrapy.core.downloader.tls import openssl_methods
from scrapy.utils.misc import load_object
from scrapy.utils.misc import load_object, create_instance
from scrapy.utils.python import to_bytes, to_unicode
from scrapy import twisted_version

Expand All @@ -44,14 +44,15 @@ def __init__(self, settings):
self._contextFactoryClass = load_object(settings['DOWNLOADER_CLIENTCONTEXTFACTORY'])
# try method-aware context factory
try:
self._contextFactory = self._contextFactoryClass(method=self._sslMethod)
self._contextFactory = create_instance(self._contextFactoryClass, settings=settings, crawler=None,
method=self._sslMethod)
except TypeError:
# use context factory defaults
self._contextFactory = self._contextFactoryClass()
self._contextFactory = create_instance(self._contextFactoryClass, settings=settings, crawler=None)
msg = """
'%s' does not accept `method` argument (type OpenSSL.SSL method,\
e.g. OpenSSL.SSL.SSLv23_METHOD).\
Please upgrade your context factory class to handle it or ignore it.""" % (
e.g. OpenSSL.SSL.SSLv23_METHOD) and/or `settings` argument.\
Please upgrade your context factory class to handle them or ignore them.""" % (
settings['DOWNLOADER_CLIENTCONTEXTFACTORY'],)
warnings.warn(msg)
self._default_maxsize = settings.getint('DOWNLOAD_MAXSIZE')
Expand Down
30 changes: 29 additions & 1 deletion scrapy/core/downloader/tls.py
Expand Up @@ -2,6 +2,7 @@
from OpenSSL import SSL

from scrapy import twisted_version
from scrapy.utils.ssl import x509name_to_string, get_temp_key_info


logger = logging.getLogger(__name__)
Expand All @@ -20,6 +21,7 @@
METHOD_TLSv12: getattr(SSL, 'TLSv1_2_METHOD', 6), # TLS 1.2 only
}


if twisted_version >= (14, 0, 0):
# ClientTLSOptions requires a recent-enough version of Twisted.
# Not having ScrapyClientTLSOptions should not matter for older
Expand Down Expand Up @@ -65,13 +67,39 @@ class ScrapyClientTLSOptions(ClientTLSOptions):
Same as Twisted's private _sslverify.ClientTLSOptions,
except that VerificationError, CertificateError and ValueError
exceptions are caught, so that the connection is not closed, only
logging warnings.
logging warnings. Also, HTTPS connection parameters logging is added.
"""

def __init__(self, hostname, ctx, verbose_logging=False):
super().__init__(hostname, ctx)
self.verbose_logging = verbose_logging

def _identityVerifyingInfoCallback(self, connection, where, ret):
if where & SSL_CB_HANDSHAKE_START:
set_tlsext_host_name(connection, self._hostnameBytes)
elif where & SSL_CB_HANDSHAKE_DONE:
if self.verbose_logging:
if hasattr(connection, 'get_cipher_name'): # requires pyOPenSSL 0.15
if hasattr(connection, 'get_protocol_version_name'): # requires pyOPenSSL 16.0.0
logger.debug('SSL connection to %s using protocol %s, cipher %s',
self._hostnameASCII,
connection.get_protocol_version_name(),
connection.get_cipher_name(),
)
else:
logger.debug('SSL connection to %s using cipher %s',
self._hostnameASCII,
connection.get_cipher_name(),
)
server_cert = connection.get_peer_certificate()
logger.debug('SSL connection certificate: issuer "%s", subject "%s"',
x509name_to_string(server_cert.get_issuer()),
x509name_to_string(server_cert.get_subject()),
)
key_info = get_temp_key_info(connection._ssl)
if key_info:
logger.debug('SSL temp key: %s', key_info)

try:
verifyHostname(connection, self._hostnameASCII)
except verification_errors as e:
Expand Down
1 change: 1 addition & 0 deletions scrapy/settings/default_settings.py
Expand Up @@ -87,6 +87,7 @@
DOWNLOADER_CLIENTCONTEXTFACTORY = 'scrapy.core.downloader.contextfactory.ScrapyClientContextFactory'
DOWNLOADER_CLIENT_TLS_METHOD = 'TLS' # Use highest TLS/SSL protocol version supported by the platform,
# also allowing negotiation
DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING = False

DOWNLOADER_MIDDLEWARES = {}

Expand Down
50 changes: 50 additions & 0 deletions scrapy/utils/ssl.py
@@ -0,0 +1,50 @@
# -*- coding: utf-8 -*-

import OpenSSL._util as pyOpenSSLutil

from scrapy.utils.python import to_native_str


def ffi_buf_to_string(buf):
return to_native_str(pyOpenSSLutil.ffi.string(buf))


def x509name_to_string(x509name):
# from OpenSSL.crypto.X509Name.__repr__
result_buffer = pyOpenSSLutil.ffi.new("char[]", 512)
pyOpenSSLutil.lib.X509_NAME_oneline(x509name._name, result_buffer, len(result_buffer))

return ffi_buf_to_string(result_buffer)


def get_temp_key_info(ssl_object):
if not hasattr(pyOpenSSLutil.lib, 'SSL_get_server_tmp_key'): # requires OpenSSL 1.0.2
return None

# adapted from OpenSSL apps/s_cb.c::ssl_print_tmp_key()
temp_key_p = pyOpenSSLutil.ffi.new("EVP_PKEY **")
pyOpenSSLutil.lib.SSL_get_server_tmp_key(ssl_object, temp_key_p)
if temp_key_p == pyOpenSSLutil.ffi.NULL:
return None

temp_key = temp_key_p[0]
pyOpenSSLutil.ffi.gc(temp_key, pyOpenSSLutil.lib.EVP_PKEY_free)
key_info = []
key_type = pyOpenSSLutil.lib.EVP_PKEY_id(temp_key)
if key_type == pyOpenSSLutil.lib.EVP_PKEY_RSA:
key_info.append('RSA')
elif key_type == pyOpenSSLutil.lib.EVP_PKEY_DH:
key_info.append('DH')
elif key_type == pyOpenSSLutil.lib.EVP_PKEY_EC:
key_info.append('ECDH')
ec_key = pyOpenSSLutil.lib.EVP_PKEY_get1_EC_KEY(temp_key)
pyOpenSSLutil.ffi.gc(ec_key, pyOpenSSLutil.lib.EC_KEY_free)
nid = pyOpenSSLutil.lib.EC_GROUP_get_curve_name(pyOpenSSLutil.lib.EC_KEY_get0_group(ec_key))
cname = pyOpenSSLutil.lib.EC_curve_nid2nist(nid)
if cname == pyOpenSSLutil.ffi.NULL:
cname = pyOpenSSLutil.lib.OBJ_nid2sn(nid)
key_info.append(ffi_buf_to_string(cname))
else:
key_info.append(ffi_buf_to_string(pyOpenSSLutil.lib.OBJ_nid2sn(key_type)))
key_info.append('%s bits' % pyOpenSSLutil.lib.EVP_PKEY_bits(temp_key))
return ', '.join(key_info)