From de8cdfca08ac612a3437041f93b693a9b580d1a5 Mon Sep 17 00:00:00 2001 From: "Nathaniel J. Smith" Date: Sun, 6 Aug 2017 14:23:43 -0700 Subject: [PATCH] [bpo-28414] In SSL module, store server_hostname as an A-label MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Historically, our handling of international domain names (IDNs) in the ssl module has been very broken. The flow went like: 1. User passes server_hostname= to the SSLSocket/SSLObject constructor. This gets normalized to an A-label by using the PyArg_Parse "et" mode: bytes objects get passed through unchanged (assumed to already be A-labels); str objects get run through .encode("idna") to convert them into A-labels. 2. newPySSLSocket takes this A-label, and for some reason decodes it *back* to a U-label, and stores that as the object's server_hostname attribute. 3. Later, this U-label server_hostname attribute gets passed to match_hostname, to compare against the hostname seen in the certificate. But certificates contain A-labels, and match_hostname expects to be passed an A-label, so this doesn't work at all. This PR fixes the problem by removing the pointless decoding at step 2, so that internally we always use A-labels, which matches how internet protocols are designed in general: A-labels are used everywhere internally and on-the-wire, and U-labels are basically just for user interfaces. This also matches the general advice to handle encoding/decoding once at the edges, though for backwards-compatibility we continue to use 'str' objects to store A-labels, even though they're now always ASCII. Technically there is a minor compatibility break here: if a user examines the .server_hostname attribute of an ssl-wrapped socket, then previously they would have seen a U-label like "pythön.org", and now they'll see an A-label like "xn--pythn-mua.org". But this only affects non-ASCII domain names, which have never worked in the first place, so it seems unlikely that anyone is relying on the old behavior. This PR also adds an end-to-end test for IDN hostname validation. Previously there were no tests for this functionality. Fixes bpo-28414. --- Doc/library/ssl.rst | 6 +++ Doc/whatsnew/3.7.rst | 10 ++++ Lib/test/ssl-idn-ca.pem | 15 ++++++ Lib/test/ssl-idn-cert.pem | 31 +++++++++++ Lib/test/test_ssl.py | 52 +++++++++++++++---- .../2017-08-06-14-43-45.bpo-28414.mzZ6vD.rst | 2 + Modules/_ssl.c | 5 +- 7 files changed, 110 insertions(+), 11 deletions(-) create mode 100644 Lib/test/ssl-idn-ca.pem create mode 100644 Lib/test/ssl-idn-cert.pem create mode 100644 Misc/NEWS.d/next/Security/2017-08-06-14-43-45.bpo-28414.mzZ6vD.rst diff --git a/Doc/library/ssl.rst b/Doc/library/ssl.rst index 9b3bdd5489d4a9..3e377bf864975c 100644 --- a/Doc/library/ssl.rst +++ b/Doc/library/ssl.rst @@ -1243,6 +1243,12 @@ SSL sockets also have the following additional methods and attributes: .. versionadded:: 3.2 + .. versionchanged:: 3.7 + When ``server_hostname`` is an internationalized domain name + (IDN), this attribute now stores the A-label form + (``"xn--pythn-mua.org"``), rather than the U-label form + (``"pythön.org"``). + .. attribute:: SSLSocket.session The :class:`SSLSession` for this SSL connection. The session is available diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst index a0c20a0ad88c3d..f12c70551d3c11 100644 --- a/Doc/whatsnew/3.7.rst +++ b/Doc/whatsnew/3.7.rst @@ -297,6 +297,16 @@ string expression pattern for braced placeholders and non-braced placeholders separately. (Contributed by Barry Warsaw in :issue:`1198569`.) +ssl +--- + +Added support for validating server certificates containing +internationalized domain names (IDNs). As part of this change, the +:attr:`ssl.SSLSocket.server_hostname` attribute now stores the +expected hostname in A-label form (``"xn--pythn-mua.org"``), rather +than the U-label form (``"pythön.org"``). (Contributed by +Nathaniel J. Smith in :issue:`28414`.) + unittest.mock ------------- diff --git a/Lib/test/ssl-idn-ca.pem b/Lib/test/ssl-idn-ca.pem new file mode 100644 index 00000000000000..0034eb8b2e607f --- /dev/null +++ b/Lib/test/ssl-idn-ca.pem @@ -0,0 +1,15 @@ +-----BEGIN CERTIFICATE----- +MIICQDCCAamgAwIBAgIUBg5Z+vupJbrxjKmrYeJ6nb3xnQEwDQYJKoZIhvcNAQEL +BQAwQDEXMBUGA1UECgwOdHJ1c3RtZSB2MC40LjAxJTAjBgNVBAsMHFRlc3Rpbmcg +Q0EgI3QxM0tzY2dCQm8xQzZpTnUwIBcNMDAwMTAxMDAwMDAwWhgPMzAwMDAxMDEw +MDAwMDBaMEAxFzAVBgNVBAoMDnRydXN0bWUgdjAuNC4wMSUwIwYDVQQLDBxUZXN0 +aW5nIENBICN0MTNLc2NnQkJvMUM2aU51MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCB +iQKBgQDRWbKkz1+y31q1bu5P5J/XOjSwEac1ESG2G7W6hbYsVTn6OqqjXvebE3ex ++pNd/ciBHMv0SzlqKyo5l0BNLOjlth8C7j9LbUimddl4rpkpmtEuu4acwPT9pzts +jHxSPehJsF26ixReg8qi/E8Rsrri+3swFbI0pos6pQZo81HvjwIDAQABozUwMzAd +BgNVHQ4EFgQUYDMWkOTMwi9BwFK0blya/ou4r/YwEgYDVR0TAQH/BAgwBgEB/wIB +CTANBgkqhkiG9w0BAQsFAAOBgQC6BjPf9juAfJPNVqRX3qAWIf6wpOVWX1CO/Qtc +HSiCxxpTv2xAGX9ZAwK8liBKR4qGd0lDmujpKVLKAdsWlhFWJNgO3VyQgTkOYBzf +6fq2RE1oKXmqg2H7ndcku6TACNVCyFv4hbN4RISXO0Al/gR4h3lL9+05BXxT2eb5 +dcUonA== +-----END CERTIFICATE----- diff --git a/Lib/test/ssl-idn-cert.pem b/Lib/test/ssl-idn-cert.pem new file mode 100644 index 00000000000000..26ae6853a74771 --- /dev/null +++ b/Lib/test/ssl-idn-cert.pem @@ -0,0 +1,31 @@ +-----BEGIN RSA PRIVATE KEY----- +MIICXQIBAAKBgQCjbzNwZrxp50RAVF55jSw5M//KD5/kswwdpUePux00JS5WnHh3 +SE98rnrWS34ryblBEMOEgIZPuYFjLs0fVN1XgmUHxs2cDPFUOpBrK2tf+nMDN0o9 +AZG4V3e6wbwOKPyIybJyhkyCe1jj5oXjhrYTcDB0TIteAmtRkLU4nZlLrwIDAQAB +AoGAYhHyTfp4CRyLaga2gj3iUZkQXpGtorCGDqwFGwxu48GD4tkVuI4dlHWmpDy8 +w03S6mZCzJnK/sAUEg4dbDWic1D9QyocWtpFFPJ3RyWKEuzN9Ka518dAzRtKya+9 +oUovXCfCAiw3gwi2sO6QeADPbnScNgLlSiOZTOyTTJhydyECQQDPCnRxqXjLJjgT +Wpur6oSLiLmtitG1KsU80d7X2yqCnScTysw4IwYoOdq4BmiwSSnwV2Glha4pZKz1 +trghZ9opAkEAyhT1vLNON4WDc6vpbWYCGFf3TaJRSqdd/hjaUmBeR9Wsa+bUccxS +6Jk2RcfP0dv8NT4ZY6bILNktCUr4V9liFwJBAJ+jTA2nwn/BRFOH9agk93YvQhvR +gcjS5anzmIOPdcOoMM1N/RD70G+LzF1Ac9AZWcD7X0slPBimi8YZ0PfQ/6ECQQCU +hAT6EvlIGsq6Jz0d1ptxkqzBFKsT559PkKpbYlHID4RxpKq7m8PPCFL3w9q7TCa2 +ZpY4Q6nYNCBCNSQBRFUvAkBhGZjMj25DqZO3vsW2LqwlBVk+hIih0LrLHThCuzT1 +rKdC2AdCvu3FZCbNg0lpjiYXEYCXvCP4c5TqS3H8uaC8 +-----END RSA PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +MIICjTCCAfagAwIBAgIUHg5Af4BmKSy4Xyot/yAGeSkO+PswDQYJKoZIhvcNAQEL +BQAwQDEXMBUGA1UECgwOdHJ1c3RtZSB2MC40LjAxJTAjBgNVBAsMHFRlc3Rpbmcg +Q0EgI3QxM0tzY2dCQm8xQzZpTnUwIBcNMDAwMTAxMDAwMDAwWhgPMzAwMDAxMDEw +MDAwMDBaMEkxFzAVBgNVBAoMDnRydXN0bWUgdjAuNC4wMS4wLAYDVQQLDCVUZXN0 +aW5nIHNlcnZlciBjZXJ0ICNaRlFmTEh1MDZaYU56UFYxMIGfMA0GCSqGSIb3DQEB +AQUAA4GNADCBiQKBgQCjbzNwZrxp50RAVF55jSw5M//KD5/kswwdpUePux00JS5W +nHh3SE98rnrWS34ryblBEMOEgIZPuYFjLs0fVN1XgmUHxs2cDPFUOpBrK2tf+nMD +N0o9AZG4V3e6wbwOKPyIybJyhkyCe1jj5oXjhrYTcDB0TIteAmtRkLU4nZlLrwID +AQABo3kwdzAdBgNVHQ4EFgQU1BZqNqO4rmUwk8V015AXCb8+keQwDAYDVR0TAQH/ +BAIwADAfBgNVHSMEGDAWgBRgMxaQ5MzCL0HAUrRuXJr+i7iv9jAnBgNVHREBAf8E +HTAbghl4bi0tcHl0aG4tbXVhLmV4YW1wbGUub3JnMA0GCSqGSIb3DQEBCwUAA4GB +ABJ4tUqfj9gHEYGxousPf7HSX/ZHF8e9HW+qpOX/urPRdGM0ObYrUlPgKJ1NIlA2 +HSOPWGVQgvk6P84s0oBYLAJ0C2CrKg2AQsusFn9s8dAM9hlYNEK9rfTQILxrnCyz +vpg6hKEGXN0UjYPb5HBPFKsWF0DbbNaWrr0co32yH2L8 +-----END CERTIFICATE----- diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py index aa2429ac9827ae..f0520bfecbc64d 100644 --- a/Lib/test/test_ssl.py +++ b/Lib/test/test_ssl.py @@ -80,6 +80,15 @@ def data_file(*name): DHFILE = data_file("dh1024.pem") BYTES_DHFILE = os.fsencode(DHFILE) +# These were generated by doing 'pip install trustme', and then: +# import trustme +# ca = trustme.CA() +# cert = ca.issue_server_cert("pythön.example.org") +# ca.cert_pem.write_to_path("ssl-idn-ca.pem") +# cert.private_key_and_cert_chain_pem.write_to_path("ssl-idn-cert.pem") +IDN_CA = data_file("ssl-idn-ca.pem") +IDN_CERT = data_file("ssl-idn-cert.pem") + # Not defined in all versions of OpenSSL OP_NO_COMPRESSION = getattr(ssl, "OP_NO_COMPRESSION", 0) OP_SINGLE_DH_USE = getattr(ssl, "OP_SINGLE_DH_USE", 0) @@ -1473,16 +1482,6 @@ def test_subclass(self): # For compatibility self.assertEqual(cm.exception.errno, ssl.SSL_ERROR_WANT_READ) - def test_bad_idna_in_server_hostname(self): - # Note: this test is testing some code that probably shouldn't exist - # in the first place, so if it starts failing at some point because - # you made the ssl module stop doing IDNA decoding then please feel - # free to remove it. The test was mainly added because this case used - # to cause memory corruption (see bpo-30594). - ctx = ssl.create_default_context() - with self.assertRaises(UnicodeError): - ctx.wrap_bio(ssl.MemoryBIO(), ssl.MemoryBIO(), - server_hostname="xn--.com") class MemoryBIOTests(unittest.TestCase): @@ -2521,6 +2520,39 @@ def test_check_hostname(self): "check_hostname requires server_hostname"): client_context.wrap_socket(s) + def test_check_hostname_idn(self): + if support.verbose: + sys.stdout.write("\n") + + server_context = ssl.SSLContext(ssl.PROTOCOL_TLS) + server_context.load_cert_chain(IDN_CERT) + + context = ssl.SSLContext(ssl.PROTOCOL_TLS) + context.verify_mode = ssl.CERT_REQUIRED + context.check_hostname = True + context.load_verify_locations(IDN_CA) + + # correct hostname should verify, when specified in several + # different ways + for server_hostname in ["pythön.example.org", + "xn--pythn-mua.example.org", + b"xn--pythn-mua.example.org"]: + server = ThreadedEchoServer(context=server_context, chatty=True) + with server: + with context.wrap_socket(socket.socket(), + server_hostname=server_hostname) as s: + s.connect((HOST, server.port)) + cert = s.getpeercert() + self.assertTrue(cert, "Can't get peer certificate.") + + # incorrect hostname should raise an exception + server = ThreadedEchoServer(context=server_context, chatty=True) + with server: + with context.wrap_socket(socket.socket(), + server_hostname="python.example.org") as s: + with self.assertRaises(ssl.CertificateError): + s.connect((HOST, server.port)) + def test_wrong_cert(self): """Connecting when the server rejects the client's certificate diff --git a/Misc/NEWS.d/next/Security/2017-08-06-14-43-45.bpo-28414.mzZ6vD.rst b/Misc/NEWS.d/next/Security/2017-08-06-14-43-45.bpo-28414.mzZ6vD.rst new file mode 100644 index 00000000000000..8423c1852b8825 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2017-08-06-14-43-45.bpo-28414.mzZ6vD.rst @@ -0,0 +1,2 @@ +The ssl module can now validate hostnames that contain non-ASCII +characters (IDNs). diff --git a/Modules/_ssl.c b/Modules/_ssl.c index 380dd1b72db0f1..2c6757355e54cf 100644 --- a/Modules/_ssl.c +++ b/Modules/_ssl.c @@ -716,8 +716,11 @@ newPySSLSocket(PySSLContext *sslctx, PySocketSockObject *sock, self->owner = NULL; self->server_hostname = NULL; if (server_hostname != NULL) { + /* server_hostname was encoded to an A-label by our caller; put it + * back into a str object, but still as an A-label (bpo-28414) + */ PyObject *hostname = PyUnicode_Decode(server_hostname, strlen(server_hostname), - "idna", "strict"); + "ascii", "strict"); if (hostname == NULL) { Py_DECREF(self); return NULL;