Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

allow specifying https:// proxy #9119

Closed
wants to merge 11 commits into from
1 change: 1 addition & 0 deletions changelog.d/9119.feature
@@ -0,0 +1 @@
Add support for https connections to a proxy server.
71 changes: 52 additions & 19 deletions synapse/http/proxyagent.py
Expand Up @@ -22,6 +22,7 @@

from twisted.internet import defer
from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
from twisted.internet.interfaces import IReactorCore
from twisted.python.failure import Failure
from twisted.web.client import URI, BrowserLikePolicyForHTTPS, _AgentBase
from twisted.web.error import SchemeNotSupported
Expand Down Expand Up @@ -81,6 +82,10 @@ class ProxyAgent(_AgentBase):

use_proxy (bool): Whether proxy settings should be discovered and used
from conventional environment variables.
This currently supports http:// and https:// proxies.
A hostname without scheme is assumed to be http.
Comment on lines +85 to +86
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a bit confusing. A hostname where?

... and yes, of course it supports http and https proxies - what other sorts of proxies would an http/https agent possibly care about?

Suggest removing these lines.


Raises: ValueError if given a proxy with a scheme we don't support.
"""

def __init__(
Expand Down Expand Up @@ -121,11 +126,11 @@ def __init__(
self.https_proxy_creds, https_proxy = parse_username_password(https_proxy)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this will break for https://username:password@host, I think? Probably best to call parse_proxy here

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason why only credentials were parsed for https proxy? Only security reason or anything else?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this will break for https://username:password@host, I think? Probably best to call parse_proxy here

I would suggest a solution like this.
This change can make parse_username_password compatible to https://username:password@host

diff --git a/synapse/http/proxyagent.py b/synapse/http/proxyagent.py
index c747b99..3c4fee6 100644
--- a/synapse/http/proxyagent.py
+++ b/synapse/http/proxyagent.py
@@ -293,7 +293,7 @@
 def parse_username_password(proxy: bytes) -> Tuple[Optional[ProxyCredentials], bytes]:
     """
     Parses the username and password from a proxy declaration e.g
-    username:password@hostname:port.
+    username:password@hostname:port or https://username:password@hostname:port
 
     Args:
         proxy: The proxy connection string.
@@ -304,9 +304,15 @@
         ProxyCredentials instance is replaced with None.
     """
     if proxy and b"@" in proxy:
+        scheme, host, port = parse_proxy(proxy)
         # We use rsplit here as the password could contain an @ character
-        credentials, proxy_without_credentials = proxy.rsplit(b"@", 1)
-        return ProxyCredentials(credentials), proxy_without_credentials
+        credentials, proxy_without_credentials = host.rsplit(b"@", 1)
+        return (
+            ProxyCredentials(credentials),
+            b"".join(
+                [scheme, b"://", proxy_without_credentials, b":", str(port).encode()]
+            ),
+        )
 
     return None, proxy

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason why only credentials were parsed for https proxy? Only security reason or anything else?

I don't think there is much reason at all. Support for credentials was added in #9657: perhaps http_proxy was just fogotten?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would suggest a solution like this.

seems sensible.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason why only credentials were parsed for https proxy? Only security reason or anything else?

I don't think there is much reason at all. Support for credentials was added in #9657: perhaps http_proxy was just fogotten?

In addition the issue for this #9000


self.http_proxy_endpoint = _http_proxy_endpoint(
http_proxy, self.proxy_reactor, **self._endpoint_kwargs
http_proxy, self.proxy_reactor, contextFactory, **self._endpoint_kwargs
)

self.https_proxy_endpoint = _http_proxy_endpoint(
https_proxy, self.proxy_reactor, **self._endpoint_kwargs
https_proxy, self.proxy_reactor, contextFactory, **self._endpoint_kwargs
)

self.no_proxy = no_proxy
Expand Down Expand Up @@ -243,28 +248,46 @@ def request(self, method, uri, headers=None, bodyProducer=None):
)


def _http_proxy_endpoint(proxy: Optional[bytes], reactor, **kwargs):
def _http_proxy_endpoint(
proxy: Optional[bytes],
reactor: IReactorCore,
tls_options_factory: IPolicyForHTTPS,
**kwargs,
):
"""Parses an http proxy setting and returns an endpoint for the proxy
Bubu marked this conversation as resolved.
Show resolved Hide resolved

Args:
proxy: the proxy setting in the form: [<username>:<password>@]<host>[:<port>]
Note that compared to other apps, this function currently lacks support
for specifying a protocol schema (i.e. protocol://...).
proxy: the proxy setting in the form: [scheme://][<username>:<password>@]<host>[:<port>]
This currently supports http:// and https:// proxies.
A hostname without scheme is assumed to be http.

reactor: reactor to be used to connect to the proxy

tls_options_factory: the TLS options to use when connecting through a https proxy
kwargs: other args to be passed to HostnameEndpoint

Returns:
interfaces.IStreamClientEndpoint|None: endpoint to use to connect to the proxy,
or None

Raises: ValueError if given a proxy with a scheme we don't support.
Bubu marked this conversation as resolved.
Show resolved Hide resolved
Bubu marked this conversation as resolved.
Show resolved Hide resolved
"""
if proxy is None:
return None

# Parse the connection string
host, port = parse_host_port(proxy, default_port=1080)
return HostnameEndpoint(reactor, host, port, **kwargs)
# Note: we can't use urlsplit/urlparse as that is completely broken for things without a scheme://
Bubu marked this conversation as resolved.
Show resolved Hide resolved
scheme, host, port = parse_proxy(proxy)

if scheme not in (b"http", b"https"):
raise ValueError("Proxy scheme '{}' not supported".format(scheme.decode()))

proxy_endpoint = HostnameEndpoint(reactor, host, port, **kwargs)

if scheme == b"https":
tls_options = tls_options_factory.creatorForNetloc(host, port)
proxy_endpoint = wrapClientTLS(tls_options, proxy_endpoint)

return proxy_endpoint


def parse_username_password(proxy: bytes) -> Tuple[Optional[ProxyCredentials], bytes]:
Expand All @@ -288,25 +311,35 @@ def parse_username_password(proxy: bytes) -> Tuple[Optional[ProxyCredentials], b
return None, proxy


def parse_host_port(hostport: bytes, default_port: int = None) -> Tuple[bytes, int]:
def parse_proxy(
proxy: bytes, default_scheme: bytes = b"http", default_port: int = 1080
) -> Tuple[bytes, bytes, int]:
"""
Parse the hostname and port from a proxy connection byte string.
Parse the scheme, hostname and port from a proxy connection byte string.

Args:
hostport: The proxy connection string. Must be in the form 'host[:port]'.
default_port: The default port to return if one is not found in `hostport`.
proxy: The proxy connection string. Must be in the form '[scheme://]host[:port]'.
default_scheme: The default scheme to return if one is not found in `proxy`. Defaults to http
default_port: The default port to return if one is not found in `proxy`. Defaults to 1080

Returns:
A tuple containing the hostname and port. Uses `default_port` if one was not found.
A tuple containing the scheme, hostname and port.
"""
if b":" in hostport:
host, port = hostport.rsplit(b":", 1)
# First check if we have a scheme present
if b"://" in proxy:
scheme, host = proxy.split(b"://", 1)
Comment on lines +329 to +330
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have thought about the problem with Python 3.9 and urlparse. See: #9119 (comment)

I would suggest to work with urlparse after all. For compatibility reasons we can check if there is a sheme and if not we add this. Somthing like this

Suggested change
if b"://" in proxy:
scheme, host = proxy.split(b"://", 1)
if not b"://" in proxy:
proxy = b"".join([default_scheme, b"://", proxy])

else:
scheme, host = default_scheme, proxy
# Now check the leftover part for a port
if b":" in host:
new_host, port = host.rsplit(b":", 1)
try:
port = int(port)
return host, port
return scheme, new_host, port
except ValueError:
# the thing after the : wasn't a valid port; presumably this is an
# IPv6 address.
# TODO: this doesn't work when the last part of the IP is also just a number.
# We probably need to require ipv6's being wrapped in square brackets: [2001:db8:0:0:1::1]
pass

return hostport, default_port
return scheme, host, default_port
179 changes: 171 additions & 8 deletions tests/http/test_proxyagent.py
Expand Up @@ -14,19 +14,20 @@
import base64
import logging
import os
from typing import Optional
from typing import Iterable, Optional
from unittest.mock import patch

import treq
from netaddr import IPSet

from twisted.internet import interfaces # noqa: F401
from twisted.internet.endpoints import HostnameEndpoint, _WrapperEndpoint
from twisted.internet.protocol import Factory
from twisted.protocols.tls import TLSMemoryBIOFactory
from twisted.web.http import HTTPChannel

from synapse.http.client import BlacklistingReactorWrapper
from synapse.http.proxyagent import ProxyAgent
from synapse.http.proxyagent import ProxyAgent, parse_proxy

from tests.http import TestServerTLSConnectionFactory, get_test_https_policy
from tests.server import FakeTransport, ThreadedMemoryReactorClock
Expand All @@ -37,12 +38,117 @@
HTTPFactory = Factory.forProtocol(HTTPChannel)


class ProxyParserTests(TestCase):
def test_parse_proxy_host_only(self):
Bubu marked this conversation as resolved.
Show resolved Hide resolved
url = b"localhost"
self.assertEqual((b"http", b"localhost", 1080), parse_proxy(url))

def test_parse_proxy_host_port(self):
url = b"localhost:9988"
self.assertEqual((b"http", b"localhost", 9988), parse_proxy(url))

def test_parse_proxy_scheme_host(self):
url = b"https://localhost"
self.assertEqual((b"https", b"localhost", 1080), parse_proxy(url))

def test_parse_proxy_scheme_host_port(self):
url = b"https://localhost:1234"
self.assertEqual((b"https", b"localhost", 1234), parse_proxy(url))

def test_parse_proxy_host_only_ipv4(self):
url = b"1.2.3.4"
self.assertEqual((b"http", b"1.2.3.4", 1080), parse_proxy(url))

def test_parse_proxy_host_port_ipv4(self):
url = b"1.2.3.4:9988"
self.assertEqual((b"http", b"1.2.3.4", 9988), parse_proxy(url))

def test_parse_proxy_scheme_host_ipv4(self):
url = b"https://1.2.3.4"
self.assertEqual((b"https", b"1.2.3.4", 1080), parse_proxy(url))

def test_parse_proxy_scheme_host_port_ipv4(self):
url = b"https://1.2.3.4:9988"
self.assertEqual((b"https", b"1.2.3.4", 9988), parse_proxy(url))

def test_parse_proxy_host_ipv6(self):
url = b"2001:0db8:85a3:0000:0000:8a2e:0370:effe"
self.assertEqual(
(b"http", b"2001:0db8:85a3:0000:0000:8a2e:0370:effe", 1080),
parse_proxy(url),
)

# currently broken
url = b"2001:0db8:85a3:0000:0000:8a2e:0370:1234"
# self.assertEqual((b"http", b"2001:0db8:85a3:0000:0000:8a2e:0370:1234", 1080), parse_proxy(url))

# also broken
url = b"::1"
# self.assertEqual((b"http", b"::1", 1080), parse_proxy(url))
url = b"::ffff:0.0.0.0"
self.assertEqual((b"http", b"::ffff:0.0.0.0", 1080), parse_proxy(url))

def test_parse_proxy_host_port_ipv6(self):
url = b"2001:0db8:85a3:0000:0000:8a2e:0370:effe:9988"
self.assertEqual(
(b"http", b"2001:0db8:85a3:0000:0000:8a2e:0370:effe", 9988),
parse_proxy(url),
)

# currently broken
url = b"2001:0db8:85a3:0000:0000:8a2e:0370:1234:9988"
# self.assertEqual((b"http", b"2001:0db8:85a3:0000:0000:8a2e:0370:1234", 9988), parse_proxy(url))

url = b"::1:9988"
self.assertEqual((b"http", b"::1", 9988), parse_proxy(url))
url = b"::ffff:0.0.0.0:9988"
self.assertEqual((b"http", b"::ffff:0.0.0.0", 9988), parse_proxy(url))

def test_parse_proxy_scheme_host_ipv6(self):
url = b"https://2001:0db8:85a3:0000:0000:8a2e:0370:effe"
self.assertEqual(
(b"https", b"2001:0db8:85a3:0000:0000:8a2e:0370:effe", 1080),
parse_proxy(url),
)

# currently broken
url = b"https://2001:0db8:85a3:0000:0000:8a2e:0370:1234"
# self.assertEqual((b"https", b"2001:0db8:85a3:0000:0000:8a2e:0370:1234", 1080), parse_proxy(url))

# also broken
url = b"https://::1"
# self.assertEqual((b"https", b"::1", 1080), parse_proxy(url))
url = b"https://::ffff:0.0.0.0:1080"
self.assertEqual((b"https", b"::ffff:0.0.0.0", 1080), parse_proxy(url))

def test_parse_proxy_scheme_host_port_ipv6(self):
url = b"https://2001:0db8:85a3:0000:0000:8a2e:0370:effe:9988"
self.assertEqual(
(b"https", b"2001:0db8:85a3:0000:0000:8a2e:0370:effe", 9988),
parse_proxy(url),
)

# currently broken
url = b"https://2001:0db8:85a3:0000:0000:8a2e:0370:1234:9988"
# self.assertEqual((b"https", b"2001:0db8:85a3:0000:0000:8a2e:0370:1234", 9988), parse_proxy(url))

url = b"https://::1:9988"
self.assertEqual((b"https", b"::1", 9988), parse_proxy(url))
url = b"https://::ffff:0.0.0.0:9988"
self.assertEqual((b"https", b"::ffff:0.0.0.0", 9988), parse_proxy(url))


class MatrixFederationAgentTests(TestCase):
def setUp(self):
self.reactor = ThreadedMemoryReactorClock()

def _make_connection(
self, client_factory, server_factory, ssl=False, expected_sni=None
self,
client_factory,
server_factory,
ssl=False,
expected_sni=None,
tls_sanlist: Optional[Iterable[bytes]] = None,
):
"""Builds a test server, and completes the outgoing client connection

Expand All @@ -59,11 +165,14 @@ def _make_connection(

expected_sni (bytes|None): the expected SNI value

tls_sanlist: list of SAN entries for the TLS cert presented by the server.
Defaults to [b'DNS:test.com']

Returns:
IProtocol: the server Protocol returned by server_factory
"""
if ssl:
server_factory = _wrap_server_factory_for_tls(server_factory)
server_factory = _wrap_server_factory_for_tls(server_factory, tls_sanlist)

server_protocol = server_factory.buildProtocol(None)

Expand Down Expand Up @@ -203,10 +312,16 @@ def test_https_request_via_no_proxy_star(self):
)
self._test_request_direct_connection(agent, b"https", b"test.com", b"abc")

@patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "unused.com"})
def test_http_request_via_proxy(self):
agent = ProxyAgent(self.reactor, use_proxy=True)
def _test_request_proxy_connection(
self, agent: ProxyAgent, ssl: bool = False
) -> None:
"""Send a request via an agent and check that it is correctly received at the proxy

Args:
agent: the Agent to send the request via. It is expected to send requests
to a proxy at 'proxy.com:8888'.
ssl: True if we expect the Agent to connect via https
"""
self.reactor.lookups["proxy.com"] = "1.2.3.5"
d = agent.request(b"GET", b"http://test.com")

Expand All @@ -219,7 +334,11 @@ def test_http_request_via_proxy(self):

# make a test server, and wire up the client
http_server = self._make_connection(
client_factory, _get_test_protocol_factory()
client_factory,
_get_test_protocol_factory(),
ssl=ssl,
tls_sanlist=[b"DNS:proxy.com"] if ssl else None,
expected_sni=b"proxy.com" if ssl else None,
)

# the FakeTransport is async, so we need to pump the reactor
Expand All @@ -241,6 +360,20 @@ def test_http_request_via_proxy(self):
body = self.successResultOf(treq.content(resp))
self.assertEqual(body, b"result")

@patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "unused.com"})
def test_http_request_via_proxy(self):
agent = ProxyAgent(self.reactor, use_proxy=True)
self._test_request_proxy_connection(agent)

@patch.dict(
os.environ, {"http_proxy": "https://proxy.com:8888", "no_proxy": "unused.com"}
)
def test_http_request_via_https_proxy(self):
agent = ProxyAgent(
self.reactor, use_proxy=True, contextFactory=get_test_https_policy()
)
self._test_request_proxy_connection(agent, ssl=True)

@patch.dict(os.environ, {"https_proxy": "proxy.com", "no_proxy": "unused.com"})
def test_https_request_via_proxy(self):
"""Tests that TLS-encrypted requests can be made through a proxy"""
Expand Down Expand Up @@ -490,6 +623,36 @@ def test_https_request_via_uppercase_proxy_with_blacklist(self):
body = self.successResultOf(treq.content(resp))
self.assertEqual(body, b"result")

@patch.dict(os.environ, {"http_proxy": "proxy.com:8888"})
Bubu marked this conversation as resolved.
Show resolved Hide resolved
def test_proxy_with_no_scheme(self):
http_proxy_agent = ProxyAgent(self.reactor, use_proxy=True)
self.assertIsInstance(http_proxy_agent.http_proxy_endpoint, HostnameEndpoint)
self.assertEqual(http_proxy_agent.http_proxy_endpoint._hostStr, "proxy.com")
self.assertEqual(http_proxy_agent.http_proxy_endpoint._port, 8888)

@patch.dict(os.environ, {"http_proxy": "socks://proxy.com:8888"})
def test_proxy_with_unsupported_scheme(self):
with self.assertRaises(ValueError):
_ = ProxyAgent(self.reactor, use_proxy=True)
Bubu marked this conversation as resolved.
Show resolved Hide resolved

@patch.dict(os.environ, {"http_proxy": "http://proxy.com:8888"})
def test_proxy_with_http_scheme(self):
http_proxy_agent = ProxyAgent(self.reactor, use_proxy=True)
self.assertIsInstance(http_proxy_agent.http_proxy_endpoint, HostnameEndpoint)
self.assertEqual(http_proxy_agent.http_proxy_endpoint._hostStr, "proxy.com")
self.assertEqual(http_proxy_agent.http_proxy_endpoint._port, 8888)

@patch.dict(os.environ, {"http_proxy": "https://proxy.com:8888"})
def test_proxy_with_https_scheme(self):
https_proxy_agent = ProxyAgent(self.reactor, use_proxy=True)
self.assertIsInstance(https_proxy_agent.http_proxy_endpoint, _WrapperEndpoint)
self.assertEqual(
https_proxy_agent.http_proxy_endpoint._wrappedEndpoint._hostStr, "proxy.com"
)
self.assertEqual(
https_proxy_agent.http_proxy_endpoint._wrappedEndpoint._port, 8888
)


def _wrap_server_factory_for_tls(factory, sanlist=None):
"""Wrap an existing Protocol Factory with a test TLSMemoryBIOFactory
Expand Down