From 4b3a1faa08f5ad16e0e00dc629fb25be520575d7 Mon Sep 17 00:00:00 2001
From: Matthew Hodgson <matthew@matrix.org>
Date: Mon, 28 Sep 2020 00:23:35 +0100
Subject: [PATCH 01/12] typo

---
 synapse/storage/databases/main/schema/delta/56/event_labels.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/synapse/storage/databases/main/schema/delta/56/event_labels.sql b/synapse/storage/databases/main/schema/delta/56/event_labels.sql
index 5e29c1da19e7..ccf287971cb4 100644
--- a/synapse/storage/databases/main/schema/delta/56/event_labels.sql
+++ b/synapse/storage/databases/main/schema/delta/56/event_labels.sql
@@ -13,7 +13,7 @@
  * limitations under the License.
  */
 
--- room_id and topoligical_ordering are denormalised from the events table in order to
+-- room_id and topological_ordering are denormalised from the events table in order to
 -- make the index work.
 CREATE TABLE IF NOT EXISTS event_labels (
     event_id TEXT,

From 450ec4844599b6f06ff6c699a8edc067fa7d4217 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Mon, 28 Sep 2020 13:15:00 +0100
Subject: [PATCH 02/12] A pair of tiny cleanups in the federation request code.
 (#8401)

---
 changelog.d/8401.misc                  | 1 +
 synapse/handlers/federation.py         | 2 +-
 synapse/http/matrixfederationclient.py | 2 --
 3 files changed, 2 insertions(+), 3 deletions(-)
 create mode 100644 changelog.d/8401.misc

diff --git a/changelog.d/8401.misc b/changelog.d/8401.misc
new file mode 100644
index 000000000000..27fd7ab129d9
--- /dev/null
+++ b/changelog.d/8401.misc
@@ -0,0 +1 @@
+A pair of tiny cleanups in the federation request code.
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 9f773aefa7fa..5bcfb231b2cb 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -281,7 +281,7 @@ async def on_receive_pdu(self, origin, pdu, sent_to_us_directly=False) -> None:
                             raise Exception(
                                 "Error fetching missing prev_events for %s: %s"
                                 % (event_id, e)
-                            )
+                            ) from e
 
                         # Update the set of things we've seen after trying to
                         # fetch the missing stuff
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index 3c86cbc546db..b02c74ab2d4b 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -473,8 +473,6 @@ async def _send_request(
                             )
 
                             response = await request_deferred
-                    except TimeoutError as e:
-                        raise RequestSendFailed(e, can_retry=True) from e
                     except DNSLookupError as e:
                         raise RequestSendFailed(e, can_retry=retry_on_dns_fail) from e
                     except Exception as e:

From bd715e12786f4e48d7a8a1973119bbc0502ecff3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dagfinn=20Ilmari=20Manns=C3=A5ker?= <ilmari@ilmari.org>
Date: Mon, 28 Sep 2020 15:35:02 +0100
Subject: [PATCH 03/12] Add `ui_auth_sessions_ips` table to `synapse_port_db`
 ignore list (#8410)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This table was created in #8034 (1.20.0).  It references
`ui_auth_sessions`, which is ignored, so this one should be too.

Signed-off-by: Dagfinn Ilmari Mannsåker <ilmari@ilmari.org>
---
 changelog.d/8410.bugfix | 1 +
 scripts/synapse_port_db | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 changelog.d/8410.bugfix

diff --git a/changelog.d/8410.bugfix b/changelog.d/8410.bugfix
new file mode 100644
index 000000000000..1323ddc525db
--- /dev/null
+++ b/changelog.d/8410.bugfix
@@ -0,0 +1 @@
+Fix a v1.20.0 regression in the `synapse_port_db` script regarding the `ui_auth_sessions_ips` table.
diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db
index 684a518b8e5f..ae2887b7d2f4 100755
--- a/scripts/synapse_port_db
+++ b/scripts/synapse_port_db
@@ -145,6 +145,7 @@ IGNORED_TABLES = {
     # the sessions are transient anyway, so ignore them.
     "ui_auth_sessions",
     "ui_auth_sessions_credentials",
+    "ui_auth_sessions_ips",
 }
 
 

From 5e3ca12b158b4abefe2e3a54259ab5255dca93d8 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Mon, 28 Sep 2020 17:58:33 +0100
Subject: [PATCH 04/12] Create a mechanism for marking tests "logcontext clean"
 (#8399)

---
 changelog.d/8399.misc        |  1 +
 synapse/logging/context.py   | 43 +++++++++++++++++++-----------------
 tests/crypto/test_keyring.py |  3 +++
 tests/unittest.py            | 15 ++++++++++++-
 4 files changed, 41 insertions(+), 21 deletions(-)
 create mode 100644 changelog.d/8399.misc

diff --git a/changelog.d/8399.misc b/changelog.d/8399.misc
new file mode 100644
index 000000000000..ce6e8123cf8b
--- /dev/null
+++ b/changelog.d/8399.misc
@@ -0,0 +1 @@
+Create a mechanism for marking tests "logcontext clean".
diff --git a/synapse/logging/context.py b/synapse/logging/context.py
index 2e282d9d670e..ca0c774cc5bc 100644
--- a/synapse/logging/context.py
+++ b/synapse/logging/context.py
@@ -65,6 +65,11 @@ def get_thread_resource_usage() -> "Optional[resource._RUsage]":
         return None
 
 
+# a hook which can be set during testing to assert that we aren't abusing logcontexts.
+def logcontext_error(msg: str):
+    logger.warning(msg)
+
+
 # get an id for the current thread.
 #
 # threading.get_ident doesn't actually return an OS-level tid, and annoyingly,
@@ -330,10 +335,9 @@ def __enter__(self) -> "LoggingContext":
         """Enters this logging context into thread local storage"""
         old_context = set_current_context(self)
         if self.previous_context != old_context:
-            logger.warning(
-                "Expected previous context %r, found %r",
-                self.previous_context,
-                old_context,
+            logcontext_error(
+                "Expected previous context %r, found %r"
+                % (self.previous_context, old_context,)
             )
         return self
 
@@ -346,10 +350,10 @@ def __exit__(self, type, value, traceback) -> None:
         current = set_current_context(self.previous_context)
         if current is not self:
             if current is SENTINEL_CONTEXT:
-                logger.warning("Expected logging context %s was lost", self)
+                logcontext_error("Expected logging context %s was lost" % (self,))
             else:
-                logger.warning(
-                    "Expected logging context %s but found %s", self, current
+                logcontext_error(
+                    "Expected logging context %s but found %s" % (self, current)
                 )
 
         # the fact that we are here suggests that the caller thinks that everything
@@ -387,16 +391,16 @@ def start(self, rusage: "Optional[resource._RUsage]") -> None:
                 support getrusuage.
         """
         if get_thread_id() != self.main_thread:
-            logger.warning("Started logcontext %s on different thread", self)
+            logcontext_error("Started logcontext %s on different thread" % (self,))
             return
 
         if self.finished:
-            logger.warning("Re-starting finished log context %s", self)
+            logcontext_error("Re-starting finished log context %s" % (self,))
 
         # If we haven't already started record the thread resource usage so
         # far
         if self.usage_start:
-            logger.warning("Re-starting already-active log context %s", self)
+            logcontext_error("Re-starting already-active log context %s" % (self,))
         else:
             self.usage_start = rusage
 
@@ -414,7 +418,7 @@ def stop(self, rusage: "Optional[resource._RUsage]") -> None:
 
         try:
             if get_thread_id() != self.main_thread:
-                logger.warning("Stopped logcontext %s on different thread", self)
+                logcontext_error("Stopped logcontext %s on different thread" % (self,))
                 return
 
             if not rusage:
@@ -422,9 +426,9 @@ def stop(self, rusage: "Optional[resource._RUsage]") -> None:
 
             # Record the cpu used since we started
             if not self.usage_start:
-                logger.warning(
-                    "Called stop on logcontext %s without recording a start rusage",
-                    self,
+                logcontext_error(
+                    "Called stop on logcontext %s without recording a start rusage"
+                    % (self,)
                 )
                 return
 
@@ -584,14 +588,13 @@ def __exit__(self, type, value, traceback) -> None:
 
         if context != self._new_context:
             if not context:
-                logger.warning(
-                    "Expected logging context %s was lost", self._new_context
+                logcontext_error(
+                    "Expected logging context %s was lost" % (self._new_context,)
                 )
             else:
-                logger.warning(
-                    "Expected logging context %s but found %s",
-                    self._new_context,
-                    context,
+                logcontext_error(
+                    "Expected logging context %s but found %s"
+                    % (self._new_context, context,)
                 )
 
 
diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py
index 5cf408f21faf..8ff1460c0d7a 100644
--- a/tests/crypto/test_keyring.py
+++ b/tests/crypto/test_keyring.py
@@ -41,6 +41,7 @@
 
 from tests import unittest
 from tests.test_utils import make_awaitable
+from tests.unittest import logcontext_clean
 
 
 class MockPerspectiveServer:
@@ -67,6 +68,7 @@ def sign_response(self, res):
         signedjson.sign.sign_json(res, self.server_name, self.key)
 
 
+@logcontext_clean
 class KeyringTestCase(unittest.HomeserverTestCase):
     def check_context(self, val, expected):
         self.assertEquals(getattr(current_context(), "request", None), expected)
@@ -309,6 +311,7 @@ async def get_keys2(keys_to_fetch):
         mock_fetcher2.get_keys.assert_called_once()
 
 
+@logcontext_clean
 class ServerKeyFetcherTestCase(unittest.HomeserverTestCase):
     def make_homeserver(self, reactor, clock):
         self.http_client = Mock()
diff --git a/tests/unittest.py b/tests/unittest.py
index dabf69cff405..bbe50c38513b 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -23,7 +23,7 @@
 import time
 from typing import Optional, Tuple, Type, TypeVar, Union
 
-from mock import Mock
+from mock import Mock, patch
 
 from canonicaljson import json
 
@@ -169,6 +169,19 @@ def INFO(target):
     return target
 
 
+def logcontext_clean(target):
+    """A decorator which marks the TestCase or method as 'logcontext_clean'
+
+    ... ie, any logcontext errors should cause a test failure
+    """
+
+    def logcontext_error(msg):
+        raise AssertionError("logcontext error: %s" % (msg))
+
+    patcher = patch("synapse.logging.context.logcontext_error", new=logcontext_error)
+    return patcher(target)
+
+
 class HomeserverTestCase(TestCase):
     """
     A base TestCase that reduces boilerplate for HomeServer-using test cases.

From bd380d942fdf91cf1214d6859f2bc97d12a92ab4 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Mon, 28 Sep 2020 18:00:30 +0100
Subject: [PATCH 05/12] Add checks for postgres sequence consistency (#8402)

---
 changelog.d/8402.misc                         |  1 +
 docs/postgres.md                              | 11 +++
 .../storage/databases/main/registration.py    |  3 +
 synapse/storage/databases/state/store.py      |  3 +
 synapse/storage/util/id_generators.py         |  5 ++
 synapse/storage/util/sequence.py              | 90 ++++++++++++++++++-
 tests/storage/test_id_generators.py           | 22 ++++-
 tests/unittest.py                             | 31 ++++++-
 8 files changed, 160 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/8402.misc

diff --git a/changelog.d/8402.misc b/changelog.d/8402.misc
new file mode 100644
index 000000000000..ad1804d207aa
--- /dev/null
+++ b/changelog.d/8402.misc
@@ -0,0 +1 @@
+Add checks on startup that PostgreSQL sequences are consistent with their associated tables.
diff --git a/docs/postgres.md b/docs/postgres.md
index e71a1975d8d2..c30cc1fd8cef 100644
--- a/docs/postgres.md
+++ b/docs/postgres.md
@@ -106,6 +106,17 @@ Note that the above may fail with an error about duplicate rows if corruption
 has already occurred, and such duplicate rows will need to be manually removed.
 
 
+## Fixing inconsistent sequences error
+
+Synapse uses Postgres sequences to generate IDs for various tables. A sequence
+and associated table can get out of sync if, for example, Synapse has been
+downgraded and then upgraded again.
+
+To fix the issue shut down Synapse (including any and all workers) and run the
+SQL command included in the error message. Once done Synapse should start
+successfully.
+
+
 ## Tuning Postgres
 
 The default settings should be fine for most deployments. For larger
diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py
index 48ce7ecd1638..a83df7759d79 100644
--- a/synapse/storage/databases/main/registration.py
+++ b/synapse/storage/databases/main/registration.py
@@ -41,6 +41,9 @@ def __init__(self, database: DatabasePool, db_conn, hs):
         self.config = hs.config
         self.clock = hs.get_clock()
 
+        # Note: we don't check this sequence for consistency as we'd have to
+        # call `find_max_generated_user_id_localpart` each time, which is
+        # expensive if there are many entries.
         self._user_id_seq = build_sequence_generator(
             database.engine, find_max_generated_user_id_localpart, "user_id_seq",
         )
diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py
index bec3780a32b1..989f0cbc9d3b 100644
--- a/synapse/storage/databases/state/store.py
+++ b/synapse/storage/databases/state/store.py
@@ -99,6 +99,9 @@ def get_max_state_group_txn(txn: Cursor):
         self._state_group_seq_gen = build_sequence_generator(
             self.database_engine, get_max_state_group_txn, "state_group_id_seq"
         )
+        self._state_group_seq_gen.check_consistency(
+            db_conn, table="state_groups", id_column="id"
+        )
 
     @cached(max_entries=10000, iterable=True)
     async def get_state_group_delta(self, state_group):
diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py
index 4269eaf9187e..4fd7573e260d 100644
--- a/synapse/storage/util/id_generators.py
+++ b/synapse/storage/util/id_generators.py
@@ -258,6 +258,11 @@ def __init__(
 
         self._sequence_gen = PostgresSequenceGenerator(sequence_name)
 
+        # We check that the table and sequence haven't diverged.
+        self._sequence_gen.check_consistency(
+            db_conn, table=table, id_column=id_column, positive=positive
+        )
+
         # This goes and fills out the above state from the database.
         self._load_current_ids(db_conn, table, instance_column, id_column)
 
diff --git a/synapse/storage/util/sequence.py b/synapse/storage/util/sequence.py
index ffc189474890..2dd95e270920 100644
--- a/synapse/storage/util/sequence.py
+++ b/synapse/storage/util/sequence.py
@@ -13,11 +13,34 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import abc
+import logging
 import threading
 from typing import Callable, List, Optional
 
-from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
-from synapse.storage.types import Cursor
+from synapse.storage.engines import (
+    BaseDatabaseEngine,
+    IncorrectDatabaseSetup,
+    PostgresEngine,
+)
+from synapse.storage.types import Connection, Cursor
+
+logger = logging.getLogger(__name__)
+
+
+_INCONSISTENT_SEQUENCE_ERROR = """
+Postgres sequence '%(seq)s' is inconsistent with associated
+table '%(table)s'. This can happen if Synapse has been downgraded and
+then upgraded again, or due to a bad migration.
+
+To fix this error, shut down Synapse (including any and all workers)
+and run the following SQL:
+
+    SELECT setval('%(seq)s', (
+        %(max_id_sql)s
+    ));
+
+See docs/postgres.md for more information.
+"""
 
 
 class SequenceGenerator(metaclass=abc.ABCMeta):
@@ -28,6 +51,19 @@ def get_next_id_txn(self, txn: Cursor) -> int:
         """Gets the next ID in the sequence"""
         ...
 
+    @abc.abstractmethod
+    def check_consistency(
+        self, db_conn: Connection, table: str, id_column: str, positive: bool = True
+    ):
+        """Should be called during start up to test that the current value of
+        the sequence is greater than or equal to the maximum ID in the table.
+
+        This is to handle various cases where the sequence value can get out
+        of sync with the table, e.g. if Synapse gets rolled back to a previous
+        version and the rolled forwards again.
+        """
+        ...
+
 
 class PostgresSequenceGenerator(SequenceGenerator):
     """An implementation of SequenceGenerator which uses a postgres sequence"""
@@ -45,6 +81,50 @@ def get_next_mult_txn(self, txn: Cursor, n: int) -> List[int]:
         )
         return [i for (i,) in txn]
 
+    def check_consistency(
+        self, db_conn: Connection, table: str, id_column: str, positive: bool = True
+    ):
+        txn = db_conn.cursor()
+
+        # First we get the current max ID from the table.
+        table_sql = "SELECT GREATEST(%(agg)s(%(id)s), 0) FROM %(table)s" % {
+            "id": id_column,
+            "table": table,
+            "agg": "MAX" if positive else "-MIN",
+        }
+
+        txn.execute(table_sql)
+        row = txn.fetchone()
+        if not row:
+            # Table is empty, so nothing to do.
+            txn.close()
+            return
+
+        # Now we fetch the current value from the sequence and compare with the
+        # above.
+        max_stream_id = row[0]
+        txn.execute(
+            "SELECT last_value, is_called FROM %(seq)s" % {"seq": self._sequence_name}
+        )
+        last_value, is_called = txn.fetchone()
+        txn.close()
+
+        # If `is_called` is False then `last_value` is actually the value that
+        # will be generated next, so we decrement to get the true "last value".
+        if not is_called:
+            last_value -= 1
+
+        if max_stream_id > last_value:
+            logger.warning(
+                "Postgres sequence %s is behind table %s: %d < %d",
+                last_value,
+                max_stream_id,
+            )
+            raise IncorrectDatabaseSetup(
+                _INCONSISTENT_SEQUENCE_ERROR
+                % {"seq": self._sequence_name, "table": table, "max_id_sql": table_sql}
+            )
+
 
 GetFirstCallbackType = Callable[[Cursor], int]
 
@@ -81,6 +161,12 @@ def get_next_id_txn(self, txn: Cursor) -> int:
             self._current_max_id += 1
             return self._current_max_id
 
+    def check_consistency(
+        self, db_conn: Connection, table: str, id_column: str, positive: bool = True
+    ):
+        # There is nothing to do for in memory sequences
+        pass
+
 
 def build_sequence_generator(
     database_engine: BaseDatabaseEngine,
diff --git a/tests/storage/test_id_generators.py b/tests/storage/test_id_generators.py
index d4ff55fbff7d..4558bee7be85 100644
--- a/tests/storage/test_id_generators.py
+++ b/tests/storage/test_id_generators.py
@@ -12,9 +12,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-
 from synapse.storage.database import DatabasePool
+from synapse.storage.engines import IncorrectDatabaseSetup
 from synapse.storage.util.id_generators import MultiWriterIdGenerator
 
 from tests.unittest import HomeserverTestCase
@@ -59,7 +58,7 @@ def _create(conn):
                 writers=writers,
             )
 
-        return self.get_success(self.db_pool.runWithConnection(_create))
+        return self.get_success_or_raise(self.db_pool.runWithConnection(_create))
 
     def _insert_rows(self, instance_name: str, number: int):
         """Insert N rows as the given instance, inserting with stream IDs pulled
@@ -411,6 +410,23 @@ async def _get_next_async():
         self.get_success(_get_next_async())
         self.assertEqual(id_gen_3.get_persisted_upto_position(), 6)
 
+    def test_sequence_consistency(self):
+        """Test that we error out if the table and sequence diverges.
+        """
+
+        # Prefill with some rows
+        self._insert_row_with_id("master", 3)
+
+        # Now we add a row *without* updating the stream ID
+        def _insert(txn):
+            txn.execute("INSERT INTO foobar VALUES (26, 'master')")
+
+        self.get_success(self.db_pool.runInteraction("_insert", _insert))
+
+        # Creating the ID gen should error
+        with self.assertRaises(IncorrectDatabaseSetup):
+            self._create_id_generator("first")
+
 
 class BackwardsMultiWriterIdGeneratorTestCase(HomeserverTestCase):
     """Tests MultiWriterIdGenerator that produce *negative* stream IDs.
diff --git a/tests/unittest.py b/tests/unittest.py
index bbe50c38513b..e654c0442d6c 100644
--- a/tests/unittest.py
+++ b/tests/unittest.py
@@ -14,7 +14,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import gc
 import hashlib
 import hmac
@@ -28,6 +27,7 @@
 from canonicaljson import json
 
 from twisted.internet.defer import Deferred, ensureDeferred, succeed
+from twisted.python.failure import Failure
 from twisted.python.threadpool import ThreadPool
 from twisted.trial import unittest
 
@@ -476,6 +476,35 @@ def get_failure(self, d, exc):
         self.pump()
         return self.failureResultOf(d, exc)
 
+    def get_success_or_raise(self, d, by=0.0):
+        """Drive deferred to completion and return result or raise exception
+        on failure.
+        """
+
+        if inspect.isawaitable(d):
+            deferred = ensureDeferred(d)
+        if not isinstance(deferred, Deferred):
+            return d
+
+        results = []  # type: list
+        deferred.addBoth(results.append)
+
+        self.pump(by=by)
+
+        if not results:
+            self.fail(
+                "Success result expected on {!r}, found no result instead".format(
+                    deferred
+                )
+            )
+
+        result = results[0]
+
+        if isinstance(result, Failure):
+            result.raiseException()
+
+        return result
+
     def register_user(self, username, password, admin=False):
         """
         Register a user. Requires the Admin API be registered.

From d4605d1f16b5d71c72dbf07b1ffeaa81c0cb87a9 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Mon, 28 Sep 2020 18:46:59 +0100
Subject: [PATCH 06/12] Don't check whether a 3pid is allowed to register
 during password reset

This endpoint should only deal with emails that have already been approved, and
are attached with user's account. There's no need to re-check them here.
---
 synapse/rest/client/v2_alpha/account.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py
index c3ce0f62592a..ed0d0772f88a 100644
--- a/synapse/rest/client/v2_alpha/account.py
+++ b/synapse/rest/client/v2_alpha/account.py
@@ -96,13 +96,6 @@ async def on_POST(self, request):
         send_attempt = body["send_attempt"]
         next_link = body.get("next_link")  # Optional param
 
-        if not check_3pid_allowed(self.hs, "email", email):
-            raise SynapseError(
-                403,
-                "Your email domain is not authorized on this server",
-                Codes.THREEPID_DENIED,
-            )
-
         # Raise if the provided next_link value isn't valid
         assert_valid_next_link(self.hs, next_link)
 

From fe443acaee36900757d79dbf7d2fb5629df38e3c Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Mon, 28 Sep 2020 18:51:41 +0100
Subject: [PATCH 07/12] Changelog

---
 changelog.d/8414.bugfix | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog.d/8414.bugfix

diff --git a/changelog.d/8414.bugfix b/changelog.d/8414.bugfix
new file mode 100644
index 000000000000..315876e89238
--- /dev/null
+++ b/changelog.d/8414.bugfix
@@ -0,0 +1 @@
+Remove unnecessary 3PID registration check when resetting password via an email address. Bug introduced in v0.34.0rc2.
\ No newline at end of file

From 1c262431f9bf768d106bf79a568479fa5a0784a1 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Tue, 29 Sep 2020 10:29:21 +0100
Subject: [PATCH 08/12] Fix handling of connection timeouts in outgoing http
 requests (#8400)

* Remove `on_timeout_cancel` from `timeout_deferred`

The `on_timeout_cancel` param to `timeout_deferred` wasn't always called on a
timeout (in particular if the canceller raised an exception), so it was
unreliable. It was also only used in one place, and to be honest it's easier to
do what it does a different way.

* Fix handling of connection timeouts in outgoing http requests

Turns out that if we get a timeout during connection, then a different
exception is raised, which wasn't always handled correctly.

To fix it, catch the exception in SimpleHttpClient and turn it into a
RequestTimedOutError (which is already a documented exception).

Also add a description to RequestTimedOutError so that we can see which stage
it failed at.

* Fix incorrect handling of timeouts reading federation responses

This was trapping the wrong sort of TimeoutError, so was never being hit.

The effect was relatively minor, but we should fix this so that it does the
expected thing.

* Fix inconsistent handling of `timeout` param between methods

`get_json`, `put_json` and `delete_json` were applying a different timeout to
the response body to `post_json`; bring them in line and test.

Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com>
Co-authored-by: Erik Johnston <erik@matrix.org>
---
 changelog.d/8400.bugfix                |   1 +
 synapse/handlers/identity.py           |  25 ++--
 synapse/http/__init__.py               |  17 +--
 synapse/http/client.py                 |  54 +++++---
 synapse/http/matrixfederationclient.py |  55 ++++++--
 synapse/http/proxyagent.py             |  16 ++-
 synapse/util/async_helpers.py          |  47 +++----
 tests/http/test_fedclient.py           |  14 +-
 tests/http/test_simple_client.py       | 180 +++++++++++++++++++++++++
 9 files changed, 311 insertions(+), 98 deletions(-)
 create mode 100644 changelog.d/8400.bugfix
 create mode 100644 tests/http/test_simple_client.py

diff --git a/changelog.d/8400.bugfix b/changelog.d/8400.bugfix
new file mode 100644
index 000000000000..835658ba5eac
--- /dev/null
+++ b/changelog.d/8400.bugfix
@@ -0,0 +1 @@
+Fix incorrect handling of timeouts on outgoing HTTP requests.
diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py
index ab15570f7a97..bc3e9607ca82 100644
--- a/synapse/handlers/identity.py
+++ b/synapse/handlers/identity.py
@@ -21,8 +21,6 @@
 import urllib.parse
 from typing import Awaitable, Callable, Dict, List, Optional, Tuple
 
-from twisted.internet.error import TimeoutError
-
 from synapse.api.errors import (
     CodeMessageException,
     Codes,
@@ -30,6 +28,7 @@
     SynapseError,
 )
 from synapse.config.emailconfig import ThreepidBehaviour
+from synapse.http import RequestTimedOutError
 from synapse.http.client import SimpleHttpClient
 from synapse.types import JsonDict, Requester
 from synapse.util import json_decoder
@@ -93,7 +92,7 @@ async def threepid_from_creds(
 
         try:
             data = await self.http_client.get_json(url, query_params)
-        except TimeoutError:
+        except RequestTimedOutError:
             raise SynapseError(500, "Timed out contacting identity server")
         except HttpResponseException as e:
             logger.info(
@@ -173,7 +172,7 @@ async def bind_threepid(
             if e.code != 404 or not use_v2:
                 logger.error("3PID bind failed with Matrix error: %r", e)
                 raise e.to_synapse_error()
-        except TimeoutError:
+        except RequestTimedOutError:
             raise SynapseError(500, "Timed out contacting identity server")
         except CodeMessageException as e:
             data = json_decoder.decode(e.msg)  # XXX WAT?
@@ -273,7 +272,7 @@ async def try_unbind_threepid_with_id_server(
             else:
                 logger.error("Failed to unbind threepid on identity server: %s", e)
                 raise SynapseError(500, "Failed to contact identity server")
-        except TimeoutError:
+        except RequestTimedOutError:
             raise SynapseError(500, "Timed out contacting identity server")
 
         await self.store.remove_user_bound_threepid(
@@ -419,7 +418,7 @@ async def requestEmailToken(
         except HttpResponseException as e:
             logger.info("Proxied requestToken failed: %r", e)
             raise e.to_synapse_error()
-        except TimeoutError:
+        except RequestTimedOutError:
             raise SynapseError(500, "Timed out contacting identity server")
 
     async def requestMsisdnToken(
@@ -471,7 +470,7 @@ async def requestMsisdnToken(
         except HttpResponseException as e:
             logger.info("Proxied requestToken failed: %r", e)
             raise e.to_synapse_error()
-        except TimeoutError:
+        except RequestTimedOutError:
             raise SynapseError(500, "Timed out contacting identity server")
 
         assert self.hs.config.public_baseurl
@@ -553,7 +552,7 @@ async def proxy_msisdn_submit_token(
                 id_server + "/_matrix/identity/api/v1/validate/msisdn/submitToken",
                 body,
             )
-        except TimeoutError:
+        except RequestTimedOutError:
             raise SynapseError(500, "Timed out contacting identity server")
         except HttpResponseException as e:
             logger.warning("Error contacting msisdn account_threepid_delegate: %s", e)
@@ -627,7 +626,7 @@ async def _lookup_3pid_v1(
                 # require or validate it. See the following for context:
                 # https://github.com/matrix-org/synapse/issues/5253#issuecomment-666246950
                 return data["mxid"]
-        except TimeoutError:
+        except RequestTimedOutError:
             raise SynapseError(500, "Timed out contacting identity server")
         except IOError as e:
             logger.warning("Error from v1 identity server lookup: %s" % (e,))
@@ -655,7 +654,7 @@ async def _lookup_3pid_v2(
                 "%s%s/_matrix/identity/v2/hash_details" % (id_server_scheme, id_server),
                 {"access_token": id_access_token},
             )
-        except TimeoutError:
+        except RequestTimedOutError:
             raise SynapseError(500, "Timed out contacting identity server")
 
         if not isinstance(hash_details, dict):
@@ -727,7 +726,7 @@ async def _lookup_3pid_v2(
                 },
                 headers=headers,
             )
-        except TimeoutError:
+        except RequestTimedOutError:
             raise SynapseError(500, "Timed out contacting identity server")
         except Exception as e:
             logger.warning("Error when performing a v2 3pid lookup: %s", e)
@@ -823,7 +822,7 @@ async def ask_id_server_for_third_party_invite(
                     invite_config,
                     {"Authorization": create_id_access_token_header(id_access_token)},
                 )
-            except TimeoutError:
+            except RequestTimedOutError:
                 raise SynapseError(500, "Timed out contacting identity server")
             except HttpResponseException as e:
                 if e.code != 404:
@@ -841,7 +840,7 @@ async def ask_id_server_for_third_party_invite(
                 data = await self.blacklisting_http_client.post_json_get_json(
                     url, invite_config
                 )
-            except TimeoutError:
+            except RequestTimedOutError:
                 raise SynapseError(500, "Timed out contacting identity server")
             except HttpResponseException as e:
                 logger.warning(
diff --git a/synapse/http/__init__.py b/synapse/http/__init__.py
index 8eb363859146..59b01b812c53 100644
--- a/synapse/http/__init__.py
+++ b/synapse/http/__init__.py
@@ -16,8 +16,6 @@
 import re
 
 from twisted.internet import task
-from twisted.internet.defer import CancelledError
-from twisted.python import failure
 from twisted.web.client import FileBodyProducer
 
 from synapse.api.errors import SynapseError
@@ -26,19 +24,8 @@
 class RequestTimedOutError(SynapseError):
     """Exception representing timeout of an outbound request"""
 
-    def __init__(self):
-        super().__init__(504, "Timed out")
-
-
-def cancelled_to_request_timed_out_error(value, timeout):
-    """Turns CancelledErrors into RequestTimedOutErrors.
-
-    For use with async.add_timeout_to_deferred
-    """
-    if isinstance(value, failure.Failure):
-        value.trap(CancelledError)
-        raise RequestTimedOutError()
-    return value
+    def __init__(self, msg):
+        super().__init__(504, msg)
 
 
 ACCESS_TOKEN_RE = re.compile(r"(\?.*access(_|%5[Ff])token=)[^&]*(.*)$")
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 4694adc400b8..8324632cb625 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -13,7 +13,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import logging
 import urllib
 from io import BytesIO
@@ -38,7 +37,7 @@
 
 from OpenSSL import SSL
 from OpenSSL.SSL import VERIFY_NONE
-from twisted.internet import defer, protocol, ssl
+from twisted.internet import defer, error as twisted_error, protocol, ssl
 from twisted.internet.interfaces import (
     IReactorPluggableNameResolver,
     IResolutionReceiver,
@@ -46,17 +45,18 @@
 from twisted.internet.task import Cooperator
 from twisted.python.failure import Failure
 from twisted.web._newclient import ResponseDone
-from twisted.web.client import Agent, HTTPConnectionPool, readBody
+from twisted.web.client import (
+    Agent,
+    HTTPConnectionPool,
+    ResponseNeverReceived,
+    readBody,
+)
 from twisted.web.http import PotentialDataLoss
 from twisted.web.http_headers import Headers
 from twisted.web.iweb import IResponse
 
 from synapse.api.errors import Codes, HttpResponseException, SynapseError
-from synapse.http import (
-    QuieterFileBodyProducer,
-    cancelled_to_request_timed_out_error,
-    redact_uri,
-)
+from synapse.http import QuieterFileBodyProducer, RequestTimedOutError, redact_uri
 from synapse.http.proxyagent import ProxyAgent
 from synapse.logging.context import make_deferred_yieldable
 from synapse.logging.opentracing import set_tag, start_active_span, tags
@@ -332,8 +332,6 @@ async def request(
             RequestTimedOutError if the request times out before the headers are read
 
         """
-        # A small wrapper around self.agent.request() so we can easily attach
-        # counters to it
         outgoing_requests_counter.labels(method).inc()
 
         # log request but strip `access_token` (AS requests for example include this)
@@ -362,15 +360,17 @@ async def request(
                     data=body_producer,
                     headers=headers,
                     **self._extra_treq_args
-                )
+                )  # type: defer.Deferred
+
                 # we use our own timeout mechanism rather than treq's as a workaround
                 # for https://twistedmatrix.com/trac/ticket/9534.
                 request_deferred = timeout_deferred(
-                    request_deferred,
-                    60,
-                    self.hs.get_reactor(),
-                    cancelled_to_request_timed_out_error,
+                    request_deferred, 60, self.hs.get_reactor(),
                 )
+
+                # turn timeouts into RequestTimedOutErrors
+                request_deferred.addErrback(_timeout_to_request_timed_out_error)
+
                 response = await make_deferred_yieldable(request_deferred)
 
                 incoming_responses_counter.labels(method, response.code).inc()
@@ -410,7 +410,7 @@ async def post_urlencoded_get_json(
             parsed json
 
         Raises:
-            RequestTimedOutException: if there is a timeout before the response headers
+            RequestTimedOutError: if there is a timeout before the response headers
                are received. Note there is currently no timeout on reading the response
                body.
 
@@ -461,7 +461,7 @@ async def post_json_get_json(
             parsed json
 
         Raises:
-            RequestTimedOutException: if there is a timeout before the response headers
+            RequestTimedOutError: if there is a timeout before the response headers
                are received. Note there is currently no timeout on reading the response
                body.
 
@@ -506,7 +506,7 @@ async def get_json(
         Returns:
             Succeeds when we get a 2xx HTTP response, with the HTTP body as JSON.
         Raises:
-            RequestTimedOutException: if there is a timeout before the response headers
+            RequestTimedOutError: if there is a timeout before the response headers
                are received. Note there is currently no timeout on reading the response
                body.
 
@@ -538,7 +538,7 @@ async def put_json(
         Returns:
             Succeeds when we get a 2xx HTTP response, with the HTTP body as JSON.
         Raises:
-             RequestTimedOutException: if there is a timeout before the response headers
+             RequestTimedOutError: if there is a timeout before the response headers
                are received. Note there is currently no timeout on reading the response
                body.
 
@@ -586,7 +586,7 @@ async def get_raw(
             Succeeds when we get a 2xx HTTP response, with the
             HTTP body as bytes.
         Raises:
-            RequestTimedOutException: if there is a timeout before the response headers
+            RequestTimedOutError: if there is a timeout before the response headers
                are received. Note there is currently no timeout on reading the response
                body.
 
@@ -631,7 +631,7 @@ async def get_file(
             headers, absolute URI of the response and HTTP response code.
 
         Raises:
-            RequestTimedOutException: if there is a timeout before the response headers
+            RequestTimedOutError: if there is a timeout before the response headers
                are received. Note there is currently no timeout on reading the response
                body.
 
@@ -684,6 +684,18 @@ async def get_file(
         )
 
 
+def _timeout_to_request_timed_out_error(f: Failure):
+    if f.check(twisted_error.TimeoutError, twisted_error.ConnectingCancelledError):
+        # The TCP connection has its own timeout (set by the 'connectTimeout' param
+        # on the Agent), which raises twisted_error.TimeoutError exception.
+        raise RequestTimedOutError("Timeout connecting to remote server")
+    elif f.check(defer.TimeoutError, ResponseNeverReceived):
+        # this one means that we hit our overall timeout on the request
+        raise RequestTimedOutError("Timeout waiting for response from remote server")
+
+    return f
+
+
 # XXX: FIXME: This is horribly copy-pasted from matrixfederationclient.
 # The two should be factored out.
 
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index b02c74ab2d4b..c23a4d7c0cc0 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -171,7 +171,7 @@ async def _handle_json_response(
         d = timeout_deferred(d, timeout=timeout_sec, reactor=reactor)
 
         body = await make_deferred_yieldable(d)
-    except TimeoutError as e:
+    except defer.TimeoutError as e:
         logger.warning(
             "{%s} [%s] Timed out reading response - %s %s",
             request.txn_id,
@@ -655,10 +655,14 @@ async def put_json(
             long_retries (bool): whether to use the long retry algorithm. See
                 docs on _send_request for details.
 
-            timeout (int|None): number of milliseconds to wait for the response headers
-                (including connecting to the server), *for each attempt*.
+            timeout (int|None): number of milliseconds to wait for the response.
                 self._default_timeout (60s) by default.
 
+                Note that we may make several attempts to send the request; this
+                timeout applies to the time spent waiting for response headers for
+                *each* attempt (including connection time) as well as the time spent
+                reading the response body after a 200 response.
+
             ignore_backoff (bool): true to ignore the historical backoff data
                 and try the request anyway.
             backoff_on_404 (bool): True if we should count a 404 response as
@@ -704,8 +708,13 @@ async def put_json(
             timeout=timeout,
         )
 
+        if timeout is not None:
+            _sec_timeout = timeout / 1000
+        else:
+            _sec_timeout = self.default_timeout
+
         body = await _handle_json_response(
-            self.reactor, self.default_timeout, request, response, start_ms
+            self.reactor, _sec_timeout, request, response, start_ms
         )
 
         return body
@@ -734,10 +743,14 @@ async def post_json(
             long_retries (bool): whether to use the long retry algorithm. See
                 docs on _send_request for details.
 
-            timeout (int|None): number of milliseconds to wait for the response headers
-                (including connecting to the server), *for each attempt*.
+            timeout (int|None): number of milliseconds to wait for the response.
                 self._default_timeout (60s) by default.
 
+                Note that we may make several attempts to send the request; this
+                timeout applies to the time spent waiting for response headers for
+                *each* attempt (including connection time) as well as the time spent
+                reading the response body after a 200 response.
+
             ignore_backoff (bool): true to ignore the historical backoff data and
                 try the request anyway.
 
@@ -801,10 +814,14 @@ async def get_json(
             args (dict|None): A dictionary used to create query strings, defaults to
                 None.
 
-            timeout (int|None): number of milliseconds to wait for the response headers
-                (including connecting to the server), *for each attempt*.
+            timeout (int|None): number of milliseconds to wait for the response.
                 self._default_timeout (60s) by default.
 
+                Note that we may make several attempts to send the request; this
+                timeout applies to the time spent waiting for response headers for
+                *each* attempt (including connection time) as well as the time spent
+                reading the response body after a 200 response.
+
             ignore_backoff (bool): true to ignore the historical backoff data
                 and try the request anyway.
 
@@ -840,8 +857,13 @@ async def get_json(
             timeout=timeout,
         )
 
+        if timeout is not None:
+            _sec_timeout = timeout / 1000
+        else:
+            _sec_timeout = self.default_timeout
+
         body = await _handle_json_response(
-            self.reactor, self.default_timeout, request, response, start_ms
+            self.reactor, _sec_timeout, request, response, start_ms
         )
 
         return body
@@ -865,10 +887,14 @@ async def delete_json(
             long_retries (bool): whether to use the long retry algorithm. See
                 docs on _send_request for details.
 
-            timeout (int|None): number of milliseconds to wait for the response headers
-                (including connecting to the server), *for each attempt*.
+            timeout (int|None): number of milliseconds to wait for the response.
                 self._default_timeout (60s) by default.
 
+                Note that we may make several attempts to send the request; this
+                timeout applies to the time spent waiting for response headers for
+                *each* attempt (including connection time) as well as the time spent
+                reading the response body after a 200 response.
+
             ignore_backoff (bool): true to ignore the historical backoff data and
                 try the request anyway.
 
@@ -900,8 +926,13 @@ async def delete_json(
             ignore_backoff=ignore_backoff,
         )
 
+        if timeout is not None:
+            _sec_timeout = timeout / 1000
+        else:
+            _sec_timeout = self.default_timeout
+
         body = await _handle_json_response(
-            self.reactor, self.default_timeout, request, response, start_ms
+            self.reactor, _sec_timeout, request, response, start_ms
         )
         return body
 
diff --git a/synapse/http/proxyagent.py b/synapse/http/proxyagent.py
index 332da02a8d18..e32d3f43e0f3 100644
--- a/synapse/http/proxyagent.py
+++ b/synapse/http/proxyagent.py
@@ -44,8 +44,11 @@ class ProxyAgent(_AgentBase):
             `BrowserLikePolicyForHTTPS`, so unless you have special
             requirements you can leave this as-is.
 
-        connectTimeout (float): The amount of time that this Agent will wait
-            for the peer to accept a connection.
+        connectTimeout (Optional[float]): The amount of time that this Agent will wait
+            for the peer to accept a connection, in seconds. If 'None',
+            HostnameEndpoint's default (30s) will be used.
+
+            This is used for connections to both proxies and destination servers.
 
         bindAddress (bytes): The local address for client sockets to bind to.
 
@@ -108,6 +111,15 @@ def request(self, method, uri, headers=None, bodyProducer=None):
         Returns:
             Deferred[IResponse]: completes when the header of the response has
                  been received (regardless of the response status code).
+
+                 Can fail with:
+                    SchemeNotSupported: if the uri is not http or https
+
+                    twisted.internet.error.TimeoutError if the server we are connecting
+                        to (proxy or destination) does not accept a connection before
+                        connectTimeout.
+
+                    ... other things too.
         """
         uri = uri.strip()
         if not _VALID_URI.match(uri):
diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py
index 67ce9a5f39a1..382f0cf3f0d2 100644
--- a/synapse/util/async_helpers.py
+++ b/synapse/util/async_helpers.py
@@ -449,18 +449,8 @@ def _ctx_manager():
 R = TypeVar("R")
 
 
-def _cancelled_to_timed_out_error(value: R, timeout: float) -> R:
-    if isinstance(value, failure.Failure):
-        value.trap(CancelledError)
-        raise defer.TimeoutError(timeout, "Deferred")
-    return value
-
-
 def timeout_deferred(
-    deferred: defer.Deferred,
-    timeout: float,
-    reactor: IReactorTime,
-    on_timeout_cancel: Optional[Callable[[Any, float], Any]] = None,
+    deferred: defer.Deferred, timeout: float, reactor: IReactorTime,
 ) -> defer.Deferred:
     """The in built twisted `Deferred.addTimeout` fails to time out deferreds
     that have a canceller that throws exceptions. This method creates a new
@@ -469,27 +459,21 @@ def timeout_deferred(
 
     (See https://twistedmatrix.com/trac/ticket/9534)
 
-    NOTE: Unlike `Deferred.addTimeout`, this function returns a new deferred
+    NOTE: Unlike `Deferred.addTimeout`, this function returns a new deferred.
+
+    NOTE: the TimeoutError raised by the resultant deferred is
+    twisted.internet.defer.TimeoutError, which is *different* to the built-in
+    TimeoutError, as well as various other TimeoutErrors you might have imported.
 
     Args:
         deferred: The Deferred to potentially timeout.
         timeout: Timeout in seconds
         reactor: The twisted reactor to use
-        on_timeout_cancel: A callable which is called immediately
-            after the deferred times out, and not if this deferred is
-            otherwise cancelled before the timeout.
 
-            It takes an arbitrary value, which is the value of the deferred at
-            that exact point in time (probably a CancelledError Failure), and
-            the timeout.
-
-            The default callable (if none is provided) will translate a
-            CancelledError Failure into a defer.TimeoutError.
 
     Returns:
-        A new Deferred.
+        A new Deferred, which will errback with defer.TimeoutError on timeout.
     """
-
     new_d = defer.Deferred()
 
     timed_out = [False]
@@ -502,18 +486,23 @@ def time_it_out():
         except:  # noqa: E722, if we throw any exception it'll break time outs
             logger.exception("Canceller failed during timeout")
 
+        # the cancel() call should have set off a chain of errbacks which
+        # will have errbacked new_d, but in case it hasn't, errback it now.
+
         if not new_d.called:
-            new_d.errback(defer.TimeoutError(timeout, "Deferred"))
+            new_d.errback(defer.TimeoutError("Timed out after %gs" % (timeout,)))
 
     delayed_call = reactor.callLater(timeout, time_it_out)
 
-    def convert_cancelled(value):
-        if timed_out[0]:
-            to_call = on_timeout_cancel or _cancelled_to_timed_out_error
-            return to_call(value, timeout)
+    def convert_cancelled(value: failure.Failure):
+        # if the orgininal deferred was cancelled, and our timeout has fired, then
+        # the reason it was cancelled was due to our timeout. Turn the CancelledError
+        # into a TimeoutError.
+        if timed_out[0] and value.check(CancelledError):
+            raise defer.TimeoutError("Timed out after %gs" % (timeout,))
         return value
 
-    deferred.addBoth(convert_cancelled)
+    deferred.addErrback(convert_cancelled)
 
     def cancel_timeout(result):
         # stop the pending call to cancel the deferred if it's been fired
diff --git a/tests/http/test_fedclient.py b/tests/http/test_fedclient.py
index 5604af379522..212484a7fecb 100644
--- a/tests/http/test_fedclient.py
+++ b/tests/http/test_fedclient.py
@@ -318,14 +318,14 @@ def test_client_gets_headers(self):
         r = self.successResultOf(d)
         self.assertEqual(r.code, 200)
 
-    def test_client_headers_no_body(self):
+    @parameterized.expand(["get_json", "post_json", "delete_json", "put_json"])
+    def test_timeout_reading_body(self, method_name: str):
         """
         If the HTTP request is connected, but gets no response before being
-        timed out, it'll give a ResponseNeverReceived.
+        timed out, it'll give a RequestSendFailed with can_retry.
         """
-        d = defer.ensureDeferred(
-            self.cl.post_json("testserv:8008", "foo/bar", timeout=10000)
-        )
+        method = getattr(self.cl, method_name)
+        d = defer.ensureDeferred(method("testserv:8008", "foo/bar", timeout=10000))
 
         self.pump()
 
@@ -349,7 +349,9 @@ def test_client_headers_no_body(self):
         self.reactor.advance(10.5)
         f = self.failureResultOf(d)
 
-        self.assertIsInstance(f.value, TimeoutError)
+        self.assertIsInstance(f.value, RequestSendFailed)
+        self.assertTrue(f.value.can_retry)
+        self.assertIsInstance(f.value.inner_exception, defer.TimeoutError)
 
     def test_client_requires_trailing_slashes(self):
         """
diff --git a/tests/http/test_simple_client.py b/tests/http/test_simple_client.py
new file mode 100644
index 000000000000..a1cf0862d4fe
--- /dev/null
+++ b/tests/http/test_simple_client.py
@@ -0,0 +1,180 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from mock import Mock
+
+from netaddr import IPSet
+
+from twisted.internet import defer
+from twisted.internet.error import DNSLookupError
+
+from synapse.http import RequestTimedOutError
+from synapse.http.client import SimpleHttpClient
+from synapse.server import HomeServer
+
+from tests.unittest import HomeserverTestCase
+
+
+class SimpleHttpClientTests(HomeserverTestCase):
+    def prepare(self, reactor, clock, hs: "HomeServer"):
+        # Add a DNS entry for a test server
+        self.reactor.lookups["testserv"] = "1.2.3.4"
+
+        self.cl = hs.get_simple_http_client()
+
+    def test_dns_error(self):
+        """
+        If the DNS lookup returns an error, it will bubble up.
+        """
+        d = defer.ensureDeferred(self.cl.get_json("http://testserv2:8008/foo/bar"))
+        self.pump()
+
+        f = self.failureResultOf(d)
+        self.assertIsInstance(f.value, DNSLookupError)
+
+    def test_client_connection_refused(self):
+        d = defer.ensureDeferred(self.cl.get_json("http://testserv:8008/foo/bar"))
+
+        self.pump()
+
+        # Nothing happened yet
+        self.assertNoResult(d)
+
+        clients = self.reactor.tcpClients
+        self.assertEqual(len(clients), 1)
+        (host, port, factory, _timeout, _bindAddress) = clients[0]
+        self.assertEqual(host, "1.2.3.4")
+        self.assertEqual(port, 8008)
+        e = Exception("go away")
+        factory.clientConnectionFailed(None, e)
+        self.pump(0.5)
+
+        f = self.failureResultOf(d)
+
+        self.assertIs(f.value, e)
+
+    def test_client_never_connect(self):
+        """
+        If the HTTP request is not connected and is timed out, it'll give a
+        ConnectingCancelledError or TimeoutError.
+        """
+        d = defer.ensureDeferred(self.cl.get_json("http://testserv:8008/foo/bar"))
+
+        self.pump()
+
+        # Nothing happened yet
+        self.assertNoResult(d)
+
+        # Make sure treq is trying to connect
+        clients = self.reactor.tcpClients
+        self.assertEqual(len(clients), 1)
+        self.assertEqual(clients[0][0], "1.2.3.4")
+        self.assertEqual(clients[0][1], 8008)
+
+        # Deferred is still without a result
+        self.assertNoResult(d)
+
+        # Push by enough to time it out
+        self.reactor.advance(120)
+        f = self.failureResultOf(d)
+
+        self.assertIsInstance(f.value, RequestTimedOutError)
+
+    def test_client_connect_no_response(self):
+        """
+        If the HTTP request is connected, but gets no response before being
+        timed out, it'll give a ResponseNeverReceived.
+        """
+        d = defer.ensureDeferred(self.cl.get_json("http://testserv:8008/foo/bar"))
+
+        self.pump()
+
+        # Nothing happened yet
+        self.assertNoResult(d)
+
+        # Make sure treq is trying to connect
+        clients = self.reactor.tcpClients
+        self.assertEqual(len(clients), 1)
+        self.assertEqual(clients[0][0], "1.2.3.4")
+        self.assertEqual(clients[0][1], 8008)
+
+        conn = Mock()
+        client = clients[0][2].buildProtocol(None)
+        client.makeConnection(conn)
+
+        # Deferred is still without a result
+        self.assertNoResult(d)
+
+        # Push by enough to time it out
+        self.reactor.advance(120)
+        f = self.failureResultOf(d)
+
+        self.assertIsInstance(f.value, RequestTimedOutError)
+
+    def test_client_ip_range_blacklist(self):
+        """Ensure that Synapse does not try to connect to blacklisted IPs"""
+
+        # Add some DNS entries we'll blacklist
+        self.reactor.lookups["internal"] = "127.0.0.1"
+        self.reactor.lookups["internalv6"] = "fe80:0:0:0:0:8a2e:370:7337"
+        ip_blacklist = IPSet(["127.0.0.0/8", "fe80::/64"])
+
+        cl = SimpleHttpClient(self.hs, ip_blacklist=ip_blacklist)
+
+        # Try making a GET request to a blacklisted IPv4 address
+        # ------------------------------------------------------
+        # Make the request
+        d = defer.ensureDeferred(cl.get_json("http://internal:8008/foo/bar"))
+        self.pump(1)
+
+        # Check that it was unable to resolve the address
+        clients = self.reactor.tcpClients
+        self.assertEqual(len(clients), 0)
+
+        self.failureResultOf(d, DNSLookupError)
+
+        # Try making a POST request to a blacklisted IPv6 address
+        # -------------------------------------------------------
+        # Make the request
+        d = defer.ensureDeferred(
+            cl.post_json_get_json("http://internalv6:8008/foo/bar", {})
+        )
+
+        # Move the reactor forwards
+        self.pump(1)
+
+        # Check that it was unable to resolve the address
+        clients = self.reactor.tcpClients
+        self.assertEqual(len(clients), 0)
+
+        # Check that it was due to a blacklisted DNS lookup
+        self.failureResultOf(d, DNSLookupError)
+
+        # Try making a GET request to a non-blacklisted IPv4 address
+        # ----------------------------------------------------------
+        # Make the request
+        d = defer.ensureDeferred(cl.get_json("http://testserv:8008/foo/bar"))
+
+        # Nothing has happened yet
+        self.assertNoResult(d)
+
+        # Move the reactor forwards
+        self.pump(1)
+
+        # Check that it was able to resolve the address
+        clients = self.reactor.tcpClients
+        self.assertNotEqual(len(clients), 0)
+
+        # Connection will still fail as this IP address does not resolve to anything
+        self.failureResultOf(d, RequestTimedOutError)

From 866c84da8ddda04f1da94b8e03719147c6d4875e Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Tue, 29 Sep 2020 11:06:11 +0100
Subject: [PATCH 09/12] Add metrics to track success/otherwise of replication
 requests (#8406)

One hope is that this might provide some insights into #3365.
---
 changelog.d/8406.feature          |  1 +
 synapse/replication/http/_base.py | 40 +++++++++++++++++++++----------
 2 files changed, 29 insertions(+), 12 deletions(-)
 create mode 100644 changelog.d/8406.feature

diff --git a/changelog.d/8406.feature b/changelog.d/8406.feature
new file mode 100644
index 000000000000..1c6472ae7eae
--- /dev/null
+++ b/changelog.d/8406.feature
@@ -0,0 +1 @@
+Add prometheus metrics for replication requests.
diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py
index b448da671038..64edadb624c1 100644
--- a/synapse/replication/http/_base.py
+++ b/synapse/replication/http/_base.py
@@ -20,18 +20,28 @@
 from inspect import signature
 from typing import Dict, List, Tuple
 
-from synapse.api.errors import (
-    CodeMessageException,
-    HttpResponseException,
-    RequestSendFailed,
-    SynapseError,
-)
+from prometheus_client import Counter, Gauge
+
+from synapse.api.errors import HttpResponseException, SynapseError
+from synapse.http import RequestTimedOutError
 from synapse.logging.opentracing import inject_active_span_byte_dict, trace
 from synapse.util.caches.response_cache import ResponseCache
 from synapse.util.stringutils import random_string
 
 logger = logging.getLogger(__name__)
 
+_pending_outgoing_requests = Gauge(
+    "synapse_pending_outgoing_replication_requests",
+    "Number of active outgoing replication requests, by replication method name",
+    ["name"],
+)
+
+_outgoing_request_counter = Counter(
+    "synapse_outgoing_replication_requests",
+    "Number of outgoing replication requests, by replication method name and result",
+    ["name", "code"],
+)
+
 
 class ReplicationEndpoint(metaclass=abc.ABCMeta):
     """Helper base class for defining new replication HTTP endpoints.
@@ -138,7 +148,10 @@ def make_client(cls, hs):
 
         instance_map = hs.config.worker.instance_map
 
+        outgoing_gauge = _pending_outgoing_requests.labels(cls.NAME)
+
         @trace(opname="outgoing_replication_request")
+        @outgoing_gauge.track_inprogress()
         async def send_request(instance_name="master", **kwargs):
             if instance_name == local_instance_name:
                 raise Exception("Trying to send HTTP request to self")
@@ -193,23 +206,26 @@ async def send_request(instance_name="master", **kwargs):
                     try:
                         result = await request_func(uri, data, headers=headers)
                         break
-                    except CodeMessageException as e:
-                        if e.code != 504 or not cls.RETRY_ON_TIMEOUT:
+                    except RequestTimedOutError:
+                        if not cls.RETRY_ON_TIMEOUT:
                             raise
 
-                    logger.warning("%s request timed out", cls.NAME)
+                    logger.warning("%s request timed out; retrying", cls.NAME)
 
                     # If we timed out we probably don't need to worry about backing
                     # off too much, but lets just wait a little anyway.
                     await clock.sleep(1)
             except HttpResponseException as e:
                 # We convert to SynapseError as we know that it was a SynapseError
-                # on the master process that we should send to the client. (And
+                # on the main process that we should send to the client. (And
                 # importantly, not stack traces everywhere)
+                _outgoing_request_counter.labels(cls.NAME, e.code).inc()
                 raise e.to_synapse_error()
-            except RequestSendFailed as e:
-                raise SynapseError(502, "Failed to talk to master") from e
+            except Exception as e:
+                _outgoing_request_counter.labels(cls.NAME, "ERR").inc()
+                raise SynapseError(502, "Failed to talk to main process") from e
 
+            _outgoing_request_counter.labels(cls.NAME, 200).inc()
             return result
 
         return send_request

From 1c6b8752b891c1a25524d8dfaa8efb7176c0dbec Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Tue, 29 Sep 2020 12:36:44 +0100
Subject: [PATCH 10/12] Only assert valid next_link params when provided
 (#8417)

Broken in https://github.com/matrix-org/synapse/pull/8275 and has yet to be put in a release. Fixes https://github.com/matrix-org/synapse/issues/8418.

`next_link` is an optional parameter. However, we were checking whether the `next_link` param was valid, even if it wasn't provided. In that case, `next_link` was `None`, which would clearly not be a valid URL.

This would prevent password reset and other operations if `next_link` was not provided, and the `next_link_domain_whitelist` config option was set.
---
 changelog.d/8417.feature                   |  1 +
 synapse/rest/client/v2_alpha/account.py    | 15 +++++++++------
 tests/rest/client/v2_alpha/test_account.py |  6 ++++++
 3 files changed, 16 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/8417.feature

diff --git a/changelog.d/8417.feature b/changelog.d/8417.feature
new file mode 100644
index 000000000000..17549c3df39a
--- /dev/null
+++ b/changelog.d/8417.feature
@@ -0,0 +1 @@
+Add a config option to specify a whitelist of domains that a user can be redirected to after validating their email or phone number.
\ No newline at end of file
diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py
index c3ce0f62592a..9245214f36eb 100644
--- a/synapse/rest/client/v2_alpha/account.py
+++ b/synapse/rest/client/v2_alpha/account.py
@@ -103,8 +103,9 @@ async def on_POST(self, request):
                 Codes.THREEPID_DENIED,
             )
 
-        # Raise if the provided next_link value isn't valid
-        assert_valid_next_link(self.hs, next_link)
+        if next_link:
+            # Raise if the provided next_link value isn't valid
+            assert_valid_next_link(self.hs, next_link)
 
         # The email will be sent to the stored address.
         # This avoids a potential account hijack by requesting a password reset to
@@ -379,8 +380,9 @@ async def on_POST(self, request):
                 Codes.THREEPID_DENIED,
             )
 
-        # Raise if the provided next_link value isn't valid
-        assert_valid_next_link(self.hs, next_link)
+        if next_link:
+            # Raise if the provided next_link value isn't valid
+            assert_valid_next_link(self.hs, next_link)
 
         existing_user_id = await self.store.get_user_id_by_threepid("email", email)
 
@@ -453,8 +455,9 @@ async def on_POST(self, request):
                 Codes.THREEPID_DENIED,
             )
 
-        # Raise if the provided next_link value isn't valid
-        assert_valid_next_link(self.hs, next_link)
+        if next_link:
+            # Raise if the provided next_link value isn't valid
+            assert_valid_next_link(self.hs, next_link)
 
         existing_user_id = await self.store.get_user_id_by_threepid("msisdn", msisdn)
 
diff --git a/tests/rest/client/v2_alpha/test_account.py b/tests/rest/client/v2_alpha/test_account.py
index 93f899d86133..ae2cd67f35de 100644
--- a/tests/rest/client/v2_alpha/test_account.py
+++ b/tests/rest/client/v2_alpha/test_account.py
@@ -732,6 +732,12 @@ def test_next_link_file_uri(self):
     @override_config({"next_link_domain_whitelist": ["example.com", "example.org"]})
     def test_next_link_domain_whitelist(self):
         """Tests next_link parameters must fit the whitelist if provided"""
+
+        # Ensure not providing a next_link parameter still works
+        self._request_token(
+            "something@example.com", "some_secret", next_link=None, expect_code=200,
+        )
+
         self._request_token(
             "something@example.com",
             "some_secret",

From 8676d8ab2e5667d7c12774effc64b3ab99344a8d Mon Sep 17 00:00:00 2001
From: Will Hunt <will@half-shot.uk>
Date: Tue, 29 Sep 2020 13:11:02 +0100
Subject: [PATCH 11/12] Filter out appservices from mau count (#8404)

This is an attempt to fix #8403.
---
 changelog.d/8404.misc                           |  1 +
 .../databases/main/monthly_active_users.py      |  9 ++++++++-
 tests/storage/test_monthly_active_users.py      | 17 ++++++++++++++++-
 3 files changed, 25 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/8404.misc

diff --git a/changelog.d/8404.misc b/changelog.d/8404.misc
new file mode 100644
index 000000000000..7aadded6c1dd
--- /dev/null
+++ b/changelog.d/8404.misc
@@ -0,0 +1 @@
+Do not include appservice users when calculating the total MAU for a server.
diff --git a/synapse/storage/databases/main/monthly_active_users.py b/synapse/storage/databases/main/monthly_active_users.py
index e0cedd1aacc9..e93aad33cd89 100644
--- a/synapse/storage/databases/main/monthly_active_users.py
+++ b/synapse/storage/databases/main/monthly_active_users.py
@@ -41,7 +41,14 @@ async def get_monthly_active_count(self) -> int:
         """
 
         def _count_users(txn):
-            sql = "SELECT COALESCE(count(*), 0) FROM monthly_active_users"
+            # Exclude app service users
+            sql = """
+                SELECT COALESCE(count(*), 0)
+                FROM monthly_active_users
+                    LEFT JOIN users
+                    ON monthly_active_users.user_id=users.name
+                WHERE (users.appservice_id IS NULL OR users.appservice_id = '');
+            """
             txn.execute(sql)
             (count,) = txn.fetchone()
             return count
diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py
index 643072bbaf18..8d97b6d4cdf4 100644
--- a/tests/storage/test_monthly_active_users.py
+++ b/tests/storage/test_monthly_active_users.py
@@ -137,6 +137,21 @@ def test_can_insert_and_count_mau(self):
         count = self.get_success(self.store.get_monthly_active_count())
         self.assertEqual(count, 1)
 
+    def test_appservice_user_not_counted_in_mau(self):
+        self.get_success(
+            self.store.register_user(
+                user_id="@appservice_user:server", appservice_id="wibble"
+            )
+        )
+        count = self.get_success(self.store.get_monthly_active_count())
+        self.assertEqual(count, 0)
+
+        d = self.store.upsert_monthly_active_user("@appservice_user:server")
+        self.get_success(d)
+
+        count = self.get_success(self.store.get_monthly_active_count())
+        self.assertEqual(count, 0)
+
     def test_user_last_seen_monthly_active(self):
         user_id1 = "@user1:server"
         user_id2 = "@user2:server"
@@ -383,7 +398,7 @@ def test_get_monthly_active_count_by_service(self):
         self.get_success(self.store.upsert_monthly_active_user(appservice2_user1))
 
         count = self.get_success(self.store.get_monthly_active_count())
-        self.assertEqual(count, 4)
+        self.assertEqual(count, 1)
 
         d = self.store.get_monthly_active_count_by_service()
         result = self.get_success(d)

From 12f0d18611f406df5e741c124cac8246fcfd9c14 Mon Sep 17 00:00:00 2001
From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
Date: Tue, 29 Sep 2020 13:47:47 +0100
Subject: [PATCH 12/12] Add support for running Complement against the local
 checkout (#8317)

This PR adds a script that:

* Builds the local Synapse checkout using our existing `docker/Dockerfile` image.
* Downloads [Complement](https://github.com/matrix-org/complement/)'s source code.
* Builds the [Synapse.Dockerfile](https://github.com/matrix-org/complement/blob/master/dockerfiles/Synapse.Dockerfile) using the above dockerfile as a base.
* Builds and runs Complement against it.

This set up differs slightly from [that of the dendrite repo](https://github.com/matrix-org/dendrite/blob/master/build/scripts/complement.sh) (`complement.sh`, `Complement.Dockerfile`), which instead stores a separate, but slightly modified, dockerfile in Dendrite's repo rather than running the one stored in Complement's repo. That synapse equivalent to that dockerfile (`Synapse.Dockerfile`) in Complement's repo is just based on top of `matrixdotorg/synapse:latest`, which we opt to build here locally.

Thus copying over the files from Complement's repo wouldn't change any functionality, and would result in two instances of the same files. So just using the dockerfile in Complement's repo was decided upon instead.
---
 changelog.d/8317.feature  |  1 +
 scripts-dev/complement.sh | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+)
 create mode 100644 changelog.d/8317.feature
 create mode 100755 scripts-dev/complement.sh

diff --git a/changelog.d/8317.feature b/changelog.d/8317.feature
new file mode 100644
index 000000000000..f9edda099c40
--- /dev/null
+++ b/changelog.d/8317.feature
@@ -0,0 +1 @@
+Support testing the local Synapse checkout against the [Complement homeserver test suite](https://github.com/matrix-org/complement/).
\ No newline at end of file
diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh
new file mode 100755
index 000000000000..3cde53f5c051
--- /dev/null
+++ b/scripts-dev/complement.sh
@@ -0,0 +1,22 @@
+#! /bin/bash -eu
+# This script is designed for developers who want to test their code
+# against Complement.
+#
+# It makes a Synapse image which represents the current checkout,
+# then downloads Complement and runs it with that image.
+
+cd "$(dirname $0)/.."
+
+# Build the base Synapse image from the local checkout
+docker build -t matrixdotorg/synapse:latest -f docker/Dockerfile .
+
+# Download Complement
+wget -N https://github.com/matrix-org/complement/archive/master.tar.gz
+tar -xzf master.tar.gz
+cd complement-master
+
+# Build the Synapse image from Complement, based on the above image we just built
+docker build -t complement-synapse -f dockerfiles/Synapse.Dockerfile ./dockerfiles
+
+# Run the tests on the resulting image!
+COMPLEMENT_BASE_IMAGE=complement-synapse go test -v -count=1 ./tests