From b8fca461fe881330c5d479c360b73440076d1bd7 Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Thu, 2 Feb 2023 08:16:43 +0100 Subject: [PATCH 1/5] Add account data to to export command --- docs/usage/administration/admin_faq.md | 3 ++ synapse/app/admin_cmd.py | 28 +++++++++++++++- synapse/handlers/admin.py | 46 ++++++++++++++++++-------- tests/handlers/test_admin.py | 27 +++++++++++++++ 4 files changed, 89 insertions(+), 15 deletions(-) diff --git a/docs/usage/administration/admin_faq.md b/docs/usage/administration/admin_faq.md index 7a2774119964..925e1d175e64 100644 --- a/docs/usage/administration/admin_faq.md +++ b/docs/usage/administration/admin_faq.md @@ -71,6 +71,9 @@ output-directory │ ├───invite_state │ └───knock_state └───user_data + ├───account_data + │ ├───global + │ └─── ├───connections ├───devices └───profile diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index fe7afb94755e..60069c12e5c5 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -17,7 +17,7 @@ import os import sys import tempfile -from typing import List, Optional +from typing import Dict, List, Optional from twisted.internet import defer, task @@ -222,6 +222,32 @@ def write_connections(self, connections: List[JsonDict]) -> None: with open(connection_file, "a") as f: print(json.dumps(connection), file=f) + def write_room_account_data( + self, room_id: str, account_data: Dict[str, JsonDict] + ) -> None: + account_data_directory = os.path.join( + self.base_directory, "user_data", "account_data" + ) + os.makedirs(account_data_directory, exist_ok=True) + + room_file = os.path.join(account_data_directory, room_id) + + with open(room_file, "a") as f: + print(json.dumps(account_data), file=f) + + def write_account_data( + self, file_name: str, account_data: Dict[str, JsonDict] + ) -> None: + account_data_directory = os.path.join( + self.base_directory, "user_data", "account_data" + ) + os.makedirs(account_data_directory, exist_ok=True) + + account_data_file = os.path.join(account_data_directory, file_name) + + with open(account_data_file, "a") as f: + print(json.dumps(account_data), file=f) + def finished(self) -> str: return self.base_directory diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index b03c214b145a..2329dd30cf04 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -29,7 +29,7 @@ class AdminHandler: def __init__(self, hs: "HomeServer"): - self.store = hs.get_datastores().main + self._store = hs.get_datastores().main self._device_handler = hs.get_device_handler() self._storage_controllers = hs.get_storage_controllers() self._state_storage_controller = self._storage_controllers.state @@ -38,7 +38,7 @@ def __init__(self, hs: "HomeServer"): async def get_whois(self, user: UserID) -> JsonDict: connections = [] - sessions = await self.store.get_user_ip_and_agents(user) + sessions = await self._store.get_user_ip_and_agents(user) for session in sessions: connections.append( { @@ -57,7 +57,7 @@ async def get_whois(self, user: UserID) -> JsonDict: async def get_user(self, user: UserID) -> Optional[JsonDict]: """Function to get user details""" - user_info_dict = await self.store.get_user_by_id(user.to_string()) + user_info_dict = await self._store.get_user_by_id(user.to_string()) if user_info_dict is None: return None @@ -89,11 +89,11 @@ async def get_user(self, user: UserID) -> Optional[JsonDict]: } # Add additional user metadata - profile = await self.store.get_profileinfo(user.localpart) - threepids = await self.store.user_get_threepids(user.to_string()) + profile = await self._store.get_profileinfo(user.localpart) + threepids = await self._store.user_get_threepids(user.to_string()) external_ids = [ ({"auth_provider": auth_provider, "external_id": external_id}) - for auth_provider, external_id in await self.store.get_external_ids_by_user( + for auth_provider, external_id in await self._store.get_external_ids_by_user( user.to_string() ) ] @@ -101,7 +101,7 @@ async def get_user(self, user: UserID) -> Optional[JsonDict]: user_info_dict["avatar_url"] = profile.avatar_url user_info_dict["threepids"] = threepids user_info_dict["external_ids"] = external_ids - user_info_dict["erased"] = await self.store.is_user_erased(user.to_string()) + user_info_dict["erased"] = await self._store.is_user_erased(user.to_string()) return user_info_dict @@ -117,7 +117,7 @@ async def export_user_data(self, user_id: str, writer: "ExfiltrationWriter") -> The returned value is that returned by `writer.finished()`. """ # Get all rooms the user is in or has been in - rooms = await self.store.get_rooms_for_local_user_where_membership_is( + rooms = await self._store.get_rooms_for_local_user_where_membership_is( user_id, membership_list=( Membership.JOIN, @@ -131,7 +131,7 @@ async def export_user_data(self, user_id: str, writer: "ExfiltrationWriter") -> # We only try and fetch events for rooms the user has been in. If # they've been e.g. invited to a room without joining then we handle # those separately. - rooms_user_has_been_in = await self.store.get_rooms_user_has_been_in(user_id) + rooms_user_has_been_in = await self._store.get_rooms_user_has_been_in(user_id) for index, room in enumerate(rooms): room_id = room.room_id @@ -140,7 +140,7 @@ async def export_user_data(self, user_id: str, writer: "ExfiltrationWriter") -> "[%s] Handling room %s, %d/%d", user_id, room_id, index + 1, len(rooms) ) - forgotten = await self.store.did_forget(user_id, room_id) + forgotten = await self._store.did_forget(user_id, room_id) if forgotten: logger.info("[%s] User forgot room %d, ignoring", user_id, room_id) continue @@ -152,14 +152,14 @@ async def export_user_data(self, user_id: str, writer: "ExfiltrationWriter") -> if room.membership == Membership.INVITE: event_id = room.event_id - invite = await self.store.get_event(event_id, allow_none=True) + invite = await self._store.get_event(event_id, allow_none=True) if invite: invited_state = invite.unsigned["invite_room_state"] writer.write_invite(room_id, invite, invited_state) if room.membership == Membership.KNOCK: event_id = room.event_id - knock = await self.store.get_event(event_id, allow_none=True) + knock = await self._store.get_event(event_id, allow_none=True) if knock: knock_state = knock.unsigned["knock_room_state"] writer.write_knock(room_id, knock, knock_state) @@ -170,7 +170,7 @@ async def export_user_data(self, user_id: str, writer: "ExfiltrationWriter") -> # were joined. We estimate that point by looking at the # stream_ordering of the last membership if it wasn't a join. if room.membership == Membership.JOIN: - stream_ordering = self.store.get_room_max_stream_ordering() + stream_ordering = self._store.get_room_max_stream_ordering() else: stream_ordering = room.stream_ordering @@ -197,7 +197,7 @@ async def export_user_data(self, user_id: str, writer: "ExfiltrationWriter") -> # events that we have and then filtering, this isn't the most # efficient method perhaps but it does guarantee we get everything. while True: - events, _ = await self.store.paginate_room_events( + events, _ = await self._store.paginate_room_events( room_id, from_key, to_key, limit=100, direction=Direction.FORWARDS ) if not events: @@ -263,6 +263,12 @@ async def export_user_data(self, user_id: str, writer: "ExfiltrationWriter") -> connections["devices"][""]["sessions"][0]["connections"] ) + # Get all account data the user has global and in rooms + global_data, by_room_data = await self._store.get_account_data_for_user(user_id) + writer.write_account_data("global", global_data) + for room_id in by_room_data: + writer.write_account_data(room_id, by_room_data[room_id]) + return writer.finished() @@ -340,6 +346,18 @@ def write_connections(self, connections: List[JsonDict]) -> None: """ raise NotImplementedError() + @abc.abstractmethod + def write_account_data( + self, file_name: str, account_data: Dict[str, JsonDict] + ) -> None: + """Write the account data of a user. + + Args: + file_name: file name to write data + account_data: dict of global or room account_data + """ + raise NotImplementedError() + @abc.abstractmethod def finished(self) -> Any: """Called when all data has successfully been exported and written. diff --git a/tests/handlers/test_admin.py b/tests/handlers/test_admin.py index 6f300b8e1119..1b97aaeed134 100644 --- a/tests/handlers/test_admin.py +++ b/tests/handlers/test_admin.py @@ -296,3 +296,30 @@ def test_connections(self) -> None: self.assertEqual(args[0][0]["user_agent"], "user_agent") self.assertGreater(args[0][0]["last_seen"], 0) self.assertNotIn("access_token", args[0][0]) + + def test_account_data(self) -> None: + """Tests that user account data get exported.""" + # add account data + self.get_success( + self._store.add_account_data_for_user(self.user2, "m.global", {"a": 1}) + ) + self.get_success( + self._store.add_account_data_to_room( + self.user2, "test_room", "m.per_room", {"b": 2} + ) + ) + + writer = Mock() + + self.get_success(self.admin_handler.export_user_data(self.user2, writer)) + + # two calls, one call for user data and one call for room data + writer.write_account_data.assert_called() + + args = writer.write_account_data.call_args_list[0][0] + self.assertEqual(args[0], "global") + self.assertEqual(args[1]["m.global"]["a"], 1) + + args = writer.write_account_data.call_args_list[1][0] + self.assertEqual(args[0], "test_room") + self.assertEqual(args[1]["m.per_room"]["b"], 2) From 9811c51f50c7b839f1783d963f1558af984b061f Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Thu, 2 Feb 2023 08:19:18 +0100 Subject: [PATCH 2/5] newsfile --- changelog.d/14969.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/14969.feature diff --git a/changelog.d/14969.feature b/changelog.d/14969.feature new file mode 100644 index 000000000000..d22741d079af --- /dev/null +++ b/changelog.d/14969.feature @@ -0,0 +1 @@ +Adds profile information, devices and connections to the user data export via command line. \ No newline at end of file From ab2b0306413787892f81fdd2afb14fac2ae6d640 Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Tue, 14 Feb 2023 12:08:22 +0100 Subject: [PATCH 3/5] remove not needed function --- synapse/app/admin_cmd.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index 60069c12e5c5..160fba8ca5e3 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -222,19 +222,6 @@ def write_connections(self, connections: List[JsonDict]) -> None: with open(connection_file, "a") as f: print(json.dumps(connection), file=f) - def write_room_account_data( - self, room_id: str, account_data: Dict[str, JsonDict] - ) -> None: - account_data_directory = os.path.join( - self.base_directory, "user_data", "account_data" - ) - os.makedirs(account_data_directory, exist_ok=True) - - room_file = os.path.join(account_data_directory, room_id) - - with open(room_file, "a") as f: - print(json.dumps(account_data), file=f) - def write_account_data( self, file_name: str, account_data: Dict[str, JsonDict] ) -> None: From 16e74953bd8fcde915b9e3bfa7d7f25b44d3d62e Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Tue, 14 Feb 2023 12:11:01 +0100 Subject: [PATCH 4/5] update newsfile --- changelog.d/14969.feature | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/14969.feature b/changelog.d/14969.feature index d22741d079af..a4680ef9c89b 100644 --- a/changelog.d/14969.feature +++ b/changelog.d/14969.feature @@ -1 +1 @@ -Adds profile information, devices and connections to the user data export via command line. \ No newline at end of file +Add account data to the command line [user data export tool](https://matrix-org.github.io/synapse/v1.78/usage/administration/admin_faq.html#how-can-i-export-user-data). \ No newline at end of file From 6dfbf5ac9796b8f543ac4167d766448f2135e24b Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Tue, 14 Feb 2023 13:58:34 +0100 Subject: [PATCH 5/5] adopt #14973 --- synapse/app/admin_cmd.py | 4 ++-- synapse/handlers/admin.py | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index 160fba8ca5e3..ad51f33165e3 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -17,7 +17,7 @@ import os import sys import tempfile -from typing import Dict, List, Optional +from typing import List, Mapping, Optional from twisted.internet import defer, task @@ -223,7 +223,7 @@ def write_connections(self, connections: List[JsonDict]) -> None: print(json.dumps(connection), file=f) def write_account_data( - self, file_name: str, account_data: Dict[str, JsonDict] + self, file_name: str, account_data: Mapping[str, JsonDict] ) -> None: account_data_directory = os.path.join( self.base_directory, "user_data", "account_data" diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index 2329dd30cf04..8b7760b2cc07 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -14,7 +14,7 @@ import abc import logging -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set +from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Set from synapse.api.constants import Direction, Membership from synapse.events import EventBase @@ -264,7 +264,8 @@ async def export_user_data(self, user_id: str, writer: "ExfiltrationWriter") -> ) # Get all account data the user has global and in rooms - global_data, by_room_data = await self._store.get_account_data_for_user(user_id) + global_data = await self._store.get_global_account_data_for_user(user_id) + by_room_data = await self._store.get_room_account_data_for_user(user_id) writer.write_account_data("global", global_data) for room_id in by_room_data: writer.write_account_data(room_id, by_room_data[room_id]) @@ -348,13 +349,13 @@ def write_connections(self, connections: List[JsonDict]) -> None: @abc.abstractmethod def write_account_data( - self, file_name: str, account_data: Dict[str, JsonDict] + self, file_name: str, account_data: Mapping[str, JsonDict] ) -> None: """Write the account data of a user. Args: file_name: file name to write data - account_data: dict of global or room account_data + account_data: mapping of global or room account_data """ raise NotImplementedError()