From 07c394cb7ba2613362fd4e9c8571464f4db63873 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 16 Apr 2024 18:34:58 +0300 Subject: [PATCH 1/2] gh-86650: Fix IndexError when parse emails with invalid Message-ID In particularly, one-off addresses generated by Microsoft Outlook: https://learn.microsoft.com/en-us/office/client-developer/outlook/mapi/one-off-addresses --- Lib/email/_header_value_parser.py | 3 +++ Lib/test/test_email/test__header_value_parser.py | 6 ++++++ .../Library/2024-04-16-18-34-11.gh-issue-86650.Zeydyg.rst | 2 ++ 3 files changed, 11 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-04-16-18-34-11.gh-issue-86650.Zeydyg.rst diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index e4a342d446f6a3..38194aaf128a9b 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1514,6 +1514,9 @@ def get_obs_local_part(value): raise token, value = get_cfws(value) obs_local_part.append(token) + if not obs_local_part: + raise errors.HeaderParseError( + "expected obs-local-part but found '{}'".format(value)) if (obs_local_part[0].token_type == 'dot' or obs_local_part[0].token_type=='cfws' and obs_local_part[1].token_type=='dot'): diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index f7e80749c456f8..1951bdc9f38220 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2698,6 +2698,12 @@ def test_get_msg_id_no_angle_end(self): ) self.assertEqual(msg_id.token_type, 'msg-id') + def test_get_msg_id_with_brackets(self): + # Microsof Outlook generates non-standard one-off addresses: + # https://learn.microsoft.com/en-us/office/client-developer/outlook/mapi/one-off-addresses + with self.assertRaises(errors.HeaderParseError): + parser.get_msg_id("<[abrakadabra@microsoft.com]>") + @parameterize diff --git a/Misc/NEWS.d/next/Library/2024-04-16-18-34-11.gh-issue-86650.Zeydyg.rst b/Misc/NEWS.d/next/Library/2024-04-16-18-34-11.gh-issue-86650.Zeydyg.rst new file mode 100644 index 00000000000000..8a1626fa63c804 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-04-16-18-34-11.gh-issue-86650.Zeydyg.rst @@ -0,0 +1,2 @@ +Fix IndexError when parse some emails with invalid Message-ID (including +one-off addresses generated by Microsoft Outlook). From a5b578cef19802650e6fbf89d1bacae0ae886ea9 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 16 Apr 2024 22:26:02 +0300 Subject: [PATCH 2/2] Fix more cases. --- Lib/email/_header_value_parser.py | 2 ++ .../test_email/test__header_value_parser.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 38194aaf128a9b..e1491e689e3f5b 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1519,11 +1519,13 @@ def get_obs_local_part(value): "expected obs-local-part but found '{}'".format(value)) if (obs_local_part[0].token_type == 'dot' or obs_local_part[0].token_type=='cfws' and + len(obs_local_part) > 1 and obs_local_part[1].token_type=='dot'): obs_local_part.defects.append(errors.InvalidHeaderDefect( "Invalid leading '.' in local part")) if (obs_local_part[-1].token_type == 'dot' or obs_local_part[-1].token_type=='cfws' and + len(obs_local_part) > 1 and obs_local_part[-2].token_type=='dot'): obs_local_part.defects.append(errors.InvalidHeaderDefect( "Invalid trailing '.' in local part")) diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 1951bdc9f38220..ff55aa1cce4a2c 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2698,12 +2698,31 @@ def test_get_msg_id_no_angle_end(self): ) self.assertEqual(msg_id.token_type, 'msg-id') + def test_get_msg_id_empty_id_left(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_msg_id("<@domain>") + + def test_get_msg_id_empty_id_right(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_msg_id("") + def test_get_msg_id_with_brackets(self): # Microsof Outlook generates non-standard one-off addresses: # https://learn.microsoft.com/en-us/office/client-developer/outlook/mapi/one-off-addresses with self.assertRaises(errors.HeaderParseError): parser.get_msg_id("<[abrakadabra@microsoft.com]>") + def test_get_msg_id_ws_only_local(self): + msg_id = self._test_get_x( + parser.get_msg_id, + "< @domain>", + "< @domain>", + "< @domain>", + [errors.ObsoleteHeaderDefect], + "" + ) + self.assertEqual(msg_id.token_type, 'msg-id') + @parameterize