Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve parsing of author information #521

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 3 additions & 10 deletions src/poetry/core/masonry/builders/builder.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

import logging
import re
import sys
import warnings

Expand All @@ -14,8 +13,6 @@
from poetry.core.poetry import Poetry


AUTHOR_REGEX = re.compile(r"(?u)^(?P<name>[- .,\w\d'’\"()]+) <(?P<email>.+?)>$")

METADATA_BASE = """\
Metadata-Version: 2.1
Name: {name}
Expand Down Expand Up @@ -343,14 +340,10 @@ def convert_script_files(self) -> list[Path]:
return script_files

@classmethod
def convert_author(cls, author: str) -> dict[str, str]:
m = AUTHOR_REGEX.match(author)
if m is None:
raise RuntimeError(f"{author} does not match regex")

name = m.group("name")
email = m.group("email")
def convert_author(cls, author: str) -> dict[str, str | None]:
from poetry.core.utils.helpers import parse_author

name, email = parse_author(author)
return {"name": name, "email": email}


Expand Down
27 changes: 5 additions & 22 deletions src/poetry/core/packages/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from poetry.core.packages.dependency_group import MAIN_GROUP
from poetry.core.packages.specification import PackageSpecification
from poetry.core.packages.utils.utils import create_nested_marker
from poetry.core.utils.helpers import parse_author
from poetry.core.version.exceptions import InvalidVersion
from poetry.core.version.markers import parse_marker

Expand All @@ -32,6 +33,8 @@

T = TypeVar("T", bound="Package")

# TODO: once poetry.console.commands.init.InitCommand._validate_author
# uses poetry.core.utils.helpers.parse_author, this can be removed.
AUTHOR_REGEX = re.compile(r"(?u)^(?P<name>[- .,\w\d'’\"():&]+)(?: <(?P<email>.+?)>)?$")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you stack this upon #517, please?



Expand Down Expand Up @@ -231,34 +234,14 @@ def _get_author(self) -> dict[str, str | None]:
if not self._authors:
return {"name": None, "email": None}

m = AUTHOR_REGEX.match(self._authors[0])

if m is None:
raise ValueError(
"Invalid author string. Must be in the format: "
"John Smith <john@example.com>"
)

name = m.group("name")
email = m.group("email")

name, email = parse_author(self._authors[0])
return {"name": name, "email": email}

def _get_maintainer(self) -> dict[str, str | None]:
if not self._maintainers:
return {"name": None, "email": None}

m = AUTHOR_REGEX.match(self._maintainers[0])

if m is None:
raise ValueError(
"Invalid maintainer string. Must be in the format: "
"John Smith <john@example.com>"
)

name = m.group("name")
email = m.group("email")

name, email = parse_author(self._maintainers[0])
return {"name": name, "email": email}

@property
Expand Down
21 changes: 21 additions & 0 deletions src/poetry/core/utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import warnings

from contextlib import contextmanager
from email.utils import parseaddr
from pathlib import Path
from typing import Any
from typing import Iterator
Expand Down Expand Up @@ -105,3 +106,23 @@ def readme_content_type(path: str | Path) -> str:
return "text/markdown"
else:
return "text/plain"


def parse_author(address: str) -> tuple[str, str | None]:
"""Parse name and address parts from an email address string.

>>> parse_author("John Doe <john.doe@example.com>")
('John Doe', 'john.doe@example.com')

:param address: the email address string to parse.
:return: a 2-tuple with the parsed name and optional email address.
:raises ValueError: if the parsed string does not contain a name.
"""
if "@" not in address:
return address, None
name, email = parseaddr(address)
if not name or (
email and address not in [f"{name} <{email}>", f'"{name}" <{email}>']
):
raise ValueError(f"Invalid author string: {address!r}")
return name, email or None
21 changes: 9 additions & 12 deletions tests/packages/test_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,11 @@ def test_package_authors() -> None:
def test_package_authors_invalid() -> None:
package = Package("foo", "0.1.0")

package.authors.insert(0, "<John Doe")
package.authors.insert(0, "john.doe@example.com")
with pytest.raises(ValueError) as e:
package.author_name

assert (
str(e.value)
== "Invalid author string. Must be in the format: John Smith <john@example.com>"
)
assert str(e.value) == "Invalid author string: 'john.doe@example.com'"


@pytest.mark.parametrize(
Expand All @@ -78,11 +75,14 @@ def test_package_authors_invalid() -> None:
("Doe, John", None),
("(Doe, John)", None),
("John Doe", "john@john.doe"),
("Doe, John", "dj@john.doe"),
("MyCompanyName R&D", "rnd@MyCompanyName.MyTLD"),
("John-Paul: Doe", None),
("John-Paul: Doe", "jp@nomail.none"),
("John Doe the 3rd", "3rd@jd.net"),
("<John Doe", None),
("John? Doe", None),
("Jane+Doe", None),
("~John Doe", None),
("John~Doe", None),
],
)
def test_package_authors_valid(name: str, email: str | None) -> None:
Expand All @@ -102,11 +102,8 @@ def test_package_authors_valid(name: str, email: str | None) -> None:
[
("<john@john.doe>",),
("john@john.doe",),
("<John Doe",),
("John? Doe",),
("Jane+Doe",),
("~John Doe",),
("John~Doe",),
("Doe, John <dj@john.doe>",),
("John-Paul: Doe <jp@nomail.none>",),
],
)
def test_package_author_names_invalid(name: str) -> None:
Expand Down
50 changes: 50 additions & 0 deletions tests/utils/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pytest

from poetry.core.utils.helpers import combine_unicode
from poetry.core.utils.helpers import parse_author
from poetry.core.utils.helpers import parse_requires
from poetry.core.utils.helpers import readme_content_type
from poetry.core.utils.helpers import temporary_directory
Expand Down Expand Up @@ -118,3 +119,52 @@ def test_utils_helpers_readme_content_type(
readme: str | Path, content_type: str
) -> None:
assert readme_content_type(readme) == content_type


@pytest.mark.parametrize(
"author, name, email",
[
# Verify the (probable) default use case
("John Doe <john.doe@example.com>", "John Doe", "john.doe@example.com"),
# Name only
("John Doe", "John Doe", None),
# Name with a “special” character + email address
(
"R&D <researchanddevelopment@example.com>",
"R&D",
"researchanddevelopment@example.com",
),
# Name with a “special” character only
("R&D", "R&D", None),
# Name with fancy unicode character + email address
(
"my·fancy corp <my-fancy-corp@example.com>",
"my·fancy corp",
"my-fancy-corp@example.com",
),
# Name with fancy unicode character only
("my·fancy corp", "my·fancy corp", None),
],
)
def test_utils_helpers_parse_author(author: str, name: str, email: str | None) -> None:
"""Test valid inputs for the :func:`parse_author` function."""
assert parse_author(author) == (name, email)


@pytest.mark.parametrize(
"author",
[
# Email address only, wrapped in angular brackets
"<john.doe@example.com>",
# Email address only
"john.doe@example.com",
# Non-RFC-conform cases with unquoted commas
"asf,dfu@t.b",
"asf,<dfu@t.b>",
"asf, dfu@t.b",
],
)
def test_utils_helpers_parse_author_invalid(author: str) -> None:
"""Test invalid inputs for the :func:`parse_author` function."""
with pytest.raises(ValueError):
parse_author(author)