Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid nested quantifiers with overlapping character space on git url parsing (#1902 #1913

Merged
merged 5 commits into from Jan 22, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
102 changes: 70 additions & 32 deletions poetry/vcs/git.py
Expand Up @@ -7,46 +7,84 @@
from poetry.utils._compat import decode


pattern_formats = {
"protocol": r"\w+",
"user": r"[a-zA-Z0-9_.-]+",
"resource": r"[a-zA-Z0-9_.-]+",
"port": r"\d+",
"path": r"[\w\-/\\]+",
"name": r"[\w\-]+",
"rev": r"[^@#]+",
}

PATTERNS = [
re.compile(
r"(git\+)?"
r"((?P<protocol>\w+)://)"
r"((?P<user>\w+)@)?"
r"(?P<resource>[\w.\-]+)"
r"(:(?P<port>\d+))?"
r"(?P<pathname>(/(?P<owner>\w+)/)"
r"((?P<projects>([\w\-/]+)/)?(?P<name>[\w\-]+)(\.git|/)?)?)"
r"([@#](?P<rev>[^@#]+))?"
r"$"
),
re.compile(
r"^(git\+)?"
r"(?P<protocol>https?|git|ssh|rsync|file)://"
r"(?:(?P<user>.+)@)*"
r"(?P<resource>[a-z0-9_.-]*)"
r"(:?P<port>[\d]+)?"
r"(?P<pathname>[:/]((?P<owner>[\w\-]+)/(?P<projects>([\w\-/]+)/)?)?"
r"((?P<name>[\w\-.]+?)(\.git|/)?)?)"
r"([@#](?P<rev>[^@#]+))?"
r"$"
r"(?:(?P<user>{user})@)?"
r"(?P<resource>{resource})?"
r"(:(?P<port>{port}))?"
r"(?P<pathname>[:/\\]({path}[/\\])?"
r"((?P<name>{name}?)(\.git|[/\\])?)?)"
r"([@#](?P<rev>{rev}))?"
r"$".format(
user=pattern_formats["user"],
resource=pattern_formats["resource"],
port=pattern_formats["port"],
path=pattern_formats["path"],
name=pattern_formats["name"],
rev=pattern_formats["rev"],
)
),
re.compile(
r"^(?:(?P<user>.+)@)*"
r"(?P<resource>[a-z0-9_.-]*)[:]*"
r"(?P<port>[\d]+)?"
r"(?P<pathname>/?(?P<owner>.+)/(?P<projects>([\w\-/]+)/)?(?P<name>.+).git)"
r"([@#](?P<rev>[^@#]+))?"
r"$"
r"(git\+)?"
r"((?P<protocol>{protocol})://)"
r"(?:(?P<user>{user})@)?"
r"(?P<resource>{resource}:?)"
r"(:(?P<port>{port}))?"
r"(?P<pathname>({path})"
r"(?P<name>{name})(\.git|/)?)"
r"([@#](?P<rev>{rev}))?"
r"$".format(
protocol=pattern_formats["protocol"],
user=pattern_formats["user"],
resource=pattern_formats["resource"],
port=pattern_formats["port"],
path=pattern_formats["path"],
name=pattern_formats["name"],
rev=pattern_formats["rev"],
)
),
re.compile(
r"((?P<user>\w+)@)?"
r"(?P<resource>[\w.\-]+)"
r"[:/]{1,2}"
r"(?P<pathname>((?P<owner>\w+)/)?"
r"(?P<projects>([\w\-/]+)/)?"
r"((?P<name>[\w\-]+)(\.git|/)?)?)"
r"([@#](?P<rev>[^@#]+))?"
r"$"
r"^(?:(?P<user>{user})@)?"
r"(?P<resource>{resource})"
r"(:(?P<port>{port}))?"
r"(?P<pathname>([:/]{path}/)"
r"(?P<name>{name})(\.git|/)?)"
r"([@#](?P<rev>{rev}))?"
r"$".format(
user=pattern_formats["user"],
resource=pattern_formats["resource"],
port=pattern_formats["port"],
path=pattern_formats["path"],
name=pattern_formats["name"],
rev=pattern_formats["rev"],
)
),
re.compile(
r"((?P<user>{user})@)?"
r"(?P<resource>{resource})"
r"[:/]{{1,2}}"
r"(?P<pathname>({path})"
r"(?P<name>{name})(\.git|/)?)"
r"([@#](?P<rev>{rev}))?"
r"$".format(
user=pattern_formats["user"],
resource=pattern_formats["resource"],
path=pattern_formats["path"],
name=pattern_formats["name"],
rev=pattern_formats["rev"],
)
),
]

Expand Down
169 changes: 169 additions & 0 deletions tests/vcs/test_git.py
Expand Up @@ -2,6 +2,7 @@

from poetry.vcs.git import Git
from poetry.vcs.git import GitUrl
from poetry.vcs.git import ParsedUrl


@pytest.mark.parametrize(
Expand Down Expand Up @@ -74,3 +75,171 @@
)
def test_normalize_url(url, normalized):
assert normalized == Git.normalize_url(url)


@pytest.mark.parametrize(
"url, parsed",
[
(
"git+ssh://user@hostname:project.git#commit",
ParsedUrl(
"ssh", "hostname", ":project.git", "user", None, "project", "commit"
),
),
(
"git+http://user@hostname/project/blah.git@commit",
ParsedUrl(
"http", "hostname", "/project/blah.git", "user", None, "blah", "commit"
),
),
(
"git+https://user@hostname/project/blah.git",
ParsedUrl(
"https", "hostname", "/project/blah.git", "user", None, "blah", None
),
),
(
"git+https://user@hostname:project/blah.git",
ParsedUrl(
"https", "hostname", ":project/blah.git", "user", None, "blah", None
),
),
(
"git+ssh://git@github.com:sdispater/poetry.git#v1.0.27",
ParsedUrl(
"ssh",
"github.com",
":sdispater/poetry.git",
"git",
None,
"poetry",
"v1.0.27",
),
),
(
"git+ssh://git@github.com:/sdispater/poetry.git",
ParsedUrl(
"ssh",
"github.com",
":/sdispater/poetry.git",
"git",
None,
"poetry",
None,
),
),
(
"git+ssh://git@github.com:org/repo",
ParsedUrl("ssh", "github.com", ":org/repo", "git", None, "repo", None),
),
(
"git+ssh://git@github.com/org/repo",
ParsedUrl("ssh", "github.com", "/org/repo", "git", None, "repo", None),
),
(
"git+ssh://foo:22/some/path",
ParsedUrl("ssh", "foo", "/some/path", None, "22", "path", None),
),
(
"git@github.com:org/repo",
ParsedUrl(None, "github.com", ":org/repo", "git", None, "repo", None),
),
(
"git+https://github.com/sdispater/pendulum",
ParsedUrl(
"https",
"github.com",
"/sdispater/pendulum",
None,
None,
"pendulum",
None,
),
),
(
"git+https://github.com/sdispater/pendulum#7a018f2d075b03a73409e8356f9b29c9ad4ea2c5",
ParsedUrl(
"https",
"github.com",
"/sdispater/pendulum",
None,
None,
"pendulum",
"7a018f2d075b03a73409e8356f9b29c9ad4ea2c5",
),
),
(
"git+ssh://git@git.example.com:b/b.git#v1.0.0",
ParsedUrl("ssh", "git.example.com", ":b/b.git", "git", None, "b", "v1.0.0"),
),
(
"git+ssh://git@github.com:sdispater/pendulum.git#foo/bar",
ParsedUrl(
"ssh",
"github.com",
":sdispater/pendulum.git",
"git",
None,
"pendulum",
"foo/bar",
),
),
(
"git+file:///foo/bar.git",
ParsedUrl("file", None, "/foo/bar.git", None, None, "bar", None),
),
(
"git+file://C:\\Users\\hello\\testing.git#zkat/windows-files",
ParsedUrl(
"file",
"C",
":\\Users\\hello\\testing.git",
None,
None,
"testing",
"zkat/windows-files",
),
),
(
"git+https://git.example.com/sdispater/project/my_repo.git",
ParsedUrl(
"https",
"git.example.com",
"/sdispater/project/my_repo.git",
None,
None,
"my_repo",
None,
),
),
(
"git+ssh://git@git.example.com:sdispater/project/my_repo.git",
ParsedUrl(
"ssh",
"git.example.com",
":sdispater/project/my_repo.git",
"git",
None,
"my_repo",
None,
),
),
],
)
def test_parse_url(url, parsed):
result = ParsedUrl.parse(url)
assert parsed.name == result.name
assert parsed.pathname == result.pathname
assert parsed.port == result.port
assert parsed.protocol == result.protocol
assert parsed.resource == result.resource
assert parsed.rev == result.rev
assert parsed.url == result.url
assert parsed.user == result.user


def test_parse_url_should_fail():
url = "https://" + "@" * 64 + "!"

with pytest.raises(ValueError):
ParsedUrl.parse(url)