Skip to content

Commit

Permalink
Improve URLs parsing (#7565)
Browse files Browse the repository at this point in the history
* Improve URLs parsing

* test and fix

* black
  • Loading branch information
orsinium committed Mar 19, 2020
1 parent 4146653 commit 6eda363
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 10 deletions.
13 changes: 13 additions & 0 deletions tests/unit/packaging/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ def test_has_meta_false(self, db_session):
]
),
),
# project_urls has more priority than home_page and download_url
(
"https://example.com/home/",
"https://example.com/download/",
Expand All @@ -259,6 +260,18 @@ def test_has_meta_false(self, db_session):
]
),
),
# ignore invalid links
(
None,
None,
[
" ,https://example.com/home/",
",https://example.com/home/",
"https://example.com/home/",
"Download,https://example.com/download/",
],
OrderedDict([("Download", "https://example.com/download/")]),
),
],
)
def test_urls(self, db_session, home_page, download_url, project_urls, expected):
Expand Down
21 changes: 11 additions & 10 deletions warehouse/packaging/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,23 +420,24 @@ def urls(self):

if self.home_page:
_urls["Homepage"] = self.home_page
if self.download_url:
_urls["Download"] = self.download_url

for urlspec in self.project_urls:
name, url = [x.strip() for x in urlspec.split(",", 1)]
_urls[name] = url

if self.download_url and "Download" not in _urls:
_urls["Download"] = self.download_url
name, _, url = urlspec.partition(",")
name = name.strip()
url = url.strip()
if name and url:
_urls[name] = url

return _urls

@property
def github_repo_info_url(self):
for parsed in [urlparse(url) for url in self.urls.values()]:
segments = parsed.path.strip("/").rstrip("/").split("/")
if (
parsed.netloc == "github.com" or parsed.netloc == "www.github.com"
) and len(segments) >= 2:
for url in self.urls.values():
parsed = urlparse(url)
segments = parsed.path.strip("/").split("/")
if parsed.netloc in {"github.com", "www.github.com"} and len(segments) >= 2:
user_name, repo_name = segments[:2]
return f"https://api.github.com/repos/{user_name}/{repo_name}"

Expand Down

0 comments on commit 6eda363

Please sign in to comment.