Skip to content

Commit

Permalink
Merge pull request psf#111 from andrewsg/make-absolute
Browse files Browse the repository at this point in the history
Add tests for absolute_links (_make_absolute and base_url) and make them pass
  • Loading branch information
kennethreitz committed Mar 7, 2018
2 parents 89c001a + 14da46f commit 0ab4a53
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 9 deletions.
29 changes: 20 additions & 9 deletions requests_html.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import sys
import asyncio
from urllib.parse import urlparse, urlunparse
from urllib.parse import urlparse, urlunparse, urljoin
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures._base import TimeoutError
from functools import partial
Expand Down Expand Up @@ -307,15 +307,20 @@ def _make_absolute(self, link):
# Parse the link with stdlib.
parsed = urlparse(link)._asdict()

# Appears to be a relative link:
# If link is relative, then join it with base_url.
if not parsed['netloc']:
parsed['netloc'] = urlparse(self.base_url).netloc
return urljoin(self.base_url, link)

# Link is absolute; if it lacks a scheme, add one from base_url.
if not parsed['scheme']:
parsed['scheme'] = urlparse(self.base_url).scheme

# Re-construct URL, with new data.
parsed = (v for v in parsed.values())
return urlunparse(parsed)
# Reconstruct the URL to incorporate the new scheme.
parsed = (v for v in parsed.values())
return urlunparse(parsed)

# Link is absolute and complete with scheme; nothing to be done here.
return link


@property
Expand All @@ -342,9 +347,15 @@ def base_url(self) -> _URL:
if result:
return result

url = '/'.join(self.url.split('/')[:-1])
if url.endswith('/'):
url = url[:-1]
# Parse the url to separate out the path
parsed = urlparse(self.url)._asdict()

# Remove any part of the path after the last '/'
path = '/'.join(parsed['path'].split('/')[:-1])

# Reconstruct the url with the modified path
parsed = (v for v in parsed.values())
url = urlunparse(parsed)

return url

Expand Down
25 changes: 25 additions & 0 deletions tests/test_requests_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,31 @@ def test_anchor_links():
assert '#site-map' in r.html.links


@pytest.mark.ok
@pytest.mark.parametrize('url,link,expected', [
('http://example.com/', 'test.html', 'http://example.com/test.html'),
('http://example.com', 'test.html', 'http://example.com/test.html'),
('http://example.com/foo/', 'test.html', 'http://example.com/foo/test.html'),
('http://example.com/foo/bar', 'test.html', 'http://example.com/foo/test.html'),
('http://example.com/foo/', '/test.html', 'http://example.com/test.html'),
('http://example.com/', 'http://xkcd.com/about/', 'http://xkcd.com/about/'),
('http://example.com/', '//xkcd.com/about/', 'http://xkcd.com/about/'),
])
def test_absolute_links(url, link, expected):
head_template = """<head><base href='{}'></head>"""
body_template = """<body><a href='{}'>Next</a></body>"""

# Test without `<base>` tag (url is base)
html = HTML(html=body_template.format(link), url=url)
assert html.absolute_links.pop() == expected

# Test with `<base>` tag (url is other)
html = HTML(
html=head_template.format(url) + body_template.format(link),
url='http://example.com/foobar/')
assert html.absolute_links.pop() == expected


@pytest.mark.render
def test_render():
r = get()
Expand Down

0 comments on commit 0ab4a53

Please sign in to comment.