-
Notifications
You must be signed in to change notification settings - Fork 107
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adds Range header support to content app #1473
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Fixed bug where content app would not respond to ``Range`` HTTP Header in requests when | ||
``remote.policy`` was either ``on_demand`` or ``streamed``. For example this request is used by | ||
Anaconda clients. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -665,7 +665,7 @@ def get_remote_artifacts_blocking(): | |
return response | ||
|
||
except (ClientResponseError, UnsupportedDigestValidationError) as e: | ||
log.warn( | ||
log.warning( | ||
_("Could not download remote artifact at '{}': {}").format( | ||
remote_artifact.url, str(e) | ||
) | ||
|
@@ -809,15 +809,50 @@ def cast_remote_blocking(): | |
|
||
remote = await loop.run_in_executor(None, cast_remote_blocking) | ||
|
||
async def handle_headers(headers): | ||
range_start, range_stop = request.http_range.start, request.http_range.stop | ||
if range_start or range_stop: | ||
response.set_status(206) | ||
|
||
async def handle_response_headers(headers): | ||
for name, value in headers.items(): | ||
if name.lower() in self.hop_by_hop_headers: | ||
lower_name = name.lower() | ||
if lower_name in self.hop_by_hop_headers: | ||
continue | ||
|
||
if response.status == 206 and lower_name == "content-length": | ||
range_bytes = int(value) | ||
start = 0 if range_start is None else range_start | ||
stop = range_bytes if range_stop is None else range_stop | ||
|
||
range_bytes = range_bytes - range_start | ||
range_bytes = range_bytes - (int(value) - stop) | ||
response.headers[name] = str(range_bytes) | ||
|
||
response.headers["Content-Range"] = "bytes {0}-{1}/{2}".format( | ||
start, stop - start + 1, int(value) | ||
) | ||
continue | ||
|
||
response.headers[name] = value | ||
await response.prepare(request) | ||
|
||
data_size_handled = 0 | ||
|
||
async def handle_data(data): | ||
await response.write(data) | ||
nonlocal data_size_handled | ||
if range_start or range_stop: | ||
start_byte_pos = 0 | ||
end_byte_pos = len(data) | ||
if range_start: | ||
start_byte_pos = max(0, range_start - data_size_handled) | ||
if range_stop: | ||
end_byte_pos = min(len(data), range_stop - data_size_handled) | ||
|
||
data_for_client = data[start_byte_pos:end_byte_pos] | ||
await response.write(data_for_client) | ||
data_size_handled = data_size_handled + len(data) | ||
else: | ||
await response.write(data) | ||
if remote.policy != Remote.STREAMED: | ||
await original_handle_data(data) | ||
Comment on lines
856
to
857
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now this I think would not work. I'll add a streamed test and also use the local var instead of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's because it's not using chunking! I'm switching all of these tests to use the chunking fixture... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes this new test is failing. I'm fixing it. |
||
|
||
|
@@ -826,7 +861,7 @@ async def finalize(): | |
await original_finalize() | ||
|
||
downloader = remote.get_downloader( | ||
remote_artifact=remote_artifact, headers_ready_callback=handle_headers | ||
remote_artifact=remote_artifact, headers_ready_callback=handle_response_headers | ||
) | ||
original_handle_data = downloader.handle_data | ||
downloader.handle_data = handle_data | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,10 +4,11 @@ | |
import unittest | ||
from urllib.parse import urljoin | ||
|
||
from pulp_smash import api, config, utils | ||
from pulp_smash import api, cli, config, utils | ||
from pulp_smash.pulp3.bindings import delete_orphans | ||
from pulp_smash.pulp3.utils import ( | ||
download_content_unit, | ||
download_content_unit_return_requests_response, | ||
gen_distribution, | ||
gen_repo, | ||
get_content, | ||
|
@@ -329,24 +330,113 @@ def test_content_served_immediate(self): | |
self.setup_download_test("immediate") | ||
self.do_test_content_served() | ||
|
||
@unittest.skip("https://pulp.plan.io/issues/8865") | ||
def test_content_served_on_demand_with_range_request(self): | ||
"""Assert that on_demand content can be properly downloaded with range requests.""" | ||
self.setup_download_test("on_demand") | ||
self.do_range_request_download_test() | ||
def test_content_served_streamed(self): | ||
"""Assert that streamed content can be properly downloaded.""" | ||
self.setup_download_test("streamed") | ||
self.do_test_content_served() | ||
|
||
def test_content_served_immediate_with_range_request(self): | ||
def test_content_served_immediate_with_range_request_inside_one_chunk(self): | ||
"""Assert that downloaded content can be properly downloaded with range requests.""" | ||
self.setup_download_test("immediate") | ||
self.do_range_request_download_test() | ||
self.setup_download_test( | ||
"immediate", url="https://fixtures.pulpproject.org/file-chunked/PULP_MANIFEST" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add this URL to constants.py and import it to avoid duplicates. |
||
) | ||
range_headers = {"Range": "bytes=1048586-1049586"} | ||
num_bytes = 1001 | ||
self.do_range_request_download_test(range_headers, num_bytes) | ||
|
||
def test_content_served_immediate_with_range_request_over_three_chunks(self): | ||
"""Assert that downloaded content can be properly downloaded with range requests.""" | ||
self.setup_download_test( | ||
"immediate", url="https://fixtures.pulpproject.org/file-chunked/PULP_MANIFEST" | ||
) | ||
range_headers = {"Range": "bytes=1048176-2248576"} | ||
num_bytes = 1200401 | ||
self.do_range_request_download_test(range_headers, num_bytes) | ||
|
||
def test_content_served_on_demand_with_range_request_over_three_chunks(self): | ||
"""Assert that on_demand content can be properly downloaded with range requests.""" | ||
self.setup_download_test( | ||
"on_demand", url="https://fixtures.pulpproject.org/file-chunked/PULP_MANIFEST" | ||
) | ||
range_headers = {"Range": "bytes=1048176-2248576"} | ||
num_bytes = 1200401 | ||
self.do_range_request_download_test(range_headers, num_bytes) | ||
|
||
def test_content_served_streamed_with_range_request_over_three_chunks(self): | ||
"""Assert that streamed content can be properly downloaded with range requests.""" | ||
self.setup_download_test( | ||
"streamed", url="https://fixtures.pulpproject.org/file-chunked/PULP_MANIFEST" | ||
) | ||
range_headers = {"Range": "bytes=1048176-2248576"} | ||
num_bytes = 1200401 | ||
self.do_range_request_download_test(range_headers, num_bytes) | ||
|
||
def test_content_served_immediate_with_multiple_different_range_requests(self): | ||
"""Assert that multiple requests with different Range header values work as expected.""" | ||
self.setup_download_test( | ||
"immediate", url="https://fixtures.pulpproject.org/file-chunked/PULP_MANIFEST" | ||
) | ||
range_headers = {"Range": "bytes=1048176-2248576"} | ||
num_bytes = 1200401 | ||
self.do_range_request_download_test(range_headers, num_bytes) | ||
range_headers = {"Range": "bytes=2042176-3248576"} | ||
num_bytes = 1206401 | ||
self.do_range_request_download_test(range_headers, num_bytes) | ||
|
||
def test_content_served_immediate_with_range_request_invalid_start_value(self): | ||
"""Assert that range requests with a negative start value errors as expected.""" | ||
cfg = config.get_config() | ||
cli_client = cli.Client(cfg) | ||
storage = utils.get_pulp_setting(cli_client, "DEFAULT_FILE_STORAGE") | ||
if storage != "pulpcore.app.models.storage.FileSystem": | ||
self.skipTest("The S3 test API project doesn't handle invalid Range values correctly") | ||
self.setup_download_test( | ||
"immediate", url="https://fixtures.pulpproject.org/file-chunked/PULP_MANIFEST" | ||
) | ||
|
||
def setup_download_test(self, policy): | ||
self.assertRaises( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should ensure that HTTP 416 is being raised, which is the correct status code. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/416 |
||
HTTPError, | ||
download_content_unit_return_requests_response, | ||
self.cfg, | ||
self.distribution.to_dict(), | ||
"1.iso", | ||
headers={"Range": "bytes=-1-11"}, | ||
) | ||
|
||
def test_content_served_immediate_with_range_request_too_large_end_value(self): | ||
"""Assert that a range request with a end value that is larger than the data works still.""" | ||
self.setup_download_test( | ||
"immediate", url="https://fixtures.pulpproject.org/file-chunked/PULP_MANIFEST" | ||
) | ||
range_headers = {"Range": "bytes=10485260-10485960"} | ||
num_bytes = 500 | ||
self.do_range_request_download_test(range_headers, num_bytes) | ||
|
||
def test_content_served_immediate_with_range_request_start_value_larger_than_content(self): | ||
"""Assert that a range request with a start value larger than the content errors.""" | ||
self.setup_download_test( | ||
"immediate", url="https://fixtures.pulpproject.org/file-chunked/PULP_MANIFEST" | ||
) | ||
self.assertRaises( | ||
HTTPError, | ||
download_content_unit_return_requests_response, | ||
self.cfg, | ||
self.distribution.to_dict(), | ||
"1.iso", | ||
headers={"Range": "bytes=10485860-10485870"}, | ||
) | ||
|
||
def setup_download_test(self, policy, url=None): | ||
# Create a repository | ||
self.repo = self.repo_api.create(gen_repo()) | ||
self.addCleanup(self.repo_api.delete, self.repo.pulp_href) | ||
|
||
# Create a remote | ||
self.remote = self.remote_api.create(gen_file_remote(policy=policy)) | ||
remote_options = {"policy": policy} | ||
if url: | ||
remote_options["url"] = url | ||
|
||
self.remote = self.remote_api.create(gen_file_remote(**remote_options)) | ||
self.addCleanup(self.remote_api.delete, self.remote.pulp_href) | ||
|
||
# Sync the repository. | ||
|
@@ -388,19 +478,26 @@ def do_test_content_served(self): | |
|
||
self.assertEqual(len(pulp_manifest), FILE_FIXTURE_COUNT, pulp_manifest) | ||
|
||
def do_range_request_download_test(self): | ||
def do_range_request_download_test(self, range_header, expected_bytes): | ||
file_path = "1.iso" | ||
|
||
headers = {"Range": "bytes=0-9"} # first 10 bytes | ||
NUM_BYTES = 10 | ||
|
||
req1 = download_content_unit( | ||
self.cfg, self.distribution.to_dict(), file_path, headers=headers | ||
req1_reponse = download_content_unit_return_requests_response( | ||
self.cfg, self.distribution.to_dict(), file_path, headers=range_header | ||
) | ||
req2 = download_content_unit( | ||
self.cfg, self.distribution.to_dict(), file_path, headers=headers | ||
req2_response = download_content_unit_return_requests_response( | ||
self.cfg, self.distribution.to_dict(), file_path, headers=range_header | ||
) | ||
|
||
self.assertEqual(NUM_BYTES, len(req1)) | ||
self.assertEqual(NUM_BYTES, len(req2)) | ||
self.assertEqual(req1, req2) | ||
self.assertEqual(expected_bytes, len(req1_reponse.content)) | ||
self.assertEqual(expected_bytes, len(req2_response.content)) | ||
self.assertEqual(req1_reponse.content, req2_response.content) | ||
|
||
self.assertEqual(req1_reponse.status_code, 206) | ||
self.assertEqual(req1_reponse.status_code, req2_response.status_code) | ||
|
||
self.assertEqual(str(expected_bytes), req1_reponse.headers["Content-Length"]) | ||
self.assertEqual(str(expected_bytes), req2_response.headers["Content-Length"]) | ||
|
||
self.assertEqual( | ||
req1_reponse.headers["Content-Range"], req2_response.headers["Content-Range"] | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure I understand this part. Why are we rewriting the content-length?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we pass the
Content-Length
header back unmodified then even though say the range is asking for 10 bytes of 1024, and we're only returning 10 bytes, theContent-Length
would show 1024 incorrectly. Also we do this here because some server's don't return theContent-Length
at which point we don't know enough to set it.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure if this math for these values are right. Testing with httpie,
http HEAD https://fixtures.pulpproject.org/file/1.iso Range:bytes=40-50
returnscontent-length: 11 content-range: bytes 40-50/1024
. Is either the range start or stop auto decremented byaiohttp
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looking at the tests constants below, they seem to be correct after manually checking them against httpie and the fixtures, so I think this math must be correct if the tests pass even if it's not intuitive from the code.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah it's a little confusing but aiohttp does modify it by one I believe because start is inclusive and end is exclusive. https://docs.aiohttp.org/en/stable/web_reference.html#aiohttp.web.BaseRequest.http_range
Take this test for example, it asserts the content-length is 1200401 and when I run
http HEAD https://fixtures.pulpproject.org/file-chunked/1.iso Range:bytes=1048176-2248576
I getI really really like the thoroughness and attention to detail you're showing here!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@gerrod3 Since the ranges start from 0, index 10 is the 11th byte, and likewise in this instance. So I think the math is correct.
Here's what the HTTP standard document says https://datatracker.ietf.org/doc/html/rfc7233#section-4.2