Skip to content

Commit 400c226

Browse files
authored
Sanitize filename in Content-Disposition (#10584)
Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>
1 parent d10f342 commit 400c226

File tree

2 files changed

+40
-0
lines changed

2 files changed

+40
-0
lines changed

Diff for: mlflow/data/http_dataset_source.py

+13
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,14 @@
1111
from mlflow.utils.rest_utils import augmented_raise_for_status, cloud_storage_http_request
1212

1313

14+
def _is_path(filename: str) -> bool:
15+
"""
16+
Return True if `filename` is a path, False otherwise. For example,
17+
"foo/bar" is a path, but "bar" is not.
18+
"""
19+
return os.path.basename(filename) != filename
20+
21+
1422
class HTTPDatasetSource(DatasetSource):
1523
"""
1624
Represents the source of a dataset stored at a web location and referred to
@@ -57,6 +65,11 @@ def load(self, dst_path=None) -> str:
5765
):
5866
# NB: If the filename is quoted, unquote it
5967
basename = file_name[1].strip("'\"")
68+
if _is_path(basename):
69+
raise MlflowException.invalid_parameter_value(
70+
f"Invalid filename in Content-Disposition header: {basename}. "
71+
"It must be a file name, not a path."
72+
)
6073
elif path is not None and len(posixpath.basename(path)) > 0:
6174
basename = posixpath.basename(path)
6275
else:

Diff for: tests/data/test_http_dataset_source.py

+27
Original file line numberDiff line numberDiff line change
@@ -128,3 +128,30 @@ def download_with_mock_content_disposition_headers(*args, **kwargs):
128128
loaded = source.load()
129129
assert os.path.exists(loaded)
130130
assert os.path.basename(loaded) == expected_filename
131+
132+
133+
@pytest.mark.parametrize(
134+
"filename",
135+
[
136+
"/foo/bar.txt",
137+
"./foo/bar.txt",
138+
"../foo/bar.txt",
139+
"foo/bar.txt",
140+
],
141+
)
142+
def test_source_load_with_content_disposition_header_invalid_filename(filename):
143+
def download_with_mock_content_disposition_headers(*args, **kwargs):
144+
response = cloud_storage_http_request(*args, **kwargs)
145+
response.headers["Content-Disposition"] = f"attachment; filename={filename}"
146+
return response
147+
148+
with mock.patch(
149+
"mlflow.data.http_dataset_source.cloud_storage_http_request",
150+
side_effect=download_with_mock_content_disposition_headers,
151+
):
152+
source = HTTPDatasetSource(
153+
"https://raw.githubusercontent.com/mlflow/mlflow/master/tests/datasets/winequality-red.csv"
154+
)
155+
156+
with pytest.raises(MlflowException, match="Invalid filename in Content-Disposition header"):
157+
source.load()

0 commit comments

Comments
 (0)