Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SSE-CPK support added to MLflow #6927

Merged
merged 20 commits into from
Oct 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
b23e245
[ML-24419] Updated artifact downloads to pass in http headers (#6525)
kriscon-db Aug 30, 2022
8fbe11b
Merge branch 'SSE-CPK-1.0' of github.com:mlflow/mlflow into SSE-CPK-1.0
kriscon-db Aug 31, 2022
8ebd666
[ML-24419] Updated artifact downloads to pass in http headers (#6525)
kriscon-db Aug 30, 2022
01a17d3
Merge branch 'SSE-CPK-1.0' of github.com:mlflow/mlflow into SSE-CPK-1.0
kriscon-db Sep 7, 2022
c358cf9
[ML-24419] Updated artifact downloads to pass in http headers (#6525)
kriscon-db Aug 30, 2022
747e3f8
Merge branch 'SSE-CPK-1.0' of github.com:mlflow/mlflow into SSE-CPK-1.0
kriscon-db Sep 16, 2022
c892278
Added ADLS Gen2 support to mlflow (#6730)
kriscon-db Sep 19, 2022
6284ca8
Add unit test for DatabricksArtifactRepository uploading artifacts th…
mingyu89 Sep 26, 2022
d81e5eb
[ML-24419] Updated artifact downloads to pass in http headers (#6525)
kriscon-db Aug 30, 2022
39d184c
Added ADLS Gen2 support to mlflow (#6730)
kriscon-db Sep 19, 2022
10af8ef
Add unit test for DatabricksArtifactRepository uploading artifacts th…
mingyu89 Sep 26, 2022
e663c2c
Merge branch 'SSE-CPK-1.0' of github.com:mlflow/mlflow into SSE-CPK-1.0
kriscon-db Sep 26, 2022
141fb21
Fix to make manual test passes (#6902)
liangz1 Sep 27, 2022
2d4e597
Updated docstrings and switched helper methods (#6893)
kriscon-db Sep 27, 2022
c7ab1bf
Fix single upload (#6911)
kriscon-db Sep 28, 2022
dd6a5ed
CR change for readability
kriscon-db Sep 30, 2022
2a09929
Remove commented out line
kriscon-db Sep 30, 2022
cd2d4b4
CR changes
kriscon-db Sep 30, 2022
b4af918
Black formatting
kriscon-db Oct 1, 2022
987a5c8
Revert spark.py change
kriscon-db Oct 1, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
158 changes: 158 additions & 0 deletions mlflow/azure/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,84 @@
}


def put_adls_file_creation(sas_url, headers):
"""
Performs an ADLS Azure file create `Put` operation
(https://docs.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/create)

:param sas_url: A shared access signature URL referring to the Azure ADLS server
to which the file creation command should be issued.
:param headers: Additional headers to include in the Put request body
"""
request_url = _append_query_parameters(sas_url, {"resource": "file"})

request_headers = {}
for name, value in headers.items():
if _is_valid_adls_put_header(name):
request_headers[name] = value
else:
_logger.debug("Removed unsupported '%s' header for ADLS Gen2 Put operation", name)

with rest_utils.cloud_storage_http_request(
"put", request_url, headers=request_headers
) as response:
rest_utils.augmented_raise_for_status(response)


def patch_adls_file_upload(sas_url, data, position, headers, is_single):
"""
Performs an ADLS Azure file create `Patch` operation
(https://docs.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/update)

:param sas_url: A shared access signature URL referring to the Azure ADLS server
to which the file update command should be issued.
:param data: Data to include in the Patch request body.
:param position: Positional offset of the data in the Patch request
:param headers: Additional headers to include in the Patch request body
"""
new_params = {"action": "append", "position": str(position)}
if is_single:
new_params["flush"] = "true"
kriscon-db marked this conversation as resolved.
Show resolved Hide resolved
request_url = _append_query_parameters(sas_url, new_params)

request_headers = {}
for name, value in headers.items():
if _is_valid_adls_patch_header(name):
request_headers[name] = value
else:
_logger.debug("Removed unsupported '%s' header for ADLS Gen2 Patch operation", name)

with rest_utils.cloud_storage_http_request(
"patch", request_url, data=data, headers=request_headers
) as response:
rest_utils.augmented_raise_for_status(response)


def patch_adls_flush(sas_url, position, headers):
"""
Performs an ADLS Azure file flush `Patch` operation
(https://docs.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/update)

:param sas_url: A shared access signature URL referring to the Azure ADLS server
to which the file update command should be issued.
:param position: The final size of the file to flush.
:param headers: Additional headers to include in the Patch request body.
"""
request_url = _append_query_parameters(sas_url, {"action": "flush", "position": str(position)})

request_headers = {}
for name, value in headers.items():
if _is_valid_adls_put_header(name):
request_headers[name] = value
else:
_logger.debug("Removed unsupported '%s' header for ADLS Gen2 Patch operation", name)

with rest_utils.cloud_storage_http_request(
"patch", request_url, headers=request_headers
) as response:
rest_utils.augmented_raise_for_status(response)


def put_block(sas_url, block_id, data, headers):
"""
Performs an Azure `Put Block` operation
Expand Down Expand Up @@ -146,3 +224,83 @@ def _is_valid_put_block_header(header_name):
"x-ms-encryption-key-sha256",
"x-ms-encryption-algorithm",
}


def _is_valid_adls_put_header(header_name):
"""
:return: True if the specified header name is a valid header for the ADLS Put operation, False
otherwise. For a list of valid headers, see
https://docs.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/create
"""
return header_name in {
"Cache-Control",
"Content-Encoding",
"Content-Language",
"Content-Disposition",
"x-ms-cache-control",
"x-ms-content-type",
"x-ms-content-encoding",
"x-ms-content-language",
"x-ms-content-disposition",
"x-ms-rename-source",
"x-ms-lease-id",
"x-ms-properties",
"x-ms-permissions",
"x-ms-umask",
"x-ms-owner",
"x-ms-group",
"x-ms-acl",
"x-ms-proposed-lease-id",
"x-ms-expiry-option",
"x-ms-expiry-time",
"If-Match",
"If-None-Match",
"If-Modified-Since",
"If-Unmodified-Since",
"x-ms-source-if-match",
"x-ms-source-if-none-match",
"x-ms-source-if-modified-since",
"x-ms-source-if-unmodified-since",
"x-ms-encryption-key",
"x-ms-encryption-key-sha256",
"x-ms-encryption-algorithm",
"x-ms-encryption-context",
"x-ms-client-request-id",
"x-ms-date",
"x-ms-version",
}


def _is_valid_adls_patch_header(header_name):
"""
:return: True if the specified header name is a valid header for the ADLS Patch operation, False
otherwise. For a list of valid headers, see
https://docs.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/update
"""
return header_name in {
"Content-Length",
"Content-MD5",
"x-ms-lease-id",
"x-ms-cache-control",
"x-ms-content-type",
"x-ms-content-disposition",
"x-ms-content-encoding",
"x-ms-content-language",
"x-ms-content-md5",
"x-ms-properties",
"x-ms-owner",
"x-ms-group",
"x-ms-permissions",
"x-ms-acl",
"If-Match",
"If-None-Match",
"If-Modified-Since",
"If-Unmodified-Since",
"x-ms-encryption-key",
"x-ms-encryption-key-sha256",
"x-ms-encryption-algorithm",
"x-ms-encryption-context",
"x-ms-client-request-id",
"x-ms-date",
"x-ms-version",
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions mlflow/protos/databricks_artifacts.proto
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,13 @@ enum ArtifactCredentialType {
// The credential is a GCP Signed URL. For more information, see
// https://cloud.google.com/storage/docs/access-control/signed-urls
GCP_SIGNED_URL = 3;

// The credential is an Azure Shared Access Signature URI for ADLS. For more
// information see
// https://docs.microsoft.com/en-us/rest/api/storageservices/data-lake-storage-gen2
// and
// https://docs.microsoft.com/en-us/azure/storage/common/storage-sas-overview
AZURE_ADLS_GEN2_SAS_URI = 4;
}

message ArtifactCredentialInfo {
Expand Down
9 changes: 5 additions & 4 deletions mlflow/protos/databricks_artifacts_pb2.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.