diff --git a/mlrun/datastore/datastore.py b/mlrun/datastore/datastore.py index 686148f22bc..b586f426ef4 100644 --- a/mlrun/datastore/datastore.py +++ b/mlrun/datastore/datastore.py @@ -223,6 +223,11 @@ def get_or_create_store( subpath = url[len("memory://") :] return in_memory_store, subpath, url + elif schema in get_local_file_schema(): + # parse_url() will drop the windows drive-letter from the path for url like "c:\a\b". + # As a workaround, we set subpath to the url. + subpath = url.replace("file://", "", 1) + if not schema and endpoint: if endpoint in self._stores.keys(): return self._stores[endpoint], subpath, url @@ -241,8 +246,7 @@ def get_or_create_store( ) if not secrets and not mlrun.config.is_running_as_api(): self._stores[store_key] = store - # in file stores in windows path like c:\a\b the drive letter is dropped from the path, so we return the url - return store, url if store.kind == "file" else subpath, url + return store, subpath, url def reset_secrets(self): self._secrets = {} diff --git a/mlrun/datastore/s3.py b/mlrun/datastore/s3.py index 0b8e3109a3d..b0a34054a3e 100644 --- a/mlrun/datastore/s3.py +++ b/mlrun/datastore/s3.py @@ -198,6 +198,11 @@ def listdir(self, key): bucket = self.s3.Bucket(bucket) return [obj.key[key_length:] for obj in bucket.objects.filter(Prefix=key)] + def rm(self, path, recursive=False, maxdepth=None): + bucket, key = self.get_bucket_and_key(path) + path = f"{bucket}/{key}" + self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth) + def parse_s3_bucket_and_key(s3_path): try: diff --git a/tests/datastore/test_filestore.py b/tests/datastore/test_filestore.py new file mode 100644 index 00000000000..e296f63acbe --- /dev/null +++ b/tests/datastore/test_filestore.py @@ -0,0 +1,42 @@ +# Copyright 2023 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os.path +import tempfile + +import pytest + +import mlrun + + +class TestFileStore: + @pytest.mark.parametrize( + "prefix", + ["", "file://"], + ) + def test_put_stat_delete(self, prefix): + try: + with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as temp_file: + object_url = f"{prefix}{temp_file.name}" + data_item = mlrun.run.get_dataitem(object_url) + test_text = "test string" + data_item.put(test_text) + assert data_item.stat().size == len(test_text) + data_item.delete() + with pytest.raises(FileNotFoundError): + data_item.stat() + assert not os.path.exists(temp_file.name) + finally: + if os.path.exists(temp_file.name): + os.remove(temp_file.name) diff --git a/tests/integration/aws_s3/test_aws_s3.py b/tests/integration/aws_s3/test_aws_s3.py index e5d1fe352f5..230f72e0baa 100644 --- a/tests/integration/aws_s3/test_aws_s3.py +++ b/tests/integration/aws_s3/test_aws_s3.py @@ -21,6 +21,7 @@ import pandas as pd import pytest import yaml +from botocore.exceptions import ClientError import mlrun import mlrun.errors @@ -144,6 +145,10 @@ def _perform_aws_s3_tests(self, secrets=None): upload_data_item.upload(self.test_file) response = upload_data_item.get() assert response.decode() == self.test_string + upload_data_item.delete() + with pytest.raises(ClientError) as client_exception: + upload_data_item.stat() + assert client_exception.value.response["Error"]["Code"] == "404" # Verify as_df() creates a proper DF. Note that the AWS case as_df() works through the fsspec interface, that's # why it's important to test it as well.