Skip to content

Commit

Permalink
Merge pull request #1403 from mabel-dev/#1402
Browse files Browse the repository at this point in the history
  • Loading branch information
joocer committed Jan 26, 2024
2 parents 8844a48 + 72d8e55 commit c7e06d7
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 6 deletions.
2 changes: 1 addition & 1 deletion opteryx/__version__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__build__ = 238
__build__ = 240

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
4 changes: 3 additions & 1 deletion opteryx/connectors/aws_s3_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ def get_list_of_blob_names(self, *, prefix: str) -> List[str]:
bucket + "/" + blob.object_name for blob in blobs if not blob.object_name.endswith("/")
)

return [blob for blob in blobs if ("." + blob.split(".")[-1].lower()) in VALID_EXTENSIONS]
return sorted(
blob for blob in blobs if ("." + blob.split(".")[-1].lower()) in VALID_EXTENSIONS
)

def read_dataset(self, columns: list = None, **kwargs) -> pyarrow.Table:
blob_names = self.partition_scheme.get_blobs_in_partition(
Expand Down
4 changes: 2 additions & 2 deletions opteryx/connectors/disk_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,12 @@ def get_list_of_blob_names(self, *, prefix: str) -> List[str]:
Returns:
A list of blob filenames.
"""
return [
return sorted(
os.path.join(root, file)
for root, _, files in os.walk(prefix)
for file in files
if os.path.splitext(file)[1] in VALID_EXTENSIONS
]
)

def read_dataset(
self, columns: list = None, predicates: list = None, just_schema: bool = False, **kwargs
Expand Down
4 changes: 2 additions & 2 deletions opteryx/connectors/gcp_cloudstorage_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,12 @@ def get_list_of_blob_names(self, *, prefix: str) -> List[str]:
raise Exception(f"Error fetching blob list: {response.text}")

blob_data = response.json()
blob_names = [
blob_names = sorted(
f"{bucket}/{blob['name']}"
for blob in blob_data.get("items", [])
if not blob["name"].endswith("/")
and any(blob["name"].endswith(ext) for ext in VALID_EXTENSIONS)
]
)

return blob_names

Expand Down

0 comments on commit c7e06d7

Please sign in to comment.