Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-25881: Add extra logging to S3 datastore #322

Merged
merged 2 commits into from
Jul 10, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 9 additions & 2 deletions python/lsst/daf/butler/datastores/s3Datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ def _artifact_exists(self, location: Location) -> bool:
exists : `bool`
True if the location can be found, false otherwise.
"""
log.debug("Checking if file exists: %s", location.uri)
exists, _ = s3CheckFileExists(location, client=self.client)
return exists

Expand All @@ -124,7 +125,9 @@ def _delete_artifact(self, location: Location) -> None:
location : `Location`
Location of the artifact associated with this datastore.
"""
log.debug("Deleting file: %s", location.uri)
self.client.delete_object(Bucket=location.netloc, Key=location.relativeToPathRoot)
log.debug("Successfully deleted file: %s", location.uri)

def _read_artifact_into_memory(self, getInfo: DatastoreFileGetInformation,
ref: DatasetRef, isComponent: bool = False) -> Any:
Expand All @@ -134,8 +137,10 @@ def _read_artifact_into_memory(self, getInfo: DatastoreFileGetInformation,
# might as well use the HEADER metadata for size comparison instead.
# s3CheckFileExists would just duplicate GET/LIST charges in this case.
try:
log.debug("Reading file: %s", location.uri)
response = self.client.get_object(Bucket=location.netloc,
Key=location.relativeToPathRoot)
log.debug("Successfully read file: %s", location.uri)
except self.client.exceptions.ClientError as err:
errorcode = err.response["ResponseMetadata"]["HTTPStatusCode"]
# head_object returns 404 when object does not exist only when user
Expand Down Expand Up @@ -208,17 +213,19 @@ def _write_in_memory_to_artifact(self, inMemoryDataset: Any, ref: DatasetRef) ->
# _toBytes is not implemented
try:
serializedDataset = formatter.toBytes(inMemoryDataset)
log.debug("Writing file directly to %s", location.uri)
self.client.put_object(Bucket=location.netloc, Key=location.relativeToPathRoot,
Body=serializedDataset)
log.debug("Wrote file directly to %s", location.uri)
log.debug("Successfully wrote file directly to %s", location.uri)
except NotImplementedError:
with tempfile.NamedTemporaryFile(suffix=location.getExtension()) as tmpFile:
formatter._fileDescriptor.location = Location(*os.path.split(tmpFile.name))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we also need a log message around line 225 (new) where we upload this temporary file. This is the code path that will trigger for fits files.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding line 225 (new), before it attempts the put_object via a temporary file.

formatter.write(inMemoryDataset)
with open(tmpFile.name, 'rb') as f:
log.debug("Writing file to %s via a temporary directory.", location.uri)
self.client.put_object(Bucket=location.netloc,
Key=location.relativeToPathRoot, Body=f)
log.debug("Wrote file to %s via a temporary directory.", location.uri)
log.debug("Successfully wrote file to %s via a temporary directory.", location.uri)

if self._transaction is None:
raise RuntimeError("Attempting to write artifact without transaction enabled")
Expand Down