-
Notifications
You must be signed in to change notification settings - Fork 61
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix syncing offline runs with file upload #1211
Changes from all commits
782c5a7
3ac11ed
ad314b7
4615354
9ba4378
d6824f3
1b2c25e
cb02c03
ac78c7f
1214707
1b80867
a59260d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,7 +17,6 @@ | |
import os | ||
from dataclasses import dataclass | ||
from datetime import datetime | ||
from pathlib import Path | ||
from typing import ( | ||
TYPE_CHECKING, | ||
Generic, | ||
|
@@ -29,9 +28,13 @@ | |
TypeVar, | ||
) | ||
|
||
from neptune.common.exceptions import InternalClientError | ||
from neptune.common.exceptions import ( | ||
InternalClientError, | ||
NeptuneException, | ||
) | ||
from neptune.new.exceptions import MalformedOperation | ||
from neptune.new.internal.container_type import ContainerType | ||
from neptune.new.internal.operation_processors.operation_storage import OperationStorage | ||
from neptune.new.internal.types.file_types import FileType | ||
from neptune.new.types.atoms.file import File | ||
|
||
|
@@ -57,7 +60,7 @@ class Operation(abc.ABC): | |
def accept(self, visitor: "OperationVisitor[Ret]") -> Ret: | ||
pass | ||
|
||
def clean(self): | ||
def clean(self, operation_storage: OperationStorage): | ||
pass | ||
|
||
def to_dict(self) -> dict: | ||
|
@@ -185,33 +188,30 @@ def from_dict(data: dict) -> "AssignArtifact": | |
class UploadFile(Operation): | ||
|
||
ext: str | ||
file_path: str | ||
file_path: str = None | ||
tmp_file_name: str = None | ||
# `clean_after_upload` is for backward compatibility and should be removed in the future | ||
aniezurawski marked this conversation as resolved.
Show resolved
Hide resolved
|
||
clean_after_upload: bool = False | ||
|
||
@classmethod | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe pass There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, why not. |
||
def of_file(cls, value: File, attribute_path: List[str], upload_path: Path): | ||
def of_file(cls, value: File, attribute_path: List[str], operation_storage: OperationStorage): | ||
if value.file_type is FileType.LOCAL_FILE: | ||
operation = UploadFile( | ||
path=attribute_path, | ||
ext=value.extension, | ||
file_path=os.path.abspath(value.path), | ||
) | ||
elif value.file_type in (FileType.IN_MEMORY, FileType.STREAM): | ||
tmp_file_path = cls.get_upload_path(attribute_path, value.extension, upload_path) | ||
value._save(tmp_file_path) | ||
operation = UploadFile( | ||
path=attribute_path, | ||
ext=value.extension, | ||
file_path=os.path.abspath(tmp_file_path), | ||
clean_after_upload=True, | ||
) | ||
tmp_file_name = cls.get_tmp_file_name(attribute_path, value.extension) | ||
value._save(operation_storage.upload_path / tmp_file_name) | ||
operation = UploadFile(path=attribute_path, ext=value.extension, tmp_file_name=tmp_file_name) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. Now we determine if operation should be cleaned by existence of
|
||
else: | ||
raise ValueError(f"Unexpected FileType: {value.file_type}") | ||
return operation | ||
|
||
def clean(self): | ||
if self.clean_after_upload: | ||
os.remove(self.file_path) | ||
def clean(self, operation_storage: OperationStorage): | ||
if self.clean_after_upload or self.tmp_file_name: | ||
os.remove(self.get_absolute_path(operation_storage)) | ||
|
||
def accept(self, visitor: "OperationVisitor[Ret]") -> Ret: | ||
return visitor.visit_upload_file(self) | ||
|
@@ -220,20 +220,35 @@ def to_dict(self) -> dict: | |
ret = super().to_dict() | ||
ret["ext"] = self.ext | ||
ret["file_path"] = self.file_path | ||
ret["tmp_file_name"] = self.tmp_file_name | ||
ret["clean_after_upload"] = self.clean_after_upload | ||
return ret | ||
|
||
@staticmethod | ||
def from_dict(data: dict) -> "UploadFile": | ||
return UploadFile(data["path"], data["ext"], data["file_path"], data.get("clean_after_upload", False)) | ||
return UploadFile( | ||
data["path"], | ||
data["ext"], | ||
data.get("file_path"), | ||
data.get("tmp_file_name"), | ||
data.get("clean_after_upload", False), | ||
) | ||
|
||
@staticmethod | ||
def get_upload_path(attribute_path: List[str], extension: str, upload_path: Path): | ||
def get_tmp_file_name(attribute_path: List[str], extension: str): | ||
now = datetime.now() | ||
tmp_file_name = ( | ||
f"{'_'.join(attribute_path)}-{now.timestamp()}-{now.strftime('%Y-%m-%d_%H.%M.%S.%f')}.{extension}" | ||
) | ||
return upload_path / tmp_file_name | ||
return tmp_file_name | ||
|
||
def get_absolute_path(self, operation_storage: OperationStorage) -> str: | ||
if self.file_path: | ||
return self.file_path | ||
elif self.tmp_file_name: | ||
return str(operation_storage.upload_path / self.tmp_file_name) | ||
|
||
raise NeptuneException("Expected 'file_path' or 'tmp_file_name' to be filled.") | ||
|
||
|
||
@dataclass | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like both -
clear
andget_absolute_path
should be defined in OperationStorage. Operation should be a simple serializable dataclass with minimum logic. And it would be better to pass operation object instead of raw path as an argument.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, I'll give it a try.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But if we define
clear
andget_absolute_path
inOperationStorage
we'll have to passOperation
(in particularUploadFile
) to those functions because we would like to clear or get path of particular operation.At the same time we need
upload_path
ofOperationStorage
when creating operation (when we createUploadFile
ofin-memory
file we also save file to.neptune
catalog).It'll require
OperationStorage
to be some kind of proxy onUploadFile
operation irresponsible also for creating it.If you're ok with that I can do that, what do you think?