Skip to content

Commit

Permalink
Human readable pack (#65)
Browse files Browse the repository at this point in the history
* scaffold for simple pack

* simple test case working

* new tests, all passing

* improved testing, catching duplicate dataset names case

* removing debug print

* oops didn't need duplicate code

* flake8 fixes
  • Loading branch information
erickmartins committed Sep 27, 2023
1 parent 64e6d9f commit baeb509
Show file tree
Hide file tree
Showing 5 changed files with 185 additions and 32 deletions.
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,14 @@ Note that, if you are packing a `Plate` or `Screen`, default OMERO settings prev

`--barchive` creates a package compliant with Bioimage Archive submission standards - see below for more detail.

`--rocrate` generates a RO-Crate compliant package with flat structure (all image
files in a single folder). A JSON metadata file is added with basic information
about the files (name, mimetype).

`--simple` creates a "human-readable" package; one folder per project or dataset is created and image files are placed according to where they came from in the OMERO server. Note that a package generated with this option is not guaranteed to work with `unpack`, though it often will.

`--metadata` allows you to specify which transfer metadata will be saved in `transfer.xml` as possible MapAnnotation values to the images. Defaults to image ID, timestamp, software version, source hostname, md5, source username, source group.

Examples:
```
omero transfer pack Image:123 transfer_pack.tar
Expand All @@ -77,6 +85,8 @@ Note that unpack needs to be able to identify the images it imports inequivocall
already owns entities with the same name as ones defined in `transfer.xml`,
effectively merging the "new" unpacked entities with existing ones.

`--metadata` allows you to specify which transfer metadata will be used from `transfer.xml` as MapAnnotation values to the images. Fields that do not exist on `transfer.xml` will be ignored. Defaults to image ID, timestamp, software version, source hostname, md5, source username, source group.

Examples:
```
omero transfer unpack transfer_pack.zip
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def read(fname):
packages=['', 'omero.plugins'],
package_dir={"": "src"},
name="omero-cli-transfer",
version='0.7.3',
version='0.8.0',
maintainer="Erick Ratamero",
maintainer_email="erick.ratamero@jax.org",
description=("A set of utilities for exporting a transfer"
Expand Down
105 changes: 77 additions & 28 deletions src/generate_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,24 +347,34 @@ def create_shapes(roi: RoiI) -> List[Shape]:


def create_filepath_annotations(id: str, conn: BlitzGateway,
filename: Union[str,
PathLike] = ".",
plate_path: Optional[str] = None
simple: bool,
filename: Union[str, PathLike] = ".",
plate_path: Optional[str] = None,
ds: Optional[str] = None,
proj: Optional[str] = None,
) -> Tuple[List[CommentAnnotation],
List[AnnotationRef]]:
ns = id
anns = []
anrefs = []
fp_type = ns.split(":")[0]
clean_id = int(ns.split(":")[-1])
if not ds:
ds = ""
if not proj:
proj = ""
if fp_type == "Image":
fpaths = ezomero.get_original_filepaths(conn, clean_id)
if len(fpaths) > 1:
allpaths = []
for f in fpaths:
f = Path(f)
allpaths.append(f.parts)
common_root = Path(*os.path.commonprefix(allpaths))
if not simple:
allpaths = []
for f in fpaths:
f = Path(f)
allpaths.append(f.parts)
common_root = Path(*os.path.commonprefix(allpaths))
else:
common_root = "./"
common_root = Path(common_root) / proj / ds
path = os.path.join(common_root, 'mock_folder')
uid = (-1) * uuid4().int
an = CommentAnnotation(id=uid,
Expand All @@ -375,19 +385,43 @@ def create_filepath_annotations(id: str, conn: BlitzGateway,
anref = AnnotationRef(id=an.id)
anrefs.append(anref)
else:
if simple:
common_root = "./"
if fpaths:
f = fpaths[0]
if simple:
filename = Path(f).name
f = Path(common_root) / proj / ds / filename
uid = (-1) * uuid4().int
an = CommentAnnotation(id=uid,
namespace=ns,
value=str(f)
)
anns.append(an)
anref = AnnotationRef(id=an.id)
anrefs.append(anref)
else:
if simple:
f = f'{clean_id}.tiff'
f = Path(common_root) / proj / ds / f
uid = (-1) * uuid4().int
an = CommentAnnotation(id=uid,
namespace=ns,
value=str(f)
)
anns.append(an)
anref = AnnotationRef(id=an.id)
anrefs.append(anref)
f = f'pixel_images/{clean_id}.tiff'
uid = (-1) * uuid4().int
an = CommentAnnotation(id=uid,
namespace=ns,
value=str(f)
)
anns.append(an)
anref = AnnotationRef(id=an.id)
anrefs.append(anref)

uid = (-1) * uuid4().int
an = CommentAnnotation(id=uid,
namespace=ns,
value=f
)
anns.append(an)
anref = AnnotationRef(id=an.id)
anrefs.append(anref)
elif fp_type == "Annotation":
filename = str(Path(filename).name)
f = f'file_annotations/{clean_id}/{filename}'
Expand Down Expand Up @@ -650,7 +684,9 @@ def populate_roi(obj: RoiI, roi_obj: IObject, ome: OME, conn: BlitzGateway


def populate_image(obj: ImageI, ome: OME, conn: BlitzGateway, hostname: str,
metadata: List[str], fset: Union[None, Fileset] = None
metadata: List[str], simple: bool,
fset: Union[None, Fileset] = None,
ds: Optional[str] = None, proj: Optional[str] = None,
) -> ImageRef:
id = obj.getId()
name = obj.getName()
Expand All @@ -671,7 +707,9 @@ def populate_image(obj: ImageI, ome: OME, conn: BlitzGateway, hostname: str,
ome.structured_annotations.append(kv)
if ref:
img.annotation_refs.append(ref)
filepath_anns, refs = create_filepath_annotations(img_id, conn)
filepath_anns, refs = create_filepath_annotations(img_id, conn,
simple, ds=ds,
proj=proj)
for i in range(len(filepath_anns)):
ome.structured_annotations.append(filepath_anns[i])
img.annotation_refs.append(refs[i])
Expand All @@ -692,12 +730,15 @@ def populate_image(obj: ImageI, ome: OME, conn: BlitzGateway, hostname: str,
for fs_image in fset.copyImages():
fs_img_id = f"Image:{str(fs_image.getId())}"
if fs_img_id not in [i.id for i in ome.images]:
populate_image(fs_image, ome, conn, hostname, metadata, fset)
populate_image(fs_image, ome, conn, hostname, metadata,
simple, fset)
return img_ref


def populate_dataset(obj: DatasetI, ome: OME, conn: BlitzGateway,
hostname: str, metadata: List[str]) -> DatasetRef:
hostname: str, metadata: List[str], simple: bool,
proj: Optional[str] = None,
) -> DatasetRef:
id = obj.getId()
name = obj.getName()
desc = obj.getDescription()
Expand All @@ -707,7 +748,9 @@ def populate_dataset(obj: DatasetI, ome: OME, conn: BlitzGateway,
add_annotation(ds, ann, ome, conn)
for img in obj.listChildren():
img_obj = conn.getObject('Image', img.getId())
img_ref = populate_image(img_obj, ome, conn, hostname, metadata)
img_ref = populate_image(img_obj, ome, conn, hostname, metadata,
simple, ds=str(id) + "_" + name,
proj=proj)
ds.image_refs.append(img_ref)
ds_id = f"Dataset:{str(ds.id)}"
if ds_id not in [i.id for i in ome.datasets]:
Expand All @@ -716,16 +759,19 @@ def populate_dataset(obj: DatasetI, ome: OME, conn: BlitzGateway,


def populate_project(obj: ProjectI, ome: OME, conn: BlitzGateway,
hostname: str, metadata: List[str]):
hostname: str, metadata: List[str], simple: bool):
id = obj.getId()
name = obj.getName()
desc = obj.getDescription()
proj, _ = create_proj_and_ref(id=id, name=name, description=desc)
for ann in obj.listAnnotations():
add_annotation(proj, ann, ome, conn)

for ds in obj.listChildren():
ds_obj = conn.getObject('Dataset', ds.getId())
ds_ref = populate_dataset(ds_obj, ome, conn, hostname, metadata)
ds_ref = populate_dataset(ds_obj, ome, conn, hostname, metadata,
simple, proj=str(id) + "_" + name)

proj.dataset_refs.append(ds_ref)
ome.projects.append(proj)

Expand Down Expand Up @@ -773,6 +819,7 @@ def populate_plate(obj: PlateI, ome: OME, conn: BlitzGateway,
int(ann.id.split(":")[-1]) < 0):
plate_path = ann.value
filepath_anns, refs = create_filepath_annotations(pl.id, conn,
simple=False,
plate_path=plate_path)
for i in range(len(filepath_anns)):
ome.structured_annotations.append(filepath_anns[i])
Expand All @@ -794,7 +841,8 @@ def populate_well(obj: WellI, ome: OME, conn: BlitzGateway,
ws_obj = obj.getWellSample(index)
ws_id = ws_obj.getId()
ws_img = ws_obj.getImage()
ws_img_ref = populate_image(ws_img, ome, conn, hostname, metadata)
ws_img_ref = populate_image(ws_img, ome, conn, hostname, metadata,
simple=False)
ws_index = int(ws_img_ref.id.split(":")[-1])
ws = WellSample(id=ws_id, index=ws_index, image_ref=ws_img_ref)
samples.append(ws)
Expand Down Expand Up @@ -862,6 +910,7 @@ def add_annotation(obj: Union[Project, Dataset, Image, Plate, Screen,
filepath_anns, refs = create_filepath_annotations(
f.id,
conn,
simple=False,
filename=ann.getFile().getName())
for i in range(len(filepath_anns)):
ome.structured_annotations.append(filepath_anns[i])
Expand All @@ -881,16 +930,16 @@ def list_file_ids(ome: OME) -> dict:


def populate_xml(datatype: str, id: int, filepath: str, conn: BlitzGateway,
hostname: str, barchive: bool,
hostname: str, barchive: bool, simple: bool,
metadata: List[str]) -> Tuple[OME, dict]:
ome = OME()
obj = conn.getObject(datatype, id)
if datatype == 'Project':
populate_project(obj, ome, conn, hostname, metadata)
populate_project(obj, ome, conn, hostname, metadata, simple)
elif datatype == 'Dataset':
populate_dataset(obj, ome, conn, hostname, metadata)
populate_dataset(obj, ome, conn, hostname, metadata, simple)
elif datatype == 'Image':
populate_image(obj, ome, conn, hostname, metadata)
populate_image(obj, ome, conn, hostname, metadata, simple)
elif datatype == 'Screen':
populate_screen(obj, ome, conn, hostname, metadata)
elif datatype == 'Plate':
Expand Down
60 changes: 57 additions & 3 deletions src/omero_cli_transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

import ezomero
from ome_types.model import CommentAnnotation, OME
from ome_types import from_xml
from ome_types import from_xml, to_xml
from omero.sys import Parameters
from omero.rtypes import rstring
from omero.cli import CLI, GraphControl
Expand Down Expand Up @@ -64,7 +64,17 @@
--zip packs the object into a compressed zip file rather than a tarball.
--barchive creates a package compliant with Bioimage Archive submission
standards - see repo README for more detail.
standards - see repo README for more detail. This package format is not
compatible with unpack usage.
--rocrate generates a RO-Crate compliant package with flat structure (all image
files in a single folder). A JSON metadata file is added with basic information
about the files (name, mimetype).
--simple creates a package that is "human readable" - folders will be created
for projects/datasets, with files being placed according to where they come
from in the server. Note this a package generated with this option is NOT
guaranteed to work with unpack.
--metadata allows you to specify which transfer metadata will be saved in
`transfer.xml` as possible MapAnnotation values to the images. Default is `all`
Expand Down Expand Up @@ -188,6 +198,9 @@ def _configure(self, parser):
"--rocrate", help="Pack into a file compliant with "
"RO-Crate standards",
action="store_true")
pack.add_argument(
"--simple", help="Pack into a human-readable package file",
action="store_true")
pack.add_argument(
"--metadata",
choices=['all', 'none', 'img_id', 'timestamp',
Expand Down Expand Up @@ -321,6 +334,36 @@ def _process_metadata(self, metadata: Union[List[str], None]):
metadata = list(set(metadata))
self.metadata = metadata

def _fix_pixels_image_simple(self, ome: OME, folder: str, filepath: str
) -> OME:
newome = copy.deepcopy(ome)
for ann in ome.structured_annotations:
if isinstance(ann.value, str) and\
ann.value.startswith("pixel_images"):
for img in newome.images:
for ref in img.annotation_refs:
if ref.id == ann.id:
this_img = img
path1 = ann.value
img.annotation_refs.remove(ref)
newome.structured_annotations.remove(ann)
for ref in this_img.annotation_refs:
for ann in newome.structured_annotations:
if ref.id == ann.id:
if isinstance(ann.value, str):
path2 = ann.value
rel_path = str(Path(path2).parent)
subfolder = os.path.join(str(Path(folder)), rel_path)
os.makedirs(subfolder, mode=DIR_PERM, exist_ok=True)
shutil.move(os.path.join(str(Path(folder)), path1),
os.path.join(str(Path(folder)), path2))
if os.path.exists(os.path.join(str(Path(folder)), "pixel_images")):
shutil.rmtree(os.path.join(str(Path(folder)), "pixel_images"))
with open(filepath, 'w') as fp:
print(to_xml(newome), file=fp)
fp.close()
return newome

def __pack(self, args):
if isinstance(args.object, Image) or isinstance(args.object, Plate) \
or isinstance(args.object, Screen):
Expand All @@ -331,6 +374,9 @@ def __pack(self, args):
if args.rocrate:
raise ValueError("Single image, plate or screen cannot be "
"packaged in a RO-Crate")
if args.simple:
raise ValueError("Single plate or screen cannot be "
"packaged in human-readable format")
if isinstance(args.object, Image):
src_datatype, src_dataid = "Image", args.object.id
elif isinstance(args.object, Dataset):
Expand All @@ -344,6 +390,11 @@ def __pack(self, args):
else:
print("Object is not a project, dataset, screen, plate or image")
return
export_types = (args.rocrate, args.barchive, args.simple)
if sum(1 for ct in export_types if ct) > 1:
raise ValueError("Only one special export type (RO-Crate, Bioimage"
" Archive, human-readable) can be specified at "
"once")
self.metadata = []
self._process_metadata(args.metadata)
obj = self.gateway.getObject(src_datatype, src_dataid)
Expand All @@ -363,10 +414,13 @@ def __pack(self, args):
print(f"Saving metadata at {md_fp}.")
ome, path_id_dict = populate_xml(src_datatype, src_dataid, md_fp,
self.gateway, self.hostname,
args.barchive, self.metadata)
args.barchive, args.simple,
self.metadata)

print("Starting file copy...")
self._copy_files(path_id_dict, folder, self.gateway)
if args.simple:
self._fix_pixels_image_simple(ome, folder, md_fp)
if args.barchive:
print(f"Creating Bioimage Archive TSV at {md_fp}.")
populate_tsv(src_datatype, ome, md_fp,
Expand Down
Loading

0 comments on commit baeb509

Please sign in to comment.