# Local Packaging demo


## Imports

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import asyncio
import logging
import os
import platform
import sys
import urllib
from pathlib import Path

from IPython.display import HTML
from jinja2 import Template
from linkml_runtime.dumpers import json_dumper, yaml_dumper
from linkml_runtime.loaders import json_loader, yaml_loader

from omero_quay.clients.excel import excel_request
from omero_quay.clients.http import get_manifest, post_request
from omero_quay.core.config import get_conf
from omero_quay.core.connect import irods_sudo_conn, omero_conn
from omero_quay.core.manifest import Manifest, Store
from omero_quay.core.provenance import (
    get_data_root,
    get_provenance,
    set_default_route,
    set_route,
)
from omero_quay.core.utils import find_by_id, pprint
from omero_quay.managers.filesystem import FSManager
from omero_quay.managers.irods import iRODSManager, put_directory
from omero_quay.managers.omero import OmeroManager
from omero_quay.parsers.excel import XlsxParser, parse_xlsx

log = logging.getLogger("omero_quay")
log.setLevel("WARNING")

## Settings


See [the documentation](../docs/configuration_file_content.md)







#### Configuration file path 

In [None]:
# path to the the configuration file
os.environ["QUAY_CONF"] = "../tests/containers/quay_docker_host.yml"

print(
    f"Using configuration from {os.environ['QUAY_CONF']}",
)

#### Data Path settings

In [None]:
conf = get_conf()
# local path to the test data
# also sets the path to the default excel file

test_data_path = Path(os.environ.get("QUAY_TEST_DATA", ".")).resolve()

conf["ingest"]["DATA_ROOT"] = test_data_path
conf["ingest"][
    "PROVENANCE_URL"
] = f"file://{Path('../tests/containers/provenance').resolve()}"


source_data_path = (test_data_path / "facility0" / "S-JCBD-200902093").resolve()
if source_data_path.exists():
    print(
        "Using data from ",
        source_data_path,
    )
else:
    print(f"{source_data_path} does not exist")

Update and display the source data directory tree

In [None]:
!cd { source_data_path } && tree -H . > source_data_tree.html

In [None]:
# %load source_data_tree.html
with (source_data_path / "source_data_tree.html").open("r") as html:
    content = html.read().replace("Directory Tree", "Source Directory Tree")


HTML(content)

# Importing data

## Local packaging only

### Parsing the excel file


This logs a list of **Import Links** that will be managed by the clercks.

Remember that the paths we print here are local to each store's container.

We use the `test_JCB_local.xlsx` spreadsheet as input

In [None]:
%pdb

In [None]:
# from requests import NewConnectionError


# path to the excel file relative to `test_data_path`

xlsx_path = test_data_path / "excels" / "test_JCB_local.xlsx"
if not xlsx_path.exists():
    print("Bad path")
else:
    print(f"Using {xlsx_path.resolve()}")
manifest = parse_xlsx(xlsx_path, conf)

In [None]:
for assay in manifest.assays:
    for importlink in assay.importlinks:
        print("assay: ", assay.name, "Import URL: ", importlink.srce_url)
print("\n | source - \n | ", end="")

for i, store in enumerate(manifest.route):
    data_root = get_data_root(manifest, store.id, store.scheme, template=True)
    print(f"{i} | {store.id} | {store.post_url} | {data_root} |", end="\n | ")
print("destination")


available_stores = {
    "l1": "localUserFile",
    "l2": "localCoopFile",
    "lo": "localOmero",
    "f1": "facilityUserFile",
    "f1i": "facilityUserResc",
    "f2": "facilityCoopFile",
    "fo": "facilityOmero",
    "f2i": "facilityCoopResc",
    "m1i": "mesoUserResc",
    "m2i": "mesoCoopResc",
    "mo": "mesoOmero",
}

store_keys = {id_: key for key, id_ in available_stores.items()}
route_digest = "-".join(store_keys[store.id] for store in manifest.route)

print("Route digest: ", route_digest)
# Let's make sure we know where we're going :)

assert route_digest == "l1-l2-lo"

In [None]:
manifest.timestamps

We go from the unstructured local user store to the cooperative space, where data is structured as ISA. The local OMERO Clerk creates an `ome.xml` file with all the metadata.   

### State of a manifest

Manifests have a list of **states**, one each time a clerk is instanciated. States  

In [None]:
print(
    *(
        (state.scheme, state.store, state.host, state.status)
        for state in manifest.states
    )
)

### Delete previously imported test data


In [None]:
!rm -Rf '/home/guillaume/Dev/Omero/QuayTestData/facility0/isa'

## En route!

Manifests follow their route step by step.

The `get_data_root` helper tells you where the data is on a given drive.
For example for our first two stages :



In [None]:
print(
    "source data root: ", get_data_root(manifest, manifest.route[0].id, template=True)
)
print(
    "target data root: ", get_data_root(manifest, manifest.route[1].id, template=True)
)


We spawn a clerk to pass from source to target, and call their `parse`, `crud`, and `transfer` methods.

Ideally these methods are idempotent. 

* The `parse` method reads the manifest and looks for **importlinks** at the assay level.
* The `transfer` ..
**TODO**

Example code using a clerk (we need to elevate to admin access to do that, not possible on a  real deployment)

```python
from omero_quay.users.omero import OmeroUserClerk
from omero_quay.core.manifest import User
from omero_quay.core.utils import temp_user
conf["omero"]["OMERO_ADMIN"] = "root"
conf["omero"]["OMERO_ADMIN_PASS"] = "omero"

with OmeroUserClerk(conf, manifest, "omero") as clerk:
    clerk.log.setLevel("DEBUG")
    clerk.parse()
    clerk.crud()
    clerk.update_manifest_members()
    print("members: ", *(m.name for m in clerk.manifest.members))
    
_ = conf["omero"].pop("OMERO_ADMIN")
_ = conf["omero"].pop("OMERO_ADMIN_PASS")
```
    

In [None]:
manifest.step = 0
# FSManager is a file system clerk
with FSManager(conf, manifest) as clerk:

    # ajust the log level if you need
    # detailed input (eg set to INFO)
    clerk.log.setLevel("WARNING")
    print(clerk.state)
    clerk.parse()
    # A clerk has a nice printed representation
    print(clerk)

    print(
        "Number of files to transfer: ",
        *(len(dest) for dest in clerk.destinations.values()),
    )
    clerk.transfer()
    clerk.parse()

    print(clerk)

    clerk.set_state("changed")

## Using `absolute_path` to access data

In [None]:
clerk.absolute_path(clerk.manifest.assays[-1], clerk.trgt_store.id)

We can thus check that the data has been imported

In [None]:
inv_path = clerk.absolute_path(clerk.manifest.investigations[0], clerk.trgt_store.id)

In [None]:
!tree {inv_path}

# 2. From CoopFile to Omero



In [None]:
manifest.step = 1

with OmeroManager(conf, manifest) as clerk:

    clerk.log.setLevel("WARNING")
    print(clerk.state)
    print("source: ", clerk.srce_store)
    print("target: ", clerk.trgt_store)
    clerk.parse()
    clerk.crud()
    print(
        "Number of files to transfer: ",
        *(len(dest) for dest in clerk.destinations.values()),
    )

    clerk.transfer()
    clerk.set_state("changed")
    clerk.manifest.step += 1

Notice that now we have images as such, not only as files in the manifest. This translates into the presence of an ome.xml annotation. 


Congrats, your data just got ✨fairer✨

## The end of local route

Next steps must happen on a machine with an OMERO server. 

In [None]:
print(*(im.name for im in manifest.images), sep="\n")

In [None]:
print("Route length: ", len(manifest.route))
print("Current step: ", manifest.step + 1)

In [None]:
print(*(state.status for state in manifest.states))

In [None]:
import ome_types

for assay in manifest.assays:
    ome = ome_types.from_xml(clerk.absolute_path(assay, "localCoopFile") / "ome.xml")
    for image in ome.images:
        print(image.name, " :", image.pixels.size_y)
    for ann in ome.structured_annotations:
        print("ns:", ann.namespace)

In [None]:
type(ome)