From e2cbae93221e45e377201d69b7077e5ab58df0c4 Mon Sep 17 00:00:00 2001 From: cwognum Date: Mon, 25 Mar 2024 17:09:52 -0400 Subject: [PATCH 1/6] Changed pointer paths to be relative and save Zarr archive to dataset --- polaris/dataset/_dataset.py | 97 ++++++++--------------------- polaris/dataset/_factory.py | 7 ++- polaris/dataset/converters/_base.py | 4 +- polaris/dataset/converters/_sdf.py | 4 +- polaris/dataset/converters/_zarr.py | 10 ++- polaris/hub/polarisfs.py | 5 +- tests/conftest.py | 2 +- tests/test_dataset.py | 14 ++--- 8 files changed, 50 insertions(+), 93 deletions(-) diff --git a/polaris/dataset/_dataset.py b/polaris/dataset/_dataset.py index e22f1ba8..6c7a618f 100644 --- a/polaris/dataset/_dataset.py +++ b/polaris/dataset/_dataset.py @@ -1,6 +1,5 @@ import json import os.path -from collections import defaultdict from hashlib import md5 from typing import Dict, List, Optional, Tuple, Union @@ -11,6 +10,7 @@ from loguru import logger from pydantic import ( Field, + PrivateAttr, computed_field, field_serializer, field_validator, @@ -25,7 +25,6 @@ from polaris.utils.constants import DEFAULT_CACHE_DIR from polaris.utils.dict2html import dict2html from polaris.utils.errors import InvalidDatasetError, PolarisChecksumError -from polaris.utils.io import get_zarr_root, robust_copy from polaris.utils.types import AccessType, HttpUrlString, HubOwner, License # Constants @@ -51,6 +50,7 @@ class Dataset(BaseArtifactModel): path to a `.parquet` file or a `pandas.DataFrame`. default_adapters: The adapters that the Dataset recommends to use by default to change the format of the data for specific columns. + zarr_archive: The data for any pointer column should be saved in the Zarr archive this path points to. md5sum: The checksum is used to verify the version of the dataset specification. If specified, it will raise an error if the specified checksum doesn't match the computed checksum. readme: Markdown text that can be used to provide a formatted description of the dataset. @@ -72,6 +72,7 @@ class Dataset(BaseArtifactModel): # Data table: Union[pd.DataFrame, str] default_adapters: Dict[str, Adapter] = Field(default_factory=dict) + zarr_archive: Optional[str] = None md5sum: Optional[str] = None # Additional meta-data @@ -85,7 +86,7 @@ class Dataset(BaseArtifactModel): cache_dir: Optional[str] = None # Where to cache the data to if cache() is called. # Private attributes - _path_to_hash: Dict[str, Dict[str, str]] = defaultdict(dict) + _zarr_root: Optional[zarr.Group] = PrivateAttr(None) _has_been_warned: bool = False _has_been_cached: bool = False @@ -188,6 +189,13 @@ def _compute_checksum(table): checksum = hash_fn.hexdigest() return checksum + @property + def zarr_root(self): + """Open the zarr archive in read-write mode if it is not already open.""" + if self._zarr_root is None: + self._zarr_root = zarr.open(self.zarr_archive, "a") + return self._zarr_root + @computed_field @property def n_rows(self) -> int: @@ -230,8 +238,7 @@ def get_data(self, row: int, col: str, adapters: Optional[List[Adapter]] = None) def _load(p: str, index: Union[int, slice]) -> np.ndarray: """Tiny helper function to reduce code repetition.""" - arr = zarr.open(p, mode="r") - arr = arr[index] + arr = self.zarr_root[p][index] if isinstance(index, slice): arr = tuple(arr) @@ -250,12 +257,11 @@ def _load(p: str, index: Union[int, slice]) -> np.ndarray: # In the case it is a pointer column, we need to load additional data into memory # We first check if the data has been downloaded to the cache. - path = self._get_cache_path(column=col, value=value) - if fs.exists(path): - return _load(path, index) + if self._has_been_cached: + return _load(value, index) # If it doesn't exist, we load from the original path and warn if not local - if not fs.is_local_path(value) and not self._has_been_warned: + if not fs.is_local_path(self.zarr_archive) and not self._has_been_warned: logger.warning( f"You're loading data from a remote location. " f"To speed up this process, consider caching the dataset first " @@ -323,19 +329,18 @@ def to_json(self, destination: str) -> str: fs.mkdir(destination, exist_ok=True) table_path = fs.join(destination, "table.parquet") dataset_path = fs.join(destination, "dataset.json") - pointer_dir = fs.join(destination, "data") + zarr_archive = fs.join(destination, "data.zarr") - # Save additional data - new_table = self._copy_and_update_pointers(pointer_dir, inplace=False) + # Copy over Zarr data to the destination + dest = zarr.open(zarr_archive, "w") + zarr.copy_all(source=self.zarr_root, dest=dest) # Lu: Avoid serilizing and sending None to hub app. serialized = self.model_dump(exclude={"cache_dir"}, exclude_none=True) serialized["table"] = table_path + serialized["zarr_archive"] = zarr_archive - # We need to recompute the checksum, as the pointer paths have changed - serialized["md5sum"] = self._compute_checksum(new_table) - - new_table.to_parquet(table_path) + self.table.to_parquet(table_path) with fsspec.open(dataset_path, "w") as f: json.dump(serialized, f) @@ -355,32 +360,15 @@ def cache(self, cache_dir: Optional[str] = None) -> str: if cache_dir is not None: self.cache_dir = cache_dir + self.to_json(self.cache_dir) + + if self.zarr_archive is not None: + self.zarr_archive = fs.join(self.cache_dir, "data.zarr") + if not self._has_been_cached: - self._copy_and_update_pointers(self.cache_dir, inplace=True) self._has_been_cached = True return self.cache_dir - def _get_cache_path(self, column: str, value: str) -> Optional[str]: - """ - Returns where the data _would be_ cached for any entry in the pointer columns, - or None if the column is not a pointer column. - """ - if not self.annotations[column].is_pointer: - return - - if value not in self._path_to_hash[column]: - h = md5(value.encode("utf-8")).hexdigest() - - value, _ = self._split_index_from_path(value) - ext = fs.get_extension(value) - dst = fs.join(self.cache_dir, column, f"{h}.{ext}") - - # The reason for caching the path is to speed-up retrieval. Hashing can be slow and with large - # datasets this could become a bottleneck. - self._path_to_hash[column][value] = dst - - return self._path_to_hash[column][value] - def size(self): return self.rows, self.n_columns @@ -402,39 +390,6 @@ def _split_index_from_path(self, path: str) -> Tuple[str, Optional[int]]: raise ValueError(f"Invalid index format: {index}") return path, index - def _copy_and_update_pointers( - self, save_dir: str, table: Optional[pd.DataFrame] = None, inplace: bool = False - ) -> pd.DataFrame: - """Copy and update the path in the table to the new destination""" - - def fn(path): - """Helper function that can be used within Pandas apply to copy and update all files""" - - # We copy the entire .zarr hierarchy - root = get_zarr_root(path) - if root is None: - raise NotImplementedError( - "Only the .zarr file format is currently supported for pointer columns" - ) - - # We could introduce name collisions here and thus use a hash of the original path for the destination - dst = fs.join(save_dir, f"{md5(root.encode('utf-8')).hexdigest()}.zarr") - robust_copy(root, dst) - - diff = os.path.relpath(path, root) - dst = fs.join(dst, diff) - return dst - - if table is None: - table = self.table - if not inplace: - table = self.table.copy(deep=True) - - for c in table.columns: - if self.annotations[c].is_pointer: - table[c] = table[c].apply(fn) - return table - def __getitem__(self, item): """Allows for indexing the dataset directly""" ret = self.table.loc[item] diff --git a/polaris/dataset/_factory.py b/polaris/dataset/_factory.py index aff4ba93..ddd90bc5 100644 --- a/polaris/dataset/_factory.py +++ b/polaris/dataset/_factory.py @@ -89,7 +89,11 @@ def zarr_root(self) -> zarr.Group: All data for a single dataset is expected to be stored in the same Zarr archive. """ if self._zarr_root is None: - self._zarr_root = zarr.open(self.zarr_root_path, "w") + # NOTE (cwognum): The DirectoryStore is the default store when calling zarr.open + # I nevertheless explicitly set it here to make it clear that this is a design decision. + # We could consider using different stores, such as the NestedDirectoryStore. + store = zarr.DirectoryStore(self.zarr_root_path) + self._zarr_root = zarr.open(store, "w") if not isinstance(self._zarr_root, zarr.Group): raise ValueError("The root of the zarr hierarchy should be a group") return self._zarr_root @@ -215,6 +219,7 @@ def build(self) -> Dataset: table=self._table, annotations=self._annotations, default_adapters=self._adapters, + zarr_archive=self.zarr_root_path, ) def reset(self, zarr_root_path: Optional[str] = None): diff --git a/polaris/dataset/converters/_base.py b/polaris/dataset/converters/_base.py index 8e3c64af..96b49691 100644 --- a/polaris/dataset/converters/_base.py +++ b/polaris/dataset/converters/_base.py @@ -17,7 +17,7 @@ def convert(self, path: str) -> FactoryProduct: raise NotImplementedError @staticmethod - def get_pointer(root: str, column: str, index: Union[int, slice]) -> str: + def get_pointer(column: str, index: Union[int, slice]) -> str: """ Creates a pointer. @@ -30,4 +30,4 @@ def get_pointer(root: str, column: str, index: Union[int, slice]) -> str: index_substr = f"{_INDEX_SEP}{index.start}:{index.stop}" else: index_substr = f"{_INDEX_SEP}{index}" - return f"{root}/{column}{index_substr}" + return f"{column}{index_substr}" diff --git a/polaris/dataset/converters/_sdf.py b/polaris/dataset/converters/_sdf.py index 76eab5cc..f78c072b 100644 --- a/polaris/dataset/converters/_sdf.py +++ b/polaris/dataset/converters/_sdf.py @@ -123,7 +123,7 @@ def _get_name(mol: dm.Mol): # Get the pointer path pointer_idx = f"{start}:{end}" if start != end else f"{start}" - pointer = self.get_pointer(factory.zarr_root_path, self.mol_column, pointer_idx) + pointer = self.get_pointer(self.mol_column, pointer_idx) # Get the single unique value per column for the group and append unique_values = [group[col].unique()[0] for col in df.columns] @@ -132,7 +132,7 @@ def _get_name(mol: dm.Mol): df = grouped else: - pointers = [self.get_pointer(factory.zarr_root_path, self.mol_column, i) for i in range(len(df))] + pointers = [self.get_pointer(self.mol_column, i) for i in range(len(df))] df[self.mol_column] = pd.Series(pointers) # Set the annotations diff --git a/polaris/dataset/converters/_zarr.py b/polaris/dataset/converters/_zarr.py index 7d26ea61..633b94bb 100644 --- a/polaris/dataset/converters/_zarr.py +++ b/polaris/dataset/converters/_zarr.py @@ -34,17 +34,15 @@ def convert(self, path: str, factory: "DatasetFactory") -> FactoryProduct: if v is not None: raise ValueError("The root of the zarr hierarchy should only contain arrays.") + # Copy to the source zarr, so everything is in one place + zarr.copy_all(source=src, dest=factory.zarr_root) + # Construct the table # Parse any group into a column data = defaultdict(dict) for col, arr in src.arrays(): - # Copy to the source zarr, so everything is in one place - dst = zarr.open_group("/".join([factory.zarr_root_path, col]), "w") - zarr.copy(arr, dst) - for i in range(len(arr)): - # In case all data is saved in a single array, we construct a path with an index suffix. - data[col][i] = self.get_pointer(path, arr.name, i) + data[col][i] = self.get_pointer(arr.name, i) # Construct the dataset table = pd.DataFrame(data) diff --git a/polaris/hub/polarisfs.py b/polaris/hub/polarisfs.py index 5412feed..9e924361 100644 --- a/polaris/hub/polarisfs.py +++ b/polaris/hub/polarisfs.py @@ -1,7 +1,8 @@ -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union +import hashlib from datetime import datetime, timezone +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union + import fsspec -import hashlib from polaris.utils.errors import PolarisHubError from polaris.utils.types import TimeoutTypes diff --git a/tests/conftest.py b/tests/conftest.py index 42df3b5e..f4206aac 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -59,7 +59,7 @@ def test_dataset(test_data, test_org_owner): table=test_data, name="test-dataset", source="https://www.example.com", - annotations={"expt": ColumnAnnotation(is_pointer=False, user_attributes={"unit": "kcal/mol"})}, + annotations={"expt": ColumnAnnotation(user_attributes={"unit": "kcal/mol"})}, tags=["tagA", "tagB"], user_attributes={"attributeA": "valueA", "attributeB": "valueB"}, owner=test_org_owner, diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 9eab85e2..83079e34 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -43,17 +43,17 @@ def test_load_data(tmp_path, with_slice, with_caching): arr = np.random.random((100, 100)) tmpdir = str(tmp_path) - path = fs.join(tmpdir, "data.zarr") + zarr_path = fs.join(tmpdir, "data.zarr") - root = zarr.open(path, "w") + root = zarr.open(zarr_path, "w") root.array("A", data=arr) - path = f"{path}/A#0:5" if with_slice else f"{path}/A#0" + path = "A#0:5" if with_slice else "A#0" table = pd.DataFrame({"A": [path]}, index=[0]) - dataset = Dataset(table=table, annotations={"A": {"is_pointer": True}}) + dataset = Dataset(table=table, annotations={"A": {"is_pointer": True}}, zarr_archive=zarr_path) if with_caching: - dataset.cache(tmpdir) + dataset.cache(fs.join(tmpdir, "cache")) data = dataset.get_data(row=0, col="A") @@ -164,8 +164,6 @@ def test_dataset_caching(zarr_archive, tmpdir): assert original_dataset == cached_dataset cache_dir = cached_dataset.cache(tmpdir.join("cached").strpath) - for i in range(len(cached_dataset)): - assert cached_dataset.table.loc[i, "A"].startswith(cache_dir) - assert cached_dataset.table.loc[i, "B"].startswith(cache_dir) + assert cached_dataset.zarr_archive.startswith(cache_dir) assert _equality_test(cached_dataset, original_dataset) From 8d018a02d31df3766fb6ec1bf2320e7e0cafa3ae Mon Sep 17 00:00:00 2001 From: cwognum Date: Mon, 25 Mar 2024 17:14:36 -0400 Subject: [PATCH 2/6] Update tutorials --- docs/tutorials/dataset_factory.ipynb | 45 +++++++++------------------- docs/tutorials/dataset_zarr.ipynb | 40 +++++++++++-------------- 2 files changed, 32 insertions(+), 53 deletions(-) diff --git a/docs/tutorials/dataset_factory.ipynb b/docs/tutorials/dataset_factory.ipynb index dcf2bd35..931999a1 100644 --- a/docs/tutorials/dataset_factory.ipynb +++ b/docs/tutorials/dataset_factory.ipynb @@ -74,14 +74,6 @@ "id": "0776f067-d01b-4b7c-89f6-a3c817f934fb", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Failed to find the pandas get_adjustment() function to patch\n", - "Failed to patch pandas - PandasTools will have limited functionality\n" - ] - }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAZJUlEQVR4nO3daVRUV7428KcoJsEogzKoqEGNA6IROlmixAEFkTAognMwU5t4jencmNtqnAhxwE7U+LbRqFfTGLEJgwSnqKgdlwja7dAK6DXIKCJDREQJFFXUeT8U7YCaCHUGlef3SXdV7f2vtWo97HPOPvuoBEEAERG1lInSBRARPdsYo0RERmGMEhEZhTFKRGQUxigRkVEYo0RERjFVugAi2Wm1OHQIly5BENC3L/z8YG6udE30DFNx3Si1Ljk5CAxERQX+8AcAOH0a9vbYvRt9+ypdGT2rGKPUmuh0GDgQtrbYvx/t2gFAdTUCAlBejuxsmJkpXR89k3hulFqTAwdw8SLWrm3MUADt2mHdOuTkYO9eRSujZxhjlFqTjAy0b49XXnmg0dMT9vbIyFCoJnrmMUapNamshLPzI9o7d8aNG7JXQ88Jxii1JlZWqKx8RPuNG2jbVvZq6DnBGKXWxN0dFRVNJ55lZbh+He7uCtVEzzzGKLUmISFo2xbR0Q80rlgBa2uEhipUEz3zuPyeWhNbW2zZgunTUVgIX1+oVEhNxa5d2L4ddnZKF0fPKq4bpdbh+HFoNBg9GgDOnMHXXyMrC4KA/v0xe3bjUnyiFmGMUisgCPD0xLlziIvDpElKV0PPG54bpVYgORnnzsHZGcHBAKDVIi1N6Zro+cEYpeedICAqCgAWLUKbNgAQE4PXXsOsWcrWRc8NxigpQxCE2tpaOUZKTMT583BxwTvvAIBWixUrAGDECDlGp1aAMUoK2LNnj6ur65IlSyQfSa/HsmUAsHAhLCwAYOtW5OfDzQ3h4ZKPTq0DY5QUYG9vX1BQ8P333+v1emlHio/HhQvo2hVvvQUA9fVYtQoAIiNhwh8/iYO/JFKAl5dX9+7dr169mp6eLuEwDQ2NZ0UXL27cmHnLFhQUoH9/LrYnETFGSQEqlWrixIkA4uLiJBwmLg6XLqF7d0REAIBG03j/UlQUp6IkIv6YSBmTJ08GEB8fr9PpJBmgoaHxrOjSpY1T0U2bUFyMQYMwbpwkI1JrxRglZQwaNKhv374VFRX/+Mc/JBkgNhb/93/o2RPTpwNAXR3+8hcAiIyESiXJiNRaMUZJMdId1+t0ujJDt4sXw9QUADZuxLVr8PBAUJDow1ErxxglxUyZMgXArl27NBqNuD3v2LHD6ccfvwwMxLRpAFBXhy+/BICoKE5FSXSMUVJM7969X375ZRsbh59+KhSxW61W+/nnnwPoNGUK1GoANf/7vygpwauv4vXXRRyIyIAxSkp6883UgoLLMTEvidhnTExMXl7eSy+9NGnSJAA1NTU9ly1b/uqrtYYrTkRiY4ySksaP76BSISUFNTXidKjValesWAEgKipKrVYDWL9+fWlZ2T61uo2vrzhjED2IMUpK6toVXl749VfRHm+8bdu2/Pz8fv36hYeHA6ipqVmzZg2AKMM6fCIJMEZJYYb9P0W5XK/VaqOjowFERkaamJgAWLduXXl5+dChQ0cbNmwmkgC3bSaFlZaiSxeYmqK0FDY2RnW1YcOG2bNnu7m5XbhwwcTE5M6dO66urhUVFUePHh05cqRI9RI1xdkoKczJCSNGQKPBDz8Y1Y9Go1m5ciWAqKgow1R07dq1FRUV3t7ezFCSFGOUlCfKcf3mzZuLi4vd3d3HjRsH4NatW1999RWAZbxATxJjjJLywsJgbo4jR1Be3sIe6urqVq1aBaBv374zZswoLS1du3ZtZWXlqFGjhg8fLmatRA/hA5ZJeba2mD4d1tZowS4ltbW1J06c+PLLL69duwYgPj7ezMysuLj4/PnzAOTYGZpaPV5iIuUVFyMjA6++im7dGlv0eiQlYcgQdO786I/k5t7Yty923759x44du3svaYcOHXx9fePi4tRqtU6n8/PzO3jwoCzfgFo1zkZJeadOYeJEvPoqMjIaNwLV6TBxIpKTH4jRhgZkZGDvXhw+jPLy6qtX/wTAxMTE09Nz9OjRgYGBQ4cOValURUVFJ06cALB48WJlvg+1MoxReiqYmCA/H1u34o9/bPpSaSn278f+/UhNRXV1Y6Ot7Yt//ONHr73mMXbs2A4dOtz//oiIiBMnTri4uHh7e8tSO7V2PKgn5SUlYfJkrFmDyEhcugQHB9TXw8ICycnYuROJibj7I3V3R0AAAgIwZEjjBngPu3nzprOzs06nKy4udnJyku1bUKvFK/X0tJg1C05O+OSTBxo7d4alJUaPxldfoaAAFy4gOhrDhj02QwHY2tqOHTu2oaHh+++/l7pmIjBG6elhaoqNG7FjB44evde4aBFu3EBqKv70p3sXoH7XtGnTAMTGxkpQJlFTjFF6igwbhilT8MEH0GobW+zt0aZNs/sJDAxs37795culP//8i7gVEj2MMUpPl9WrUVKCTZuM6sTS0vL998/X1xfFxnb4/XcTGYcxSk8XJydERcH4Gzh9fbvV1SE2FryGSlJjjNJTZ/ZsdO9ubCcjR6JzZ+Tm4l//EqEkot/AGCXlOThg1Kh7/1WrsXEjRo+Gg0PL+zQxadzxhNeZSGpcN0rK++tfERYGZ2eRuz17Fp6ecHDAtWu/tUCKyEicjZLCfvoJH34IT0/U14vcs4cH3NxQXo4jR8TpsLS0NDU1VZy+6DnCGCWFffopAHzwAczNxe988mTA6OP6/Pz8devW+fr6uri4hIaG1tXViVIbPTd4qENKSk5GRgYcHDBnjiT9T52K5cuhUjX7g4IgnD59Ojk5OSUl5eLFi4bGNm3ajBo1qrKyslOnTiIXSs8yxigppqEBhj2Yli7FCy9IMoSTE3JyHtgmSqvFnTuwsXl0tmq1+Okn/PTTupiYLwwbmAKws7MLDAwcN27cmDFjrKysJCmUnmWMUVLM9u3IzsaLL+Ldd6Ua4r//G5s347vvMH16Y8uBAwgOxq+/PnBz1K+/4sgRJCRgzx5UVWH48IZr16517drV398/MDDQ39/fzMxMqhLp2ccYJWXU1+PzzwFg2TJJzoreZWmJuXPx+uuwtW36Unk5du/GDz/gyBHcPeE5YADGjJm+Zs0IDw8PCcui5whjlJSxfj3y8+Hu3ngVSDojRqCkBPPnP3CDaVUV/PyQng69HgDUarz2GkJCMH48XF0BOABGrFmlVoYxSgq4cwerVgFAdHTjdvfSUavx9dcYPhwzZmDIkMZGGxuUlMDcHN7eCAzExInir1ql1oMxSgr44guUl8PbGwEBcgzn7Y2wMMyahTNn7jUmJ8PVFW3bylEAPd+4bpTkVlGBtWsBIDpavkHXrEF+Pv7613stAwYwQ0kcjFGS24YNuzt2rA8KwtCh8g3auTMiIxEVhYoK+QalVoL31JOsCgoK+vTpIwjqc+fy+vVzlHq4997DtWvYuxcAdDp4eKCuDjk5TRc8ERmDs1GS1ZIlSzQazZQp4TJkaBOmptiwAVeuyDwsPf8YoySfrKys2NhYc3PzJUuWKFKAtzfeeEORkel5xhgl+Xz66ad6vX7WrFmurq4yDKfX4+23G4/o74qJgSDwiJ7ExBglmZw6dWrv3r1t27ZdsGCBPCMmJGDwYLzzjjyjUevFGCWZzJ8/XxCEuXPnOjrKcVa0oQGRkQAweLAMo1Grxiv1JIfY2Njp06d36NAhNze3Xbt2Moy4dSvefRe9eiE7G9xXhCTFu5hIQkVFRcnJyQkJCSdOnACwcOFCeTK0vh7LlwPAZ58xQ0lynI2S+C5fvrxr166kpKQz99196eDgUFRUZGFhIUMB69bho4/Qvz/On5f8nn0izkZJPJmZuw8dWvi3v2VlZRka2rVr5+/vf+zYsbKyslWrVsmToTU1WLkSAJYvZ4aSHPgrI6NlZyMyEv36YcCAl1JSsrKybG1t33jjjfj4+JKSEj8/v8rKyo4dO74h14rNdetQVoZXXkFQkDwDUmvH2Si1iF6PkyeRlIRdu1BQ0Njo6NjN0/PQ4sUjR440/c8TjauqqrRabX19vVqtlqGuqiqsXg0A0dEteQQTUQswRuk3VVdj505cuACdDn36YPJkdOqE/Hx4e6OkpPE9Xbpg/HhMmABv7zZqte99n75165adnZ2ZmdmtW7cuXbrUt29fqev94gtUVmLYMPj4SD0UUSNeYqLHy8zEmDGwtMSoUTA3R1oacnMRHw9/f3TpAnNzhIQgPBxDhjQ5B3nz5s09e/YkJCSkpqZqNBpD47JlyxYuXChpvRUV6NEDt28jPR1eXpIORXQPY5Qeo6EBbm5wcsLBgzBcGhIEzJyJhATk5KC+/oHnbRqUleGHH+oPHGi/f39dfT0AtVo9fPjw3r17b9y4cdCgQWfPnpW05KVLr0RF9QwORkqKpOMQPYAxSo9x+DB8fZvO66qq4OyMVavw4Yf3GouLsX8/9uzBgQPQ6QB88PLL/7a2Dg8PnzRpkpOTk0ajcXBwqK6uvnLlSo8ePSSqt6io6KWXXurff9i33+5zd+diUZIPz43SY5w5A1NTvPLKA402NnB1bXwWR34+EhOxaxdOnYLhj7GlJQICMGHC/wsKMrnvOZwWFhavv/763//+9+Tk5E8++USieqOiojQaTe/eHZmhJDMueKLHqK6GnR1MH/pD6+iIW7cAYPly/PnPOHkSlpYIDERMDMrKkJKCiAiTh55lHBoaCiApKUmiYnNycmJiYtRqtVJb8FFrxtkoPUb79rhxAzpd0yQtK0PXrgAwdSrq6jBhAvz9f3fjuYCAAGtr61OnTl29etXFxUX0YpcsWaLT6WbOnNm7d2/ROyf6bZyN0mN4eKChAdnZDzSWlyM3F56eAODjgx07MH78k2zeaWVl5efnJwhCigRXfzIzM+Pj4y0tLRctWiR650S/izFKjzFyJHr1wqefGq4aNVq6FBYWmDKlBf1NmDAB0hzX390NWop5LtHv4pV6erwzZ+DvDycnjB0Lc3McO4bTpxEXh5CQFnR2+/ZtBwcHrVZbUlLi4OAgVo3//Oc/Bw8ebG1tfeXKFXl2MiVqgrNRejxPT1y6hDfewLVryMmBjw8uXmxZhgJ44YUXfHx8GhoaxD2uX7BggSAIH330ETOUlMLZKMln27Zt77zzzpgxYw4cOCBKh4cPH/b19bWxscnLy7N9aHkAkTw4GyX5hISEmJqaHj16tLKyUpQOFy9eDGDevHnMUFIQY5TkY29vP3z4cK1Wu7fJ4zqb7+zZsxERESdPnnRycpozZ44o5RG1DGOUZGXk9frs7OzIyMjevXt7enp+99139vb2wcHB1tbWotZI1Dxcfk+yCg0NnTNnzsGDB6urq5/wuUyCIJw8eTIxMTEpKamwsNDQ2KlTJ1dX17S0tDZ85DwpjbNRkpWjo6OXl5dGo/nxxx9/+516vT4tLW3+/Pm9evUaMmTImjVrCgsLu3Tp8uGHH6amphYWFr7//vsAysvLZSmc6LE4GyW5hYaGpqWlJSUlTZo06eFX9Xp9enp6QkJCYmJiyX92hnZxcRk/fnx4ePjQoUNV/9nU3rDCqaysTLbKiR6JC55IblevXu3WrVubNm0qKiqsrKwMjQ0NDRkZGQkJCQkJCdevXzc0duvWLSQkpEl63pWZmTlgwAA3N7e7T9AjUgRnoyQ3FxcXT0/P06dPHzp0KCgoyJCe8fHxpaWlhjd07949ODj4kelZUVHRsWNHw78Nt0JxNkqK42yUFBAdHb1gwYKePXtWVlbeXUPap0+fsLCwsLCwgQMHNnl/UVFRcnJyQkLCyZMni4qKOnXqBKChocHCwkKv19fX15s+vKEfkVz44yMFDBs2zNnZOTc3VxCEfv36BQUFBQYGent7N3lbXl5eYmJiYmLi6dOnDX/vra2tMzMzDTGqVqvt7e3Ly8t/+eUXJycnBb4GEQDGKCkiIyPj+vXrHh4esbGxffr0afJqQUFBSkpKQkJCenq6IT3btGkzatSo8PDw0NDQtm3b3n2no6NjeXl5WVkZY5QUxBglBWzfvh3AkiVL7s/QvLw8w/NET5w4YWixsrLy8fEJDw+fMGHCI9fYOzo6ZmZm8vQoKYsxSnI7d+7chQsX7O3tx44da2hJSkpaunRp9n+2iLa1tQ0ODg4LC/P19bUwPJT0MdzdV5SVRVZW9pe8aKLHY4yS3GJiYgBMmzbN3Nzc0KJSqbKzs21tbQMDA8PDw8eMGXP3pd/zSmYmrl2TrFaiJ8AYJVnpdLq4uDgAM2bMuNvo7+9/8OBBHx+f5l5wN2wxymN6UhZjlGS1b9++srIyNzc3Dw+Pu42GJzW1oDfDJvq8HZSUxXvqSVaGI/o333xTlN44G6WnAWOU5FNZWbl//35TU9Np06aJ0iFjlJ4GjFGSz86dOzUajZ+fn7OzsygdGg7qGaOkLMYoycdwRH//xSUjOThApUJ5OXhLMymI99STTC5evOjm5ta+ffvr16+LuNeyrS2qqnDjBuzsxOqSqHl4pZ5ksnNnr6FD84cPzxB3v3pHR1RVoayMMUqK4WyU5KDXo1s3FBcjPR1eXmL2fOQITE3xhz+AD2QipXA2SnJITUVxMXr1wuDB4nRYV4eICLi5YenSe42ffYbOnfHuu+IMQfSEeImJ5BATAwAzZuChPexbSKdDQgIiI3H48L3GY8dw9qw4/RM9OcYoSa66GikpUKkwdarIPXt5YfZsaDQid0vULIxRktz33+PXX+HjgxdfFLnn//kfVFcjOlrkbomahTFKkrt7RC+6du2wciWio5GTI37nRE+IMUrSys9HejqsrTF+vCT9z5gBT0/8139J0jnRk2CMkrS+/RaCgPBw3Pfsj5a4devR7SoVvvkGx45h1y6j+idqMcYoSUgQEBsLGH1En52NXr2wadOjX+3fH7Nn489/Rn29UaMQtQxjlCR07Bjy8tCtG4YNa3knBQXw80NFBQ4deuy98599htpaZGS0fBSiFmOMkoRSUgAgIgImLf2h/fIL/P1RUoIRIxAb+9hlp+3aYfVq6PUtHIXIGLyLiSS0ejUCA9G7dws/fvs2/P1x+TIGDEByMiwtAWDvXiQmYsMGbNqE+5/NPHkytFrx11QR/S7eU09iCgqCRoPduxsjD8Bf/oKcHGzZ0uyu6usRFIRDh9CjB9LSYHgQfVoa/PxQW4u//U2SFVRELcCDehJTZiZSU7Fy5b2Wq1fx88/N7kevx/TpOHQInTohNbUxQ7OyEByM2lrMnMkMpacIY5REFhCA6GhcumRUJ0uX5iQkwMYGP/7YeJyenw8/P9y8iZAQfP21KJUSiYMxSiIbMwajRmHWrJbvSL9o0aLly/uMHv3PPXswYAAAVFRg7Fhcv46RIxEXh2Y+hplIWoxREt+aNcjIaFwx2lwbNmxYvny5iYlq1qxib28AqK5uvNA0cCB27bp31pXoKcEYJfH16YOPP8bcuaiqat4Hd+7cOWfOHJVKtXnz5tDQUAD19QgLw9mz6NkTBw/CxkaSgomMwRglSSxeDCsrREU14yOHDx9+66239Hr9F1988fbbbwNoaMC0aUhNbbzQZHicMtHThjFK4qisfOC/VlZYswbr19+7TP/ba+NPnTo1bty4+vr6+fPnz507F4AgCEuW7P/hB9ja4uBBdO8uTd1ERmOMkrFqazF/Pvr2RWnpA+3jx8PPD4cOAUB5Ofr1Q1zco3vIzs4OCAioqamJiIhYsWKFoXHhwoUrVrw+ePCqvXvRv7+k34DIKIxRMkpaGtzdsWoVqqqQnt701a+/hpUVAGzYgMuXMWUKZs5ETU3TtyUlJVVWVo4bN27btm0qlQrA+vXrV65caWZmtmCB+5AhMnwPIiMIRC1SWyvMmyeo1QIg9O8vnD0rCIJw/LhQVPTA286dE/71L0GvFzZtEqysBEB48UXhxImmvW3fvr22ttbw7x07dpiYmKhUqm+//Vb670FkLMYotcT588LAgQIgmJoK8+YJdXVP9KnMTGHAAAEQzMyE1asrdTrdw+/Zu3evmZkZgNWrV4tcNJE0GKPUPFqtEB0tmJsLgNCjh3D8ePM+XlcnzJsnmJo2DBrk4+XllZube/+rJ0+etLa2BrBw4UIxiyaSEmOUmiErS/D0FABBpRJmzhTu3GlhP0eP5jo7OwOwsbGJi4szNGZmZtrZ2QGYMWOGXq8XrWgiiTFG6YnodLqVK1cOGHAHEFxdhWPHjO2woqIiJCTEcII+PDw8KyvLxcUFQHBwsFarFaNkIplwozz6fT///PObb76ZkZHRq9d4X9+kVatURj5Y6a7Nmzd//PHHNTU1arW6oaFh5MiR+/fvt+T9nvRMYYzSbxEEYcuWLYakc3Jy2rJlS2BgoLhD5OXlTZ06NTs7297e/t///rcN7/ekZw1jlB6rsLDw7bffPnr0KIDw8PBvvvnGcO5SdHfu3CksLOzZs6eFhYUU/RNJijFKj7Z9+/YPPvjg9u3bDg4O33zzzXiJHjNP9Ozjxo3UVGlp6Xvvvbd7924AYWFhGzdu7NChg9JFET29GKP0gOPHj4eEhNy8edPOzm79+vVTpkxRuiKipx0P6ukBVVVV7u7ubm5uW7du7dy5s9LlED0DGKPU1NWrVw1LOInoSTBGiYiMwo3yiIiMwhglIjIKY5SIyCiMUSIiozBGiYiM8v8BBq3eN4zITYIAAB+5elRYdHJka2l0UEtMIHJka2l0IDIwMjMuMDkuNQAAeJyFmWdUFGvw5hlyjkOOZgREFCPIW4MoCJjjvYqKCiYUEBVFFBEMiGSQHBQQJaigZPCtVxGzYsSMCYmSRJDMztzzP7t79sv2lzrT3TPn13Weqnqqp4NerRPiHwpC/+eQ/5/P/hxxAzN+FBGVcDYURBFx5zmCyPm/TvwXRf/fKGnwXxT+3/f9zw/9/67LCHGEOMLOwiI7hUVEnUXFdgqLiTuLSxgKi0s6S0oJSUo7S8sYCkvLOsvKCYlxnGVFnbnC/C+Kcfi3i4mLS0rLioqpC40VEprOP8v572EqmySZt5gjE5eNxu7cCFSbqM+sMq/TqrmdNNncit17bwFlS6tI/EMOy65OgHsqkWQodg++9vlIso4a4TzrVJz0XRx32R/Be9NC6IIjlTgjvxVL++Rw6agqc/5WQFtW7AL9s0VommOGwg+7wfO1Fkv6xYXYfXFgKt5Ge9ZGQx9T4dG4elqquRM66Fly8XQnCV33B0otVaFxPwWOAFd+i7Q4c4txYGs3x+LCB8GYfVSPBZ6upEYSb6jNG8Lm8aZCpWgduTlfmLWqnwdVjzRib7ML6abXZFzFePzYkYxfT0ng9rSjGJZ7hhYFlmHSuHb88VkGTe247PHSh3SM1j7QXVqIU5Ks0fbpIPDKNVimixScKE6AwaVttIZ3DuouqPFy332ng6d3gs/EOPLStpU4Lu+BLbdU4fnmW8ARpF1+mbEsE+c5sPtTQ7BrWzyGR+iw0vQ0um+RLM7bMZfJ/OsAn7YnE3eFQVRYlQbBq3eRpq3bcNGbb6T6xgwMkU7EzdliGPLXF7VXn6VbHSmO6rbgugdyuKlRjZ14e4UWrN8OojklaHFZH/9Z0QWPWnWYlz8XDh2Igy7tXhrcFgfLXijzPOoaaOcVZ7juHkMa6nvImTNt8MNSB17uR+CICGjdzkiznyqOTHd5GLpujMAqKX322L2Iyi9/R2uDCRtSMgX7Yz9JrbUwm+IYDltDUsnhwx5YlInkis8UjDdJwXJpGdzQGYDNR4/Q/LwyHLe4Bz+86KFXB5TZ0vR7tC3PEy4q30SPibY4yXwAngSps9L3XHinFwfcyb/pho8RQC1VeVVlw1RawwvE4v3Io2cfyKPZw7A0RRFKn+YDR1RAy9snxQ46OrJn9yPxjVE4brumzwyiblKDAx/oP6GEVd2cDtu4n8mvRmE2/n00HA1JIrZG7li/6BHp6TPCMywVffwkMarhGHJ7j1N1tTL8496B3ieGqVugCpsleocG3/KCA92FWCvDw9SXQ/AwRZOlFMrDqyPxsIs7QG1MIuCCDZfn6dlOrb7tgZoZwcTN6gtZrtcPb7xVIF6iGDhiAtqGfAm2L3oJWz4xFC+YRuI2Zz02pfwWLYUmml41j8m/mwL705+SkyuFWNulKNgwJphoTtqFby/fI1LJZqhUmYQqnnK42CgArdqO05SIChxd3YlJWT3UYJ0qizz9kF7bsA9y8SbKN89Fn4kjkOGqzvTVZcDkWgK483X71SwUHI9yeQ4ddTR6yAVKesNI2clGYq79B2rWqMLyOeXAERfQ5ipJsMC/jmzt4SjcdyEM33fqshI1SmvVvtDoS/PYgY9mcP3SC+LuJMTutMSARFo0eTvNBbPya8isvMn451Qiv8qk8N7nE+h14xR1DalEZc9O3BYui+nSqsxg3xPq/sUdhrwLcV434NC8fhh212Dt+yUB3yRB2uku+qAsBDb1qfI2wUc62Xo7zIu9SNobm8lz79/Qv4kL5q78KpMQ0L5PlWYi2o5MaXEQpqnGYUqtFutYkEt/Zsqgd94cFtfCA+X8dLJ12wAmc5KhpmA/oWZbMKLuLSEHp2OCUhyWbpTGgi5fND7jT2e9RFya2Yp9raN0iYcme3Api+oYekDRrzI0Fx+HFq//wGmeHrNPUoOvGfFgcG6AeifGAK9FhdeYy0H/RS6Q2htHKnkdpDmsEyTvK4P+otvAkRTQnn0kwZJtF7Om3gjUsIvETI4um72vkrLk39Sj3oKtMpoDH7sryePsYSRj4mHZwSByWMMVF4W+IpvfmeAd2USMyJHDSfE+WKl4hFqYIu5QbkaXN3+o2HMNlm9yjRbU7wPyvRgTOWa46nQvaBjrsKIyJVC34Xewyx20iK+wS6+4vNrFX6nwMlcIYOdJf3UXOQhdMKlIA1Ja+T1BSkC7+rEE65y8hAU9jMR9Y6JwMFOPSYsW01H3EdoSPY9ViVrCzjf5xGm6EOMqJ4HkBn8SxXbissbPpG7nFFTUTsF+fUmUVT+E3pfP0KrVlbg/+ydOduunZrfUWUJECfV/7gH5q0rRfuEU1PPqgz/rdJh3nywM98eDXckv+vpxGHzo5fLc279R14mbYW1vLLH6t5e8kGwDekUTxu25AxxpAa3dJSl27LADe/MsHC8URqKSiAEbO3yBLs/opHLtVmyZsjVEWt0lY7kc5jQ3EQauBJHWDXtwKP8tseyYihvOpaGPiBgGLfTFupbj9JlvOR56+gsfL5bBehkuuxxfTEUeeYJvTBFum2yK3S4DoF+pyaxdFaD2zXn4PaGDXpoTDhdGVXhPhXpoyQ5XmBR+hrzObieveN3Qt1UbPi/kV5mMgNZhujQT+2jPfm+IRPFXUdiwQp/Vz0qlwcnD1NDBit3InA9vE8rI6tdC7GRJCizuDCBv7u3Ggt315K+IGdq6pOK7j0K4qNQHZ5w+QuumlqECtOK41bIodEuF5QwV0gp7L0icW4hNUWb4TrMPHpprsYDdymC6NwGk2r/StKxQUFVU5fmf7aR9G7bA/JUhROxDP9k+0gFm9voQ8JpfZbICWj8xeda+yoE9uxWMhSHxWLBXj62Oj6XRfrI4vsGSda9whHsHr5AJR4VYy9NUsFc8Qy7dc0Mvmy/kR64ZdtqmoVIRB3WKvDAlI4RG11SgdG4TTlqjgLErVBl3wXkK/Dr3V+LnvJCL9wN+g7KMHlN6MA4uOsTCqZMc/Ho3GZ6Kq/BORcrgh/5dcKg7kJTe/EpiZvXA08/q4EdKgCMnoLVOl2LVDxxY8/ho3MWLxgkf9Nimvks03kQYbUbnsfNpNjBG9CrR8hNiXzJTodTQj0jp7MJm42bSxZmKdS4pGBoujBMCvdHzqw9doF+BcVua8OU6WWxMUmUr716l22S84G1/IW4MMsRhTh9MeKnF4iqUQeddAthBHfUVDQOTAVVeukU7zb63Ca5tjCCir0ZJ5PtfYDYyBu4jA47ATMoHKMqxnB327HJUEOq4xuH8NdrMjl2lpWVieG+qJfu6eCak9VWQ9b1CbIliLOT0RJPmjF34NOcduSM/HX/+jMNjSRJYoeiP3Lbt1Ph5GfpaduGqv+10zCcVtjvwKp0RfgDqFhXitVtT8YDuABhc12QmQTqgkhEH43Z00KeesSD9UIVX6CKE/cke8PboaXJpoJFUlA7Am89KYDvAn2UC2yv/slyMtRU7sMjWc6gYw59npTps4rNL9M+LZlqtYsneH+VBcmA1mTRmFDedToa2zGAypdgVZ0V9ILeyTZBXk4AZ02RQ84AfXis4TM83MJSvakV68je1fKTJ/lEqpbVaHvBTqBRx7RSUqOyDVx7azPaILBSoJkOjqRD2CUVDrJkqz/hYHdUo3whOJ9NISdtHsia0FfJmKcBf2UrgKApopR5Ks0Vt9mxPVyi+t4vCLl891mJ2kWad6qTWslbMR5MHZ548ILWMw+YkJEJ1azTZyXNDCas60hdpgrOXpGDHBA4WXfbBS17+tMe6BO89akXxP0IYk6/McmNLqFs/P7fHizHz+0y8d78fjo1qsev7VCBvcRz4v+qkpD0SjhxQ4ZVP7aZSzrtgs3o06XH6SebV9ILFKVWYcIDfE5QEtOvkpNgR/UUsYn4I/usRiQsjdJlkYhTdPomD2s8s2cSZS+C3bwEJDh/B9kOpIHIhlLgscMG9478SNQ1jPCKUhGOzxFB/hQ/mfYqkd2wYXoptQeF9CjgcrcG4m7NpWLQr5P3hu3R9Q3Qtbofx4QZM/IE67N8cC2lSorjSPxY+NSjxFlwVQbXDO2Dbm1hSLfadZN3shCAnGRivw3fjygLafDsJluqyiC0vC0FtzzAM2KjLUt5n0COjzfT6TSt2vG8e8CbVkGxtEZa/PQmkTYNIhOl+/CD3hrxZaIq3ryein540lo31xTzuFtpoLqiyXyi7v56qNqoyOc8i+s3zIPg0FKLfR3M0XjIIUwo02eO9qiDtmgSTT7TTWIiCK99UeUL32+nTuo0AswOJYVQt2bmvA5btkYLp9XzdqghojXZLsbO/7JnJxSh8WxmFlY902YXym7Suc4B+JPPYi1dzQCWxkkw/JMQ+xCbAkbyzZHH2LvSp+kgiXkzBTauT8B8PSTT4fgiHQ/3oVF45jkS04KsOCdTu5bIF16/Tz2aeEL2gEBVGpuITkb+gsEOLvfNWBoXhOEh+0UQfKkXB8imqvPs2DXSPnCvUTgolxhe7iUV9F0R368PzKxXA4Qpoi86KsUYXe5ZeHYUqxqE47KDHsp3y6NqLzdT9rRWbEW8FbBEjtjs4LCQ4Ef41DSa7ZXejx+PPZLTFGJ2y+ZvOUSmsNjiOcdxQ+tiO4rm3rfg6qY+mH9FkoebV9Jr6TsikxWh20xyLHf/C141arK1CAkyWxMGcZZ3U6u5ZMBrL5V23/kV7zTeDU0EUKTjWSSx626BqnzqseMH3CaoCWkVPcTbnlQPbujoSsy6FY7WxAdMxiafrrg5TOS0rduusAwS+ukqmyXCY/6QMgKoT5NpJN7zY1k3EYo0wcHManpwihI/93fHz12A69wffa7V/x3tHuGhwTZVpt+fRq4EeUDKlDBP+mYQe23sgpkeXrWuRhiGjONhZ0Unp0Qj4oMTlBel9pCu2OcHhujgSdHaI7KtuBr8n+jA1oAo4agJaMlucBXOWsJUbonE3hGLvE32mr3eL6t35Tq9tJmz47hToKn9OAseLsLis88CVSyZC9W743qaWjD42ROn8VJT3l8FN7YfQJ9afqk6oQPn2VvwyKItzDFTZDPFbtKHCC8qHirAzdyaeXjQIbzma7Gu9PGTlJsCC8e3UelwoWIuo8eLutNDqf13hpVw4+XriJ/kypgvGeqlCtkMZcNQFtCfaJZmxxGL2+FoE6gdF4UCiLnPJqqRb3YfoqgPzmJryNAj4eouo8Cfv3SmxMOFmCGm5uBsdsmpJ0TpT9DBMwnu2shgj5IvprYfo2oxKDGttQc0NbTTPXp1ZOuTRHEsv6Nl7E88dMkc/flcq36rF9jI1kJFJhIcJP+kabjRInFLj7elrpAfnbwf3smCS3N5BNvb8BnMvTXj8ia9bDQGt41xJpnbanl15GoLXgiKx8aYum7kxhx5U76V7cyzZ0r/8KlOqInciR1Gbv5tuGRtGPozbjvuNXhH9TWbo/plP+0sac1b54iP0oRb1Faj2pRUHjw5RmxJVtsGmlOZOPwCLykvxbvE0tEwfAKNv2oyzTQEuqcSD9c4hKtYfA+XzVXhOP75S9VMuED0hnGwO+UYqnnZBepwCvBbm91tNAW0mV5rpJjuwFf+EIe8Lv4ON6DMp5VyakfeS2gUAez84DVZnNJFyb1EWkRQFR6uyiMVDL1SNfU4szhijcXYqyqwUw2P/HsP2XbuoBN9rtRt2o4UxBwelFdkTmfu0YtAHHPEGBoYuwlnjR+CFgzpbeUERwufFQ9aTLrrzYDicu6nKu6jYS7O63WFWTiDRHXxP0rcPwvtkOdB6UwAcLQFtXoMky9xqx2T8wvGARjhK1uizhVtD6Np/e2jSYmBtS+1AZsEdUtUuwrpZGuyvOUeSph/GiWe6iGa8ES7NSUX1/X00zPwomgRF0UK7EvQZ+YU3eiRx7hgu+3fFHVr2yQ2SJUvwy5tpeJDXB64OWuzuB1noE4+F2Om/ac63MDAnKrzLtl20b+ZG+F5yiATsbSIDX9og4IIazNbjezBtAa1WhjjTXOPIHsuGYZxQBI5O0WV32wupwo8e+uzDPBa+dxYcv3aLDNZw2PY5CVBjFUpS/XbjROvP5JivIdYJJeCd3RIofuogGnLdqdvsckzMbMSrOt10+nlV5tiUR/0O+kBQXSFeeT4TRwb7QMlfm5mVKsDKdYkQHtBOr2+NhFV5XJ6N0Ud6d9lmGG8aSpa/byQuju2wwFIRPOv5PkFHQCuySZxFfF7E/haFoZB+GKaf1WOrHqfQlJSfVKHFkv20WgiLKx4QczaCnsPJUOscQRba7sAdn7+TGL3JaNWchMtyRHDenMMYs+sE1WYV+FK5FfMTVXCgicuuzqik3zZ5QMZQKTp/mobHuH0guVyHkYVSECwbB6NxQ3TGsRAwXMbfImN76cf8baDVmUzcA1tIm3Q7TL+kBDK+fDeuK6CtWS7BTE45sqKgCJSSj0CvWzpM17KSwoIeOu6FJZs+aToEhlKS0sHf0PX4/XFONDnz2hVzfN+R03ON8KpfAsYoyeLpvoMoX7CXrllTibNfNOOy+8L43U2Nif24ShOOHwDVV0V4rs4URV37wbxEiynVKUFHcgJw7Trp2mnRMNVZlRdx+gvdtmwnLEmJI/eKO8je6t9Qm64BEbV8Wj0BbdywJIuycGQ/jobiIYNIDJfSZzUkk94o7KKFawmbeQagLrKaiL8UZqFFSfDSN4I8affAsAs/yNBjE5TvTMFrs0TwmcUBrLE4QKMjirH8Nt/VLONg3msVJh98gxoGHwS7zkLc02iOPVcGwLNKiykbK8GLcwng96WLzjGKAINGLi9X5jeVi90C6U5BZFtQPcl+1QFzLPkb+qZS4OgLaNfHS7Azt+3Y8W+hKP8oDEuu67KpLecp16efmmZYsakvbEFzVyl5tInDku1SIOthKOl8y5+8pQ3kSdlktJiehNGZwmgtdhjD3P3oinOVeHF8C/LmyqGmrSrb4VFCpTd4Qf6OCjxTbIrT9/bBi1+67MwsWYiNiYUCDQ5OroiAEUMVnp1WHy085QwzWiJI3LgfRNSgHV7+kgO17XzdGghof2yWYdv67dmE0mDksRj8wN8djhYVUzcpIex2m8f+0TKHaeZ3iPNlYba25DxsbIkjJ/k7b8fpWhK9ywTDDsZjcbs40nfHkWO3ne4NqsA2hS6cW9tLJ7tzmdadcqo0sg8CtUvwm+xsfBrWD1sfazGNu8owLT4elC2EMW5NLDxtVea1TvtNyY7dcDE+mPS1viWbu3rBUUMaGlYWAWeMgDbZUJwZqy5isy+GY9yrUDzD1WWetVdoeuUvusXQit0VmgdXd1aTJ0IiTGxFMnTeiST6np5493EzcYswQgOvJJwYKoY5LUfx8usT1ONcBf4Q/4UJkb+po6E6U75QSh8J7QejpkLckjAXvaf+hdn1fCU0yECSUyzov2qhlp9DYfIyZZ7cvM/03oktYHwziJhYdpLZ67ug6qEqtK7k+9uxAtrRZHH2R9We/W05i/6PgrH9jD5rfJpMxbhPaL+DNau8Mhfc5FpJ6koxZrLhPLTZJpMXH3wwvOEFmWVngpJKKSg9SwJzegLwQak7nb2sFFPru7HiUjfVvK3EfIc+ULkAbyg3KcY/rTbY0M/hzeSqMxc9MWhbEQtGBSN0x6wweOOvwltU2UD3znCBC6u9ya+CGlJd+AeO7ReBS3vzgTNOQLvophAL0F/C3v0bi0knTuJotgEba15ML7kW0+NiPAZ6c6B+cQMJUhdm1w7FwxPJUDKJ64EHf74g9+8YoW1xKgqJK2PX2iPIDh+g5zMq8GN4M05X7qYax9TZEdF3dPmhozBNtATDr9ih05AQL3m7Jtsq1ktGpiRDYFUrdfA7CeOmqfECR5BeCVsNB9zPE3HvH8S2uwHMqAysvc7P7fj/lJAgwXpXOrKCxBgsuh2G51z12dNxN+kdj2Zact6KYcMcWHHoHlnYy2EJ0xJgxbR4ss90N+4arCM7qg2xxCsVE3sl8N72AxiqeIRWD1fgzhVNeLREFs9xVZlaRj4tWLMfEtRKcHKEKfqE98PyHi1mVMuFt88SwffzL9p5ORzqX6rxhsJ66OB0V9g/J4o0FTUT2wudUDaiCd9E+a5mgoB2728x9mSnA7u5OAJD/4RitJsu63+dTQvke+mRsfOYUZYltEgUERfhUazjz7Kc96dJzI4dOND1mVi9NEL1XYn4aJEUztfyxW0vj1NOOmKJZTMWnpBF3w41dtK2lM6c4QHdE0rxVpExLlk0AE4vtdnMMmlwsUyEA1F99KRTGNhRLi/gdgNNX+QEBjppZJPHD7I7vAVgphz8tGbAmSig1ZCQYk+zHFnT+CgsLotAhWB9Vse5QnO0/9JHFoRNMydgklRBzl4QZrtZEqxSjCBRph6oENhEOt1NMGtrGu4IE0V7My88tiqQrugrQaf8JrytLoPi2Vx24lwB7Z64H6KlS/DHtcko6T4IShVazHIFF6K3JIL37Vb65Xs4SE5S41273EM1wBkefT3L93ktxOhZO8yMVwcHN35uJwloTTdIsWNL7Bg5H4paB0KxeboukzPLpS1vX1BOEWGVhjNhslYLmbxdhNE1UdBqWEAq1N1Rr+85Cdk8Eb8+SMTy02L4asYxjGjcRQuSizHvzG+0LPxDt0UpsZljKqjbdm/ImHsT4zsX4AnjfqifoMFeHVKF9K3nIby+h66vjoVdUsq8kYM/KHuyH7KKTpD1Im/Jl0sjkC8pCUWRN4Aj+C9O3nCTKFtjY8+aLcPx+cMgXBKvx9aPyaQ9s1/Tqb+B6URZguKr1+TNemE2kHMehK5EklVSHhikV0MmpRlh+e9EpBflcMc/x5B17Kde4ypxWLcN8/w5+FhTlRWZ3qXfPh0AuRnFuMbLGp84DUDKTi12u1ACNFISIDB6kLYah8I2UOXVTGigLpGbwNo+mkx2eUlGSAfsPSoJtTf4s2zyf+9vR8WZ8gpHlqgUjAuaQ3FLoR4LCk6k8PwX3fSYMN5ja2gPriLxY4TZj/cJ8DY0lCyLdEcni88kIWEqFo2moN9zCfx76TAaWXrTc9llmExb8OONAfr0C5cFnLhDXZIPgc+5coyVmY331o/AQIMOezNTBlyfJcDc6UKY5R4JX3VVeZ0nW+nh2I2wjD8vJ/a/JdlC7TDwRxpMi/m0RgLah/nSDOpt2ca60/hrQSwqntJkAZ2htNJFC4OuW7CF3HUg6xZIVL1H8PzaTNh26AhRmr4Dr93oJYHVplgnHoft5WJ4rcQdqfFJ2tx2G8+Vf8NipwGq4qTN7lacpOVH3CDGGvHqFzmcNdoGbrfGscVi48BtJBZu5oni6+o0kMxV5sVclsDVIc7gmXeWlJ9tIrYOrRAdqwRJk/gdzFhAW6onzfbGOrDCWUGYuz4ac7brMqvbGXRSBQfVagkT9yXg9KGMPBcWZjPnx4FbVBjBR55Y1f+e0MxpGLc3CR9ISeNMk+P4Quc4XR5WiQWtbWgt2UY3jlVjUY9u0X8qPKCnvRwPXjNCoy/DUPRdm7034cK9awkgM1UUu6/EQa29Cm9+cje1cd0Kec6niGJuDTGe8Bsut0jC9bhC4JgIaOVSRVn1Z0cmNBCGe/j+9nWIPjvyMJbm2kqg4woeE165DGboJBDuCmEmrpcBM0W3koecg/jcpoPYlxijr1Iafvgjjv8ud8OHGEqfGFH0kKrDdHExzFfRYudks2jceHcIFaP48aMaGhj0wd3T+qxztizUhMVC3IcBOjEqAkx/q/DCbVrp8Jrl4KrpTz7FtBOVuq+wtk4N0ByBM+U/JdSLMfLNjv02i0bRR2fRZ54BSw3MoMfevaSuodbsuJYF5M7+Qv42iLC/D2Nh6ZxkcjnrEEYa15GR1ROx9HwaunYJ491jvnhnTQgNGCnC1TfaMP75INVBFUaePaC8ie6Q8PMGjiuzQZs5A/DHXYOdTJeE7QHnYergLzonPhg8vLi8w6otNF5tC6iOO0pcg+uIbdAfcDdXA7M0/qZjKqBdIi3MgnExe9d3Hme9P4MhfQZMpbyShsW/pLll1mz9QyN4JP2WFO0WZXuGYqDCIpYssPHCE1VfiHHFBJw5mow9RVJoJOONWa7b6dCeYhT3bsIY114qXqfCzua8pOTdcfgx+yZKytvjuGAOz7ZSnakocKCQpUBz5ifqqHgKmjrVebdL7tAS43WwzSSUvLnUSmZINUPwHhUoceY7xqkC2mlTxJmwtiOb2BaMkbfCccJzXXZPLZkaNIqgnR9hHuvtQdolnfhxOMxhdgpMdfAirRruaDrxK7npPxXH1Sfh4BIZVBjvgddkDtAXW25hbvw3VKv8Q02a1JlV6BWafNEb/prewlrLMSj/bRDe5PId/zZFsBqOh7ZjI7TGKQJknnB5gfuF0Er7X/jmHEKq/qkn8zc0gYeEPDzI4FeZ2X9vP5rE2JeIRcz9SCSWDwVj6joDljwaS8cr1NP75/lK6FoAq0/eJ0t2ijLvb0nwjRtD8p288eTcX2RcliHG8H3CmD0iePP0QWzFEzTZoBSX327EQ1W91PAxlznWUDrP4wDs+qcE88Pm4qbDA9BzTJtxn0nDad94sOR204mpYWA2wuVdLP1BtS5uAKPM48R+5w/y3OsXTDsrDxYpZcCZJqCdWSzBmpY4souGYZgzJQLdsvVYxp48CvHdVOYFYXcXzAHDpCpyaECYVU9IgFvaEURhqScqb/1OMn+b4t41KVjzVgIPS3jje3Sl73JL0P9TE77N+U5fX+Uyy2xKzY74gFFpMTomm+OwtBAvoEWT3b4iC1PuJ0FT+R9qYx0KxtJqPOWaTlr5dwMc3XSOtHE/kV1+v6CjTRFevikGtf8F1vHABLVkwC0AAAHGelRYdE1PTCByZGtpdCAyMDIzLjA5LjUAAHicfVNLjtswDN37FLpADP7Ez3KSDIpBMQ7Qpr1D970/SirNSLMZyzQk4pF6fKS3Vs+P6/c/f9vHw9dtaw2+eCOi/WYA2N5bbdr59dvb0S73l/PTc7n9Ou4/G0rDnjG5PmNf7rf3pwfbpZ1oRw6C3k68oxGBtxPuKO4gM5jaMdwMJF5B4moA7QQ7C0Nf7uHKCrtoYGBBoytZktmNwr1PpGTSjHcUhQICWVA03AmQZQH2TAk7OXOXyh0kaJEuIVddaOrjbtUe6U7CxMqq5euk7j6h9oCGmTrXjiyZWAWJZZE4od5u5XZBTlErV5duWpSzeIMla5ROpYkJcRIMRAEdSdFJbCIR/nMNgLAsOiCT0tAWg1AXKI76e9Ljnkj05CEPkSl4uR4pqSagI6T2SQ9c+6hORMWWLiEP8QNSPypxkwdXk7JvZitPycuLnQfRYEGREVmQW+R9H8DX4/ppuh7zdr4d1zlvtWjOlKTxnBspm8NRq88RyEPT2WhJs9lMSvPZMMljzKZg2Sq9lAMXgak+SIuQMjy8CIbjI4syRWLGFMdE9FWRtf46P//V3G//AL/ZwTizd7A3AAABBnpUWHRTTUlMRVMgcmRraXQgMjAyMy4wOS41AAB4nCWQza3DQAiEW3lHW1pby/AvK6cUkCJ8dwUpPrDvhviGYeD90L29PvuNe3vuB+/92d77Ta/P33c75mk5Z/qgM6eojwOnUAJXIRYX8JhnEsm0cVCxgDZLdwseVcE5yBd0SFxVhBBTM9XytDZlYaceNNM0aTnY2JcKFhFXWQWzSrcSQp61WhBmUqyWyHKa8OTKi0ks7SiWlNQo1eA15MgI7STEE7qOClu7KscsgiKJcfBJDlDHIVkZctY+lAnqM6xj+bv71R+KbKKBLEkdHJ50dYO8pOURTDpWJiRHzZDSzFH3z7B/Iia+f396aFLjBFvx3AAAAABJRU5ErkJggg==", @@ -90,7 +82,7 @@ "my_propertymy_value" ], "text/plain": [ - "" + "" ] }, "execution_count": 4, @@ -205,7 +197,7 @@ "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAZI0lEQVR4nO3de1hU1d4H8O8w3DHlYggaat4V0YRTjyhlgiAaCCKopGLlm+Xr8fR27Bw1FDnkBctL9qqlnkxMjEAkDK947PCGICcvKaAVISgoDigoSTAwM/v9Y0gDxYTZe7bK9/OXrplZ6zfPM8+Xvfdae22FIAggIqK2MpG7ACKiRxtjlIjIIIxRIiKDMEaJiAzCGCUiMghjlIjIIKZyF0BkdA0NOHwY589DEDBwIPz8YG4ud030CFNw3Si1LwUFCAhARQX+9CcAOHECDg7YuxcDB8pdGT2qGKPUnmg0GDoUdnbYvx8dOwJAdTXGj0d5OfLzYWYmd330SOK1UWpPDh7EuXNYt64xQwF07Ij161FQgLQ0WSujRxhjlNqT7Gx06oRnn23S6OEBBwdkZ8tUEz3yGKPUnlRWwtn5Hu3duuH6daNXQ48Jxii1J9bWqKy8R/v16+jQwejV0GOCMUrtiZsbKipQVtakUaVCWRnc3GSqiR55jFFqT4KC0KEDli9v0rhiBWxsEBIiU030yOPye2pP7OywdSumT4dKBV9fKBRIT0dKCnbsgL293MXRo4rrRql9+PZbqNUYMwYATp7Exo3Iy4MgYPBgzJ3buBSfqE0Yo9QOCAI8PHD6NBISMGWK3NXQ44bXRqkdSEnB6dNwdsaECQDQ0IDMTLlroscHY5Qed4KAmBgAWLwYVlYAEBeH55/HnDny1kWPDcYoyUMQhNraWmOMtHs3zpyBiwtmzQKAhgasWAEAL75ojNGpHWCMkgy+/vrrXr16RUVFST6STodlywAgMhIWFgDw6acoKoKrK8LCJB+d2gfGKMnAwcGhuLj4yy+/1Ol00o6UmIizZ9G9O159FQDq67FqFQBER8OEP34SB39JJANPT8+ePXuWlJRkZWVJOIxW23hVdMmSxo2Zt25FcTEGD+ZiexIRY5RkoFAoJk+eDCAhIUHCYRIScP48evZERAQAqNWIjQWAmBgeipKI+GMieUydOhVAYmKiRqORZACttvGq6NKljYeimzejtBTDhiE4WJIRqb1ijJI8hg0bNnDgwIqKim+++UaSAeLj8cMP6NMH06cDQF0d3n8fAKKjoVBIMiK1V4xRko105/UajUal73bJEpiaAsDHH+PyZbi7IzBQ9OGonWOMkmzCw8MB7NmzR61Wi9vzzp07nQ4cWB0QgGnTAKCuDqtXA0BMDA9FSXSMUZJN//79n3nmGVtbx3//+6KI3TY0NLz33nsAuoaHQ6kEUPPPf+LKFTz3HF56ScSBiPQYoySnV15JLy7+MS6un4h9xsXFXbhwoV+/flOmTAFQU1PTZ9my5c89V6ufcSISG2OU5DRxYmeFAqmpqKkRp8OGhoYVK1YAiImJUSqVADZs2HBVpdqnVFr5+oozBlFTjFGSU/fu8PTEr7+K9njjbdu2FRUVubq6hoWFAaipqVm7di2AGP06fCIJMEZJZvr9P0WZrq+vr4+NjQUQHR1tYmICYP369eXl5SNHjhyj37CZSALctplkdvUqnnoKpqa4ehW2tgZ1tWnTprlz5w4ePPjMmTMmJia3bt3q1atXRUXF0aNHR48eLVK9RM3xaJRk5uSEF1+EWo2vvjKoH7VavXLlSgAxMTH6Q9F169ZVVFR4eXkxQ0lSjFGSnyjn9Vu2bCktLR02bFhwcDCAmzdvfvjhhwCWcYKeJMYYJfmFhsLcHP/6F8rL29hDXV3dqlWrAPTr1y8iIuLq1avr1q2rrKz08fEZNWqUmLUS3YUPWCb52dlh+nTY2KANu5TU1tYeO3Zs9erVly9fBvDll1+amZmVlpaeOXMGgDF2hqZ2j1NMJL/SUmRn47nn0KNHY4tOh+RkjBiBbt3u/ZHCwuv79sXv27cvIyPj9r2knTt39vX1TUhIUCqVGo3Gz8/v0KFDRvkG1K7xaJTkl5ODyZPx3HPIzm7cCFSjweTJSElpEqNaLbKzkZaGI0dQXl5dUvIWABMTEw8PjzFjxgQEBIwcOVKhUFy8eFG/G/SSJUvk+T7UzjBG6aFgYoKiIvzzn5g9u/lLV69i/37s34/0dFRXNzba2T39+uv/8/zz7uPGjevcufPv3x8REZGVldW9e3cvLy+j1E7tHU/qSX7JyZg6FWvXIjoa58/D0RH19bCwQEoKdu3C7t24/SN1c8P48Rg/HiNGNG6Ad7eqqipnZ2eNRlNaWurk5GS0b0HtFmfq6WExZw6cnfHOO00au3WDpSXGjMGHH6K4GGfPIjYWL7zQYoYCsLOz8/f312q1iYmJUtdMBMYoPTxMTfHxx9i5E//6153GxYtx/TrS0/HWW3cmoP7QtGnTAMTHx0tQJlFzjFF6iDz/PF5+GfPmoaGhscXBAVZWre4nMDCwU6dOP/xQVlBwXdwKie7GGKWHy+rVuHIFmzcb1ImlpeWbb56tr78UH+8gUl1ELWKM0sPFyQkxMTD8Bs4xY7rX1WHnTnAOlaTGGKWHzty56NnT0E68vdGtGwoL8d13IpREdB+MUZKfoyN8fO78V6nEpk0YMwaOjm3v08SkcccTzjOR1LhulOS3YQMmTYKzs8jdnjoFDw84OuLy5fstkCIyEI9GSWYZGZg3Dx4ed2bnxeLuDldXlJc3WUFliLKysvT0dHH6oscIY5RktmgRAMydCzMz8TufOhUw+Ly+qKho/fr1vr6+3bt3DwkJqaurE6U2emzwVIfklJKC7Gw4OuIvf5Gk/5dfxvLlUCha/UFBEE6cOJGSkpKamnru3Dl9o5WVlY+PT2VlZdeuXUUulB5ljFGSjVYL/R5MUVF44glJhnByQkFBk22iGhpw6xZsbe+drQ0NyMjAv/+9Pi5udWlpqb7R3t4+ICAgKCjI39/f2tpakkLpUcYYJdns2IH8fDz9NF5/Xaoh3n4bW7bg888xfXpjy8GDmDABv/7a5Oao2locOYK0NHz1FcrLMWqUtrS01MXFZdy4cQEBAWPHjjU3N5eqRHr0MUZJHvX1eO89AFi2DJJmlKUl5s/HSy/Bzq75S+Xl2LsXqak4cgS3L3i6uWHs2Olr1oxyd3dXtOFyALU/jFGSx8aNKCqCm1vjLJB0XnwRV65g4cImN5jeuAE/P2RlQacDAKUSXl4IDkZwMHr3BuAIGLBmldoZxijJ4NYtxMYCQGxs43b30lEqsW4dfH0xcyZGjGhstLXFlSswN4eXFwICMHmy+KtWqf1gjJIMPvgA5eXw8sL48cYYztsboaGYMwcnT95pTElBr17o0MEYBdDjjetGydgqKrBuHYDGA1LjWLsWRUX43/+90zJkCDOUxMEYJWPbtGnvk0/WBwZi5EjjDdqtG5YuRUwMrl0z3qDUTvCeejKq4uLiAQMGCILy9OkLgwZ1kXq4N97A5ctISwMAjQbDhkGtRkFB8wVPRIbg0SgZVVRUlFqtDg8PM0KGNqN/SMnPPxt5WHr8MUbJePLy8uLj483NzaOiomQpwMsLM2bIMjI9zhijZDzvvvuuTqebM2dOr169jDCcIOC11xrP6G+Li4Mg8IyexMQYJSPJyclJS0vr0KHDIv2eTtJLSsLw4Zg1yzijUfvFGCUjWbhwoSAI8+fP79LFGFdFtVpERwOAp6cRRqN2jTP1ZAzx8fHTp0/v3LlzYWFhx44djTDitm2YNQt9++LcOW59T9Li74skdOnSpZSUlKSkpGPHjgGIjIw0TobW1zc+W/Qf/2CGkuR4NEri+/HHH/fs2ZOcnHzyd3dfOjo6Xrp0ycLCwggFfPQR3noLbm74/nvJ79kn4l9qEk9u7t7DhyO3b8/Ly9M3dOzY0d/fPyMjQ6VSrVq1yjgZWlODlSsBYPlyZigZA39lZLD8fERHY9AgDBnSLzU1Ly/Pzs5uxowZiYmJV65c8fPzq6ysfPLJJ2cYa8XmRx/h6lU8+ywCAowzILV3PBqlNhEEHD+O5GQkJ6O4uLHR0bGHh8fhJUtGjx5t+tslyRs3bjQ0NNTX1yuVSiPUdfMmVq8GgNjYtjyCiagNGKN0X9XV+OILnD0LrRb9+mHqVHTtiuJieHnh8uXG93TrhpAQTJoELy8rpdL3d5++efOmvb29mZnZzZs3z58/P3DgQKnr/eADVFbCxwfe3lIPRdSIU0zUstxcjB0LS0v4+MDcHJmZKCxEYiL8/eHiAlNTBAcjLAwjRjS7BllVVfX1118nJSWlp6er1Wp947JlyyIjIyWt99o19OqFX35BVhaXi5LxMEapBVotBg9Gly44dAj6qSH9zZUpKSgogFqNp55q/hGVCl99VX/wYKf9++vq6wEolcpRo0b179//448/HjZs2KlTpyQteenSwpiY3hMmIDVV0nGImmCMUguOHIGvb/PjOpUKPXrg/febPFe+ogIHDiApCQcPQqMB8OdnnvnexiYsLGzKlClOTk5qtdrR0bG6uvrnn3/u3bu3RPWWlJT069fP1fWFzz5Lc3Mzk2gUorvx2ii14ORJmJri2WebNHbpgt69G5/FUVTUOMWUkwP9H2NLS4wfj0mTPgoMNPndczgtLCxeeumlL774IiUl5Z133pGo3piYmLq6uv79OzNDyci44IlaUF0Ne/t73APUpQtu3gSA5cvxt7/h+HFYWiIgAHFxUKmQmoqICJO7nmUcEhICIDk5WaJiCwoKtm/frlQq5dqCj9ozHo1SCzp1wvXr0GiaJ6lKhe7dASA8HLW1mDQJ/v6wtr5/Z+PHj7exscnJySkpKXFxcRG92KioKI1G8/rrr/fv31/0zonuj0ej1AJ3d2i1yM9v0lhejsJCeHgAgI8P4uMREvKHGQrA2traz89PEIRUCWZ/cnNzExMTLS0tlyxZInrnRH+IMUotGD0affvi3Xf1s0aNoqNhYYHw8Db0N2nSJEhzXh8ZGanT6d58800pjnOJ/hBn6qllJ09i7Fg4O2PcOJibIyMDJ04gIQFBQW3o7JdffnF0dGxoaLhy5Yqjo6NYNf7nP/8ZPny4tbV1YWGhcXYyJWqGR6PUMg8P/PADZszA5csoKIC3N86da1uGAnjiiSe8vb21Wq245/WLFi0SBOHtt99mhpJceDRKxrNt27ZZs2aNHTv24MGDonR45MgRX19fW1vbCxcu2N21PIDIOHg0SsYTFBRkamp69OjRyspKUTrUzyktWLCAGUoyYoyS8Tg4OIwaNaqhoSGt2eM6W+/06dMRERHHjx93cnKaN2+eKOURtQ1jlIzKwPn6/Pz86OjoAQMGuLu7f/755507dw4MDLSxsRG1RqLW4fJ7MqqQkJB58+YdOnSourr6AZ/LJAhCTk7O7t27k5OTi3/b29TZ2bl3796ZmZnWD7BqlUhSPBolo+rSpYunp6darT5w4MD936nT6U6ePBkdHd23b19PT881a9YUFxc/9dRTs2fP3rt376VLl958800AKpXKKIUTtYhHo2RsISEhmZmZycnJU6ZMuftVnU6XlZWVlJSUnJx8+bedoV1cXCZOnBgWFjZixAiT3/Y21a9wKi8vN1rlRPfEBU9kbCUlJT169LCysqqoqLh9Sq7VarOzs5OSkpKSksrKyvSNPXr0CAoKCgsLGzlypOKuR4Lk5uYOGTLE1dX19hP0iGTBo1EyNhcXFw8PjxMnThw+fDgwMFCfnomJiVevXtW/oWfPnhMmTLhnel67dq1z5876f+uPRnlST7Lj0SjJIDY2dtGiRX369KmsrLy9hnTAgAGhoaGhoaFDhw5t9v6SkpI9e/YkJSXl5ORcvHixa9euAHQ6nYWFhVarVavVZmbcY5Rkw6NRksELL7zg7OxcWFgoCMKgQYMCAwMDAgK8vLyave3ChQvJycm7d+/+7rvv9H/vra2tz549q49RExMTBwcHlUp17do1Z2dnGb4GEQDGKMkiOzu7rKzM3d09Pj5+wIABzV4tLi5OTU1NSkrKysrSp6eVlZWPj09YWNjEiROfeOKJ2+/s0qWLSqVSqVSMUZIRY5RksGPHDgBRUVG/z9ALFy7onyd67NgxfYu1tbW3t3dYWFhISEiHDh3u7ke/UxQvj5K8GKNkbKdPnz579qyDg8O4ceP0LcnJyUuXLs3/bYtoOzu7wMDA0NBQPz8/C/1DSVswZMgKlSq6qmqw5EUTtYwxSsYWFxcHYNq0aebm5voWhUKRn59vZ2cXEBAQFhY2duzY2y/9kWdzc1FaKlmtRA+AMUpGpdFoEhISAMycOfN2o7+//6FDh7y9vU3vfoLefem3GOU5PcmLMUpGtW/fPpVK5erq6u7ufrtR/6SmNvSm30Sf9zGRvHhPPRmV/oz+lVdeEaU3Ho3Sw4AxSsZTWVm5f/9+U1PTadOmidIhY5QeBoxRMp5du3ap1Wo/Pz+xlnnqT+oZoyQvxigZj/6M/veTSwZydIRCgYoK6HRidUnUarynnozk3Llzrq6unTp1Kisrs7KyEqtbe3tUVeHaNTg4iNUlUetwpp6MZNeuviNHFo0alS1ihgLo0gVVVVCpGKMkGx6NkjHodOjRA6WlyMqCp6eYPR89CqUSf/oT+EAmkguPRskY0tNRWoq+fTF8uDgd1tUhIgKurli69E7jP/6Bbt3wX/8lzhBED4hTTGQMcXEAMHMm7trDvo00GiQlIToaR47caczIwKlT4vRP9OAYoyS56mqkpkKhwMsvi9yzpyfmzoVaLXK3RK3CGCXJJSbi118xejSeflrknv/2N1RXIzZW5G6JWoUxSpK7fUYvuo4dsXIlYmNRUCB+50QPiDFK0ioqwrFjsLFBSIgk/c+cCQ8P/Pd/S9I50YNgjJK0tm+HICA0FPfavb4Vbt68d7tCgU8+QUYG9uwxqH+iNmOMkoQEATt3Agaf0efno29fbN5871cHD8af/4y//x319QaNQtQ2jFGS0P/9Hy5cQI8eGDWq7Z0UF8PPDxUVOHwYLd0sEh2N2lpkZ7d9FKI2Y4yShFJTAWDGDJi09Yd27Rr8/XHlCl58EfHxLS477dgRa9ZwgxKSB+9iIgmtXo2AAPTt28aP//IL/P3x448YMgQpKbC0BIC0NOzejU2bsHkzfv9s5qlT0dAg/poqoj/Ee+pJTIGBUKuxd29j5AF4/30UFGDr1lZ3VV+PwEAcPozevZGZCScnAMjMhJ8famuxfbskK6iI2oAn9SSm3Fykp2PlyjstJSX46adW96PTYfp0HD6Mrl2Rnt6YoXl5mDABtbWYPZsZSg8RxiiJbNw4xMbi/HmDOlm6tCApCba2OHCg8Ty9qAh+fqiqQlAQNm4UpVIicTBGSWT+/vDxwZw5Lc6q/6HFixcvXz5gzJj/fP01hgwBgIoKjBuHsjKMHo2EBLTyMcxE0mKMkvjWrkV2duOK0dbatGnT8uXLTUwUc+aUenkBQHV140TT0KHYs+fOVVeihwRjlMQ3YADmz8c776CqqnUf3LVr17x58xQKxZYtW0JCQgDU1yM0FKdOoU8fHDoEW1tJCiYyBGOUJLF4MWxsEBPTio8cOXLk1Vdf1el0H3zwwWuvvQZAq8W0aUhPb5xo0j9OmehhwxglcVRWNvmvtTXWrsXGjXem6e+/Nj4nJyc4OLi+vn7hwoXz588HIAhCVNT+r76CnR0OHULPntLUTWQwxigZqrYWCxdi4EBcvdqkPTgYfn44fBgAyssxaBASEu7dQ35+/vjx42tqaiIiIlasWKFvjIyMXLHipeHDV6WlYfBgSb8BkUEYo2SQzEy4uWHVKty4gays5q9u3AhrawDYtAk//ojwcMyejZqa5m9LTk6urKwMDg7etm2bQqEAsGHDhpUrV5qZmS1a5DZihBG+B5EBBKI2qa0VFiwQlEoBEAYPFk6dEgRB+PZb4dKlJm87fVr47jtBpxM2bxasrQVAePpp4dix5r3t2LGjtrZW/++dO3eamJgoFIrPPvtM+u9BZCjGKLXFmTPC0KECIJiaCgsWCHV1D/Sp3FxhyBABEMzMhDVrqjQazd3vSUtLMzMzA7BmzRqRiyaSBmOUWqehQYiNFczNBUDo3Vv49tvWfbyuTliwQDA11Q4b5uPp6VlYWPj7V48fP25jYwMgMjJSzKKJpMQYpVbIyxM8PARAUCiE2bOFW7fa2M8331xwdnYGYGtr+8UXX+gbc3Nz7e3tAcycOVOn04lWNJHEGKP0QDQaTWxs7JAhtwChVy8hI8PQDisqKoKDg/UX6MPCwnJzc11cXABMmDChoaFBjJKJjIQb5dEf++mnn1555ZXs7Oy+fYPHjNnz/vsKAx+sdNuWLVv++te/1tTUKJVKrVY7evTo/fv3W/J+T3qkMEbpfgRB2Lp1qz7pnJyctm7dGhAQIO4QRUVF4eHh+fn5Dg4O33//vS3v96RHDWOUWnTx4sXXXnvt6NGjAMLCwj755BP9tUvR3bp16+LFi3369LGwsJCifyJJMUbp3pKSkt54442qqipHR8dPPvlk4sSJcldE9JDiXUzUnEqlCgoKmjx5clVVVWhoaF5eHjOU6D64/y01kZmZGRQUVFlZaW9vv2HDhvDwcLkrInrY8aSemrhx44abm5urq+unn37arVs3ucshegQwRqm5kpIS/RJOInoQjFEiIoNwiomIyCCMUSIigzBGiYgMwhglIjIIY5SIyCD/D9+G3rhq5bBLAAABTnpUWHRyZGtpdFBLTCByZGtpdCAyMDIzLjA5LjUAAHice79v7T0GIOBnQAA+KL+BkY0hAyTAyMzOoAFiMEMFmBkRAmCaBZ3mgNBMaBoZmQkq4GZgZGBkYmBi5mBiZmFgYeVgYmVjYGPnYGLjYODgZODgYuDi5mDi4mHg4WVgZWTgYWEQYQJqZGUEKmdlY+Pg4mFhFd8EMgqKGfiWv+A4EMzqfeAh9+T9qasm7JdQkz+wae76fb+tPuxjNbE9sOuWlf2P4MN2sicZDxgfmWl/TnKiXfyMnP0T627b/arT2h/tNG//60ds+3u8qvY36/fse1i1Z/+O9a/3u/zi3a/3X/RA1r2N+5oDM+2ntG8Fmm+w//3Jz/Y6V6QOeL8SsZcsnm5vzfh2n334ZPt9B4Udls1+su+DWIZ93K5OuwWdH+yuhX2xf28hat9UvM9eDACM1GEAYrR3BQAAAaF6VFh0TU9MIHJka2l0IDIwMjMuMDkuNQAAeJx9U0tOxDAM3fcUvgCR7TgfL5kZhBCiI8HAHdhzf2EnHZJsaOsqcZ+d52d3A7/eL6/fP/B3xcu2AeA/j6rCV0TE7Q18Aaen55cdzrfH091zvn7utw8gAUoWY/eKfbxd3+4egjM8cKCojAgPMVDhtqJAUitOwQx7c0dsAA5Sc/EVhijOaECjZ8UgWQ+ophwdEAqrzkixpBZfKXUgcvHvFBgpzsBkKTFwjdKPVBZuGYVrmYG5n51zxl4Hx3zEJM7L4aVDtZSjDi6x9iApnGZohau7qzRWnitJ7pSt+IWAuk6uSZFGUInkSEqVZyThwVWRWtGK96RCukKp1Z8qlYakGrkTkcxLUcRG1QCJPKfRw3pUJ5KXmig28RVdKhfX1WmSZl06b5N0buxqO8lYcG+S1LKc/bRflunq83a67pcxb37zmCnbQBxzYxuQMRxklsYIsFkejSazMprJZnU0jGyroynkNkvfHDQJzP4inoSk5omTYNReMinjJEZM6og0KzLX7/v7v2rr7RdbH7+0RVgL8gAAAOF6VFh0U01JTEVTIHJka2l0IDIwMjMuMDkuNQAAeJwlT0uuxDAIu8pbtlIaBUP4qOoq+86F5vAPMiuC7dhmvbSed+FZtI7nc77HOn8PrL/vcaETB9rFnQw5qZO43zV55I4urtau0Vn4ziEaG46p3EY3RNxJOs1CBywadQziROEs9TUgSK3ArSxUtYLAutkJjYLDbAfB2IsXw6wiLsRbN0UrI4tsGxaTdA0i2XJybPcYlBVi/NRCgawynSxRckbbVyDuXCdRS8fhO1lEZ2pjZMHsodH2vX6Xndc2HXWfuMX5/QcEX0W59Lht5AAAAABJRU5ErkJggg==", "text/plain": [ - "" + "" ] }, "execution_count": 8, @@ -248,7 +240,7 @@ "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAZI0lEQVR4nO3de1hU1d4H8O8w3DHlYggaat4V0YRTjyhlgiAaCCKopGLlm+Xr8fR27Bw1FDnkBctL9qqlnkxMjEAkDK947PCGICcvKaAVISgoDigoSTAwM/v9Y0gDxYTZe7bK9/OXrplZ6zfPM8+Xvfdae22FIAggIqK2MpG7ACKiRxtjlIjIIIxRIiKDMEaJiAzCGCUiMghjlIjIIKZyF0BkdA0NOHwY589DEDBwIPz8YG4ud030CFNw3Si1LwUFCAhARQX+9CcAOHECDg7YuxcDB8pdGT2qGKPUnmg0GDoUdnbYvx8dOwJAdTXGj0d5OfLzYWYmd330SOK1UWpPDh7EuXNYt64xQwF07Ij161FQgLQ0WSujRxhjlNqT7Gx06oRnn23S6OEBBwdkZ8tUEz3yGKPUnlRWwtn5Hu3duuH6daNXQ48Jxii1J9bWqKy8R/v16+jQwejV0GOCMUrtiZsbKipQVtakUaVCWRnc3GSqiR55jFFqT4KC0KEDli9v0rhiBWxsEBIiU030yOPye2pP7OywdSumT4dKBV9fKBRIT0dKCnbsgL293MXRo4rrRql9+PZbqNUYMwYATp7Exo3Iy4MgYPBgzJ3buBSfqE0Yo9QOCAI8PHD6NBISMGWK3NXQ44bXRqkdSEnB6dNwdsaECQDQ0IDMTLlroscHY5Qed4KAmBgAWLwYVlYAEBeH55/HnDny1kWPDcYoyUMQhNraWmOMtHs3zpyBiwtmzQKAhgasWAEAL75ojNGpHWCMkgy+/vrrXr16RUVFST6STodlywAgMhIWFgDw6acoKoKrK8LCJB+d2gfGKMnAwcGhuLj4yy+/1Ol00o6UmIizZ9G9O159FQDq67FqFQBER8OEP34SB39JJANPT8+ePXuWlJRkZWVJOIxW23hVdMmSxo2Zt25FcTEGD+ZiexIRY5RkoFAoJk+eDCAhIUHCYRIScP48evZERAQAqNWIjQWAmBgeipKI+GMieUydOhVAYmKiRqORZACttvGq6NKljYeimzejtBTDhiE4WJIRqb1ijJI8hg0bNnDgwIqKim+++UaSAeLj8cMP6NMH06cDQF0d3n8fAKKjoVBIMiK1V4xRko105/UajUal73bJEpiaAsDHH+PyZbi7IzBQ9OGonWOMkmzCw8MB7NmzR61Wi9vzzp07nQ4cWB0QgGnTAKCuDqtXA0BMDA9FSXSMUZJN//79n3nmGVtbx3//+6KI3TY0NLz33nsAuoaHQ6kEUPPPf+LKFTz3HF56ScSBiPQYoySnV15JLy7+MS6un4h9xsXFXbhwoV+/flOmTAFQU1PTZ9my5c89V6ufcSISG2OU5DRxYmeFAqmpqKkRp8OGhoYVK1YAiImJUSqVADZs2HBVpdqnVFr5+oozBlFTjFGSU/fu8PTEr7+K9njjbdu2FRUVubq6hoWFAaipqVm7di2AGP06fCIJMEZJZvr9P0WZrq+vr4+NjQUQHR1tYmICYP369eXl5SNHjhyj37CZSALctplkdvUqnnoKpqa4ehW2tgZ1tWnTprlz5w4ePPjMmTMmJia3bt3q1atXRUXF0aNHR48eLVK9RM3xaJRk5uSEF1+EWo2vvjKoH7VavXLlSgAxMTH6Q9F169ZVVFR4eXkxQ0lSjFGSnyjn9Vu2bCktLR02bFhwcDCAmzdvfvjhhwCWcYKeJMYYJfmFhsLcHP/6F8rL29hDXV3dqlWrAPTr1y8iIuLq1avr1q2rrKz08fEZNWqUmLUS3YUPWCb52dlh+nTY2KANu5TU1tYeO3Zs9erVly9fBvDll1+amZmVlpaeOXMGgDF2hqZ2j1NMJL/SUmRn47nn0KNHY4tOh+RkjBiBbt3u/ZHCwuv79sXv27cvIyPj9r2knTt39vX1TUhIUCqVGo3Gz8/v0KFDRvkG1K7xaJTkl5ODyZPx3HPIzm7cCFSjweTJSElpEqNaLbKzkZaGI0dQXl5dUvIWABMTEw8PjzFjxgQEBIwcOVKhUFy8eFG/G/SSJUvk+T7UzjBG6aFgYoKiIvzzn5g9u/lLV69i/37s34/0dFRXNzba2T39+uv/8/zz7uPGjevcufPv3x8REZGVldW9e3cvLy+j1E7tHU/qSX7JyZg6FWvXIjoa58/D0RH19bCwQEoKdu3C7t24/SN1c8P48Rg/HiNGNG6Ad7eqqipnZ2eNRlNaWurk5GS0b0HtFmfq6WExZw6cnfHOO00au3WDpSXGjMGHH6K4GGfPIjYWL7zQYoYCsLOz8/f312q1iYmJUtdMBMYoPTxMTfHxx9i5E//6153GxYtx/TrS0/HWW3cmoP7QtGnTAMTHx0tQJlFzjFF6iDz/PF5+GfPmoaGhscXBAVZWre4nMDCwU6dOP/xQVlBwXdwKie7GGKWHy+rVuHIFmzcb1ImlpeWbb56tr78UH+8gUl1ELWKM0sPFyQkxMTD8Bs4xY7rX1WHnTnAOlaTGGKWHzty56NnT0E68vdGtGwoL8d13IpREdB+MUZKfoyN8fO78V6nEpk0YMwaOjm3v08SkcccTzjOR1LhulOS3YQMmTYKzs8jdnjoFDw84OuLy5fstkCIyEI9GSWYZGZg3Dx4ed2bnxeLuDldXlJc3WUFliLKysvT0dHH6oscIY5RktmgRAMydCzMz8TufOhUw+Ly+qKho/fr1vr6+3bt3DwkJqaurE6U2emzwVIfklJKC7Gw4OuIvf5Gk/5dfxvLlUCha/UFBEE6cOJGSkpKamnru3Dl9o5WVlY+PT2VlZdeuXUUulB5ljFGSjVYL/R5MUVF44glJhnByQkFBk22iGhpw6xZsbe+drQ0NyMjAv/+9Pi5udWlpqb7R3t4+ICAgKCjI39/f2tpakkLpUcYYJdns2IH8fDz9NF5/Xaoh3n4bW7bg888xfXpjy8GDmDABv/7a5Oao2locOYK0NHz1FcrLMWqUtrS01MXFZdy4cQEBAWPHjjU3N5eqRHr0MUZJHvX1eO89AFi2DJJmlKUl5s/HSy/Bzq75S+Xl2LsXqak4cgS3L3i6uWHs2Olr1oxyd3dXtOFyALU/jFGSx8aNKCqCm1vjLJB0XnwRV65g4cImN5jeuAE/P2RlQacDAKUSXl4IDkZwMHr3BuAIGLBmldoZxijJ4NYtxMYCQGxs43b30lEqsW4dfH0xcyZGjGhstLXFlSswN4eXFwICMHmy+KtWqf1gjJIMPvgA5eXw8sL48cYYztsboaGYMwcnT95pTElBr17o0MEYBdDjjetGydgqKrBuHYDGA1LjWLsWRUX43/+90zJkCDOUxMEYJWPbtGnvk0/WBwZi5EjjDdqtG5YuRUwMrl0z3qDUTvCeejKq4uLiAQMGCILy9OkLgwZ1kXq4N97A5ctISwMAjQbDhkGtRkFB8wVPRIbg0SgZVVRUlFqtDg8PM0KGNqN/SMnPPxt5WHr8MUbJePLy8uLj483NzaOiomQpwMsLM2bIMjI9zhijZDzvvvuuTqebM2dOr169jDCcIOC11xrP6G+Li4Mg8IyexMQYJSPJyclJS0vr0KHDIv2eTtJLSsLw4Zg1yzijUfvFGCUjWbhwoSAI8+fP79LFGFdFtVpERwOAp6cRRqN2jTP1ZAzx8fHTp0/v3LlzYWFhx44djTDitm2YNQt9++LcOW59T9Li74skdOnSpZSUlKSkpGPHjgGIjIw0TobW1zc+W/Qf/2CGkuR4NEri+/HHH/fs2ZOcnHzyd3dfOjo6Xrp0ycLCwggFfPQR3noLbm74/nvJ79kn4l9qEk9u7t7DhyO3b8/Ly9M3dOzY0d/fPyMjQ6VSrVq1yjgZWlODlSsBYPlyZigZA39lZLD8fERHY9AgDBnSLzU1Ly/Pzs5uxowZiYmJV65c8fPzq6ysfPLJJ2cYa8XmRx/h6lU8+ywCAowzILV3PBqlNhEEHD+O5GQkJ6O4uLHR0bGHh8fhJUtGjx5t+tslyRs3bjQ0NNTX1yuVSiPUdfMmVq8GgNjYtjyCiagNGKN0X9XV+OILnD0LrRb9+mHqVHTtiuJieHnh8uXG93TrhpAQTJoELy8rpdL3d5++efOmvb29mZnZzZs3z58/P3DgQKnr/eADVFbCxwfe3lIPRdSIU0zUstxcjB0LS0v4+MDcHJmZKCxEYiL8/eHiAlNTBAcjLAwjRjS7BllVVfX1118nJSWlp6er1Wp947JlyyIjIyWt99o19OqFX35BVhaXi5LxMEapBVotBg9Gly44dAj6qSH9zZUpKSgogFqNp55q/hGVCl99VX/wYKf9++vq6wEolcpRo0b179//448/HjZs2KlTpyQteenSwpiY3hMmIDVV0nGImmCMUguOHIGvb/PjOpUKPXrg/febPFe+ogIHDiApCQcPQqMB8OdnnvnexiYsLGzKlClOTk5qtdrR0bG6uvrnn3/u3bu3RPWWlJT069fP1fWFzz5Lc3Mzk2gUorvx2ii14ORJmJri2WebNHbpgt69G5/FUVTUOMWUkwP9H2NLS4wfj0mTPgoMNPndczgtLCxeeumlL774IiUl5Z133pGo3piYmLq6uv79OzNDyci44IlaUF0Ne/t73APUpQtu3gSA5cvxt7/h+HFYWiIgAHFxUKmQmoqICJO7nmUcEhICIDk5WaJiCwoKtm/frlQq5dqCj9ozHo1SCzp1wvXr0GiaJ6lKhe7dASA8HLW1mDQJ/v6wtr5/Z+PHj7exscnJySkpKXFxcRG92KioKI1G8/rrr/fv31/0zonuj0ej1AJ3d2i1yM9v0lhejsJCeHgAgI8P4uMREvKHGQrA2traz89PEIRUCWZ/cnNzExMTLS0tlyxZInrnRH+IMUotGD0affvi3Xf1s0aNoqNhYYHw8Db0N2nSJEhzXh8ZGanT6d58800pjnOJ/hBn6qllJ09i7Fg4O2PcOJibIyMDJ04gIQFBQW3o7JdffnF0dGxoaLhy5Yqjo6NYNf7nP/8ZPny4tbV1YWGhcXYyJWqGR6PUMg8P/PADZszA5csoKIC3N86da1uGAnjiiSe8vb21Wq245/WLFi0SBOHtt99mhpJceDRKxrNt27ZZs2aNHTv24MGDonR45MgRX19fW1vbCxcu2N21PIDIOHg0SsYTFBRkamp69OjRyspKUTrUzyktWLCAGUoyYoyS8Tg4OIwaNaqhoSGt2eM6W+/06dMRERHHjx93cnKaN2+eKOURtQ1jlIzKwPn6/Pz86OjoAQMGuLu7f/755507dw4MDLSxsRG1RqLW4fJ7MqqQkJB58+YdOnSourr6AZ/LJAhCTk7O7t27k5OTi3/b29TZ2bl3796ZmZnWD7BqlUhSPBolo+rSpYunp6darT5w4MD936nT6U6ePBkdHd23b19PT881a9YUFxc/9dRTs2fP3rt376VLl958800AKpXKKIUTtYhHo2RsISEhmZmZycnJU6ZMuftVnU6XlZWVlJSUnJx8+bedoV1cXCZOnBgWFjZixAiT3/Y21a9wKi8vN1rlRPfEBU9kbCUlJT169LCysqqoqLh9Sq7VarOzs5OSkpKSksrKyvSNPXr0CAoKCgsLGzlypOKuR4Lk5uYOGTLE1dX19hP0iGTBo1EyNhcXFw8PjxMnThw+fDgwMFCfnomJiVevXtW/oWfPnhMmTLhnel67dq1z5876f+uPRnlST7Lj0SjJIDY2dtGiRX369KmsrLy9hnTAgAGhoaGhoaFDhw5t9v6SkpI9e/YkJSXl5ORcvHixa9euAHQ6nYWFhVarVavVZmbcY5Rkw6NRksELL7zg7OxcWFgoCMKgQYMCAwMDAgK8vLyave3ChQvJycm7d+/+7rvv9H/vra2tz549q49RExMTBwcHlUp17do1Z2dnGb4GEQDGKMkiOzu7rKzM3d09Pj5+wIABzV4tLi5OTU1NSkrKysrSp6eVlZWPj09YWNjEiROfeOKJ2+/s0qWLSqVSqVSMUZIRY5RksGPHDgBRUVG/z9ALFy7onyd67NgxfYu1tbW3t3dYWFhISEiHDh3u7ke/UxQvj5K8GKNkbKdPnz579qyDg8O4ceP0LcnJyUuXLs3/bYtoOzu7wMDA0NBQPz8/C/1DSVswZMgKlSq6qmqw5EUTtYwxSsYWFxcHYNq0aebm5voWhUKRn59vZ2cXEBAQFhY2duzY2y/9kWdzc1FaKlmtRA+AMUpGpdFoEhISAMycOfN2o7+//6FDh7y9vU3vfoLefem3GOU5PcmLMUpGtW/fPpVK5erq6u7ufrtR/6SmNvSm30Sf9zGRvHhPPRmV/oz+lVdeEaU3Ho3Sw4AxSsZTWVm5f/9+U1PTadOmidIhY5QeBoxRMp5du3ap1Wo/Pz+xlnnqT+oZoyQvxigZj/6M/veTSwZydIRCgYoK6HRidUnUarynnozk3Llzrq6unTp1Kisrs7KyEqtbe3tUVeHaNTg4iNUlUetwpp6MZNeuviNHFo0alS1ihgLo0gVVVVCpGKMkGx6NkjHodOjRA6WlyMqCp6eYPR89CqUSf/oT+EAmkguPRskY0tNRWoq+fTF8uDgd1tUhIgKurli69E7jP/6Bbt3wX/8lzhBED4hTTGQMcXEAMHMm7trDvo00GiQlIToaR47caczIwKlT4vRP9OAYoyS56mqkpkKhwMsvi9yzpyfmzoVaLXK3RK3CGCXJJSbi118xejSeflrknv/2N1RXIzZW5G6JWoUxSpK7fUYvuo4dsXIlYmNRUCB+50QPiDFK0ioqwrFjsLFBSIgk/c+cCQ8P/Pd/S9I50YNgjJK0tm+HICA0FPfavb4Vbt68d7tCgU8+QUYG9uwxqH+iNmOMkoQEATt3Agaf0efno29fbN5871cHD8af/4y//x319QaNQtQ2jFGS0P/9Hy5cQI8eGDWq7Z0UF8PPDxUVOHwYLd0sEh2N2lpkZ7d9FKI2Y4yShFJTAWDGDJi09Yd27Rr8/XHlCl58EfHxLS477dgRa9ZwgxKSB+9iIgmtXo2AAPTt28aP//IL/P3x448YMgQpKbC0BIC0NOzejU2bsHkzfv9s5qlT0dAg/poqoj/Ee+pJTIGBUKuxd29j5AF4/30UFGDr1lZ3VV+PwEAcPozevZGZCScnAMjMhJ8famuxfbskK6iI2oAn9SSm3Fykp2PlyjstJSX46adW96PTYfp0HD6Mrl2Rnt6YoXl5mDABtbWYPZsZSg8RxiiJbNw4xMbi/HmDOlm6tCApCba2OHCg8Ty9qAh+fqiqQlAQNm4UpVIicTBGSWT+/vDxwZw5Lc6q/6HFixcvXz5gzJj/fP01hgwBgIoKjBuHsjKMHo2EBLTyMcxE0mKMkvjWrkV2duOK0dbatGnT8uXLTUwUc+aUenkBQHV140TT0KHYs+fOVVeihwRjlMQ3YADmz8c776CqqnUf3LVr17x58xQKxZYtW0JCQgDU1yM0FKdOoU8fHDoEW1tJCiYyBGOUJLF4MWxsEBPTio8cOXLk1Vdf1el0H3zwwWuvvQZAq8W0aUhPb5xo0j9OmehhwxglcVRWNvmvtTXWrsXGjXem6e+/Nj4nJyc4OLi+vn7hwoXz588HIAhCVNT+r76CnR0OHULPntLUTWQwxigZqrYWCxdi4EBcvdqkPTgYfn44fBgAyssxaBASEu7dQ35+/vjx42tqaiIiIlasWKFvjIyMXLHipeHDV6WlYfBgSb8BkUEYo2SQzEy4uWHVKty4gays5q9u3AhrawDYtAk//ojwcMyejZqa5m9LTk6urKwMDg7etm2bQqEAsGHDhpUrV5qZmS1a5DZihBG+B5EBBKI2qa0VFiwQlEoBEAYPFk6dEgRB+PZb4dKlJm87fVr47jtBpxM2bxasrQVAePpp4dix5r3t2LGjtrZW/++dO3eamJgoFIrPPvtM+u9BZCjGKLXFmTPC0KECIJiaCgsWCHV1D/Sp3FxhyBABEMzMhDVrqjQazd3vSUtLMzMzA7BmzRqRiyaSBmOUWqehQYiNFczNBUDo3Vv49tvWfbyuTliwQDA11Q4b5uPp6VlYWPj7V48fP25jYwMgMjJSzKKJpMQYpVbIyxM8PARAUCiE2bOFW7fa2M8331xwdnYGYGtr+8UXX+gbc3Nz7e3tAcycOVOn04lWNJHEGKP0QDQaTWxs7JAhtwChVy8hI8PQDisqKoKDg/UX6MPCwnJzc11cXABMmDChoaFBjJKJjIQb5dEf++mnn1555ZXs7Oy+fYPHjNnz/vsKAx+sdNuWLVv++te/1tTUKJVKrVY7evTo/fv3W/J+T3qkMEbpfgRB2Lp1qz7pnJyctm7dGhAQIO4QRUVF4eHh+fn5Dg4O33//vS3v96RHDWOUWnTx4sXXXnvt6NGjAMLCwj755BP9tUvR3bp16+LFi3369LGwsJCifyJJMUbp3pKSkt54442qqipHR8dPPvlk4sSJcldE9JDiXUzUnEqlCgoKmjx5clVVVWhoaF5eHjOU6D64/y01kZmZGRQUVFlZaW9vv2HDhvDwcLkrInrY8aSemrhx44abm5urq+unn37arVs3ucshegQwRqm5kpIS/RJOInoQjFEiIoNwiomIyCCMUSIigzBGiYgMwhglIjIIY5SIyCD/D9+G3rhq5bBLAAABTnpUWHRyZGtpdFBLTCByZGtpdCAyMDIzLjA5LjUAAHice79v7T0GIOBnQAA+KL+BkY0hAyTAyMzOoAFiMEMFmBkRAmCaBZ3mgNBMaBoZmQkq4GZgZGBkYmBi5mBiZmFgYeVgYmVjYGPnYGLjYODgZODgYuDi5mDi4mHg4WVgZWTgYWEQYQJqZGUEKmdlY+Pg4mFhFd8EMgqKGfiWv+A4EMzqfeAh9+T9qasm7JdQkz+wae76fb+tPuxjNbE9sOuWlf2P4MN2sicZDxgfmWl/TnKiXfyMnP0T627b/arT2h/tNG//60ds+3u8qvY36/fse1i1Z/+O9a/3u/zi3a/3X/RA1r2N+5oDM+2ntG8Fmm+w//3Jz/Y6V6QOeL8SsZcsnm5vzfh2n334ZPt9B4Udls1+su+DWIZ93K5OuwWdH+yuhX2xf28hat9UvM9eDACM1GEAYrR3BQAAAaF6VFh0TU9MIHJka2l0IDIwMjMuMDkuNQAAeJx9U0tOxDAM3fcUvgCR7TgfL5kZhBCiI8HAHdhzf2EnHZJsaOsqcZ+d52d3A7/eL6/fP/B3xcu2AeA/j6rCV0TE7Q18Aaen55cdzrfH091zvn7utw8gAUoWY/eKfbxd3+4egjM8cKCojAgPMVDhtqJAUitOwQx7c0dsAA5Sc/EVhijOaECjZ8UgWQ+ophwdEAqrzkixpBZfKXUgcvHvFBgpzsBkKTFwjdKPVBZuGYVrmYG5n51zxl4Hx3zEJM7L4aVDtZSjDi6x9iApnGZohau7qzRWnitJ7pSt+IWAuk6uSZFGUInkSEqVZyThwVWRWtGK96RCukKp1Z8qlYakGrkTkcxLUcRG1QCJPKfRw3pUJ5KXmig28RVdKhfX1WmSZl06b5N0buxqO8lYcG+S1LKc/bRflunq83a67pcxb37zmCnbQBxzYxuQMRxklsYIsFkejSazMprJZnU0jGyroynkNkvfHDQJzP4inoSk5omTYNReMinjJEZM6og0KzLX7/v7v2rr7RdbH7+0RVgL8gAAAOF6VFh0U01JTEVTIHJka2l0IDIwMjMuMDkuNQAAeJwlT0uuxDAIu8pbtlIaBUP4qOoq+86F5vAPMiuC7dhmvbSed+FZtI7nc77HOn8PrL/vcaETB9rFnQw5qZO43zV55I4urtau0Vn4ziEaG46p3EY3RNxJOs1CBywadQziROEs9TUgSK3ArSxUtYLAutkJjYLDbAfB2IsXw6wiLsRbN0UrI4tsGxaTdA0i2XJybPcYlBVi/NRCgawynSxRckbbVyDuXCdRS8fhO1lEZ2pjZMHsodH2vX6Xndc2HXWfuMX5/QcEX0W59Lht5AAAAABJRU5ErkJggg==", "text/plain": [ - "" + "" ] }, "execution_count": 9, @@ -286,7 +278,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "\u001b[32m2024-03-14 15:33:36.569\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpolaris.dataset._factory\u001b[0m:\u001b[36mregister_converter\u001b[0m:\u001b[36m100\u001b[0m - \u001b[1mYou are overwriting the converter for the sdf extension.\u001b[0m\n" + "\u001b[32m2024-03-25 17:13:50.540\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpolaris.dataset._factory\u001b[0m:\u001b[36mregister_converter\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mYou are overwriting the converter for the sdf extension.\u001b[0m\n" ] } ], @@ -329,7 +321,7 @@ "my_propertymy_value" ], "text/plain": [ - "" + "" ] }, "execution_count": 11, @@ -376,18 +368,15 @@ " \n", " 0\n", " CN1C=NC2=C1C(=O)N(C)C(=O)N2C\n", - " /home/cas/.cache/polaris-tutorials/003/data2.z...\n", + " molecule#0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " smiles \\\n", - "0 CN1C=NC2=C1C(=O)N(C)C(=O)N2C \n", - "\n", - " molecule \n", - "0 /home/cas/.cache/polaris-tutorials/003/data2.z... " + " smiles molecule\n", + "0 CN1C=NC2=C1C(=O)N(C)C(=O)N2C molecule#0" ] }, "execution_count": 12, @@ -419,8 +408,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "\u001b[32m2024-03-14 15:33:36.611\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpolaris.dataset._factory\u001b[0m:\u001b[36mregister_converter\u001b[0m:\u001b[36m100\u001b[0m - \u001b[1mYou are overwriting the converter for the sdf extension.\u001b[0m\n", - "\u001b[32m2024-03-14 15:33:36.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpolaris.dataset._factory\u001b[0m:\u001b[36mregister_converter\u001b[0m:\u001b[36m100\u001b[0m - \u001b[1mYou are overwriting the converter for the sdf extension.\u001b[0m\n" + "\u001b[32m2024-03-25 17:13:50.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpolaris.dataset._factory\u001b[0m:\u001b[36mregister_converter\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mYou are overwriting the converter for the sdf extension.\u001b[0m\n", + "\u001b[32m2024-03-25 17:13:50.590\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpolaris.dataset._factory\u001b[0m:\u001b[36mregister_converter\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mYou are overwriting the converter for the sdf extension.\u001b[0m\n" ] } ], @@ -476,23 +465,17 @@ " \n", " 0\n", " my_value\n", - " /home/cas/.cache/polaris-tutorials/003/data3.z...\n", + " molecule1#0\n", " CN1C=NC2=C1C(=O)N(C)C(=O)N2C\n", - " /home/cas/.cache/polaris-tutorials/003/data3.z...\n", + " molecule2#0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " my_property molecule1 \\\n", - "0 my_value /home/cas/.cache/polaris-tutorials/003/data3.z... \n", - "\n", - " smiles \\\n", - "0 CN1C=NC2=C1C(=O)N(C)C(=O)N2C \n", - "\n", - " molecule2 \n", - "0 /home/cas/.cache/polaris-tutorials/003/data3.z... " + " my_property molecule1 smiles molecule2\n", + "0 my_value molecule1#0 CN1C=NC2=C1C(=O)N(C)C(=O)N2C molecule2#0" ] }, "execution_count": 14, diff --git a/docs/tutorials/dataset_zarr.ipynb b/docs/tutorials/dataset_zarr.ipynb index 09211aa4..1dc5be73 100644 --- a/docs/tutorials/dataset_zarr.ipynb +++ b/docs/tutorials/dataset_zarr.ipynb @@ -129,8 +129,8 @@ "source": [ "# For performance reasons, Polaris expects all data related to a column to be saved in a single Zarr array. \n", "# To index a specific element in that array, the pointer path can have a suffix to specify the index. \n", - "train_path = f\"{base_path}/{inp_col_name}#0\"\n", - "test_path = f\"{base_path}/{inp_col_name}#1\"" + "train_path = f\"{inp_col_name}#0\"\n", + "test_path = f\"{inp_col_name}#1\"" ] }, { @@ -176,6 +176,8 @@ " # To indicate that we are dealing with a pointer column here,\n", " # we need to annotate the column.\n", " annotations={\"images\": ColumnAnnotation(is_pointer=True)},\n", + " # We also need to specify the path to the root of the Zarr archive\n", + " zarr_archive=base_path,\n", ")" ] }, @@ -190,7 +192,7 @@ "tags": [] }, "source": [ - "Note how the table does not contain the image data, but rather stores a path. " + "Note how the table does not contain the image data, but rather stores a path relative to the root of the Zarr. " ] }, { @@ -208,7 +210,7 @@ { "data": { "text/plain": [ - "'/home/cas/.cache/polaris-tutorials/002/data.zarr/images#0'" + "'images#0'" ] }, "execution_count": 7, @@ -360,12 +362,12 @@ "\n", "Which will get parsed into a table like: \n", "\n", - "| column_a |\n", - "| ------------------------------------ |\n", - "| /path/to/root.zarr/column_a/array#1 |\n", - "| /path/to/root.zarr/column_a/array#2 |\n", - "| ... |\n", - "| /path/to/root.zarr/column_a/array#N |\n", + "| column_a |\n", + "| ----------------- |\n", + "| column_a/array#1 |\n", + "| column_a/array#2 |\n", + "| ... |\n", + "| column_a/array#N |\n", "\n", "
\n", "

Note

\n", @@ -411,17 +413,10 @@ "id": "3c7c11ac", "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "A\n" - ] - }, { "data": { "text/plain": [ - "'/home/cas/.cache/polaris-tutorials/002/zarr/data.zarr//images#0'" + "'/images#0'" ] }, "execution_count": 13, @@ -531,7 +526,7 @@ { "data": { "text/html": [ - "
nameNone
description
tags
user_attributes
ownerNone
default_adaptersNone
md5sum6ef8d23737aafcbf82c421e7f99e1d95
readme
annotations
images
is_pointerTrue
modalityUNKNOWN
descriptionNone
user_attributes
dtypeobject
sourceNone
licenseNone
curation_referenceNone
cache_dir/home/cas/.cache/polaris/datasets/None/6ef8d23737aafcbf82c421e7f99e1d95
artifact_idNone
n_rows1000
n_columns1
" + "
nameNone
description
tags
user_attributes
ownerNone
default_adapters
zarr_archive/home/cas/.cache/polaris-tutorials/002/json/data.zarr
md5sum3874f5ec0a215c52bf468b7be5f09fc7
readme
annotations
images
is_pointerTrue
modalityUNKNOWN
descriptionNone
user_attributes
dtypeobject
sourceNone
licenseNone
curation_referenceNone
cache_dir/home/cas/.cache/polaris/datasets/None/3874f5ec0a215c52bf468b7be5f09fc7
artifact_idNone
n_rows1000
n_columns1
" ], "text/plain": [ "{\n", @@ -540,8 +535,9 @@ " \"tags\": [],\n", " \"user_attributes\": {},\n", " \"owner\": null,\n", - " \"default_adapters\": null,\n", - " \"md5sum\": \"6ef8d23737aafcbf82c421e7f99e1d95\",\n", + " \"default_adapters\": {},\n", + " \"zarr_archive\": \"/home/cas/.cache/polaris-tutorials/002/json/data.zarr\",\n", + " \"md5sum\": \"3874f5ec0a215c52bf468b7be5f09fc7\",\n", " \"readme\": \"\",\n", " \"annotations\": {\n", " \"images\": {\n", @@ -555,7 +551,7 @@ " \"source\": null,\n", " \"license\": null,\n", " \"curation_reference\": null,\n", - " \"cache_dir\": \"/home/cas/.cache/polaris/datasets/None/6ef8d23737aafcbf82c421e7f99e1d95\",\n", + " \"cache_dir\": \"/home/cas/.cache/polaris/datasets/None/3874f5ec0a215c52bf468b7be5f09fc7\",\n", " \"artifact_id\": null,\n", " \"n_rows\": 1000,\n", " \"n_columns\": 1\n", From e6400018627416cb0935a45849ee253d61b252a9 Mon Sep 17 00:00:00 2001 From: cwognum Date: Mon, 25 Mar 2024 18:02:46 -0400 Subject: [PATCH 3/6] Added the basic flow for uploading Zarr datasets to the Hub --- polaris/dataset/_dataset.py | 58 +++++++++++++++++------------ polaris/hub/client.py | 73 ++++++++++++++++++++++--------------- polaris/hub/polarisfs.py | 3 ++ 3 files changed, 80 insertions(+), 54 deletions(-) diff --git a/polaris/dataset/_dataset.py b/polaris/dataset/_dataset.py index 6c7a618f..8e6fb40b 100644 --- a/polaris/dataset/_dataset.py +++ b/polaris/dataset/_dataset.py @@ -1,5 +1,4 @@ import json -import os.path from hashlib import md5 from typing import Dict, List, Optional, Tuple, Union @@ -20,7 +19,7 @@ from polaris._artifact import BaseArtifactModel from polaris.dataset._adapters import Adapter from polaris.dataset._column import ColumnAnnotation -from polaris.hub.settings import PolarisHubSettings +from polaris.hub.polarisfs import PolarisFileSystem from polaris.utils import fs from polaris.utils.constants import DEFAULT_CACHE_DIR from polaris.utils.dict2html import dict2html @@ -87,6 +86,7 @@ class Dataset(BaseArtifactModel): # Private attributes _zarr_root: Optional[zarr.Group] = PrivateAttr(None) + _client = PrivateAttr(None) # Optional[PolarisHubClient] _has_been_warned: bool = False _has_been_cached: bool = False @@ -189,11 +189,29 @@ def _compute_checksum(table): checksum = hash_fn.hexdigest() return checksum + @property + def client(self): + """The Polaris Hub client used to interact with the Polaris Hub.""" + + # Import it here to prevent circular imports + from polaris.hub.client import PolarisHubClient + + if self._client is None: + self._client = PolarisHubClient() + return self._client + @property def zarr_root(self): """Open the zarr archive in read-write mode if it is not already open.""" + if not any(anno.is_pointer for anno in self.annotations.values()): + return None if self._zarr_root is None: - self._zarr_root = zarr.open(self.zarr_archive, "a") + options = {} + if PolarisFileSystem.protocol in self.zarr_archive: + options["client"] = self.client + options["dataset_owner"] = self.owner + options["dataset_name"] = self.name + self._zarr_root = zarr.open(self.zarr_archive, "a", storage_options=options) return self._zarr_root @computed_field @@ -271,27 +289,13 @@ def _load(p: str, index: Union[int, slice]) -> np.ndarray: return _load(value, index) def upload_to_hub( - self, - env_file: Optional[Union[str, os.PathLike]] = None, - settings: Optional[PolarisHubSettings] = None, - cache_auth_token: bool = True, - access: Optional[AccessType] = "private", - owner: Optional[Union[HubOwner, str]] = None, - **kwargs: dict, + self, access: Optional[AccessType] = "private", owner: Optional[Union[HubOwner, str]] = None ): """ Very light, convenient wrapper around the [`PolarisHubClient.upload_dataset`][polaris.hub.client.PolarisHubClient.upload_dataset] method. """ - from polaris.hub.client import PolarisHubClient - - with PolarisHubClient( - env_file=env_file, - settings=settings, - cache_auth_token=cache_auth_token, - **kwargs, - ) as client: - return client.upload_dataset(self, access=access, owner=owner) + self.client.upload_dataset(self, access=access, owner=owner) @classmethod def from_json(cls, path: str): @@ -331,14 +335,15 @@ def to_json(self, destination: str) -> str: dataset_path = fs.join(destination, "dataset.json") zarr_archive = fs.join(destination, "data.zarr") - # Copy over Zarr data to the destination - dest = zarr.open(zarr_archive, "w") - zarr.copy_all(source=self.zarr_root, dest=dest) - # Lu: Avoid serilizing and sending None to hub app. serialized = self.model_dump(exclude={"cache_dir"}, exclude_none=True) serialized["table"] = table_path - serialized["zarr_archive"] = zarr_archive + + # Copy over Zarr data to the destination + if self.zarr_root is not None: + dest = zarr.open(zarr_archive, "w") + zarr.copy_all(source=self.zarr_root, dest=dest) + serialized["zarr_archive"] = zarr_archive self.table.to_parquet(table_path) with fsspec.open(dataset_path, "w") as f: @@ -441,3 +446,8 @@ def __eq__(self, other): if not isinstance(other, Dataset): return False return self.md5sum == other.md5sum + + def __del__(self): + """Close the connection of the client""" + if self._client is not None: + self._client.close() diff --git a/polaris/hub/client.py b/polaris/hub/client.py index 2aff2cb7..6bfbec8e 100644 --- a/polaris/hub/client.py +++ b/polaris/hub/client.py @@ -152,6 +152,25 @@ def _base_request_to_hub(self, url: str, method: str, **kwargs): return response + def _normalize_owner( + self, + artifact_owner: Optional[Union[str, HubOwner]] = None, + parameter_owner: Optional[Union[str, HubOwner]] = None, + ) -> HubOwner: + """ + Normalize the owner of an artifact to a `HubOwner` instance. + The parameter owner takes precedence over the artifact owner. + """ + if parameter_owner is not None: + artifact_owner = parameter_owner + + if artifact_owner is None: + raise ValueError( + "Either specify the `owner` attribute for the artifact or pass the `owner` parameter." + ) + + return artifact_owner if isinstance(artifact_owner, HubOwner) else HubOwner(slug=artifact_owner) + # ========================= # Overrides # ========================= @@ -443,19 +462,11 @@ def upload_results( Args: results: The results to upload. access: Grant public or private access to result - owner: Which Hub user or organization owns the artifact. - Optional if and only if the `benchmark.owner` attribute is set. + owner: Which Hub user or organization owns the artifact. Takes precedence over `results.owner`. """ # Get the serialized model data-structure - - if results.owner is None: - if owner is None: - raise ValueError( - "The `owner` argument must be specified if the `results.owner` attribute is not set." - ) - results.owner = owner if isinstance(owner, HubOwner) else HubOwner(slug=owner) - + results.owner = self._normalize_owner(results.owner, owner) result_json = results.model_dump(by_alias=True, exclude_none=True) # Make a request to the hub @@ -498,24 +509,22 @@ def upload_dataset( tuple with (connect_timeout, write_timeout). The type of the the timout parameter comes from `httpx`. Since datasets can get large, it might be needed to increase the write timeout for larger datasets. See also: https://www.python-httpx.org/advanced/#timeout-configuration - owner: Which Hub user or organization owns the artifact. - Optional if and only if the `benchmark.owner` attribute is set. + owner: Which Hub user or organization owns the artifact. Takes precedence over `dataset.owner`. """ - if dataset.owner is None: - if owner is None: - raise ValueError( - "The `owner` argument must be specified if the `dataset.owner` attribute is not set." - ) - dataset.owner = owner if isinstance(owner, HubOwner) else HubOwner(slug=owner) - # Get the serialized data-model - # We exclude the table as it handled separately and the cache_dir as it is user-specific + # We exclude the table as it handled separately and we exclude the cache_dir as it is user-specific + dataset.owner = self._normalize_owner(dataset.owner, owner) dataset_json = dataset.model_dump(exclude={"cache_dir", "table"}, exclude_none=True, by_alias=True) - # Uploading a dataset is a two-step process. + # We will save the Zarr archive to the Hub as well + zarr_fname = "data.zarr" + dataset_json["zarrArchive"] = f"{PolarisFileSystem.protocol}://{zarr_fname}" + + # Uploading a dataset is a three-step process. # 1. Upload the dataset meta data to the hub and prepare the hub to receive the parquet file # 2. Upload the parquet file to the hub + # 3. Upload the associated Zarr archive # TODO: Revert step 1 in case step 2 fails - Is this needed? Or should this be taken care of by the hub? # Write the parquet file directly to a buffer @@ -568,6 +577,17 @@ def upload_dataset( else: hub_response.raise_for_status() + # Step 3: Upload any associated Zarr archive + if dataset.zarr_root is not None: + source = dataset.zarr_root + dest = self.open_zarr_file( + owner=dataset.owner, + name=dataset.name, + path=zarr_fname, + mode="w", + ) + zarr.copy_all(source=source, dest=dest) + logger.success( "Your dataset has been successfully uploaded to the Hub. " f"View it here: {urljoin(self.settings.hub_url, f'datasets/{dataset.owner}/{dataset.name}')}" @@ -600,18 +620,11 @@ def upload_benchmark( Args: benchmark: The benchmark to upload. access: Grant public or private access to result - owner: Which Hub user or organization owns the artifact. - Optional if and only if the `benchmark.owner` attribute is set. + owner: Which Hub user or organization owns the artifact. Takes precedence over `benchmark.owner`. """ - if benchmark.owner is None: - if owner is None: - raise ValueError( - "The `owner` argument must be specified if the `benchmark.owner` attribute is not set." - ) - benchmark.owner = owner if isinstance(owner, HubOwner) else HubOwner(slug=owner) - # Get the serialized data-model # We exclude the dataset as we expect it to exist on the hub already. + benchmark.owner = self._normalize_owner(benchmark.owner, owner) benchmark_json = benchmark.model_dump(exclude={"dataset"}, exclude_none=True, by_alias=True) benchmark_json["datasetArtifactId"] = benchmark.dataset.artifact_id benchmark_json["access"] = access diff --git a/polaris/hub/polarisfs.py b/polaris/hub/polarisfs.py index 9e924361..a74e9def 100644 --- a/polaris/hub/polarisfs.py +++ b/polaris/hub/polarisfs.py @@ -200,3 +200,6 @@ def pipe_file( timeout=timeout, ) response.raise_for_status() + + +fsspec.register_implementation("polarisfs", PolarisFileSystem) From 0b7a2448089e9d44e68d0f33ecf0438ccd956a06 Mon Sep 17 00:00:00 2001 From: cwognum Date: Tue, 26 Mar 2024 13:11:53 -0400 Subject: [PATCH 4/6] New flow to use Zarr Datasets through the Hub --- polaris/benchmark/_base.py | 2 +- polaris/dataset/_dataset.py | 67 +++-- polaris/dataset/converters/_zarr.py | 2 +- polaris/hub/client.py | 28 +- polaris/hub/polarisfs.py | 15 +- polaris/loader/load.py | 2 +- polaris/utils/fs.py | 395 ---------------------------- polaris/utils/io.py | 145 ---------- tests/conftest.py | 2 +- tests/test_dataset.py | 2 +- 10 files changed, 68 insertions(+), 592 deletions(-) delete mode 100644 polaris/utils/fs.py delete mode 100644 polaris/utils/io.py diff --git a/polaris/benchmark/_base.py b/polaris/benchmark/_base.py index b3170a52..0d4f12ed 100644 --- a/polaris/benchmark/_base.py +++ b/polaris/benchmark/_base.py @@ -6,6 +6,7 @@ import fsspec import numpy as np import pandas as pd +from datamol.utils import fs from pydantic import ( Field, FieldValidationInfo, @@ -20,7 +21,6 @@ from polaris.dataset import Dataset, Subset from polaris.evaluate import BenchmarkResults, Metric, ResultsType from polaris.hub.settings import PolarisHubSettings -from polaris.utils import fs from polaris.utils.context import tmp_attribute_change from polaris.utils.dict2html import dict2html from polaris.utils.errors import InvalidBenchmarkError, PolarisChecksumError diff --git a/polaris/dataset/_dataset.py b/polaris/dataset/_dataset.py index 8e6fb40b..797f0ef3 100644 --- a/polaris/dataset/_dataset.py +++ b/polaris/dataset/_dataset.py @@ -6,6 +6,7 @@ import numpy as np import pandas as pd import zarr +from datamol.utils import fs from loguru import logger from pydantic import ( Field, @@ -20,7 +21,6 @@ from polaris.dataset._adapters import Adapter from polaris.dataset._column import ColumnAnnotation from polaris.hub.polarisfs import PolarisFileSystem -from polaris.utils import fs from polaris.utils.constants import DEFAULT_CACHE_DIR from polaris.utils.dict2html import dict2html from polaris.utils.errors import InvalidDatasetError, PolarisChecksumError @@ -203,15 +203,26 @@ def client(self): @property def zarr_root(self): """Open the zarr archive in read-write mode if it is not already open.""" - if not any(anno.is_pointer for anno in self.annotations.values()): + if self.zarr_archive is None or not any(anno.is_pointer for anno in self.annotations.values()): return None + + saved_on_hub = PolarisFileSystem.is_polarisfs_path(self.zarr_archive) + saved_remote = saved_on_hub or not fs.is_local_path(self.zarr_archive) + + if saved_remote and not self._has_been_warned: + logger.warning( + f"You're loading data from a remote location. " + f"To speed up this process, consider caching the dataset first " + f"using {self.__class__.__name__}.cache()" + ) + self._has_been_warned = True + + # We open the archive in read-only mode if it is saved on the Hub if self._zarr_root is None: - options = {} - if PolarisFileSystem.protocol in self.zarr_archive: - options["client"] = self.client - options["dataset_owner"] = self.owner - options["dataset_name"] = self.name - self._zarr_root = zarr.open(self.zarr_archive, "a", storage_options=options) + if saved_on_hub: + self._zarr_root = self.client.open_zarr_file(self.owner, self.name, self.zarr_archive, "r+") + else: + self._zarr_root = zarr.open(self.zarr_archive, "r+") return self._zarr_root @computed_field @@ -254,39 +265,25 @@ def get_data(self, row: int, col: str, adapters: Optional[List[Adapter]] = None) adapters = adapters or self.default_adapters - def _load(p: str, index: Union[int, slice]) -> np.ndarray: - """Tiny helper function to reduce code repetition.""" - arr = self.zarr_root[p][index] - - if isinstance(index, slice): - arr = tuple(arr) - - adapter = adapters.get(col) - if adapter is not None: - arr = adapter(arr) - - return arr - + # If not a pointer, we can just return here value = self.table.loc[row, col] if not self.annotations[col].is_pointer: return value - value, index = self._split_index_from_path(value) + # Load the data from the Zarr archive + path, index = self._split_index_from_path(value) + arr = self.zarr_root[path][index] - # In the case it is a pointer column, we need to load additional data into memory - # We first check if the data has been downloaded to the cache. - if self._has_been_cached: - return _load(value, index) + # Change to tuple if a slice + if isinstance(index, slice): + arr = tuple(arr) - # If it doesn't exist, we load from the original path and warn if not local - if not fs.is_local_path(self.zarr_archive) and not self._has_been_warned: - logger.warning( - f"You're loading data from a remote location. " - f"To speed up this process, consider caching the dataset first " - f"using {self.__class__.__name__}.cache()" - ) - self._has_been_warned = True - return _load(value, index) + # Adapt the input + adapter = adapters.get(col) + if adapter is not None: + arr = adapter(arr) + + return arr def upload_to_hub( self, access: Optional[AccessType] = "private", owner: Optional[Union[HubOwner, str]] = None diff --git a/polaris/dataset/converters/_zarr.py b/polaris/dataset/converters/_zarr.py index 633b94bb..5ed706d0 100644 --- a/polaris/dataset/converters/_zarr.py +++ b/polaris/dataset/converters/_zarr.py @@ -42,7 +42,7 @@ def convert(self, path: str, factory: "DatasetFactory") -> FactoryProduct: data = defaultdict(dict) for col, arr in src.arrays(): for i in range(len(arr)): - data[col][i] = self.get_pointer(arr.name, i) + data[col][i] = self.get_pointer(arr.name.removeprefix("/"), i) # Construct the dataset table = pd.DataFrame(data) diff --git a/polaris/hub/client.py b/polaris/hub/client.py index 6bfbec8e..4acf72e1 100644 --- a/polaris/hub/client.py +++ b/polaris/hub/client.py @@ -16,6 +16,7 @@ from authlib.integrations.base_client.errors import InvalidTokenError, MissingTokenError from authlib.integrations.httpx_client import OAuth2Client, OAuthError from authlib.oauth2.client import OAuth2Client as _OAuth2Client +from datamol.utils import fs from httpx import HTTPStatusError from httpx._types import HeaderTypes, URLTypes from loguru import logger @@ -29,8 +30,8 @@ from polaris.evaluate import BenchmarkResults from polaris.hub.polarisfs import PolarisFileSystem from polaris.hub.settings import PolarisHubSettings -from polaris.utils import fs from polaris.utils.constants import DEFAULT_CACHE_DIR +from polaris.utils.context import tmp_attribute_change from polaris.utils.errors import PolarisHubError, PolarisUnauthorizedError from polaris.utils.types import AccessType, HubOwner, IOMode, TimeoutTypes @@ -379,6 +380,7 @@ def open_zarr_file( try: store = zarr.storage.FSStore(path, fs=polaris_fs) return zarr.open(store, mode=mode) + except Exception as e: raise PolarisHubError("Error opening Zarr store") from e @@ -511,6 +513,9 @@ def upload_dataset( See also: https://www.python-httpx.org/advanced/#timeout-configuration owner: Which Hub user or organization owns the artifact. Takes precedence over `dataset.owner`. """ + # Normalize timeout + if timeout is None: + timeout = self.settings.default_timeout # Get the serialized data-model # We exclude the table as it handled separately and we exclude the cache_dir as it is user-specific @@ -548,6 +553,7 @@ def upload_dataset( "access": access, **dataset_json, }, + timeout=timeout, ) # Step 2: Upload the parquet file @@ -558,6 +564,7 @@ def upload_dataset( headers={ "Content-type": "application/vnd.apache.parquet", }, + timeout=timeout, ) if hub_response.status_code == 307: @@ -579,14 +586,17 @@ def upload_dataset( # Step 3: Upload any associated Zarr archive if dataset.zarr_root is not None: - source = dataset.zarr_root - dest = self.open_zarr_file( - owner=dataset.owner, - name=dataset.name, - path=zarr_fname, - mode="w", - ) - zarr.copy_all(source=source, dest=dest) + with tmp_attribute_change(self.settings, "default_timeout", timeout): + # Copy the Zarr archive to the hub + # This does not copy the consolidated data + dest = self.open_zarr_file( + owner=dataset.owner, + name=dataset.name, + path=zarr_fname, + mode="w", + ) + logger.info("Copying Zarr archive to the Hub. This may take a while.") + zarr.copy_all(source=dataset.zarr_root, dest=dest, log=logger.info) logger.success( "Your dataset has been successfully uploaded to the Hub. " diff --git a/polaris/hub/polarisfs.py b/polaris/hub/polarisfs.py index a74e9def..8d0efa9d 100644 --- a/polaris/hub/polarisfs.py +++ b/polaris/hub/polarisfs.py @@ -55,6 +55,18 @@ def __init__( self.prefix = f"dataset/{dataset_owner}/{dataset_name}/" self.base_path = f"/storage/{self.prefix.rstrip('/')}" + @staticmethod + def is_polarisfs_path(path: str) -> bool: + """Check if the given path is a PolarisFS path. + + Args: + path: The path to check. + + Returns: + True if the path is a PolarisFS path; otherwise, False. + """ + return path.startswith(f"{PolarisFileSystem.protocol}://") + def ls( self, path: str, @@ -200,6 +212,3 @@ def pipe_file( timeout=timeout, ) response.raise_for_status() - - -fsspec.register_implementation("polarisfs", PolarisFileSystem) diff --git a/polaris/loader/load.py b/polaris/loader/load.py index 4f4d10c8..5bebf291 100644 --- a/polaris/loader/load.py +++ b/polaris/loader/load.py @@ -1,6 +1,7 @@ import json import fsspec +from datamol.utils import fs from polaris.benchmark._definitions import ( MultiTaskBenchmarkSpecification, @@ -8,7 +9,6 @@ ) from polaris.dataset import Dataset, create_dataset_from_file from polaris.hub.client import PolarisHubClient -from polaris.utils import fs def load_dataset(path: str, verify_checksum: bool = True) -> Dataset: diff --git a/polaris/utils/fs.py b/polaris/utils/fs.py deleted file mode 100644 index 13cb95c4..00000000 --- a/polaris/utils/fs.py +++ /dev/null @@ -1,395 +0,0 @@ -""" -The `fs` module makes it easier to work with all type of path (the ones supported by `fsspec`). -""" - -import hashlib -import io -import os -import pathlib -from typing import List, Optional, Union - -import fsspec -import fsspec.utils -from datamol.utils import parallelized - - -def _import_tqdm(): - try: - from tqdm.auto import tqdm - - return tqdm - except ImportError: - return None - - -def get_mapper(path: Union[str, os.PathLike]): - """Get the fsspec mapper. - - Args: - path: a path supported by `fsspec` such as local, s3, gcs, etc. - """ - return fsspec.get_mapper(str(path)) - - -def get_basename(path: Union[str, os.PathLike]): - """Get the basename of a file or a folder. - - Args: - path: a path supported by `fsspec` such as local, s3, gcs, etc. - """ - path = str(path) - mapper = get_mapper(path) - clean_path = path.rstrip(mapper.fs.sep) - return str(clean_path).split(mapper.fs.sep)[-1] - - -def get_extension(path: Union[str, os.PathLike]): - """Get the extension of a file. - - Args: - path: a path supported by `fsspec` such as local, s3, gcs, etc. - """ - basename = get_basename(path) - return basename.split(".")[-1] - - -def exists(path: Union[str, os.PathLike, fsspec.core.OpenFile, io.IOBase]): - """Check whether a file or a directory exists. - - Important: File-like object always exists. - - Args: - path: a path supported by `fsspec` such as local, s3, gcs, etc. - """ - return is_file(path) or is_dir(path) - - -def is_file(path: Union[str, os.PathLike, fsspec.core.OpenFile, io.IOBase]): - """Check whether a file exists. - - Args: - path: a path supported by `fsspec` such as local, s3, gcs, etc. - """ - if isinstance(path, fsspec.core.OpenFile): - return path.fs.isfile(path.path) - - elif isinstance(path, (str, os.PathLike)): - mapper = get_mapper(str(path)) - return mapper.fs.isfile(str(path)) - - else: - return False - - -def is_dir(path: Union[str, os.PathLike, fsspec.core.OpenFile, io.IOBase]): - """Check whether a file exists. - - Args: - path: a path supported by `fsspec` such as local, s3, gcs, etc. - """ - if isinstance(path, fsspec.core.OpenFile): - return path.fs.isdir(path.path) - - elif isinstance(path, (str, os.PathLike)): - mapper = get_mapper(str(path)) - return mapper.fs.isdir(str(path)) - - else: - return False - - -def get_protocol(path: Union[str, os.PathLike], fs: Optional[fsspec.AbstractFileSystem] = None): - """Return the name of the path protocol. - - Args: - path: a path supported by `fsspec` such as local, s3, gcs, etc. - """ - - if fs is None: - fs = get_mapper(path).fs - - protocol = fs.protocol # type: ignore - - if "s3" in protocol: - return "s3" - elif "gs" in protocol: - return "gs" - elif isinstance(protocol, (tuple, list)): - return protocol[0] - return protocol - - -def is_local_path(path: Union[str, os.PathLike]): - """Check whether a path is local.""" - return get_protocol(str(path)) == "file" - - -def join(*paths: str): - """Join paths together. The first element determine the - filesystem to use (and so the separator. - - Args: - *paths: a list of paths supported by `fsspec` such as local, s3, gcs, etc. - """ - _paths = [str(path).rstrip("/") for path in paths] - source_path = _paths[0] - fs = get_mapper(source_path).fs - full_path = fs.sep.join(_paths) - return full_path - - -def get_size(file: Union[str, os.PathLike, io.IOBase, fsspec.core.OpenFile]) -> Optional[int]: - """Get the size of a file given its path. Return None if the - size can't be retrieved. - """ - - if isinstance(file, io.IOBase) and hasattr(file, "name"): - fs_local = fsspec.filesystem("file") - file_size = fs_local.size(getattr(file, "name")) - - elif isinstance(file, (str, os.PathLike)): - fs = get_mapper(str(file)).fs - file_size = fs.size(str(file)) - - elif isinstance(file, fsspec.core.OpenFile): - file_size = file.fs.size(file.path) - - else: - file_size = None - - return file_size - - -def copy_file( - source: Union[str, pathlib.Path, io.IOBase, fsspec.core.OpenFile], - destination: Union[str, pathlib.Path, io.IOBase, fsspec.core.OpenFile], - chunk_size: Optional[int] = None, - force: bool = False, - progress: bool = False, - leave_progress: bool = True, -): - """Copy one file to another location across different filesystem (local, S3, GCS, etc). - - Args: - source: path or file-like object to copy from. - destination: path or file-like object to copy to. - chunk_size: the chunk size to use. If progress is enabled the chunk - size is `None`, it is set to 1MB (1024 * 1024). - force: whether to overwrite the destination file if it exists. - progress: whether to display a progress bar. - leave_progress: whether to hide the progress bar once the copy is done. - """ - - if progress and chunk_size is None: - chunk_size = 1024 * 1024 - - if isinstance(source, (str, os.PathLike)): - source_file = fsspec.open(str(source), "rb") - else: - source_file = source - - if isinstance(destination, (str, os.PathLike)): - # adapt the file mode of the destination depending on the source file. - destination_mode = "wb" - if hasattr(source_file, "mode"): - destination_mode = "wb" if "b" in getattr(source_file, "mode") else "w" - elif isinstance(source_file, io.BytesIO): - destination_mode = "wb" - elif isinstance(source_file, io.StringIO): - destination_mode = "w" - - destination_file = fsspec.open(str(destination), destination_mode) - else: - destination_file = destination - - if not is_file(source_file): # type: ignore - raise ValueError(f"The file being copied does not exist or is not a file: {source}") - - if not force and is_file(destination_file): # type: ignore - raise ValueError(f"The destination file to copy already exists: {destination}") - - with source_file as source_stream: - with destination_file as destination_stream: - if chunk_size is None: - # copy without chunks - destination_stream.write(source_stream.read()) # type: ignore - - else: - # copy with chunks - - # determine the size of the source file - source_size = None - if progress: - source_size = get_size(source) - - pbar = None - if progress: - tqdm = _import_tqdm() - - if tqdm is None: - raise ImportError( - "If the progress bar is enabled, you must have `tqdm` " - "installed: `conda install tqdm`." - ) - else: - # init progress bar - pbar = tqdm( - total=source_size, - leave=leave_progress, - disable=not progress, - unit="B", - unit_divisor=1024, - unit_scale=True, - ) - - # start the loop - while True: - data = source_stream.read(chunk_size) # type: ignore - if not data: - break - destination_stream.write(data) # type: ignore - - if pbar is not None: - pbar.update(chunk_size) - - if pbar is not None: - pbar.close() - - -def mkdir(dir_path: Union[str, os.PathLike], exist_ok: bool = False): - """Create a directory. - - Args: - dir_path: The path of the directory to create. - exist_ok: Whether to ignore the error if the directory - already exists. - """ - fs = get_mapper(str(dir_path)).fs - fs.mkdirs(str(dir_path), exist_ok=exist_ok) - - -def glob(path: str, detail: bool = False, **kwargs) -> List[str]: - """Find files by glob-matching. - - Args: - path: A glob-style path. - """ - # Get the list of paths - fs = get_mapper(path).fs - paths = fs.glob(path, detail=detail, **kwargs) - paths = [fsspec.utils._unstrip_protocol(d, fs) for d in paths] - return paths - - -def copy_dir( - source: Union[str, pathlib.Path], - destination: Union[str, pathlib.Path], - force: bool = False, - progress: bool = False, - leave_progress: bool = True, - file_progress: bool = False, - file_leave_progress: bool = False, - chunk_size: Optional[int] = None, -): - """Copy one directory to another location across different filesystem (local, S3, GCS, etc). - - Note that if both FS from source and destination are the same, progress won't be shown. - - Args: - source: Path to the source directory. - destination: Path to the destination directory. - chunk_size: the chunk size to use. If progress is enabled the chunk - size is `None`, it is set to 2048. - force: whether to overwrite the destination directory if it exists. - progress: Whether to display a progress bar. - leave_progress: Whether to hide the progress bar once the copy is done. - file_progress: Whether to display a progress bar for each file. - file_leave_progress: Whether to hide the progress bar once a file copy is done. - chunk_size: See `po.utils.fs.copy_file`. - """ - - source = str(source) - destination = str(destination) - - source_fs = get_mapper(source).fs - destination_fs = get_mapper(destination).fs - - # Sanity check - if not is_dir(source): - raise ValueError(f"The directory being copied does not exist or is not a directory: {source}") - - if not force and is_dir(destination): - raise ValueError(f"The destination folder to copy already exists: {destination}") - - # If both fs are the same then we just rely on the internal `copy` method - # which is much faster. - if destination_fs.__class__ == source_fs.__class__: - source_fs.copy(source, destination, recursive=True) - return - - # Get all input paths with details - # NOTE(hadim): we could have use `.glob(..., detail=True)` here but that API is inconsistent - # between the backends resulting in different object types being returned (dict, list, etc). - detailed_paths = source_fs.find(source, withdirs=True, detail=True) - detailed_paths = list(detailed_paths.values()) - - # Get list of input types - input_types = [d["type"] for d in detailed_paths] - - # Get list of input path + add protocol if needed - input_paths = [d["name"] for d in detailed_paths] - input_paths = [fsspec.utils._unstrip_protocol(p, source_fs) for p in input_paths] - - # Build all the output paths - output_paths: List[str] = fsspec.utils.other_paths(input_paths, destination) # type: ignore - - def _copy_source_to_destination(input_path, input_type, output_path): - # A directory - if input_type == "directory": - destination_fs.mkdir(output_path) - - # A file - else: - copy_file( - input_path, - output_path, - force=force, - progress=file_progress, - leave_progress=file_leave_progress, - chunk_size=chunk_size, - ) - - # Copy source files/directories to destination in parallel - parallelized( - _copy_source_to_destination, - inputs_list=list(zip(input_paths, input_types, output_paths)), - arg_type="args", - progress=progress, - tqdm_kwargs=dict(leave=leave_progress), - scheduler="threads", - ) - - -def hash_file(file: Union[str, os.PathLike, io.BytesIO, io.IOBase], chunk_size: int = 4096): - """Return the md5 hash of a file.""" - - md5 = hashlib.md5() - - if isinstance(file, (io.BytesIO, io.TextIOBase, io.BufferedIOBase)): - sentinel = b"" - if isinstance(file, io.TextIOBase): - sentinel = "" - - for block in iter(lambda: file.read(chunk_size), sentinel): - if isinstance(block, str): - block = block.encode() - - md5.update(block) - file.seek(0) - - elif is_file(file): - with fsspec.open(file, "rb") as f: - for block in iter(lambda: f.read(chunk_size), b""): - md5.update(block) - - return md5.hexdigest() diff --git a/polaris/utils/io.py b/polaris/utils/io.py deleted file mode 100644 index 56d49a62..00000000 --- a/polaris/utils/io.py +++ /dev/null @@ -1,145 +0,0 @@ -import os.path -import uuid -from typing import Optional - -import filelock -import fsspec -from loguru import logger -from tenacity import Retrying -from tenacity.stop import stop_after_attempt -from tenacity.wait import wait_fixed - -from polaris.utils import fs -from polaris.utils.constants import DEFAULT_CACHE_DIR -from polaris.utils.errors import PolarisChecksumError - - -def create_filelock(lock_name: str, cache_dir_path: str = DEFAULT_CACHE_DIR): - """Create an empty lock file into `cache_dir_path/locks/lock_name`""" - lock_path = fs.join(cache_dir_path, "_lock_files", lock_name) - with fsspec.open(lock_path, "w", auto_mkdir=True): - pass - return filelock.FileLock(lock_path) - - -def robust_copy( - source_path: str, - destination_path: str, - md5sum: Optional[str] = None, - max_retries: int = 5, - wait_after_try: int = 2, - progress: bool = True, - leave_progress: bool = True, - chunk_size: int = 2048, -): - if not fs.is_file(source_path) and get_zarr_root(source_path) is None: - raise ValueError(f"{source_path} is a directory and not part of a .zarr hierarchy!") - - if md5sum is None and fs.is_file(source_path): - # NOTE (cwognum): This effectively means we will not check the checksum of .zarr files. - # The reason being that I'm not sure how to effectively compute a checksum for a .zarr - md5sum = fs.hash_file(source_path) - - artifact_cache_lock = create_filelock(f"artifact_version_{md5sum or uuid.uuid4()}.lock") - - def log_failure(retry_state): - logger.warning( - f"""Downloading the artifact from {source_path} to {destination_path} failed. """ - f"""Retrying attempt {retry_state.attempt_number}/{max_retries} """ - f"""after a sleeping period of {wait_after_try} seconds.""" - ) - - # This context manager will lock any process that try to download the same file. Only one process - # will be able to download the artifact and all the other ones will be waiting at that line. - # Once the lock is released the other processes will call `download_with_checksum` but the download will - # not happen since the artifact file will already exist and its checksum will be correct. - with artifact_cache_lock: - # This loop will retry downloading the artifact for multiple attempts. Downloading an artifact - # might fail for multiple reasons such as disk IO failures or network failures. The checksum logic - # and the retry mechanism together allow to be resilient in case of intermitent failures. - for attempt in Retrying( - reraise=True, - stop=stop_after_attempt(max_retries), - after=log_failure, - wait=wait_fixed(wait_after_try), - ): - with attempt: - # The checksum logic will only validate an artifact download if its checksum matches - # the excepted one. If not then it will be deleted and the download will happen again - # until it succeeds (or until the number of attemps have been reached). - download_with_checksum( - source_path=source_path, - destination_path=destination_path, - md5sum=md5sum, - progress=progress, - leave_progress=leave_progress, - chunk_size=chunk_size, - ) - - return destination_path - - -def download_with_checksum( - source_path: str, - destination_path: str, - md5sum: Optional[str], - progress: bool = False, - leave_progress: bool = True, - chunk_size: int = 2048, -): - """Download an artifact from the bucket to a cache path while checking for its md5sum given a true md5sum. - - Args: - source_path: The path to the artifact in the bucket. - destination_path: The path of the artifact in the local cache. - md5sum: The true md5sum to check against. If None, no checksum is performed but a warning is logged. - progress: whether to display a progress bar. - leave_progress: whether to hide the progress bar once the copy is done. - chunk_size: the chunk size for the download. - """ - - # Download the artifact if not already in the cache. - if not fs.exists(destination_path): - if fs.is_dir(source_path): - fs.copy_dir( - source_path, - destination_path, - progress=progress, - leave_progress=leave_progress, - chunk_size=chunk_size, - ) - - else: - fs.copy_file( - source_path, - destination_path, - progress=progress, - leave_progress=leave_progress, - chunk_size=chunk_size, - ) - - # Check the cached artifact has the correct md5sum - if md5sum is not None: - cache_md5sum = fs.hash_file(destination_path) - if cache_md5sum != md5sum: - file_system = fs.get_mapper(destination_path).fs - file_system.delete(destination_path) - - raise PolarisChecksumError( - f"""The destination artifact at {destination_path} has a different md5sum ({cache_md5sum})""" - f"""than the expected artifact md5sum ({md5sum}). The destination artifact has been deleted. """ - ) - - -def get_zarr_root(path): - """ - Recursive function to find the root of a .zarr file. - Finds the highest level directory that has the .zarr extension. - """ - if os.path.dirname(path) == path: - # We reached the root of the filesystem - return - root = get_zarr_root(os.path.dirname(path)) - if root is None and fs.get_extension(path) == "zarr": - root = path - return root diff --git a/tests/conftest.py b/tests/conftest.py index f4206aac..8874e473 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,13 +2,13 @@ import numpy as np import pytest import zarr +from datamol.utils import fs from polaris.benchmark import ( MultiTaskBenchmarkSpecification, SingleTaskBenchmarkSpecification, ) from polaris.dataset import ColumnAnnotation, Dataset -from polaris.utils import fs from polaris.utils.types import HubOwner, License diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 83079e34..b6e11700 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -2,11 +2,11 @@ import pandas as pd import pytest import zarr +from datamol.utils import fs from pydantic import ValidationError from polaris.dataset import Dataset, create_dataset_from_file from polaris.loader import load_dataset -from polaris.utils import fs from polaris.utils.errors import PolarisChecksumError From 73385d44480e8c6b70beb71a238b66961fc0d427 Mon Sep 17 00:00:00 2001 From: cwognum Date: Tue, 26 Mar 2024 13:17:20 -0400 Subject: [PATCH 5/6] renamed zarr_archive to zarr_root_path --- docs/tutorials/dataset_factory.ipynb | 14 +++++++------- docs/tutorials/dataset_zarr.ipynb | 12 ++++++------ polaris/dataset/_dataset.py | 20 ++++++++++---------- polaris/dataset/_factory.py | 2 +- polaris/hub/client.py | 5 ++--- tests/test_dataset.py | 4 ++-- 6 files changed, 28 insertions(+), 29 deletions(-) diff --git a/docs/tutorials/dataset_factory.ipynb b/docs/tutorials/dataset_factory.ipynb index 931999a1..cc5fef18 100644 --- a/docs/tutorials/dataset_factory.ipynb +++ b/docs/tutorials/dataset_factory.ipynb @@ -82,7 +82,7 @@ "my_propertymy_value" ], "text/plain": [ - "" + "" ] }, "execution_count": 4, @@ -197,7 +197,7 @@ "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAZI0lEQVR4nO3de1hU1d4H8O8w3DHlYggaat4V0YRTjyhlgiAaCCKopGLlm+Xr8fR27Bw1FDnkBctL9qqlnkxMjEAkDK947PCGICcvKaAVISgoDigoSTAwM/v9Y0gDxYTZe7bK9/OXrplZ6zfPM8+Xvfdae22FIAggIqK2MpG7ACKiRxtjlIjIIIxRIiKDMEaJiAzCGCUiMghjlIjIIKZyF0BkdA0NOHwY589DEDBwIPz8YG4ud030CFNw3Si1LwUFCAhARQX+9CcAOHECDg7YuxcDB8pdGT2qGKPUnmg0GDoUdnbYvx8dOwJAdTXGj0d5OfLzYWYmd330SOK1UWpPDh7EuXNYt64xQwF07Ij161FQgLQ0WSujRxhjlNqT7Gx06oRnn23S6OEBBwdkZ8tUEz3yGKPUnlRWwtn5Hu3duuH6daNXQ48Jxii1J9bWqKy8R/v16+jQwejV0GOCMUrtiZsbKipQVtakUaVCWRnc3GSqiR55jFFqT4KC0KEDli9v0rhiBWxsEBIiU030yOPye2pP7OywdSumT4dKBV9fKBRIT0dKCnbsgL293MXRo4rrRql9+PZbqNUYMwYATp7Exo3Iy4MgYPBgzJ3buBSfqE0Yo9QOCAI8PHD6NBISMGWK3NXQ44bXRqkdSEnB6dNwdsaECQDQ0IDMTLlroscHY5Qed4KAmBgAWLwYVlYAEBeH55/HnDny1kWPDcYoyUMQhNraWmOMtHs3zpyBiwtmzQKAhgasWAEAL75ojNGpHWCMkgy+/vrrXr16RUVFST6STodlywAgMhIWFgDw6acoKoKrK8LCJB+d2gfGKMnAwcGhuLj4yy+/1Ol00o6UmIizZ9G9O159FQDq67FqFQBER8OEP34SB39JJANPT8+ePXuWlJRkZWVJOIxW23hVdMmSxo2Zt25FcTEGD+ZiexIRY5RkoFAoJk+eDCAhIUHCYRIScP48evZERAQAqNWIjQWAmBgeipKI+GMieUydOhVAYmKiRqORZACttvGq6NKljYeimzejtBTDhiE4WJIRqb1ijJI8hg0bNnDgwIqKim+++UaSAeLj8cMP6NMH06cDQF0d3n8fAKKjoVBIMiK1V4xRko105/UajUal73bJEpiaAsDHH+PyZbi7IzBQ9OGonWOMkmzCw8MB7NmzR61Wi9vzzp07nQ4cWB0QgGnTAKCuDqtXA0BMDA9FSXSMUZJN//79n3nmGVtbx3//+6KI3TY0NLz33nsAuoaHQ6kEUPPPf+LKFTz3HF56ScSBiPQYoySnV15JLy7+MS6un4h9xsXFXbhwoV+/flOmTAFQU1PTZ9my5c89V6ufcSISG2OU5DRxYmeFAqmpqKkRp8OGhoYVK1YAiImJUSqVADZs2HBVpdqnVFr5+oozBlFTjFGSU/fu8PTEr7+K9njjbdu2FRUVubq6hoWFAaipqVm7di2AGP06fCIJMEZJZvr9P0WZrq+vr4+NjQUQHR1tYmICYP369eXl5SNHjhyj37CZSALctplkdvUqnnoKpqa4ehW2tgZ1tWnTprlz5w4ePPjMmTMmJia3bt3q1atXRUXF0aNHR48eLVK9RM3xaJRk5uSEF1+EWo2vvjKoH7VavXLlSgAxMTH6Q9F169ZVVFR4eXkxQ0lSjFGSnyjn9Vu2bCktLR02bFhwcDCAmzdvfvjhhwCWcYKeJMYYJfmFhsLcHP/6F8rL29hDXV3dqlWrAPTr1y8iIuLq1avr1q2rrKz08fEZNWqUmLUS3YUPWCb52dlh+nTY2KANu5TU1tYeO3Zs9erVly9fBvDll1+amZmVlpaeOXMGgDF2hqZ2j1NMJL/SUmRn47nn0KNHY4tOh+RkjBiBbt3u/ZHCwuv79sXv27cvIyPj9r2knTt39vX1TUhIUCqVGo3Gz8/v0KFDRvkG1K7xaJTkl5ODyZPx3HPIzm7cCFSjweTJSElpEqNaLbKzkZaGI0dQXl5dUvIWABMTEw8PjzFjxgQEBIwcOVKhUFy8eFG/G/SSJUvk+T7UzjBG6aFgYoKiIvzzn5g9u/lLV69i/37s34/0dFRXNzba2T39+uv/8/zz7uPGjevcufPv3x8REZGVldW9e3cvLy+j1E7tHU/qSX7JyZg6FWvXIjoa58/D0RH19bCwQEoKdu3C7t24/SN1c8P48Rg/HiNGNG6Ad7eqqipnZ2eNRlNaWurk5GS0b0HtFmfq6WExZw6cnfHOO00au3WDpSXGjMGHH6K4GGfPIjYWL7zQYoYCsLOz8/f312q1iYmJUtdMBMYoPTxMTfHxx9i5E//6153GxYtx/TrS0/HWW3cmoP7QtGnTAMTHx0tQJlFzjFF6iDz/PF5+GfPmoaGhscXBAVZWre4nMDCwU6dOP/xQVlBwXdwKie7GGKWHy+rVuHIFmzcb1ImlpeWbb56tr78UH+8gUl1ELWKM0sPFyQkxMTD8Bs4xY7rX1WHnTnAOlaTGGKWHzty56NnT0E68vdGtGwoL8d13IpREdB+MUZKfoyN8fO78V6nEpk0YMwaOjm3v08SkcccTzjOR1LhulOS3YQMmTYKzs8jdnjoFDw84OuLy5fstkCIyEI9GSWYZGZg3Dx4ed2bnxeLuDldXlJc3WUFliLKysvT0dHH6oscIY5RktmgRAMydCzMz8TufOhUw+Ly+qKho/fr1vr6+3bt3DwkJqaurE6U2emzwVIfklJKC7Gw4OuIvf5Gk/5dfxvLlUCha/UFBEE6cOJGSkpKamnru3Dl9o5WVlY+PT2VlZdeuXUUulB5ljFGSjVYL/R5MUVF44glJhnByQkFBk22iGhpw6xZsbe+drQ0NyMjAv/+9Pi5udWlpqb7R3t4+ICAgKCjI39/f2tpakkLpUcYYJdns2IH8fDz9NF5/Xaoh3n4bW7bg888xfXpjy8GDmDABv/7a5Oao2locOYK0NHz1FcrLMWqUtrS01MXFZdy4cQEBAWPHjjU3N5eqRHr0MUZJHvX1eO89AFi2DJJmlKUl5s/HSy/Bzq75S+Xl2LsXqak4cgS3L3i6uWHs2Olr1oxyd3dXtOFyALU/jFGSx8aNKCqCm1vjLJB0XnwRV65g4cImN5jeuAE/P2RlQacDAKUSXl4IDkZwMHr3BuAIGLBmldoZxijJ4NYtxMYCQGxs43b30lEqsW4dfH0xcyZGjGhstLXFlSswN4eXFwICMHmy+KtWqf1gjJIMPvgA5eXw8sL48cYYztsboaGYMwcnT95pTElBr17o0MEYBdDjjetGydgqKrBuHYDGA1LjWLsWRUX43/+90zJkCDOUxMEYJWPbtGnvk0/WBwZi5EjjDdqtG5YuRUwMrl0z3qDUTvCeejKq4uLiAQMGCILy9OkLgwZ1kXq4N97A5ctISwMAjQbDhkGtRkFB8wVPRIbg0SgZVVRUlFqtDg8PM0KGNqN/SMnPPxt5WHr8MUbJePLy8uLj483NzaOiomQpwMsLM2bIMjI9zhijZDzvvvuuTqebM2dOr169jDCcIOC11xrP6G+Li4Mg8IyexMQYJSPJyclJS0vr0KHDIv2eTtJLSsLw4Zg1yzijUfvFGCUjWbhwoSAI8+fP79LFGFdFtVpERwOAp6cRRqN2jTP1ZAzx8fHTp0/v3LlzYWFhx44djTDitm2YNQt9++LcOW59T9Li74skdOnSpZSUlKSkpGPHjgGIjIw0TobW1zc+W/Qf/2CGkuR4NEri+/HHH/fs2ZOcnHzyd3dfOjo6Xrp0ycLCwggFfPQR3noLbm74/nvJ79kn4l9qEk9u7t7DhyO3b8/Ly9M3dOzY0d/fPyMjQ6VSrVq1yjgZWlODlSsBYPlyZigZA39lZLD8fERHY9AgDBnSLzU1Ly/Pzs5uxowZiYmJV65c8fPzq6ysfPLJJ2cYa8XmRx/h6lU8+ywCAowzILV3PBqlNhEEHD+O5GQkJ6O4uLHR0bGHh8fhJUtGjx5t+tslyRs3bjQ0NNTX1yuVSiPUdfMmVq8GgNjYtjyCiagNGKN0X9XV+OILnD0LrRb9+mHqVHTtiuJieHnh8uXG93TrhpAQTJoELy8rpdL3d5++efOmvb29mZnZzZs3z58/P3DgQKnr/eADVFbCxwfe3lIPRdSIU0zUstxcjB0LS0v4+MDcHJmZKCxEYiL8/eHiAlNTBAcjLAwjRjS7BllVVfX1118nJSWlp6er1Wp947JlyyIjIyWt99o19OqFX35BVhaXi5LxMEapBVotBg9Gly44dAj6qSH9zZUpKSgogFqNp55q/hGVCl99VX/wYKf9++vq6wEolcpRo0b179//448/HjZs2KlTpyQteenSwpiY3hMmIDVV0nGImmCMUguOHIGvb/PjOpUKPXrg/febPFe+ogIHDiApCQcPQqMB8OdnnvnexiYsLGzKlClOTk5qtdrR0bG6uvrnn3/u3bu3RPWWlJT069fP1fWFzz5Lc3Mzk2gUorvx2ii14ORJmJri2WebNHbpgt69G5/FUVTUOMWUkwP9H2NLS4wfj0mTPgoMNPndczgtLCxeeumlL774IiUl5Z133pGo3piYmLq6uv79OzNDyci44IlaUF0Ne/t73APUpQtu3gSA5cvxt7/h+HFYWiIgAHFxUKmQmoqICJO7nmUcEhICIDk5WaJiCwoKtm/frlQq5dqCj9ozHo1SCzp1wvXr0GiaJ6lKhe7dASA8HLW1mDQJ/v6wtr5/Z+PHj7exscnJySkpKXFxcRG92KioKI1G8/rrr/fv31/0zonuj0ej1AJ3d2i1yM9v0lhejsJCeHgAgI8P4uMREvKHGQrA2traz89PEIRUCWZ/cnNzExMTLS0tlyxZInrnRH+IMUotGD0affvi3Xf1s0aNoqNhYYHw8Db0N2nSJEhzXh8ZGanT6d58800pjnOJ/hBn6qllJ09i7Fg4O2PcOJibIyMDJ04gIQFBQW3o7JdffnF0dGxoaLhy5Yqjo6NYNf7nP/8ZPny4tbV1YWGhcXYyJWqGR6PUMg8P/PADZszA5csoKIC3N86da1uGAnjiiSe8vb21Wq245/WLFi0SBOHtt99mhpJceDRKxrNt27ZZs2aNHTv24MGDonR45MgRX19fW1vbCxcu2N21PIDIOHg0SsYTFBRkamp69OjRyspKUTrUzyktWLCAGUoyYoyS8Tg4OIwaNaqhoSGt2eM6W+/06dMRERHHjx93cnKaN2+eKOURtQ1jlIzKwPn6/Pz86OjoAQMGuLu7f/755507dw4MDLSxsRG1RqLW4fJ7MqqQkJB58+YdOnSourr6AZ/LJAhCTk7O7t27k5OTi3/b29TZ2bl3796ZmZnWD7BqlUhSPBolo+rSpYunp6darT5w4MD936nT6U6ePBkdHd23b19PT881a9YUFxc/9dRTs2fP3rt376VLl958800AKpXKKIUTtYhHo2RsISEhmZmZycnJU6ZMuftVnU6XlZWVlJSUnJx8+bedoV1cXCZOnBgWFjZixAiT3/Y21a9wKi8vN1rlRPfEBU9kbCUlJT169LCysqqoqLh9Sq7VarOzs5OSkpKSksrKyvSNPXr0CAoKCgsLGzlypOKuR4Lk5uYOGTLE1dX19hP0iGTBo1EyNhcXFw8PjxMnThw+fDgwMFCfnomJiVevXtW/oWfPnhMmTLhnel67dq1z5876f+uPRnlST7Lj0SjJIDY2dtGiRX369KmsrLy9hnTAgAGhoaGhoaFDhw5t9v6SkpI9e/YkJSXl5ORcvHixa9euAHQ6nYWFhVarVavVZmbcY5Rkw6NRksELL7zg7OxcWFgoCMKgQYMCAwMDAgK8vLyave3ChQvJycm7d+/+7rvv9H/vra2tz549q49RExMTBwcHlUp17do1Z2dnGb4GEQDGKMkiOzu7rKzM3d09Pj5+wIABzV4tLi5OTU1NSkrKysrSp6eVlZWPj09YWNjEiROfeOKJ2+/s0qWLSqVSqVSMUZIRY5RksGPHDgBRUVG/z9ALFy7onyd67NgxfYu1tbW3t3dYWFhISEiHDh3u7ke/UxQvj5K8GKNkbKdPnz579qyDg8O4ceP0LcnJyUuXLs3/bYtoOzu7wMDA0NBQPz8/C/1DSVswZMgKlSq6qmqw5EUTtYwxSsYWFxcHYNq0aebm5voWhUKRn59vZ2cXEBAQFhY2duzY2y/9kWdzc1FaKlmtRA+AMUpGpdFoEhISAMycOfN2o7+//6FDh7y9vU3vfoLefem3GOU5PcmLMUpGtW/fPpVK5erq6u7ufrtR/6SmNvSm30Sf9zGRvHhPPRmV/oz+lVdeEaU3Ho3Sw4AxSsZTWVm5f/9+U1PTadOmidIhY5QeBoxRMp5du3ap1Wo/Pz+xlnnqT+oZoyQvxigZj/6M/veTSwZydIRCgYoK6HRidUnUarynnozk3Llzrq6unTp1Kisrs7KyEqtbe3tUVeHaNTg4iNUlUetwpp6MZNeuviNHFo0alS1ihgLo0gVVVVCpGKMkGx6NkjHodOjRA6WlyMqCp6eYPR89CqUSf/oT+EAmkguPRskY0tNRWoq+fTF8uDgd1tUhIgKurli69E7jP/6Bbt3wX/8lzhBED4hTTGQMcXEAMHMm7trDvo00GiQlIToaR47caczIwKlT4vRP9OAYoyS56mqkpkKhwMsvi9yzpyfmzoVaLXK3RK3CGCXJJSbi118xejSeflrknv/2N1RXIzZW5G6JWoUxSpK7fUYvuo4dsXIlYmNRUCB+50QPiDFK0ioqwrFjsLFBSIgk/c+cCQ8P/Pd/S9I50YNgjJK0tm+HICA0FPfavb4Vbt68d7tCgU8+QUYG9uwxqH+iNmOMkoQEATt3Agaf0efno29fbN5871cHD8af/4y//x319QaNQtQ2jFGS0P/9Hy5cQI8eGDWq7Z0UF8PPDxUVOHwYLd0sEh2N2lpkZ7d9FKI2Y4yShFJTAWDGDJi09Yd27Rr8/XHlCl58EfHxLS477dgRa9ZwgxKSB+9iIgmtXo2AAPTt28aP//IL/P3x448YMgQpKbC0BIC0NOzejU2bsHkzfv9s5qlT0dAg/poqoj/Ee+pJTIGBUKuxd29j5AF4/30UFGDr1lZ3VV+PwEAcPozevZGZCScnAMjMhJ8famuxfbskK6iI2oAn9SSm3Fykp2PlyjstJSX46adW96PTYfp0HD6Mrl2Rnt6YoXl5mDABtbWYPZsZSg8RxiiJbNw4xMbi/HmDOlm6tCApCba2OHCg8Ty9qAh+fqiqQlAQNm4UpVIicTBGSWT+/vDxwZw5Lc6q/6HFixcvXz5gzJj/fP01hgwBgIoKjBuHsjKMHo2EBLTyMcxE0mKMkvjWrkV2duOK0dbatGnT8uXLTUwUc+aUenkBQHV140TT0KHYs+fOVVeihwRjlMQ3YADmz8c776CqqnUf3LVr17x58xQKxZYtW0JCQgDU1yM0FKdOoU8fHDoEW1tJCiYyBGOUJLF4MWxsEBPTio8cOXLk1Vdf1el0H3zwwWuvvQZAq8W0aUhPb5xo0j9OmehhwxglcVRWNvmvtTXWrsXGjXem6e+/Nj4nJyc4OLi+vn7hwoXz588HIAhCVNT+r76CnR0OHULPntLUTWQwxigZqrYWCxdi4EBcvdqkPTgYfn44fBgAyssxaBASEu7dQ35+/vjx42tqaiIiIlasWKFvjIyMXLHipeHDV6WlYfBgSb8BkUEYo2SQzEy4uWHVKty4gays5q9u3AhrawDYtAk//ojwcMyejZqa5m9LTk6urKwMDg7etm2bQqEAsGHDhpUrV5qZmS1a5DZihBG+B5EBBKI2qa0VFiwQlEoBEAYPFk6dEgRB+PZb4dKlJm87fVr47jtBpxM2bxasrQVAePpp4dix5r3t2LGjtrZW/++dO3eamJgoFIrPPvtM+u9BZCjGKLXFmTPC0KECIJiaCgsWCHV1D/Sp3FxhyBABEMzMhDVrqjQazd3vSUtLMzMzA7BmzRqRiyaSBmOUWqehQYiNFczNBUDo3Vv49tvWfbyuTliwQDA11Q4b5uPp6VlYWPj7V48fP25jYwMgMjJSzKKJpMQYpVbIyxM8PARAUCiE2bOFW7fa2M8331xwdnYGYGtr+8UXX+gbc3Nz7e3tAcycOVOn04lWNJHEGKP0QDQaTWxs7JAhtwChVy8hI8PQDisqKoKDg/UX6MPCwnJzc11cXABMmDChoaFBjJKJjIQb5dEf++mnn1555ZXs7Oy+fYPHjNnz/vsKAx+sdNuWLVv++te/1tTUKJVKrVY7evTo/fv3W/J+T3qkMEbpfgRB2Lp1qz7pnJyctm7dGhAQIO4QRUVF4eHh+fn5Dg4O33//vS3v96RHDWOUWnTx4sXXXnvt6NGjAMLCwj755BP9tUvR3bp16+LFi3369LGwsJCifyJJMUbp3pKSkt54442qqipHR8dPPvlk4sSJcldE9JDiXUzUnEqlCgoKmjx5clVVVWhoaF5eHjOU6D64/y01kZmZGRQUVFlZaW9vv2HDhvDwcLkrInrY8aSemrhx44abm5urq+unn37arVs3ucshegQwRqm5kpIS/RJOInoQjFEiIoNwiomIyCCMUSIigzBGiYgMwhglIjIIY5SIyCD/D9+G3rhq5bBLAAABTnpUWHRyZGtpdFBLTCByZGtpdCAyMDIzLjA5LjUAAHice79v7T0GIOBnQAA+KL+BkY0hAyTAyMzOoAFiMEMFmBkRAmCaBZ3mgNBMaBoZmQkq4GZgZGBkYmBi5mBiZmFgYeVgYmVjYGPnYGLjYODgZODgYuDi5mDi4mHg4WVgZWTgYWEQYQJqZGUEKmdlY+Pg4mFhFd8EMgqKGfiWv+A4EMzqfeAh9+T9qasm7JdQkz+wae76fb+tPuxjNbE9sOuWlf2P4MN2sicZDxgfmWl/TnKiXfyMnP0T627b/arT2h/tNG//60ds+3u8qvY36/fse1i1Z/+O9a/3u/zi3a/3X/RA1r2N+5oDM+2ntG8Fmm+w//3Jz/Y6V6QOeL8SsZcsnm5vzfh2n334ZPt9B4Udls1+su+DWIZ93K5OuwWdH+yuhX2xf28hat9UvM9eDACM1GEAYrR3BQAAAaF6VFh0TU9MIHJka2l0IDIwMjMuMDkuNQAAeJx9U0tOxDAM3fcUvgCR7TgfL5kZhBCiI8HAHdhzf2EnHZJsaOsqcZ+d52d3A7/eL6/fP/B3xcu2AeA/j6rCV0TE7Q18Aaen55cdzrfH091zvn7utw8gAUoWY/eKfbxd3+4egjM8cKCojAgPMVDhtqJAUitOwQx7c0dsAA5Sc/EVhijOaECjZ8UgWQ+ophwdEAqrzkixpBZfKXUgcvHvFBgpzsBkKTFwjdKPVBZuGYVrmYG5n51zxl4Hx3zEJM7L4aVDtZSjDi6x9iApnGZohau7qzRWnitJ7pSt+IWAuk6uSZFGUInkSEqVZyThwVWRWtGK96RCukKp1Z8qlYakGrkTkcxLUcRG1QCJPKfRw3pUJ5KXmig28RVdKhfX1WmSZl06b5N0buxqO8lYcG+S1LKc/bRflunq83a67pcxb37zmCnbQBxzYxuQMRxklsYIsFkejSazMprJZnU0jGyroynkNkvfHDQJzP4inoSk5omTYNReMinjJEZM6og0KzLX7/v7v2rr7RdbH7+0RVgL8gAAAOF6VFh0U01JTEVTIHJka2l0IDIwMjMuMDkuNQAAeJwlT0uuxDAIu8pbtlIaBUP4qOoq+86F5vAPMiuC7dhmvbSed+FZtI7nc77HOn8PrL/vcaETB9rFnQw5qZO43zV55I4urtau0Vn4ziEaG46p3EY3RNxJOs1CBywadQziROEs9TUgSK3ArSxUtYLAutkJjYLDbAfB2IsXw6wiLsRbN0UrI4tsGxaTdA0i2XJybPcYlBVi/NRCgawynSxRckbbVyDuXCdRS8fhO1lEZ2pjZMHsodH2vX6Xndc2HXWfuMX5/QcEX0W59Lht5AAAAABJRU5ErkJggg==", "text/plain": [ - "" + "" ] }, "execution_count": 8, @@ -240,7 +240,7 @@ "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAZI0lEQVR4nO3de1hU1d4H8O8w3DHlYggaat4V0YRTjyhlgiAaCCKopGLlm+Xr8fR27Bw1FDnkBctL9qqlnkxMjEAkDK947PCGICcvKaAVISgoDigoSTAwM/v9Y0gDxYTZe7bK9/OXrplZ6zfPM8+Xvfdae22FIAggIqK2MpG7ACKiRxtjlIjIIIxRIiKDMEaJiAzCGCUiMghjlIjIIKZyF0BkdA0NOHwY589DEDBwIPz8YG4ud030CFNw3Si1LwUFCAhARQX+9CcAOHECDg7YuxcDB8pdGT2qGKPUnmg0GDoUdnbYvx8dOwJAdTXGj0d5OfLzYWYmd330SOK1UWpPDh7EuXNYt64xQwF07Ij161FQgLQ0WSujRxhjlNqT7Gx06oRnn23S6OEBBwdkZ8tUEz3yGKPUnlRWwtn5Hu3duuH6daNXQ48Jxii1J9bWqKy8R/v16+jQwejV0GOCMUrtiZsbKipQVtakUaVCWRnc3GSqiR55jFFqT4KC0KEDli9v0rhiBWxsEBIiU030yOPye2pP7OywdSumT4dKBV9fKBRIT0dKCnbsgL293MXRo4rrRql9+PZbqNUYMwYATp7Exo3Iy4MgYPBgzJ3buBSfqE0Yo9QOCAI8PHD6NBISMGWK3NXQ44bXRqkdSEnB6dNwdsaECQDQ0IDMTLlroscHY5Qed4KAmBgAWLwYVlYAEBeH55/HnDny1kWPDcYoyUMQhNraWmOMtHs3zpyBiwtmzQKAhgasWAEAL75ojNGpHWCMkgy+/vrrXr16RUVFST6STodlywAgMhIWFgDw6acoKoKrK8LCJB+d2gfGKMnAwcGhuLj4yy+/1Ol00o6UmIizZ9G9O159FQDq67FqFQBER8OEP34SB39JJANPT8+ePXuWlJRkZWVJOIxW23hVdMmSxo2Zt25FcTEGD+ZiexIRY5RkoFAoJk+eDCAhIUHCYRIScP48evZERAQAqNWIjQWAmBgeipKI+GMieUydOhVAYmKiRqORZACttvGq6NKljYeimzejtBTDhiE4WJIRqb1ijJI8hg0bNnDgwIqKim+++UaSAeLj8cMP6NMH06cDQF0d3n8fAKKjoVBIMiK1V4xRko105/UajUal73bJEpiaAsDHH+PyZbi7IzBQ9OGonWOMkmzCw8MB7NmzR61Wi9vzzp07nQ4cWB0QgGnTAKCuDqtXA0BMDA9FSXSMUZJN//79n3nmGVtbx3//+6KI3TY0NLz33nsAuoaHQ6kEUPPPf+LKFTz3HF56ScSBiPQYoySnV15JLy7+MS6un4h9xsXFXbhwoV+/flOmTAFQU1PTZ9my5c89V6ufcSISG2OU5DRxYmeFAqmpqKkRp8OGhoYVK1YAiImJUSqVADZs2HBVpdqnVFr5+oozBlFTjFGSU/fu8PTEr7+K9njjbdu2FRUVubq6hoWFAaipqVm7di2AGP06fCIJMEZJZvr9P0WZrq+vr4+NjQUQHR1tYmICYP369eXl5SNHjhyj37CZSALctplkdvUqnnoKpqa4ehW2tgZ1tWnTprlz5w4ePPjMmTMmJia3bt3q1atXRUXF0aNHR48eLVK9RM3xaJRk5uSEF1+EWo2vvjKoH7VavXLlSgAxMTH6Q9F169ZVVFR4eXkxQ0lSjFGSnyjn9Vu2bCktLR02bFhwcDCAmzdvfvjhhwCWcYKeJMYYJfmFhsLcHP/6F8rL29hDXV3dqlWrAPTr1y8iIuLq1avr1q2rrKz08fEZNWqUmLUS3YUPWCb52dlh+nTY2KANu5TU1tYeO3Zs9erVly9fBvDll1+amZmVlpaeOXMGgDF2hqZ2j1NMJL/SUmRn47nn0KNHY4tOh+RkjBiBbt3u/ZHCwuv79sXv27cvIyPj9r2knTt39vX1TUhIUCqVGo3Gz8/v0KFDRvkG1K7xaJTkl5ODyZPx3HPIzm7cCFSjweTJSElpEqNaLbKzkZaGI0dQXl5dUvIWABMTEw8PjzFjxgQEBIwcOVKhUFy8eFG/G/SSJUvk+T7UzjBG6aFgYoKiIvzzn5g9u/lLV69i/37s34/0dFRXNzba2T39+uv/8/zz7uPGjevcufPv3x8REZGVldW9e3cvLy+j1E7tHU/qSX7JyZg6FWvXIjoa58/D0RH19bCwQEoKdu3C7t24/SN1c8P48Rg/HiNGNG6Ad7eqqipnZ2eNRlNaWurk5GS0b0HtFmfq6WExZw6cnfHOO00au3WDpSXGjMGHH6K4GGfPIjYWL7zQYoYCsLOz8/f312q1iYmJUtdMBMYoPTxMTfHxx9i5E//6153GxYtx/TrS0/HWW3cmoP7QtGnTAMTHx0tQJlFzjFF6iDz/PF5+GfPmoaGhscXBAVZWre4nMDCwU6dOP/xQVlBwXdwKie7GGKWHy+rVuHIFmzcb1ImlpeWbb56tr78UH+8gUl1ELWKM0sPFyQkxMTD8Bs4xY7rX1WHnTnAOlaTGGKWHzty56NnT0E68vdGtGwoL8d13IpREdB+MUZKfoyN8fO78V6nEpk0YMwaOjm3v08SkcccTzjOR1LhulOS3YQMmTYKzs8jdnjoFDw84OuLy5fstkCIyEI9GSWYZGZg3Dx4ed2bnxeLuDldXlJc3WUFliLKysvT0dHH6oscIY5RktmgRAMydCzMz8TufOhUw+Ly+qKho/fr1vr6+3bt3DwkJqaurE6U2emzwVIfklJKC7Gw4OuIvf5Gk/5dfxvLlUCha/UFBEE6cOJGSkpKamnru3Dl9o5WVlY+PT2VlZdeuXUUulB5ljFGSjVYL/R5MUVF44glJhnByQkFBk22iGhpw6xZsbe+drQ0NyMjAv/+9Pi5udWlpqb7R3t4+ICAgKCjI39/f2tpakkLpUcYYJdns2IH8fDz9NF5/Xaoh3n4bW7bg888xfXpjy8GDmDABv/7a5Oao2locOYK0NHz1FcrLMWqUtrS01MXFZdy4cQEBAWPHjjU3N5eqRHr0MUZJHvX1eO89AFi2DJJmlKUl5s/HSy/Bzq75S+Xl2LsXqak4cgS3L3i6uWHs2Olr1oxyd3dXtOFyALU/jFGSx8aNKCqCm1vjLJB0XnwRV65g4cImN5jeuAE/P2RlQacDAKUSXl4IDkZwMHr3BuAIGLBmldoZxijJ4NYtxMYCQGxs43b30lEqsW4dfH0xcyZGjGhstLXFlSswN4eXFwICMHmy+KtWqf1gjJIMPvgA5eXw8sL48cYYztsboaGYMwcnT95pTElBr17o0MEYBdDjjetGydgqKrBuHYDGA1LjWLsWRUX43/+90zJkCDOUxMEYJWPbtGnvk0/WBwZi5EjjDdqtG5YuRUwMrl0z3qDUTvCeejKq4uLiAQMGCILy9OkLgwZ1kXq4N97A5ctISwMAjQbDhkGtRkFB8wVPRIbg0SgZVVRUlFqtDg8PM0KGNqN/SMnPPxt5WHr8MUbJePLy8uLj483NzaOiomQpwMsLM2bIMjI9zhijZDzvvvuuTqebM2dOr169jDCcIOC11xrP6G+Li4Mg8IyexMQYJSPJyclJS0vr0KHDIv2eTtJLSsLw4Zg1yzijUfvFGCUjWbhwoSAI8+fP79LFGFdFtVpERwOAp6cRRqN2jTP1ZAzx8fHTp0/v3LlzYWFhx44djTDitm2YNQt9++LcOW59T9Li74skdOnSpZSUlKSkpGPHjgGIjIw0TobW1zc+W/Qf/2CGkuR4NEri+/HHH/fs2ZOcnHzyd3dfOjo6Xrp0ycLCwggFfPQR3noLbm74/nvJ79kn4l9qEk9u7t7DhyO3b8/Ly9M3dOzY0d/fPyMjQ6VSrVq1yjgZWlODlSsBYPlyZigZA39lZLD8fERHY9AgDBnSLzU1Ly/Pzs5uxowZiYmJV65c8fPzq6ysfPLJJ2cYa8XmRx/h6lU8+ywCAowzILV3PBqlNhEEHD+O5GQkJ6O4uLHR0bGHh8fhJUtGjx5t+tslyRs3bjQ0NNTX1yuVSiPUdfMmVq8GgNjYtjyCiagNGKN0X9XV+OILnD0LrRb9+mHqVHTtiuJieHnh8uXG93TrhpAQTJoELy8rpdL3d5++efOmvb29mZnZzZs3z58/P3DgQKnr/eADVFbCxwfe3lIPRdSIU0zUstxcjB0LS0v4+MDcHJmZKCxEYiL8/eHiAlNTBAcjLAwjRjS7BllVVfX1118nJSWlp6er1Wp947JlyyIjIyWt99o19OqFX35BVhaXi5LxMEapBVotBg9Gly44dAj6qSH9zZUpKSgogFqNp55q/hGVCl99VX/wYKf9++vq6wEolcpRo0b179//448/HjZs2KlTpyQteenSwpiY3hMmIDVV0nGImmCMUguOHIGvb/PjOpUKPXrg/febPFe+ogIHDiApCQcPQqMB8OdnnvnexiYsLGzKlClOTk5qtdrR0bG6uvrnn3/u3bu3RPWWlJT069fP1fWFzz5Lc3Mzk2gUorvx2ii14ORJmJri2WebNHbpgt69G5/FUVTUOMWUkwP9H2NLS4wfj0mTPgoMNPndczgtLCxeeumlL774IiUl5Z133pGo3piYmLq6uv79OzNDyci44IlaUF0Ne/t73APUpQtu3gSA5cvxt7/h+HFYWiIgAHFxUKmQmoqICJO7nmUcEhICIDk5WaJiCwoKtm/frlQq5dqCj9ozHo1SCzp1wvXr0GiaJ6lKhe7dASA8HLW1mDQJ/v6wtr5/Z+PHj7exscnJySkpKXFxcRG92KioKI1G8/rrr/fv31/0zonuj0ej1AJ3d2i1yM9v0lhejsJCeHgAgI8P4uMREvKHGQrA2traz89PEIRUCWZ/cnNzExMTLS0tlyxZInrnRH+IMUotGD0affvi3Xf1s0aNoqNhYYHw8Db0N2nSJEhzXh8ZGanT6d58800pjnOJ/hBn6qllJ09i7Fg4O2PcOJibIyMDJ04gIQFBQW3o7JdffnF0dGxoaLhy5Yqjo6NYNf7nP/8ZPny4tbV1YWGhcXYyJWqGR6PUMg8P/PADZszA5csoKIC3N86da1uGAnjiiSe8vb21Wq245/WLFi0SBOHtt99mhpJceDRKxrNt27ZZs2aNHTv24MGDonR45MgRX19fW1vbCxcu2N21PIDIOHg0SsYTFBRkamp69OjRyspKUTrUzyktWLCAGUoyYoyS8Tg4OIwaNaqhoSGt2eM6W+/06dMRERHHjx93cnKaN2+eKOURtQ1jlIzKwPn6/Pz86OjoAQMGuLu7f/755507dw4MDLSxsRG1RqLW4fJ7MqqQkJB58+YdOnSourr6AZ/LJAhCTk7O7t27k5OTi3/b29TZ2bl3796ZmZnWD7BqlUhSPBolo+rSpYunp6darT5w4MD936nT6U6ePBkdHd23b19PT881a9YUFxc/9dRTs2fP3rt376VLl958800AKpXKKIUTtYhHo2RsISEhmZmZycnJU6ZMuftVnU6XlZWVlJSUnJx8+bedoV1cXCZOnBgWFjZixAiT3/Y21a9wKi8vN1rlRPfEBU9kbCUlJT169LCysqqoqLh9Sq7VarOzs5OSkpKSksrKyvSNPXr0CAoKCgsLGzlypOKuR4Lk5uYOGTLE1dX19hP0iGTBo1EyNhcXFw8PjxMnThw+fDgwMFCfnomJiVevXtW/oWfPnhMmTLhnel67dq1z5876f+uPRnlST7Lj0SjJIDY2dtGiRX369KmsrLy9hnTAgAGhoaGhoaFDhw5t9v6SkpI9e/YkJSXl5ORcvHixa9euAHQ6nYWFhVarVavVZmbcY5Rkw6NRksELL7zg7OxcWFgoCMKgQYMCAwMDAgK8vLyave3ChQvJycm7d+/+7rvv9H/vra2tz549q49RExMTBwcHlUp17do1Z2dnGb4GEQDGKMkiOzu7rKzM3d09Pj5+wIABzV4tLi5OTU1NSkrKysrSp6eVlZWPj09YWNjEiROfeOKJ2+/s0qWLSqVSqVSMUZIRY5RksGPHDgBRUVG/z9ALFy7onyd67NgxfYu1tbW3t3dYWFhISEiHDh3u7ke/UxQvj5K8GKNkbKdPnz579qyDg8O4ceP0LcnJyUuXLs3/bYtoOzu7wMDA0NBQPz8/C/1DSVswZMgKlSq6qmqw5EUTtYwxSsYWFxcHYNq0aebm5voWhUKRn59vZ2cXEBAQFhY2duzY2y/9kWdzc1FaKlmtRA+AMUpGpdFoEhISAMycOfN2o7+//6FDh7y9vU3vfoLefem3GOU5PcmLMUpGtW/fPpVK5erq6u7ufrtR/6SmNvSm30Sf9zGRvHhPPRmV/oz+lVdeEaU3Ho3Sw4AxSsZTWVm5f/9+U1PTadOmidIhY5QeBoxRMp5du3ap1Wo/Pz+xlnnqT+oZoyQvxigZj/6M/veTSwZydIRCgYoK6HRidUnUarynnozk3Llzrq6unTp1Kisrs7KyEqtbe3tUVeHaNTg4iNUlUetwpp6MZNeuviNHFo0alS1ihgLo0gVVVVCpGKMkGx6NkjHodOjRA6WlyMqCp6eYPR89CqUSf/oT+EAmkguPRskY0tNRWoq+fTF8uDgd1tUhIgKurli69E7jP/6Bbt3wX/8lzhBED4hTTGQMcXEAMHMm7trDvo00GiQlIToaR47caczIwKlT4vRP9OAYoyS56mqkpkKhwMsvi9yzpyfmzoVaLXK3RK3CGCXJJSbi118xejSeflrknv/2N1RXIzZW5G6JWoUxSpK7fUYvuo4dsXIlYmNRUCB+50QPiDFK0ioqwrFjsLFBSIgk/c+cCQ8P/Pd/S9I50YNgjJK0tm+HICA0FPfavb4Vbt68d7tCgU8+QUYG9uwxqH+iNmOMkoQEATt3Agaf0efno29fbN5871cHD8af/4y//x319QaNQtQ2jFGS0P/9Hy5cQI8eGDWq7Z0UF8PPDxUVOHwYLd0sEh2N2lpkZ7d9FKI2Y4yShFJTAWDGDJi09Yd27Rr8/XHlCl58EfHxLS477dgRa9ZwgxKSB+9iIgmtXo2AAPTt28aP//IL/P3x448YMgQpKbC0BIC0NOzejU2bsHkzfv9s5qlT0dAg/poqoj/Ee+pJTIGBUKuxd29j5AF4/30UFGDr1lZ3VV+PwEAcPozevZGZCScnAMjMhJ8famuxfbskK6iI2oAn9SSm3Fykp2PlyjstJSX46adW96PTYfp0HD6Mrl2Rnt6YoXl5mDABtbWYPZsZSg8RxiiJbNw4xMbi/HmDOlm6tCApCba2OHCg8Ty9qAh+fqiqQlAQNm4UpVIicTBGSWT+/vDxwZw5Lc6q/6HFixcvXz5gzJj/fP01hgwBgIoKjBuHsjKMHo2EBLTyMcxE0mKMkvjWrkV2duOK0dbatGnT8uXLTUwUc+aUenkBQHV140TT0KHYs+fOVVeihwRjlMQ3YADmz8c776CqqnUf3LVr17x58xQKxZYtW0JCQgDU1yM0FKdOoU8fHDoEW1tJCiYyBGOUJLF4MWxsEBPTio8cOXLk1Vdf1el0H3zwwWuvvQZAq8W0aUhPb5xo0j9OmehhwxglcVRWNvmvtTXWrsXGjXem6e+/Nj4nJyc4OLi+vn7hwoXz588HIAhCVNT+r76CnR0OHULPntLUTWQwxigZqrYWCxdi4EBcvdqkPTgYfn44fBgAyssxaBASEu7dQ35+/vjx42tqaiIiIlasWKFvjIyMXLHipeHDV6WlYfBgSb8BkUEYo2SQzEy4uWHVKty4gays5q9u3AhrawDYtAk//ojwcMyejZqa5m9LTk6urKwMDg7etm2bQqEAsGHDhpUrV5qZmS1a5DZihBG+B5EBBKI2qa0VFiwQlEoBEAYPFk6dEgRB+PZb4dKlJm87fVr47jtBpxM2bxasrQVAePpp4dix5r3t2LGjtrZW/++dO3eamJgoFIrPPvtM+u9BZCjGKLXFmTPC0KECIJiaCgsWCHV1D/Sp3FxhyBABEMzMhDVrqjQazd3vSUtLMzMzA7BmzRqRiyaSBmOUWqehQYiNFczNBUDo3Vv49tvWfbyuTliwQDA11Q4b5uPp6VlYWPj7V48fP25jYwMgMjJSzKKJpMQYpVbIyxM8PARAUCiE2bOFW7fa2M8331xwdnYGYGtr+8UXX+gbc3Nz7e3tAcycOVOn04lWNJHEGKP0QDQaTWxs7JAhtwChVy8hI8PQDisqKoKDg/UX6MPCwnJzc11cXABMmDChoaFBjJKJjIQb5dEf++mnn1555ZXs7Oy+fYPHjNnz/vsKAx+sdNuWLVv++te/1tTUKJVKrVY7evTo/fv3W/J+T3qkMEbpfgRB2Lp1qz7pnJyctm7dGhAQIO4QRUVF4eHh+fn5Dg4O33//vS3v96RHDWOUWnTx4sXXXnvt6NGjAMLCwj755BP9tUvR3bp16+LFi3369LGwsJCifyJJMUbp3pKSkt54442qqipHR8dPPvlk4sSJcldE9JDiXUzUnEqlCgoKmjx5clVVVWhoaF5eHjOU6D64/y01kZmZGRQUVFlZaW9vv2HDhvDwcLkrInrY8aSemrhx44abm5urq+unn37arVs3ucshegQwRqm5kpIS/RJOInoQjFEiIoNwiomIyCCMUSIigzBGiYgMwhglIjIIY5SIyCD/D9+G3rhq5bBLAAABTnpUWHRyZGtpdFBLTCByZGtpdCAyMDIzLjA5LjUAAHice79v7T0GIOBnQAA+KL+BkY0hAyTAyMzOoAFiMEMFmBkRAmCaBZ3mgNBMaBoZmQkq4GZgZGBkYmBi5mBiZmFgYeVgYmVjYGPnYGLjYODgZODgYuDi5mDi4mHg4WVgZWTgYWEQYQJqZGUEKmdlY+Pg4mFhFd8EMgqKGfiWv+A4EMzqfeAh9+T9qasm7JdQkz+wae76fb+tPuxjNbE9sOuWlf2P4MN2sicZDxgfmWl/TnKiXfyMnP0T627b/arT2h/tNG//60ds+3u8qvY36/fse1i1Z/+O9a/3u/zi3a/3X/RA1r2N+5oDM+2ntG8Fmm+w//3Jz/Y6V6QOeL8SsZcsnm5vzfh2n334ZPt9B4Udls1+su+DWIZ93K5OuwWdH+yuhX2xf28hat9UvM9eDACM1GEAYrR3BQAAAaF6VFh0TU9MIHJka2l0IDIwMjMuMDkuNQAAeJx9U0tOxDAM3fcUvgCR7TgfL5kZhBCiI8HAHdhzf2EnHZJsaOsqcZ+d52d3A7/eL6/fP/B3xcu2AeA/j6rCV0TE7Q18Aaen55cdzrfH091zvn7utw8gAUoWY/eKfbxd3+4egjM8cKCojAgPMVDhtqJAUitOwQx7c0dsAA5Sc/EVhijOaECjZ8UgWQ+ophwdEAqrzkixpBZfKXUgcvHvFBgpzsBkKTFwjdKPVBZuGYVrmYG5n51zxl4Hx3zEJM7L4aVDtZSjDi6x9iApnGZohau7qzRWnitJ7pSt+IWAuk6uSZFGUInkSEqVZyThwVWRWtGK96RCukKp1Z8qlYakGrkTkcxLUcRG1QCJPKfRw3pUJ5KXmig28RVdKhfX1WmSZl06b5N0buxqO8lYcG+S1LKc/bRflunq83a67pcxb37zmCnbQBxzYxuQMRxklsYIsFkejSazMprJZnU0jGyroynkNkvfHDQJzP4inoSk5omTYNReMinjJEZM6og0KzLX7/v7v2rr7RdbH7+0RVgL8gAAAOF6VFh0U01JTEVTIHJka2l0IDIwMjMuMDkuNQAAeJwlT0uuxDAIu8pbtlIaBUP4qOoq+86F5vAPMiuC7dhmvbSed+FZtI7nc77HOn8PrL/vcaETB9rFnQw5qZO43zV55I4urtau0Vn4ziEaG46p3EY3RNxJOs1CBywadQziROEs9TUgSK3ArSxUtYLAutkJjYLDbAfB2IsXw6wiLsRbN0UrI4tsGxaTdA0i2XJybPcYlBVi/NRCgawynSxRckbbVyDuXCdRS8fhO1lEZ2pjZMHsodH2vX6Xndc2HXWfuMX5/QcEX0W59Lht5AAAAABJRU5ErkJggg==", "text/plain": [ - "" + "" ] }, "execution_count": 9, @@ -278,7 +278,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "\u001b[32m2024-03-25 17:13:50.540\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpolaris.dataset._factory\u001b[0m:\u001b[36mregister_converter\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mYou are overwriting the converter for the sdf extension.\u001b[0m\n" + "\u001b[32m2024-03-26 13:16:43.897\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpolaris.dataset._factory\u001b[0m:\u001b[36mregister_converter\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mYou are overwriting the converter for the sdf extension.\u001b[0m\n" ] } ], @@ -321,7 +321,7 @@ "my_propertymy_value" ], "text/plain": [ - "" + "" ] }, "execution_count": 11, @@ -408,8 +408,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "\u001b[32m2024-03-25 17:13:50.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpolaris.dataset._factory\u001b[0m:\u001b[36mregister_converter\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mYou are overwriting the converter for the sdf extension.\u001b[0m\n", - "\u001b[32m2024-03-25 17:13:50.590\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpolaris.dataset._factory\u001b[0m:\u001b[36mregister_converter\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mYou are overwriting the converter for the sdf extension.\u001b[0m\n" + "\u001b[32m2024-03-26 13:16:43.938\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpolaris.dataset._factory\u001b[0m:\u001b[36mregister_converter\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mYou are overwriting the converter for the sdf extension.\u001b[0m\n", + "\u001b[32m2024-03-26 13:16:43.945\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpolaris.dataset._factory\u001b[0m:\u001b[36mregister_converter\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mYou are overwriting the converter for the sdf extension.\u001b[0m\n" ] } ], diff --git a/docs/tutorials/dataset_zarr.ipynb b/docs/tutorials/dataset_zarr.ipynb index 1dc5be73..44ebf3d8 100644 --- a/docs/tutorials/dataset_zarr.ipynb +++ b/docs/tutorials/dataset_zarr.ipynb @@ -177,7 +177,7 @@ " # we need to annotate the column.\n", " annotations={\"images\": ColumnAnnotation(is_pointer=True)},\n", " # We also need to specify the path to the root of the Zarr archive\n", - " zarr_archive=base_path,\n", + " zarr_root_path=base_path,\n", ")" ] }, @@ -416,7 +416,7 @@ { "data": { "text/plain": [ - "'/images#0'" + "'images#0'" ] }, "execution_count": 13, @@ -526,7 +526,7 @@ { "data": { "text/html": [ - "
nameNone
description
tags
user_attributes
ownerNone
default_adapters
zarr_archive/home/cas/.cache/polaris-tutorials/002/json/data.zarr
md5sum3874f5ec0a215c52bf468b7be5f09fc7
readme
annotations
images
is_pointerTrue
modalityUNKNOWN
descriptionNone
user_attributes
dtypeobject
sourceNone
licenseNone
curation_referenceNone
cache_dir/home/cas/.cache/polaris/datasets/None/3874f5ec0a215c52bf468b7be5f09fc7
artifact_idNone
n_rows1000
n_columns1
" + "
nameNone
description
tags
user_attributes
ownerNone
default_adapters
zarr_root_path/home/cas/.cache/polaris-tutorials/002/json/data.zarr
md5sum5488b4909fd67d3208624288e720e1b8
readme
annotations
images
is_pointerTrue
modalityUNKNOWN
descriptionNone
user_attributes
dtypeobject
sourceNone
licenseNone
curation_referenceNone
cache_dir/home/cas/.cache/polaris/datasets/None/5488b4909fd67d3208624288e720e1b8
artifact_idNone
n_rows1000
n_columns1
" ], "text/plain": [ "{\n", @@ -536,8 +536,8 @@ " \"user_attributes\": {},\n", " \"owner\": null,\n", " \"default_adapters\": {},\n", - " \"zarr_archive\": \"/home/cas/.cache/polaris-tutorials/002/json/data.zarr\",\n", - " \"md5sum\": \"3874f5ec0a215c52bf468b7be5f09fc7\",\n", + " \"zarr_root_path\": \"/home/cas/.cache/polaris-tutorials/002/json/data.zarr\",\n", + " \"md5sum\": \"5488b4909fd67d3208624288e720e1b8\",\n", " \"readme\": \"\",\n", " \"annotations\": {\n", " \"images\": {\n", @@ -551,7 +551,7 @@ " \"source\": null,\n", " \"license\": null,\n", " \"curation_reference\": null,\n", - " \"cache_dir\": \"/home/cas/.cache/polaris/datasets/None/3874f5ec0a215c52bf468b7be5f09fc7\",\n", + " \"cache_dir\": \"/home/cas/.cache/polaris/datasets/None/5488b4909fd67d3208624288e720e1b8\",\n", " \"artifact_id\": null,\n", " \"n_rows\": 1000,\n", " \"n_columns\": 1\n", diff --git a/polaris/dataset/_dataset.py b/polaris/dataset/_dataset.py index 797f0ef3..5200b2de 100644 --- a/polaris/dataset/_dataset.py +++ b/polaris/dataset/_dataset.py @@ -49,7 +49,7 @@ class Dataset(BaseArtifactModel): path to a `.parquet` file or a `pandas.DataFrame`. default_adapters: The adapters that the Dataset recommends to use by default to change the format of the data for specific columns. - zarr_archive: The data for any pointer column should be saved in the Zarr archive this path points to. + zarr_root_path: The data for any pointer column should be saved in the Zarr archive this path points to. md5sum: The checksum is used to verify the version of the dataset specification. If specified, it will raise an error if the specified checksum doesn't match the computed checksum. readme: Markdown text that can be used to provide a formatted description of the dataset. @@ -71,7 +71,7 @@ class Dataset(BaseArtifactModel): # Data table: Union[pd.DataFrame, str] default_adapters: Dict[str, Adapter] = Field(default_factory=dict) - zarr_archive: Optional[str] = None + zarr_root_path: Optional[str] = None md5sum: Optional[str] = None # Additional meta-data @@ -203,11 +203,11 @@ def client(self): @property def zarr_root(self): """Open the zarr archive in read-write mode if it is not already open.""" - if self.zarr_archive is None or not any(anno.is_pointer for anno in self.annotations.values()): + if self.zarr_root_path is None or not any(anno.is_pointer for anno in self.annotations.values()): return None - saved_on_hub = PolarisFileSystem.is_polarisfs_path(self.zarr_archive) - saved_remote = saved_on_hub or not fs.is_local_path(self.zarr_archive) + saved_on_hub = PolarisFileSystem.is_polarisfs_path(self.zarr_root_path) + saved_remote = saved_on_hub or not fs.is_local_path(self.zarr_root_path) if saved_remote and not self._has_been_warned: logger.warning( @@ -220,9 +220,9 @@ def zarr_root(self): # We open the archive in read-only mode if it is saved on the Hub if self._zarr_root is None: if saved_on_hub: - self._zarr_root = self.client.open_zarr_file(self.owner, self.name, self.zarr_archive, "r+") + self._zarr_root = self.client.open_zarr_file(self.owner, self.name, self.zarr_root_path, "r+") else: - self._zarr_root = zarr.open(self.zarr_archive, "r+") + self._zarr_root = zarr.open(self.zarr_root_path, "r+") return self._zarr_root @computed_field @@ -340,7 +340,7 @@ def to_json(self, destination: str) -> str: if self.zarr_root is not None: dest = zarr.open(zarr_archive, "w") zarr.copy_all(source=self.zarr_root, dest=dest) - serialized["zarr_archive"] = zarr_archive + serialized["zarr_root_path"] = zarr_archive self.table.to_parquet(table_path) with fsspec.open(dataset_path, "w") as f: @@ -364,8 +364,8 @@ def cache(self, cache_dir: Optional[str] = None) -> str: self.to_json(self.cache_dir) - if self.zarr_archive is not None: - self.zarr_archive = fs.join(self.cache_dir, "data.zarr") + if self.zarr_root_path is not None: + self.zarr_root_path = fs.join(self.cache_dir, "data.zarr") if not self._has_been_cached: self._has_been_cached = True diff --git a/polaris/dataset/_factory.py b/polaris/dataset/_factory.py index ddd90bc5..b6dd48e3 100644 --- a/polaris/dataset/_factory.py +++ b/polaris/dataset/_factory.py @@ -219,7 +219,7 @@ def build(self) -> Dataset: table=self._table, annotations=self._annotations, default_adapters=self._adapters, - zarr_archive=self.zarr_root_path, + zarr_root_path=self.zarr_root_path, ) def reset(self, zarr_root_path: Optional[str] = None): diff --git a/polaris/hub/client.py b/polaris/hub/client.py index 4acf72e1..9f7e2292 100644 --- a/polaris/hub/client.py +++ b/polaris/hub/client.py @@ -523,8 +523,7 @@ def upload_dataset( dataset_json = dataset.model_dump(exclude={"cache_dir", "table"}, exclude_none=True, by_alias=True) # We will save the Zarr archive to the Hub as well - zarr_fname = "data.zarr" - dataset_json["zarrArchive"] = f"{PolarisFileSystem.protocol}://{zarr_fname}" + dataset_json["zarrRootPath"] = f"{PolarisFileSystem.protocol}://data.zarr" # Uploading a dataset is a three-step process. # 1. Upload the dataset meta data to the hub and prepare the hub to receive the parquet file @@ -592,7 +591,7 @@ def upload_dataset( dest = self.open_zarr_file( owner=dataset.owner, name=dataset.name, - path=zarr_fname, + path=dataset_json["zarrRootPath"], mode="w", ) logger.info("Copying Zarr archive to the Hub. This may take a while.") diff --git a/tests/test_dataset.py b/tests/test_dataset.py index b6e11700..26da2eb8 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -50,7 +50,7 @@ def test_load_data(tmp_path, with_slice, with_caching): path = "A#0:5" if with_slice else "A#0" table = pd.DataFrame({"A": [path]}, index=[0]) - dataset = Dataset(table=table, annotations={"A": {"is_pointer": True}}, zarr_archive=zarr_path) + dataset = Dataset(table=table, annotations={"A": {"is_pointer": True}}, zarr_root_path=zarr_path) if with_caching: dataset.cache(fs.join(tmpdir, "cache")) @@ -164,6 +164,6 @@ def test_dataset_caching(zarr_archive, tmpdir): assert original_dataset == cached_dataset cache_dir = cached_dataset.cache(tmpdir.join("cached").strpath) - assert cached_dataset.zarr_archive.startswith(cache_dir) + assert cached_dataset.zarr_root_path.startswith(cache_dir) assert _equality_test(cached_dataset, original_dataset) From 046eb83dbacb9e00db2ca788329c1450909355be Mon Sep 17 00:00:00 2001 From: cwognum Date: Tue, 26 Mar 2024 13:28:40 -0400 Subject: [PATCH 6/6] Fixed error in docs --- polaris/dataset/converters/_base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/polaris/dataset/converters/_base.py b/polaris/dataset/converters/_base.py index 96b49691..5ca31b37 100644 --- a/polaris/dataset/converters/_base.py +++ b/polaris/dataset/converters/_base.py @@ -22,7 +22,6 @@ def get_pointer(column: str, index: Union[int, slice]) -> str: Creates a pointer. Args: - root: The root path of the zarr hierarchy. column: The name of the column. Each column has its own group in the root. index: The index or slice of the pointer. """