From a03ab681c2b5c9bc6a414a3a69279d1b351bf341 Mon Sep 17 00:00:00 2001 From: Pavel Hurwicz Date: Sat, 22 Apr 2023 14:12:59 -0400 Subject: [PATCH] hover config usage: hover.config -> hover.. -> other modules --- hover/__init__.py | 76 ++++++++++++----------- hover/config_constants.py | 59 ++++++++++++++++++ hover/core/dataset.py | 16 +++-- hover/core/explorer/base.py | 18 +++--- hover/core/explorer/feature.py | 4 +- hover/core/explorer/local_config.py | 12 +++- hover/core/local_config.py | 13 +++- hover/core/neural.py | 11 ++-- hover/core/representation/local_config.py | 10 +++ hover/core/representation/manifold.py | 4 +- hover/core/representation/reduction.py | 18 ++---- hover/module_config.py | 14 +++-- hover/recipes/local_config.py | 9 +++ hover/recipes/subroutine.py | 20 +++--- hover/utils/bokeh_helper/__init__.py | 5 +- hover/utils/bokeh_helper/local_config.py | 10 +++ 16 files changed, 202 insertions(+), 97 deletions(-) create mode 100644 hover/config_constants.py create mode 100644 hover/recipes/local_config.py diff --git a/hover/__init__.py b/hover/__init__.py index 0b3fd522..c5aa900f 100644 --- a/hover/__init__.py +++ b/hover/__init__.py @@ -1,20 +1,26 @@ """ Module root where constants get configured. """ -import re +from .config_constants import ( + ConfigSection, + ConfigKey, + Validator, + Preprocessor, +) from flexmod import AutolockedConfigValue, Config, ConfigIndex from bokeh.palettes import Turbo256 + config = ConfigIndex( [ Config( - "io", + ConfigSection.IO, [ AutolockedConfigValue( - "data_save_dir", + ConfigKey.DATA_SAVE_DIR, "The directory path for saving labeled data.", ".", - validation=lambda x: isinstance(x, str), + validation=Validator.is_str, ), ], ), @@ -22,103 +28,103 @@ "visual", [ AutolockedConfigValue( - "abstain_hexcolor", + ConfigKey.ABSTAIN_HEXCOLOR, "Hex code of RGB color.", "#dcdcdc", - validation=lambda x: bool(re.match(r"^\#[0-9a-fA-F]{6}$", x)), + validation=Validator.is_hex_color, ), AutolockedConfigValue( - "bokeh_palette", + ConfigKey.BOKEH_PALETTE, "The bokeh color palette to use for plotting. This should be a list of hex color codes.", Turbo256, - validation=lambda x: hasattr(x, "__iter__"), + validation=Validator.is_iterable, ), AutolockedConfigValue( - "bokeh_palette_usage", + ConfigKey.BOKEH_PALETTE_USAGE, "Specify how colors from the palette should be chosen when there are fewer categories than colors. This needs to be 'iterate' or 'linspace'", "linspace", - validation=lambda x: x in ["iterate", "linspace"], + validation=Validator.is_supported_traversal_mode, ), AutolockedConfigValue( - "table_img_style", + ConfigKey.TABLE_IMG_STYLE, "HTML style of images shown in selection tables.", "max-height: 100%; max-width: 100%; object-fit: contain", - preprocessor=lambda x: re.sub(r"(^[\'\"]|[\'\"]$)", "", x), + preprocessor=Preprocessor.remove_quote_at_ends, ), AutolockedConfigValue( - "tooltip_img_style", + ConfigKey.TOOLTIP_IMG_STYLE, "HTML style of images shown in mouse-over-data-point tooltips.", "float: left; margin: 2px 2px 2px 2px; width: 60px; height: 60px;", - preprocessor=lambda x: re.sub(r"(^[\'\"]|[\'\"]$)", "", x), + preprocessor=Preprocessor.remove_quote_at_ends, ), ], ), Config( - "data.embedding", + ConfigSection.DATA_EMBEDDING, [ AutolockedConfigValue( - "default_reduction_method", + ConfigKey.DEFAULT_REDUCTION_METHOD, "Default method for dimensionality reduction. Currently either 'umap' or 'ivis'.", "umap", - validation=lambda x: x in ["umap", "ivis"], + validation=Validator.is_supported_dimensionality_reduction, ), ], ), Config( - "data.columns", + ConfigSection.DATA_COLUMNS, [ AutolockedConfigValue( - "encoded_label_key", + ConfigKey.ENCODED_LABEL_KEY, "The column name for the encoded label.", "label_encoded", - validation=lambda x: isinstance(x, str), + validation=Validator.is_str, ), AutolockedConfigValue( - "dataset_subset_field", + ConfigKey.DATASET_SUBSET_FIELD, "The column name for dataset subsets.", "SUBSET", - validation=lambda x: isinstance(x, str), + validation=Validator.is_str, ), AutolockedConfigValue( - "embedding_field_prefix", + ConfigKey.EMBEDDING_FIELD_PREFIX, "The prefix of column names for embedding coordinates.", "embed_", - validation=lambda x: isinstance(x, str), + validation=Validator.is_str, ), AutolockedConfigValue( - "source_color_field", + ConfigKey.SOURCE_COLOR_FIELD, "The column name for plotted data point color.", "__COLOR__", - validation=lambda x: isinstance(x, str), + validation=Validator.is_str, ), AutolockedConfigValue( - "source_alpha_field", + ConfigKey.SOURCE_ALPHA_FIELD, "The column name for plotted data point color alpha (opacity).", "__ALPHA__", - validation=lambda x: isinstance(x, str), + validation=Validator.is_str, ), AutolockedConfigValue( - "search_score_field", + ConfigKey.SEARCH_SCORE_FIELD, "The column name for data points' score from search widgets.", "__SEARCH_SCORE__", - validation=lambda x: isinstance(x, str), + validation=Validator.is_str, ), ], ), Config( - "data.values", + ConfigSection.DATA_VALUES, [ AutolockedConfigValue( - "abstain_decoded", + ConfigKey.ABSTAIN_DECODED, "The placeholder label indicating 'no label yet'.", "ABSTAIN", - validation=lambda x: isinstance(x, str), + validation=Validator.is_str, ), AutolockedConfigValue( - "abstain_encoded", + ConfigKey.ABSTAIN_ENCODED, "The encoded value of 'no label yet' which should almost always be -1, never 0 or positive.", -1, - validation=lambda x: isinstance(x, int) and x < 0, + validation=Validator.is_negative_int, ), ], ), diff --git a/hover/config_constants.py b/hover/config_constants.py new file mode 100644 index 00000000..c9f1aadc --- /dev/null +++ b/hover/config_constants.py @@ -0,0 +1,59 @@ +import re + + +class ConfigSection: + IO = "io" + VISUAL = "visual" + DATA_EMBEDDING = "data.embedding" + DATA_COLUMNS = "data.columns" + DATA_VALUES = "data.columns" + + +class ConfigKey: + DATA_SAVE_DIR = "data_save_dir" + ABSTAIN_HEXCOLOR = "abstain_hexcolor" + BOKEH_PALETTE = "bokeh_palette" + BOKEH_PALETTE_USAGE = "bokeh_palette_usage" + TABLE_IMG_STYLE = "table_img_style" + TOOLTIP_IMG_STYLE = "tooltip_img_style" + DEFAULT_REDUCTION_METHOD = "default_reduction_method" + ENCODED_LABEL_KEY = "encoded_label_key" + DATASET_SUBSET_FIELD = "dataset_subset_field" + EMBEDDING_FIELD_PREFIX = "embedding_field_prefix" + SOURCE_COLOR_FIELD = "source_color_field" + SOURCE_ALPHA_FIELD = "source_alpha_field" + SEARCH_SCORE_FIELD = "search_score_field" + ABSTAIN_DECODED = "abstain_decoded" + ABSTAIN_ENCODED = "abstain_encoded" + + +class Validator: + @staticmethod + def is_hex_color(x): + return bool(re.match(r"^\#[0-9a-fA-F]{6}$", x)) + + @staticmethod + def is_iterable(x): + return hasattr(x, "__iter__") + + @staticmethod + def is_supported_dimensionality_reduction(x): + return x.lower() in ["umap", "ivis"] + + @staticmethod + def is_supported_traversal_mode(x): + return x.lower() in ["iterate", "linspace"] + + @staticmethod + def is_str(x): + return isinstance(x, str) + + @staticmethod + def is_negative_int(x): + return isinstance(x, int) and x < 0 + + +class Preprocessor: + @staticmethod + def remove_quote_at_ends(x): + return re.sub(r"(^[\'\"]|[\'\"]$)", "", x) diff --git a/hover/core/dataset.py b/hover/core/dataset.py index b73c0b8d..c71c662d 100644 --- a/hover/core/dataset.py +++ b/hover/core/dataset.py @@ -11,7 +11,6 @@ - loading data for training models """ import os -import hover import numpy as np import pandas as pd from tqdm import tqdm @@ -35,6 +34,7 @@ dataset_default_sel_table_kwargs, COLOR_GLYPH_TEMPLATE, DATASET_SUBSET_FIELD, + DEFAULT_REDUCTION_METHOD, embedding_field, ) @@ -161,6 +161,12 @@ def burner(d): self.dfs[_key] = _df + def subset(self, key): + """ + ???+ note "Return the DataFrame by reference for the given subset." + """ + return self.dfs[key] + def copy(self): """ ???+ note "Create another instance, copying over the data entries." @@ -760,7 +766,9 @@ def vectorizer_lookup(self): def vectorizer_lookup(self, *args, **kwargs): self._fail("assigning vectorizer lookup by reference is forbidden.") - def compute_nd_embedding(self, vectorizer, method=None, dimension=2, **kwargs): + def compute_nd_embedding( + self, vectorizer, method=DEFAULT_REDUCTION_METHOD, dimension=2, **kwargs + ): """ ???+ note "Get embeddings in n-dimensional space and return the dimensionality reducer." Reference: [`DimensionalityReducer`](https://github.com/phurwicz/hover/blob/main/hover/core/representation/reduction.py) @@ -774,8 +782,6 @@ def compute_nd_embedding(self, vectorizer, method=None, dimension=2, **kwargs): """ from hover.core.representation.reduction import DimensionalityReducer - if method is None: - method = hover.config["data.embedding"]["default_reduction_method"] # register the vectorizer for scenarios that may need it self.vectorizer_lookup[dimension] = vectorizer @@ -846,7 +852,7 @@ def compute_2d_embedding(self, vectorizer, method=None, **kwargs): | `**kwargs` | | kwargs for `DimensionalityReducer` | """ reducer = self.compute_nd_embedding( - vectorizer, method=None, dimension=2, **kwargs + vectorizer, method=method, dimension=2, **kwargs ) return reducer diff --git a/hover/core/explorer/base.py b/hover/core/explorer/base.py index 11843236..b9b2228c 100644 --- a/hover/core/explorer/base.py +++ b/hover/core/explorer/base.py @@ -1,7 +1,6 @@ """ ???+ note "Base class(es) for ALL explorer implementations." """ -import pandas as pd from abc import ABC, abstractmethod from collections import OrderedDict, defaultdict from bokeh.events import SelectionGeometry @@ -345,13 +344,15 @@ def _setup_dfs(self, df_dict, copy=False): expected_not_supplied = expected_keys.difference(supplied_keys) for _key in supplied_not_expected: - self._warn( - f"{self.__class__.__name__}.__init__(): got unexpected df key {_key}" + self._fail( + f"expected df keys {list(expected_keys)}, got unexpected df key {_key}" ) for _key in expected_not_supplied: - self._warn( - f"{self.__class__.__name__}.__init__(): missing expected df key {_key}" - ) + self._fail(f"expected df keys {list(expected_keys)}, missing df key {_key}") + # raise an exception if the supplied keys and expected keys are any different + assert ( + not supplied_not_expected and not expected_not_supplied + ), "df key mismatch" # assign df with column checks self.dfs = dict() @@ -372,11 +373,6 @@ def _setup_dfs(self, df_dict, copy=False): _df[_col] = _default self.dfs[_key] = _df.copy() if copy else _df - # expected dfs must be present - for _key in expected_not_supplied: - _df = pd.DataFrame(columns=list(mandatory_col_to_default.keys())) - self.dfs[_key] = _df - def _setup_sources(self): """ ???+ note "Create, **(not update)**, `ColumnDataSource` objects." diff --git a/hover/core/explorer/feature.py b/hover/core/explorer/feature.py index c61ce31e..f2c31e3e 100644 --- a/hover/core/explorer/feature.py +++ b/hover/core/explorer/feature.py @@ -2,11 +2,11 @@ ???+ note "Intermediate classes based on the main feature." """ import re -import hover import numpy as np from functools import lru_cache from bokeh.models import TextInput, Slider from .base import BokehBaseExplorer +from .local_config import TOOLTIP_IMG_STYLE class BokehForText(BokehBaseExplorer): @@ -200,7 +200,7 @@ class BokehForImage(BokehForUrlToVector): MANDATORY_COLUMNS = [PRIMARY_FEATURE, "label"] TOOLTIP_KWARGS = { "label": {"label": "Label"}, - "image": {"image": hover.config["visual"]["tooltip_img_style"]}, + "image": {"image": TOOLTIP_IMG_STYLE}, "coords": True, "index": True, } diff --git a/hover/core/explorer/local_config.py b/hover/core/explorer/local_config.py index 16e7c7d1..50b639df 100644 --- a/hover/core/explorer/local_config.py +++ b/hover/core/explorer/local_config.py @@ -1,5 +1,11 @@ import hover +from hover.config_constants import ( + ConfigSection as Section, + ConfigKey as Key, +) -SOURCE_COLOR_FIELD = hover.config["data.columns"]["source_color_field"] -SOURCE_ALPHA_FIELD = hover.config["data.columns"]["source_alpha_field"] -SEARCH_SCORE_FIELD = hover.config["data.columns"]["search_score_field"] +SOURCE_COLOR_FIELD = hover.config[Section.DATA_COLUMNS][Key.SOURCE_COLOR_FIELD] +SOURCE_ALPHA_FIELD = hover.config[Section.DATA_COLUMNS][Key.SOURCE_ALPHA_FIELD] +SEARCH_SCORE_FIELD = hover.config[Section.DATA_COLUMNS][Key.SEARCH_SCORE_FIELD] + +TOOLTIP_IMG_STYLE = hover.config[Section.VISUAL][Key.TOOLTIP_IMG_STYLE] diff --git a/hover/core/local_config.py b/hover/core/local_config.py index 49a56ddb..0787ad61 100644 --- a/hover/core/local_config.py +++ b/hover/core/local_config.py @@ -1,5 +1,9 @@ import re import hover +from hover.config_constants import ( + ConfigSection as Section, + ConfigKey as Key, +) from bokeh.models import ( Div, TableColumn, @@ -8,7 +12,10 @@ ) -DATASET_SUBSET_FIELD = hover.config["data.columns"]["dataset_subset_field"] +DEFAULT_REDUCTION_METHOD = hover.config[Section.DATA_EMBEDDING][ + Key.DEFAULT_REDUCTION_METHOD +] +DATASET_SUBSET_FIELD = hover.config[Section.DATA_COLUMNS][Key.DATASET_SUBSET_FIELD] COLOR_GLYPH_TEMPLATE = """

@@ -16,7 +23,7 @@

""" -EMBEDDING_FIELD_PREFIX = hover.config["data.columns"]["embedding_field_prefix"] +EMBEDDING_FIELD_PREFIX = hover.config[Section.DATA_COLUMNS][Key.EMBEDDING_FIELD_PREFIX] EMBEDDING_FIELD_REGEX = r"\d+d_\d+$" @@ -53,7 +60,7 @@ def dataset_default_sel_table_columns(feature_key): template="""<%= value %>""" ) elif feature_key == "image": - style = hover.config["visual"]["table_img_style"] + style = hover.config[Section.VISUAL][Key.TABLE_IMG_STYLE] # width is easily adjustable on the UI, no need to make configurable here feature_col_kwargs["width"] = 200 feature_col_kwargs["formatter"] = HTMLTemplateFormatter( diff --git a/hover/core/neural.py b/hover/core/neural.py index 0eafbe83..44e5bbfa 100644 --- a/hover/core/neural.py +++ b/hover/core/neural.py @@ -4,7 +4,6 @@ `torch`-based template classes for implementing neural nets that work the most smoothly with `hover`. """ import os -import hover import numpy as np import torch import torch.nn.functional as F @@ -15,6 +14,7 @@ from hover.core import Loggable from hover.utils.metrics import classification_accuracy from hover.utils.misc import current_time +from .local_config import DEFAULT_REDUCTION_METHOD class BaseVectorNet(Loggable): @@ -325,7 +325,11 @@ def predict_proba(self, inps): return probs def manifold_trajectory( - self, inps, method=None, reducer_kwargs=None, spline_kwargs=None + self, + inps, + method=DEFAULT_REDUCTION_METHOD, + reducer_kwargs=None, + spline_kwargs=None, ): """ ???+ note "Compute a propagation trajectory of the dataset manifold through the neural net." @@ -346,9 +350,6 @@ def manifold_trajectory( from hover.core.representation.manifold import LayerwiseManifold from hover.core.representation.trajectory import manifold_spline - if method is None: - method = hover.config["data.embedding"]["default_reduction_method"] - reducer_kwargs = reducer_kwargs or {} spline_kwargs = spline_kwargs or {} diff --git a/hover/core/representation/local_config.py b/hover/core/representation/local_config.py index 15ba1b5f..93be1e0b 100644 --- a/hover/core/representation/local_config.py +++ b/hover/core/representation/local_config.py @@ -1,6 +1,16 @@ +import hover +from hover.config_constants import ( + ConfigSection as Section, + ConfigKey as Key, +) + KWARG_TRANSLATOR = { "dimension": { "umap": "n_components", "ivis": "embedding_dims", }, } + +DEFAULT_REDUCTION_METHOD = hover.config[Section.DATA_EMBEDDING][ + Key.DEFAULT_REDUCTION_METHOD +] diff --git a/hover/core/representation/manifold.py b/hover/core/representation/manifold.py index a08f1bdf..cea3462c 100644 --- a/hover/core/representation/manifold.py +++ b/hover/core/representation/manifold.py @@ -2,11 +2,11 @@ Manifold similarity measures for any collection of sequences of vectors. Can be useful for improved interpretability of neural nets. """ -import hover from tqdm import tqdm from scipy.spatial import procrustes from hover.core import Loggable from .reduction import DimensionalityReducer +from .local_config import DEFAULT_REDUCTION_METHOD class LayerwiseManifold(Loggable): @@ -64,7 +64,7 @@ def unfold(self, method=None, **kwargs): :type method: str """ if method is None: - method = hover.config["data.embedding"]["default_reduction_method"] + method = DEFAULT_REDUCTION_METHOD # default kwargs should fix random state and seed # so that randomness does not introduce disparity diff --git a/hover/core/representation/reduction.py b/hover/core/representation/reduction.py index 7d1655fd..11909312 100644 --- a/hover/core/representation/reduction.py +++ b/hover/core/representation/reduction.py @@ -5,10 +5,9 @@ Icing on the cake: unify the syntax across different kinds of reducers. """ -import hover import numpy as np from hover.core import Loggable -from .local_config import KWARG_TRANSLATOR +from .local_config import KWARG_TRANSLATOR, DEFAULT_REDUCTION_METHOD class DimensionalityReducer(Loggable): @@ -22,7 +21,7 @@ def __init__(self, array): self.reference_array = array @staticmethod - def create_reducer(method=None, *args, **kwargs): + def create_reducer(method=DEFAULT_REDUCTION_METHOD, *args, **kwargs): """ ???+ note "Handle kwarg translation and dynamic imports." @@ -32,9 +31,6 @@ def create_reducer(method=None, *args, **kwargs): | `*args` | | forwarded to the reducer | | `**kwargs` | | translated and forwarded | """ - if method is None: - method = hover.config["data.embedding"]["default_reduction_method"] - if method == "umap": import umap @@ -57,7 +53,7 @@ def create_reducer(method=None, *args, **kwargs): reducer = reducer_cls(*args, **translated_kwargs) return reducer - def fit_transform(self, method=None, *args, **kwargs): + def fit_transform(self, method=DEFAULT_REDUCTION_METHOD, *args, **kwargs): """ ???+ note "Fit and transform an array and store the reducer." | Param | Type | Description | @@ -66,15 +62,12 @@ def fit_transform(self, method=None, *args, **kwargs): | `*args` | | forwarded to the reducer | | `**kwargs` | | forwarded to the reducer | """ - if method is None: - method = hover.config["data.embedding"]["default_reduction_method"] - reducer = DimensionalityReducer.create_reducer(method=method, *args, **kwargs) embedding = reducer.fit_transform(self.reference_array) setattr(self, method, reducer) return embedding - def transform(self, array, method=None): + def transform(self, array, method=DEFAULT_REDUCTION_METHOD): """ ???+ note "Transform an array with a already-fitted reducer." | Param | Type | Description | @@ -82,9 +75,6 @@ def transform(self, array, method=None): | `array` | `np.ndarray` | the array to transform | | `method` | `str` | `"umap"` or `"ivis"` | """ - if method is None: - method = hover.config["data.embedding"]["default_reduction_method"] - assert isinstance(array, np.ndarray), f"Expected np.ndarray, got {type(array)}" # edge case: array is too small if array.shape[0] < 1: diff --git a/hover/module_config.py b/hover/module_config.py index dff6c60b..adbf74e1 100644 --- a/hover/module_config.py +++ b/hover/module_config.py @@ -1,12 +1,16 @@ import hover +from .config_constants import ( + ConfigSection as Section, + ConfigKey as Key, +) # constants for the abstain mechanism -ABSTAIN_DECODED = hover.config["data.values"]["abstain_decoded"] -ABSTAIN_ENCODED = hover.config["data.values"]["abstain_encoded"] -ABSTAIN_HEXCOLOR = hover.config["visual"]["abstain_hexcolor"] +ABSTAIN_DECODED = hover.config[Section.DATA_VALUES][Key.ABSTAIN_DECODED] +ABSTAIN_ENCODED = hover.config[Section.DATA_VALUES][Key.ABSTAIN_ENCODED] +ABSTAIN_HEXCOLOR = hover.config[Section.VISUAL][Key.ABSTAIN_HEXCOLOR] # constants for label encoding mechanism -ENCODED_LABEL_KEY = hover.config["data.columns"]["encoded_label_key"] +ENCODED_LABEL_KEY = hover.config[Section.DATA_COLUMNS][Key.ENCODED_LABEL_KEY] # constants for saving work -DATA_SAVE_DIR = hover.config["io"]["data_save_dir"] +DATA_SAVE_DIR = hover.config[Section.IO][Key.DATA_SAVE_DIR] diff --git a/hover/recipes/local_config.py b/hover/recipes/local_config.py new file mode 100644 index 00000000..7e3b5eb7 --- /dev/null +++ b/hover/recipes/local_config.py @@ -0,0 +1,9 @@ +import hover +from hover.config_constants import ( + ConfigSection as Section, + ConfigKey as Key, +) + +DEFAULT_REDUCTION_METHOD = hover.config[Section.DATA_EMBEDDING][ + Key.DEFAULT_REDUCTION_METHOD +] diff --git a/hover/recipes/subroutine.py b/hover/recipes/subroutine.py index 1f88b555..d5334bdd 100644 --- a/hover/recipes/subroutine.py +++ b/hover/recipes/subroutine.py @@ -7,11 +7,11 @@ """ import re import numpy as np -import hover import hover.core.explorer as hovex from bokeh.layouts import row, column from bokeh.models import Button from rich.console import Console +from .local_config import DEFAULT_REDUCTION_METHOD EXPLORER_CATALOG = { @@ -112,7 +112,7 @@ def standard_annotator(dataset, **kwargs): annotator.activate_search() annotator.plot() - # subscribe for df updates + # subscribe for dataset updates dataset.subscribe_update_push(annotator, {_k: _k for _k in subsets}) # annotators can commit to a dataset @@ -152,7 +152,7 @@ def standard_finder(dataset, **kwargs): finder.activate_search() finder.plot() - # subscribe for df updates + # subscribe for dataset updates dataset.subscribe_update_push(finder, {_k: _k for _k in subsets}) return finder @@ -269,7 +269,7 @@ def retrain_vecnet(): vecnet.auto_adjust_setup(dataset.classes) train_loader = vecnet.prepare_loader(dataset, "train", smoothing_coeff=0.2) - if dataset.dfs["dev"].shape[0] > 0: + if dataset.subset("dev").shape[0] > 0: dev_loader = vecnet.prepare_loader(dataset, "dev") else: dataset._warn("dev set is empty, borrowing train set for validation.") @@ -287,33 +287,33 @@ def update_softlabel_plot(): use_subsets = ("raw", "train", "dev") inps = [] for _key in use_subsets: - inps.extend(dataset.dfs[_key][feature_key].tolist()) + inps.extend(dataset.subset(_key)[feature_key].tolist()) probs = vecnet.predict_proba(inps) labels = [dataset.label_decoder[_val] for _val in probs.argmax(axis=-1)] scores = probs.max(axis=-1).tolist() traj_arr, _, _ = vecnet.manifold_trajectory( inps, - method=hover.config["data.embedding"]["default_reduction_method"], + method=DEFAULT_REDUCTION_METHOD, reducer_kwargs=dict(dimension=manifold_dim), spline_kwargs=dict(points_per_step=5), ) offset = 0 for _key in use_subsets: - _length = dataset.dfs[_key].shape[0] + _length = dataset.subset(_key).shape[0] # skip subset if empty if _length == 0: continue _slice = slice(offset, offset + _length) - dataset.dfs[_key]["pred_label"] = labels[_slice] - dataset.dfs[_key]["pred_score"] = scores[_slice] + dataset.subset(_key)["pred_label"] = labels[_slice] + dataset.subset(_key)["pred_score"] = scores[_slice] for i, _col in enumerate(manifold_traj_cols): # all steps, selected slice _traj = traj_arr[:, _slice, i] # selected slice, all steps _traj = list(np.swapaxes(_traj, 0, 1)) - dataset.dfs[_key][f"{_col}_traj"] = _traj + dataset.subset(_key)[f"{_col}_traj"] = _traj offset += _length diff --git a/hover/utils/bokeh_helper/__init__.py b/hover/utils/bokeh_helper/__init__.py index b2c2b497..9727f2cc 100644 --- a/hover/utils/bokeh_helper/__init__.py +++ b/hover/utils/bokeh_helper/__init__.py @@ -18,6 +18,8 @@ TOOLTIP_LABEL_TEMPLATE, TOOLTIP_COORDS_DIV, TOOLTIP_INDEX_DIV, + BOKEH_PALETTE_USAGE, + BOKEH_PALETTE, ) @@ -29,8 +31,7 @@ def auto_label_color(labels): use_labels.discard(module_config.ABSTAIN_DECODED) use_labels = sorted(use_labels, reverse=False) - palette = hover.config["visual"]["bokeh_palette"] - usage = hover.config["visual"]["bokeh_palette_usage"] + palette, usage = BOKEH_PALETTE, BOKEH_PALETTE_USAGE nlabels, ncolors = len(use_labels), len(palette) assert nlabels <= ncolors, f"Too many labels to support (max at {len(palette)})" diff --git a/hover/utils/bokeh_helper/local_config.py b/hover/utils/bokeh_helper/local_config.py index 78fb5991..3d622034 100644 --- a/hover/utils/bokeh_helper/local_config.py +++ b/hover/utils/bokeh_helper/local_config.py @@ -1,3 +1,13 @@ +import hover +from hover.config_constants import ( + ConfigSection as Section, + ConfigKey as Key, +) + + +BOKEH_PALETTE = hover.config[Section.VISUAL][Key.BOKEH_PALETTE] +BOKEH_PALETTE_USAGE = hover.config[Section.VISUAL][Key.BOKEH_PALETTE_USAGE] + TOOLTIP_TEXT_TEMPLATE = """