From a03ab681c2b5c9bc6a414a3a69279d1b351bf341 Mon Sep 17 00:00:00 2001
From: Pavel Hurwicz <pavelhurwicz@gmail.com>
Date: Sat, 22 Apr 2023 14:12:59 -0400
Subject: [PATCH] hover config usage: hover.config ->
 hover.<module>.<module_config/local_config> -> other modules

---
 hover/__init__.py                         | 76 ++++++++++++-----------
 hover/config_constants.py                 | 59 ++++++++++++++++++
 hover/core/dataset.py                     | 16 +++--
 hover/core/explorer/base.py               | 18 +++---
 hover/core/explorer/feature.py            |  4 +-
 hover/core/explorer/local_config.py       | 12 +++-
 hover/core/local_config.py                | 13 +++-
 hover/core/neural.py                      | 11 ++--
 hover/core/representation/local_config.py | 10 +++
 hover/core/representation/manifold.py     |  4 +-
 hover/core/representation/reduction.py    | 18 ++----
 hover/module_config.py                    | 14 +++--
 hover/recipes/local_config.py             |  9 +++
 hover/recipes/subroutine.py               | 20 +++---
 hover/utils/bokeh_helper/__init__.py      |  5 +-
 hover/utils/bokeh_helper/local_config.py  | 10 +++
 16 files changed, 202 insertions(+), 97 deletions(-)
 create mode 100644 hover/config_constants.py
 create mode 100644 hover/recipes/local_config.py
diff --git a/hover/__init__.py b/hover/__init__.py
index 0b3fd522..c5aa900f 100644
--- a/hover/__init__.py
+++ b/hover/__init__.py
@@ -1,20 +1,26 @@
 """
 Module root where constants get configured.
 """
-import re
+from .config_constants import (
+    ConfigSection,
+    ConfigKey,
+    Validator,
+    Preprocessor,
+)
 from flexmod import AutolockedConfigValue, Config, ConfigIndex
 from bokeh.palettes import Turbo256
 
+
 config = ConfigIndex(
     [
         Config(
-            "io",
+            ConfigSection.IO,
             [
                 AutolockedConfigValue(
-                    "data_save_dir",
+                    ConfigKey.DATA_SAVE_DIR,
                     "The directory path for saving labeled data.",
                     ".",
-                    validation=lambda x: isinstance(x, str),
+                    validation=Validator.is_str,
                 ),
             ],
         ),
@@ -22,103 +28,103 @@
             "visual",
             [
                 AutolockedConfigValue(
-                    "abstain_hexcolor",
+                    ConfigKey.ABSTAIN_HEXCOLOR,
                     "Hex code of RGB color.",
                     "#dcdcdc",
-                    validation=lambda x: bool(re.match(r"^\#[0-9a-fA-F]{6}$", x)),
+                    validation=Validator.is_hex_color,
                 ),
                 AutolockedConfigValue(
-                    "bokeh_palette",
+                    ConfigKey.BOKEH_PALETTE,
                     "The bokeh color palette to use for plotting. This should be a list of hex color codes.",
                     Turbo256,
-                    validation=lambda x: hasattr(x, "__iter__"),
+                    validation=Validator.is_iterable,
                 ),
                 AutolockedConfigValue(
-                    "bokeh_palette_usage",
+                    ConfigKey.BOKEH_PALETTE_USAGE,
                     "Specify how colors from the palette should be chosen when there are fewer categories than colors. This needs to be 'iterate' or 'linspace'",
                     "linspace",
-                    validation=lambda x: x in ["iterate", "linspace"],
+                    validation=Validator.is_supported_traversal_mode,
                 ),
                 AutolockedConfigValue(
-                    "table_img_style",
+                    ConfigKey.TABLE_IMG_STYLE,
                     "HTML style of images shown in selection tables.",
                     "max-height: 100%; max-width: 100%; object-fit: contain",
-                    preprocessor=lambda x: re.sub(r"(^[\'\"]|[\'\"]$)", "", x),
+                    preprocessor=Preprocessor.remove_quote_at_ends,
                 ),
                 AutolockedConfigValue(
-                    "tooltip_img_style",
+                    ConfigKey.TOOLTIP_IMG_STYLE,
                     "HTML style of images shown in mouse-over-data-point tooltips.",
                     "float: left; margin: 2px 2px 2px 2px; width: 60px; height: 60px;",
-                    preprocessor=lambda x: re.sub(r"(^[\'\"]|[\'\"]$)", "", x),
+                    preprocessor=Preprocessor.remove_quote_at_ends,
                 ),
             ],
         ),
         Config(
-            "data.embedding",
+            ConfigSection.DATA_EMBEDDING,
             [
                 AutolockedConfigValue(
-                    "default_reduction_method",
+                    ConfigKey.DEFAULT_REDUCTION_METHOD,
                     "Default method for dimensionality reduction. Currently either 'umap' or 'ivis'.",
                     "umap",
-                    validation=lambda x: x in ["umap", "ivis"],
+                    validation=Validator.is_supported_dimensionality_reduction,
                 ),
             ],
         ),
         Config(
-            "data.columns",
+            ConfigSection.DATA_COLUMNS,
             [
                 AutolockedConfigValue(
-                    "encoded_label_key",
+                    ConfigKey.ENCODED_LABEL_KEY,
                     "The column name for the encoded label.",
                     "label_encoded",
-                    validation=lambda x: isinstance(x, str),
+                    validation=Validator.is_str,
                 ),
                 AutolockedConfigValue(
-                    "dataset_subset_field",
+                    ConfigKey.DATASET_SUBSET_FIELD,
                     "The column name for dataset subsets.",
                     "SUBSET",
-                    validation=lambda x: isinstance(x, str),
+                    validation=Validator.is_str,
                 ),
                 AutolockedConfigValue(
-                    "embedding_field_prefix",
+                    ConfigKey.EMBEDDING_FIELD_PREFIX,
                     "The prefix of column names for embedding coordinates.",
                     "embed_",
-                    validation=lambda x: isinstance(x, str),
+                    validation=Validator.is_str,
                 ),
                 AutolockedConfigValue(
-                    "source_color_field",
+                    ConfigKey.SOURCE_COLOR_FIELD,
                     "The column name for plotted data point color.",
                     "__COLOR__",
-                    validation=lambda x: isinstance(x, str),
+                    validation=Validator.is_str,
                 ),
                 AutolockedConfigValue(
-                    "source_alpha_field",
+                    ConfigKey.SOURCE_ALPHA_FIELD,
                     "The column name for plotted data point color alpha (opacity).",
                     "__ALPHA__",
-                    validation=lambda x: isinstance(x, str),
+                    validation=Validator.is_str,
                 ),
                 AutolockedConfigValue(
-                    "search_score_field",
+                    ConfigKey.SEARCH_SCORE_FIELD,
                     "The column name for data points' score from search widgets.",
                     "__SEARCH_SCORE__",
-                    validation=lambda x: isinstance(x, str),
+                    validation=Validator.is_str,
                 ),
             ],
         ),
         Config(
-            "data.values",
+            ConfigSection.DATA_VALUES,
             [
                 AutolockedConfigValue(
-                    "abstain_decoded",
+                    ConfigKey.ABSTAIN_DECODED,
                     "The placeholder label indicating 'no label yet'.",
                     "ABSTAIN",
-                    validation=lambda x: isinstance(x, str),
+                    validation=Validator.is_str,
                 ),
                 AutolockedConfigValue(
-                    "abstain_encoded",
+                    ConfigKey.ABSTAIN_ENCODED,
                     "The encoded value of 'no label yet' which should almost always be -1, never 0 or positive.",
                     -1,
-                    validation=lambda x: isinstance(x, int) and x < 0,
+                    validation=Validator.is_negative_int,
                 ),
             ],
         ),
diff --git a/hover/config_constants.py b/hover/config_constants.py
new file mode 100644
index 00000000..c9f1aadc
--- /dev/null
+++ b/hover/config_constants.py
@@ -0,0 +1,59 @@
+import re
+
+
+class ConfigSection:
+    IO = "io"
+    VISUAL = "visual"
+    DATA_EMBEDDING = "data.embedding"
+    DATA_COLUMNS = "data.columns"
+    DATA_VALUES = "data.columns"
+
+
+class ConfigKey:
+    DATA_SAVE_DIR = "data_save_dir"
+    ABSTAIN_HEXCOLOR = "abstain_hexcolor"
+    BOKEH_PALETTE = "bokeh_palette"
+    BOKEH_PALETTE_USAGE = "bokeh_palette_usage"
+    TABLE_IMG_STYLE = "table_img_style"
+    TOOLTIP_IMG_STYLE = "tooltip_img_style"
+    DEFAULT_REDUCTION_METHOD = "default_reduction_method"
+    ENCODED_LABEL_KEY = "encoded_label_key"
+    DATASET_SUBSET_FIELD = "dataset_subset_field"
+    EMBEDDING_FIELD_PREFIX = "embedding_field_prefix"
+    SOURCE_COLOR_FIELD = "source_color_field"
+    SOURCE_ALPHA_FIELD = "source_alpha_field"
+    SEARCH_SCORE_FIELD = "search_score_field"
+    ABSTAIN_DECODED = "abstain_decoded"
+    ABSTAIN_ENCODED = "abstain_encoded"
+
+
+class Validator:
+    @staticmethod
+    def is_hex_color(x):
+        return bool(re.match(r"^\#[0-9a-fA-F]{6}$", x))
+
+    @staticmethod
+    def is_iterable(x):
+        return hasattr(x, "__iter__")
+
+    @staticmethod
+    def is_supported_dimensionality_reduction(x):
+        return x.lower() in ["umap", "ivis"]
+
+    @staticmethod
+    def is_supported_traversal_mode(x):
+        return x.lower() in ["iterate", "linspace"]
+
+    @staticmethod
+    def is_str(x):
+        return isinstance(x, str)
+
+    @staticmethod
+    def is_negative_int(x):
+        return isinstance(x, int) and x < 0
+
+
+class Preprocessor:
+    @staticmethod
+    def remove_quote_at_ends(x):
+        return re.sub(r"(^[\'\"]|[\'\"]$)", "", x)
diff --git a/hover/core/dataset.py b/hover/core/dataset.py
index b73c0b8d..c71c662d 100644
--- a/hover/core/dataset.py
+++ b/hover/core/dataset.py
@@ -11,7 +11,6 @@
     -   loading data for training models
 """
 import os
-import hover
 import numpy as np
 import pandas as pd
 from tqdm import tqdm
@@ -35,6 +34,7 @@
     dataset_default_sel_table_kwargs,
     COLOR_GLYPH_TEMPLATE,
     DATASET_SUBSET_FIELD,
+    DEFAULT_REDUCTION_METHOD,
     embedding_field,
 )
 
@@ -161,6 +161,12 @@ def burner(d):
 
             self.dfs[_key] = _df
 
+    def subset(self, key):
+        """
+        ???+ note "Return the DataFrame by reference for the given subset."
+        """
+        return self.dfs[key]
+
     def copy(self):
         """
         ???+ note "Create another instance, copying over the data entries."
@@ -760,7 +766,9 @@ def vectorizer_lookup(self):
     def vectorizer_lookup(self, *args, **kwargs):
         self._fail("assigning vectorizer lookup by reference is forbidden.")
 
-    def compute_nd_embedding(self, vectorizer, method=None, dimension=2, **kwargs):
+    def compute_nd_embedding(
+        self, vectorizer, method=DEFAULT_REDUCTION_METHOD, dimension=2, **kwargs
+    ):
         """
         ???+ note "Get embeddings in n-dimensional space and return the dimensionality reducer."
             Reference: [`DimensionalityReducer`](https://github.com/phurwicz/hover/blob/main/hover/core/representation/reduction.py)
@@ -774,8 +782,6 @@ def compute_nd_embedding(self, vectorizer, method=None, dimension=2, **kwargs):
         """
         from hover.core.representation.reduction import DimensionalityReducer
 
-        if method is None:
-            method = hover.config["data.embedding"]["default_reduction_method"]
         # register the vectorizer for scenarios that may need it
         self.vectorizer_lookup[dimension] = vectorizer
 
@@ -846,7 +852,7 @@ def compute_2d_embedding(self, vectorizer, method=None, **kwargs):
             | `**kwargs`   |            | kwargs for `DimensionalityReducer` |
         """
         reducer = self.compute_nd_embedding(
-            vectorizer, method=None, dimension=2, **kwargs
+            vectorizer, method=method, dimension=2, **kwargs
         )
         return reducer
 
diff --git a/hover/core/explorer/base.py b/hover/core/explorer/base.py
index 11843236..b9b2228c 100644
--- a/hover/core/explorer/base.py
+++ b/hover/core/explorer/base.py
@@ -1,7 +1,6 @@
 """
 ???+ note "Base class(es) for ALL explorer implementations."
 """
-import pandas as pd
 from abc import ABC, abstractmethod
 from collections import OrderedDict, defaultdict
 from bokeh.events import SelectionGeometry
@@ -345,13 +344,15 @@ def _setup_dfs(self, df_dict, copy=False):
         expected_not_supplied = expected_keys.difference(supplied_keys)
 
         for _key in supplied_not_expected:
-            self._warn(
-                f"{self.__class__.__name__}.__init__(): got unexpected df key {_key}"
+            self._fail(
+                f"expected df keys {list(expected_keys)}, got unexpected df key {_key}"
             )
         for _key in expected_not_supplied:
-            self._warn(
-                f"{self.__class__.__name__}.__init__(): missing expected df key {_key}"
-            )
+            self._fail(f"expected df keys {list(expected_keys)}, missing df key {_key}")
+        # raise an exception if the supplied keys and expected keys are any different
+        assert (
+            not supplied_not_expected and not expected_not_supplied
+        ), "df key mismatch"
 
         # assign df with column checks
         self.dfs = dict()
@@ -372,11 +373,6 @@ def _setup_dfs(self, df_dict, copy=False):
                     _df[_col] = _default
             self.dfs[_key] = _df.copy() if copy else _df
 
-        # expected dfs must be present
-        for _key in expected_not_supplied:
-            _df = pd.DataFrame(columns=list(mandatory_col_to_default.keys()))
-            self.dfs[_key] = _df
-
     def _setup_sources(self):
         """
         ???+ note "Create, **(not update)**, `ColumnDataSource` objects."
diff --git a/hover/core/explorer/feature.py b/hover/core/explorer/feature.py
index c61ce31e..f2c31e3e 100644
--- a/hover/core/explorer/feature.py
+++ b/hover/core/explorer/feature.py
@@ -2,11 +2,11 @@
 ???+ note "Intermediate classes based on the main feature."
 """
 import re
-import hover
 import numpy as np
 from functools import lru_cache
 from bokeh.models import TextInput, Slider
 from .base import BokehBaseExplorer
+from .local_config import TOOLTIP_IMG_STYLE
 
 
 class BokehForText(BokehBaseExplorer):
@@ -200,7 +200,7 @@ class BokehForImage(BokehForUrlToVector):
     MANDATORY_COLUMNS = [PRIMARY_FEATURE, "label"]
     TOOLTIP_KWARGS = {
         "label": {"label": "Label"},
-        "image": {"image": hover.config["visual"]["tooltip_img_style"]},
+        "image": {"image": TOOLTIP_IMG_STYLE},
         "coords": True,
         "index": True,
     }
diff --git a/hover/core/explorer/local_config.py b/hover/core/explorer/local_config.py
index 16e7c7d1..50b639df 100644
--- a/hover/core/explorer/local_config.py
+++ b/hover/core/explorer/local_config.py
@@ -1,5 +1,11 @@
 import hover
+from hover.config_constants import (
+    ConfigSection as Section,
+    ConfigKey as Key,
+)
 
-SOURCE_COLOR_FIELD = hover.config["data.columns"]["source_color_field"]
-SOURCE_ALPHA_FIELD = hover.config["data.columns"]["source_alpha_field"]
-SEARCH_SCORE_FIELD = hover.config["data.columns"]["search_score_field"]
+SOURCE_COLOR_FIELD = hover.config[Section.DATA_COLUMNS][Key.SOURCE_COLOR_FIELD]
+SOURCE_ALPHA_FIELD = hover.config[Section.DATA_COLUMNS][Key.SOURCE_ALPHA_FIELD]
+SEARCH_SCORE_FIELD = hover.config[Section.DATA_COLUMNS][Key.SEARCH_SCORE_FIELD]
+
+TOOLTIP_IMG_STYLE = hover.config[Section.VISUAL][Key.TOOLTIP_IMG_STYLE]
diff --git a/hover/core/local_config.py b/hover/core/local_config.py
index 49a56ddb..0787ad61 100644
--- a/hover/core/local_config.py
+++ b/hover/core/local_config.py
@@ -1,5 +1,9 @@
 import re
 import hover
+from hover.config_constants import (
+    ConfigSection as Section,
+    ConfigKey as Key,
+)
 from bokeh.models import (
     Div,
     TableColumn,
@@ -8,7 +12,10 @@
 )
 
 
-DATASET_SUBSET_FIELD = hover.config["data.columns"]["dataset_subset_field"]
+DEFAULT_REDUCTION_METHOD = hover.config[Section.DATA_EMBEDDING][
+    Key.DEFAULT_REDUCTION_METHOD
+]
+DATASET_SUBSET_FIELD = hover.config[Section.DATA_COLUMNS][Key.DATASET_SUBSET_FIELD]
 
 COLOR_GLYPH_TEMPLATE = """
 <p style="color:<%= value %>;">
@@ -16,7 +23,7 @@
 </p>
 """
 
-EMBEDDING_FIELD_PREFIX = hover.config["data.columns"]["embedding_field_prefix"]
+EMBEDDING_FIELD_PREFIX = hover.config[Section.DATA_COLUMNS][Key.EMBEDDING_FIELD_PREFIX]
 EMBEDDING_FIELD_REGEX = r"\d+d_\d+$"
 
 
@@ -53,7 +60,7 @@ def dataset_default_sel_table_columns(feature_key):
             template="""<span href="#" data-toggle="tooltip" title="<%= value %>"><%= value %></span>"""
         )
     elif feature_key == "image":
-        style = hover.config["visual"]["table_img_style"]
+        style = hover.config[Section.VISUAL][Key.TABLE_IMG_STYLE]
         # width is easily adjustable on the UI, no need to make configurable here
         feature_col_kwargs["width"] = 200
         feature_col_kwargs["formatter"] = HTMLTemplateFormatter(
diff --git a/hover/core/neural.py b/hover/core/neural.py
index 0eafbe83..44e5bbfa 100644
--- a/hover/core/neural.py
+++ b/hover/core/neural.py
@@ -4,7 +4,6 @@
     `torch`-based template classes for implementing neural nets that work the most smoothly with `hover`.
 """
 import os
-import hover
 import numpy as np
 import torch
 import torch.nn.functional as F
@@ -15,6 +14,7 @@
 from hover.core import Loggable
 from hover.utils.metrics import classification_accuracy
 from hover.utils.misc import current_time
+from .local_config import DEFAULT_REDUCTION_METHOD
 
 
 class BaseVectorNet(Loggable):
@@ -325,7 +325,11 @@ def predict_proba(self, inps):
         return probs
 
     def manifold_trajectory(
-        self, inps, method=None, reducer_kwargs=None, spline_kwargs=None
+        self,
+        inps,
+        method=DEFAULT_REDUCTION_METHOD,
+        reducer_kwargs=None,
+        spline_kwargs=None,
     ):
         """
         ???+ note "Compute a propagation trajectory of the dataset manifold through the neural net."
@@ -346,9 +350,6 @@ def manifold_trajectory(
         from hover.core.representation.manifold import LayerwiseManifold
         from hover.core.representation.trajectory import manifold_spline
 
-        if method is None:
-            method = hover.config["data.embedding"]["default_reduction_method"]
-
         reducer_kwargs = reducer_kwargs or {}
         spline_kwargs = spline_kwargs or {}
 
diff --git a/hover/core/representation/local_config.py b/hover/core/representation/local_config.py
index 15ba1b5f..93be1e0b 100644
--- a/hover/core/representation/local_config.py
+++ b/hover/core/representation/local_config.py
@@ -1,6 +1,16 @@
+import hover
+from hover.config_constants import (
+    ConfigSection as Section,
+    ConfigKey as Key,
+)
+
 KWARG_TRANSLATOR = {
     "dimension": {
         "umap": "n_components",
         "ivis": "embedding_dims",
     },
 }
+
+DEFAULT_REDUCTION_METHOD = hover.config[Section.DATA_EMBEDDING][
+    Key.DEFAULT_REDUCTION_METHOD
+]
diff --git a/hover/core/representation/manifold.py b/hover/core/representation/manifold.py
index a08f1bdf..cea3462c 100644
--- a/hover/core/representation/manifold.py
+++ b/hover/core/representation/manifold.py
@@ -2,11 +2,11 @@
 Manifold similarity measures for any collection of sequences of vectors.
 Can be useful for improved interpretability of neural nets.
 """
-import hover
 from tqdm import tqdm
 from scipy.spatial import procrustes
 from hover.core import Loggable
 from .reduction import DimensionalityReducer
+from .local_config import DEFAULT_REDUCTION_METHOD
 
 
 class LayerwiseManifold(Loggable):
@@ -64,7 +64,7 @@ def unfold(self, method=None, **kwargs):
         :type method: str
         """
         if method is None:
-            method = hover.config["data.embedding"]["default_reduction_method"]
+            method = DEFAULT_REDUCTION_METHOD
 
         # default kwargs should fix random state and seed
         # so that randomness does not introduce disparity
diff --git a/hover/core/representation/reduction.py b/hover/core/representation/reduction.py
index 7d1655fd..11909312 100644
--- a/hover/core/representation/reduction.py
+++ b/hover/core/representation/reduction.py
@@ -5,10 +5,9 @@
 
     Icing on the cake: unify the syntax across different kinds of reducers.
 """
-import hover
 import numpy as np
 from hover.core import Loggable
-from .local_config import KWARG_TRANSLATOR
+from .local_config import KWARG_TRANSLATOR, DEFAULT_REDUCTION_METHOD
 
 
 class DimensionalityReducer(Loggable):
@@ -22,7 +21,7 @@ def __init__(self, array):
         self.reference_array = array
 
     @staticmethod
-    def create_reducer(method=None, *args, **kwargs):
+    def create_reducer(method=DEFAULT_REDUCTION_METHOD, *args, **kwargs):
         """
         ???+ note "Handle kwarg translation and dynamic imports."
 
@@ -32,9 +31,6 @@ def create_reducer(method=None, *args, **kwargs):
             | `*args`    |        | forwarded to the reducer |
             | `**kwargs` |        | translated and forwarded |
         """
-        if method is None:
-            method = hover.config["data.embedding"]["default_reduction_method"]
-
         if method == "umap":
             import umap
 
@@ -57,7 +53,7 @@ def create_reducer(method=None, *args, **kwargs):
         reducer = reducer_cls(*args, **translated_kwargs)
         return reducer
 
-    def fit_transform(self, method=None, *args, **kwargs):
+    def fit_transform(self, method=DEFAULT_REDUCTION_METHOD, *args, **kwargs):
         """
         ???+ note "Fit and transform an array and store the reducer."
             | Param      | Type   | Description              |
@@ -66,15 +62,12 @@ def fit_transform(self, method=None, *args, **kwargs):
             | `*args`    |        | forwarded to the reducer |
             | `**kwargs` |        | forwarded to the reducer |
         """
-        if method is None:
-            method = hover.config["data.embedding"]["default_reduction_method"]
-
         reducer = DimensionalityReducer.create_reducer(method=method, *args, **kwargs)
         embedding = reducer.fit_transform(self.reference_array)
         setattr(self, method, reducer)
         return embedding
 
-    def transform(self, array, method=None):
+    def transform(self, array, method=DEFAULT_REDUCTION_METHOD):
         """
         ???+ note "Transform an array with a already-fitted reducer."
             | Param      | Type         | Description              |
@@ -82,9 +75,6 @@ def transform(self, array, method=None):
             | `array`    | `np.ndarray` | the array to transform   |
             | `method`   | `str`        | `"umap"` or `"ivis"`     |
         """
-        if method is None:
-            method = hover.config["data.embedding"]["default_reduction_method"]
-
         assert isinstance(array, np.ndarray), f"Expected np.ndarray, got {type(array)}"
         # edge case: array is too small
         if array.shape[0] < 1:
diff --git a/hover/module_config.py b/hover/module_config.py
index dff6c60b..adbf74e1 100644
--- a/hover/module_config.py
+++ b/hover/module_config.py
@@ -1,12 +1,16 @@
 import hover
+from .config_constants import (
+    ConfigSection as Section,
+    ConfigKey as Key,
+)
 
 # constants for the abstain mechanism
-ABSTAIN_DECODED = hover.config["data.values"]["abstain_decoded"]
-ABSTAIN_ENCODED = hover.config["data.values"]["abstain_encoded"]
-ABSTAIN_HEXCOLOR = hover.config["visual"]["abstain_hexcolor"]
+ABSTAIN_DECODED = hover.config[Section.DATA_VALUES][Key.ABSTAIN_DECODED]
+ABSTAIN_ENCODED = hover.config[Section.DATA_VALUES][Key.ABSTAIN_ENCODED]
+ABSTAIN_HEXCOLOR = hover.config[Section.VISUAL][Key.ABSTAIN_HEXCOLOR]
 
 # constants for label encoding mechanism
-ENCODED_LABEL_KEY = hover.config["data.columns"]["encoded_label_key"]
+ENCODED_LABEL_KEY = hover.config[Section.DATA_COLUMNS][Key.ENCODED_LABEL_KEY]
 
 # constants for saving work
-DATA_SAVE_DIR = hover.config["io"]["data_save_dir"]
+DATA_SAVE_DIR = hover.config[Section.IO][Key.DATA_SAVE_DIR]
diff --git a/hover/recipes/local_config.py b/hover/recipes/local_config.py
new file mode 100644
index 00000000..7e3b5eb7
--- /dev/null
+++ b/hover/recipes/local_config.py
@@ -0,0 +1,9 @@
+import hover
+from hover.config_constants import (
+    ConfigSection as Section,
+    ConfigKey as Key,
+)
+
+DEFAULT_REDUCTION_METHOD = hover.config[Section.DATA_EMBEDDING][
+    Key.DEFAULT_REDUCTION_METHOD
+]
diff --git a/hover/recipes/subroutine.py b/hover/recipes/subroutine.py
index 1f88b555..d5334bdd 100644
--- a/hover/recipes/subroutine.py
+++ b/hover/recipes/subroutine.py
@@ -7,11 +7,11 @@
 """
 import re
 import numpy as np
-import hover
 import hover.core.explorer as hovex
 from bokeh.layouts import row, column
 from bokeh.models import Button
 from rich.console import Console
+from .local_config import DEFAULT_REDUCTION_METHOD
 
 
 EXPLORER_CATALOG = {
@@ -112,7 +112,7 @@ def standard_annotator(dataset, **kwargs):
     annotator.activate_search()
     annotator.plot()
 
-    # subscribe for df updates
+    # subscribe for dataset updates
     dataset.subscribe_update_push(annotator, {_k: _k for _k in subsets})
 
     # annotators can commit to a dataset
@@ -152,7 +152,7 @@ def standard_finder(dataset, **kwargs):
     finder.activate_search()
     finder.plot()
 
-    # subscribe for df updates
+    # subscribe for dataset updates
     dataset.subscribe_update_push(finder, {_k: _k for _k in subsets})
     return finder
 
@@ -269,7 +269,7 @@ def retrain_vecnet():
         vecnet.auto_adjust_setup(dataset.classes)
 
         train_loader = vecnet.prepare_loader(dataset, "train", smoothing_coeff=0.2)
-        if dataset.dfs["dev"].shape[0] > 0:
+        if dataset.subset("dev").shape[0] > 0:
             dev_loader = vecnet.prepare_loader(dataset, "dev")
         else:
             dataset._warn("dev set is empty, borrowing train set for validation.")
@@ -287,33 +287,33 @@ def update_softlabel_plot():
         use_subsets = ("raw", "train", "dev")
         inps = []
         for _key in use_subsets:
-            inps.extend(dataset.dfs[_key][feature_key].tolist())
+            inps.extend(dataset.subset(_key)[feature_key].tolist())
 
         probs = vecnet.predict_proba(inps)
         labels = [dataset.label_decoder[_val] for _val in probs.argmax(axis=-1)]
         scores = probs.max(axis=-1).tolist()
         traj_arr, _, _ = vecnet.manifold_trajectory(
             inps,
-            method=hover.config["data.embedding"]["default_reduction_method"],
+            method=DEFAULT_REDUCTION_METHOD,
             reducer_kwargs=dict(dimension=manifold_dim),
             spline_kwargs=dict(points_per_step=5),
         )
 
         offset = 0
         for _key in use_subsets:
-            _length = dataset.dfs[_key].shape[0]
+            _length = dataset.subset(_key).shape[0]
             # skip subset if empty
             if _length == 0:
                 continue
             _slice = slice(offset, offset + _length)
-            dataset.dfs[_key]["pred_label"] = labels[_slice]
-            dataset.dfs[_key]["pred_score"] = scores[_slice]
+            dataset.subset(_key)["pred_label"] = labels[_slice]
+            dataset.subset(_key)["pred_score"] = scores[_slice]
             for i, _col in enumerate(manifold_traj_cols):
                 # all steps, selected slice
                 _traj = traj_arr[:, _slice, i]
                 # selected slice, all steps
                 _traj = list(np.swapaxes(_traj, 0, 1))
-                dataset.dfs[_key][f"{_col}_traj"] = _traj
+                dataset.subset(_key)[f"{_col}_traj"] = _traj
 
             offset += _length
 
diff --git a/hover/utils/bokeh_helper/__init__.py b/hover/utils/bokeh_helper/__init__.py
index b2c2b497..9727f2cc 100644
--- a/hover/utils/bokeh_helper/__init__.py
+++ b/hover/utils/bokeh_helper/__init__.py
@@ -18,6 +18,8 @@
     TOOLTIP_LABEL_TEMPLATE,
     TOOLTIP_COORDS_DIV,
     TOOLTIP_INDEX_DIV,
+    BOKEH_PALETTE_USAGE,
+    BOKEH_PALETTE,
 )
 
 
@@ -29,8 +31,7 @@ def auto_label_color(labels):
     use_labels.discard(module_config.ABSTAIN_DECODED)
     use_labels = sorted(use_labels, reverse=False)
 
-    palette = hover.config["visual"]["bokeh_palette"]
-    usage = hover.config["visual"]["bokeh_palette_usage"]
+    palette, usage = BOKEH_PALETTE, BOKEH_PALETTE_USAGE
     nlabels, ncolors = len(use_labels), len(palette)
     assert nlabels <= ncolors, f"Too many labels to support (max at {len(palette)})"
 
diff --git a/hover/utils/bokeh_helper/local_config.py b/hover/utils/bokeh_helper/local_config.py
index 78fb5991..3d622034 100644
--- a/hover/utils/bokeh_helper/local_config.py
+++ b/hover/utils/bokeh_helper/local_config.py
@@ -1,3 +1,13 @@
+import hover
+from hover.config_constants import (
+    ConfigSection as Section,
+    ConfigKey as Key,
+)
+
+
+BOKEH_PALETTE = hover.config[Section.VISUAL][Key.BOKEH_PALETTE]
+BOKEH_PALETTE_USAGE = hover.config[Section.VISUAL][Key.BOKEH_PALETTE_USAGE]
+
 TOOLTIP_TEXT_TEMPLATE = """
     <div style="word-wrap: break-word; width: 95%; text-overflow: ellipsis; line-height: 90%">
         <span style="font-size: 11px;">