Skip to content

Commit

Permalink
hover config usage: hover.config -> hover.<module>.<module_config/loc…
Browse files Browse the repository at this point in the history
…al_config> -> other modules
  • Loading branch information
phurwicz committed Apr 22, 2023
1 parent c65fff2 commit a03ab68
Show file tree
Hide file tree
Showing 16 changed files with 202 additions and 97 deletions.
76 changes: 41 additions & 35 deletions hover/__init__.py
Original file line number Diff line number Diff line change
@@ -1,124 +1,130 @@
"""
Module root where constants get configured.
"""
import re
from .config_constants import (
ConfigSection,
ConfigKey,
Validator,
Preprocessor,
)
from flexmod import AutolockedConfigValue, Config, ConfigIndex
from bokeh.palettes import Turbo256


config = ConfigIndex(
[
Config(
"io",
ConfigSection.IO,
[
AutolockedConfigValue(
"data_save_dir",
ConfigKey.DATA_SAVE_DIR,
"The directory path for saving labeled data.",
".",
validation=lambda x: isinstance(x, str),
validation=Validator.is_str,
),
],
),
Config(
"visual",
[
AutolockedConfigValue(
"abstain_hexcolor",
ConfigKey.ABSTAIN_HEXCOLOR,
"Hex code of RGB color.",
"#dcdcdc",
validation=lambda x: bool(re.match(r"^\#[0-9a-fA-F]{6}$", x)),
validation=Validator.is_hex_color,
),
AutolockedConfigValue(
"bokeh_palette",
ConfigKey.BOKEH_PALETTE,
"The bokeh color palette to use for plotting. This should be a list of hex color codes.",
Turbo256,
validation=lambda x: hasattr(x, "__iter__"),
validation=Validator.is_iterable,
),
AutolockedConfigValue(
"bokeh_palette_usage",
ConfigKey.BOKEH_PALETTE_USAGE,
"Specify how colors from the palette should be chosen when there are fewer categories than colors. This needs to be 'iterate' or 'linspace'",
"linspace",
validation=lambda x: x in ["iterate", "linspace"],
validation=Validator.is_supported_traversal_mode,
),
AutolockedConfigValue(
"table_img_style",
ConfigKey.TABLE_IMG_STYLE,
"HTML style of images shown in selection tables.",
"max-height: 100%; max-width: 100%; object-fit: contain",
preprocessor=lambda x: re.sub(r"(^[\'\"]|[\'\"]$)", "", x),
preprocessor=Preprocessor.remove_quote_at_ends,
),
AutolockedConfigValue(
"tooltip_img_style",
ConfigKey.TOOLTIP_IMG_STYLE,
"HTML style of images shown in mouse-over-data-point tooltips.",
"float: left; margin: 2px 2px 2px 2px; width: 60px; height: 60px;",
preprocessor=lambda x: re.sub(r"(^[\'\"]|[\'\"]$)", "", x),
preprocessor=Preprocessor.remove_quote_at_ends,
),
],
),
Config(
"data.embedding",
ConfigSection.DATA_EMBEDDING,
[
AutolockedConfigValue(
"default_reduction_method",
ConfigKey.DEFAULT_REDUCTION_METHOD,
"Default method for dimensionality reduction. Currently either 'umap' or 'ivis'.",
"umap",
validation=lambda x: x in ["umap", "ivis"],
validation=Validator.is_supported_dimensionality_reduction,
),
],
),
Config(
"data.columns",
ConfigSection.DATA_COLUMNS,
[
AutolockedConfigValue(
"encoded_label_key",
ConfigKey.ENCODED_LABEL_KEY,
"The column name for the encoded label.",
"label_encoded",
validation=lambda x: isinstance(x, str),
validation=Validator.is_str,
),
AutolockedConfigValue(
"dataset_subset_field",
ConfigKey.DATASET_SUBSET_FIELD,
"The column name for dataset subsets.",
"SUBSET",
validation=lambda x: isinstance(x, str),
validation=Validator.is_str,
),
AutolockedConfigValue(
"embedding_field_prefix",
ConfigKey.EMBEDDING_FIELD_PREFIX,
"The prefix of column names for embedding coordinates.",
"embed_",
validation=lambda x: isinstance(x, str),
validation=Validator.is_str,
),
AutolockedConfigValue(
"source_color_field",
ConfigKey.SOURCE_COLOR_FIELD,
"The column name for plotted data point color.",
"__COLOR__",
validation=lambda x: isinstance(x, str),
validation=Validator.is_str,
),
AutolockedConfigValue(
"source_alpha_field",
ConfigKey.SOURCE_ALPHA_FIELD,
"The column name for plotted data point color alpha (opacity).",
"__ALPHA__",
validation=lambda x: isinstance(x, str),
validation=Validator.is_str,
),
AutolockedConfigValue(
"search_score_field",
ConfigKey.SEARCH_SCORE_FIELD,
"The column name for data points' score from search widgets.",
"__SEARCH_SCORE__",
validation=lambda x: isinstance(x, str),
validation=Validator.is_str,
),
],
),
Config(
"data.values",
ConfigSection.DATA_VALUES,
[
AutolockedConfigValue(
"abstain_decoded",
ConfigKey.ABSTAIN_DECODED,
"The placeholder label indicating 'no label yet'.",
"ABSTAIN",
validation=lambda x: isinstance(x, str),
validation=Validator.is_str,
),
AutolockedConfigValue(
"abstain_encoded",
ConfigKey.ABSTAIN_ENCODED,
"The encoded value of 'no label yet' which should almost always be -1, never 0 or positive.",
-1,
validation=lambda x: isinstance(x, int) and x < 0,
validation=Validator.is_negative_int,
),
],
),
Expand Down
59 changes: 59 additions & 0 deletions hover/config_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import re


class ConfigSection:
IO = "io"
VISUAL = "visual"
DATA_EMBEDDING = "data.embedding"
DATA_COLUMNS = "data.columns"
DATA_VALUES = "data.columns"


class ConfigKey:
DATA_SAVE_DIR = "data_save_dir"
ABSTAIN_HEXCOLOR = "abstain_hexcolor"
BOKEH_PALETTE = "bokeh_palette"
BOKEH_PALETTE_USAGE = "bokeh_palette_usage"
TABLE_IMG_STYLE = "table_img_style"
TOOLTIP_IMG_STYLE = "tooltip_img_style"
DEFAULT_REDUCTION_METHOD = "default_reduction_method"
ENCODED_LABEL_KEY = "encoded_label_key"
DATASET_SUBSET_FIELD = "dataset_subset_field"
EMBEDDING_FIELD_PREFIX = "embedding_field_prefix"
SOURCE_COLOR_FIELD = "source_color_field"
SOURCE_ALPHA_FIELD = "source_alpha_field"
SEARCH_SCORE_FIELD = "search_score_field"
ABSTAIN_DECODED = "abstain_decoded"
ABSTAIN_ENCODED = "abstain_encoded"


class Validator:
@staticmethod
def is_hex_color(x):
return bool(re.match(r"^\#[0-9a-fA-F]{6}$", x))

@staticmethod
def is_iterable(x):
return hasattr(x, "__iter__")

@staticmethod
def is_supported_dimensionality_reduction(x):
return x.lower() in ["umap", "ivis"]

@staticmethod
def is_supported_traversal_mode(x):
return x.lower() in ["iterate", "linspace"]

@staticmethod
def is_str(x):
return isinstance(x, str)

@staticmethod
def is_negative_int(x):
return isinstance(x, int) and x < 0


class Preprocessor:
@staticmethod
def remove_quote_at_ends(x):
return re.sub(r"(^[\'\"]|[\'\"]$)", "", x)
16 changes: 11 additions & 5 deletions hover/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
- loading data for training models
"""
import os
import hover
import numpy as np
import pandas as pd
from tqdm import tqdm
Expand All @@ -35,6 +34,7 @@
dataset_default_sel_table_kwargs,
COLOR_GLYPH_TEMPLATE,
DATASET_SUBSET_FIELD,
DEFAULT_REDUCTION_METHOD,
embedding_field,
)

Expand Down Expand Up @@ -161,6 +161,12 @@ def burner(d):

self.dfs[_key] = _df

def subset(self, key):
"""
???+ note "Return the DataFrame by reference for the given subset."
"""
return self.dfs[key]

def copy(self):
"""
???+ note "Create another instance, copying over the data entries."
Expand Down Expand Up @@ -760,7 +766,9 @@ def vectorizer_lookup(self):
def vectorizer_lookup(self, *args, **kwargs):
self._fail("assigning vectorizer lookup by reference is forbidden.")

def compute_nd_embedding(self, vectorizer, method=None, dimension=2, **kwargs):
def compute_nd_embedding(
self, vectorizer, method=DEFAULT_REDUCTION_METHOD, dimension=2, **kwargs
):
"""
???+ note "Get embeddings in n-dimensional space and return the dimensionality reducer."
Reference: [`DimensionalityReducer`](https://github.com/phurwicz/hover/blob/main/hover/core/representation/reduction.py)
Expand All @@ -774,8 +782,6 @@ def compute_nd_embedding(self, vectorizer, method=None, dimension=2, **kwargs):
"""
from hover.core.representation.reduction import DimensionalityReducer

if method is None:
method = hover.config["data.embedding"]["default_reduction_method"]
# register the vectorizer for scenarios that may need it
self.vectorizer_lookup[dimension] = vectorizer

Expand Down Expand Up @@ -846,7 +852,7 @@ def compute_2d_embedding(self, vectorizer, method=None, **kwargs):
| `**kwargs` | | kwargs for `DimensionalityReducer` |
"""
reducer = self.compute_nd_embedding(
vectorizer, method=None, dimension=2, **kwargs
vectorizer, method=method, dimension=2, **kwargs
)
return reducer

Expand Down
18 changes: 7 additions & 11 deletions hover/core/explorer/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""
???+ note "Base class(es) for ALL explorer implementations."
"""
import pandas as pd
from abc import ABC, abstractmethod
from collections import OrderedDict, defaultdict
from bokeh.events import SelectionGeometry
Expand Down Expand Up @@ -345,13 +344,15 @@ def _setup_dfs(self, df_dict, copy=False):
expected_not_supplied = expected_keys.difference(supplied_keys)

for _key in supplied_not_expected:
self._warn(
f"{self.__class__.__name__}.__init__(): got unexpected df key {_key}"
self._fail(
f"expected df keys {list(expected_keys)}, got unexpected df key {_key}"
)
for _key in expected_not_supplied:
self._warn(
f"{self.__class__.__name__}.__init__(): missing expected df key {_key}"
)
self._fail(f"expected df keys {list(expected_keys)}, missing df key {_key}")
# raise an exception if the supplied keys and expected keys are any different
assert (
not supplied_not_expected and not expected_not_supplied
), "df key mismatch"

# assign df with column checks
self.dfs = dict()
Expand All @@ -372,11 +373,6 @@ def _setup_dfs(self, df_dict, copy=False):
_df[_col] = _default
self.dfs[_key] = _df.copy() if copy else _df

# expected dfs must be present
for _key in expected_not_supplied:
_df = pd.DataFrame(columns=list(mandatory_col_to_default.keys()))
self.dfs[_key] = _df

def _setup_sources(self):
"""
???+ note "Create, **(not update)**, `ColumnDataSource` objects."
Expand Down
4 changes: 2 additions & 2 deletions hover/core/explorer/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
???+ note "Intermediate classes based on the main feature."
"""
import re
import hover
import numpy as np
from functools import lru_cache
from bokeh.models import TextInput, Slider
from .base import BokehBaseExplorer
from .local_config import TOOLTIP_IMG_STYLE


class BokehForText(BokehBaseExplorer):
Expand Down Expand Up @@ -200,7 +200,7 @@ class BokehForImage(BokehForUrlToVector):
MANDATORY_COLUMNS = [PRIMARY_FEATURE, "label"]
TOOLTIP_KWARGS = {
"label": {"label": "Label"},
"image": {"image": hover.config["visual"]["tooltip_img_style"]},
"image": {"image": TOOLTIP_IMG_STYLE},
"coords": True,
"index": True,
}
12 changes: 9 additions & 3 deletions hover/core/explorer/local_config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
import hover
from hover.config_constants import (
ConfigSection as Section,
ConfigKey as Key,
)

SOURCE_COLOR_FIELD = hover.config["data.columns"]["source_color_field"]
SOURCE_ALPHA_FIELD = hover.config["data.columns"]["source_alpha_field"]
SEARCH_SCORE_FIELD = hover.config["data.columns"]["search_score_field"]
SOURCE_COLOR_FIELD = hover.config[Section.DATA_COLUMNS][Key.SOURCE_COLOR_FIELD]
SOURCE_ALPHA_FIELD = hover.config[Section.DATA_COLUMNS][Key.SOURCE_ALPHA_FIELD]
SEARCH_SCORE_FIELD = hover.config[Section.DATA_COLUMNS][Key.SEARCH_SCORE_FIELD]

TOOLTIP_IMG_STYLE = hover.config[Section.VISUAL][Key.TOOLTIP_IMG_STYLE]
Loading

0 comments on commit a03ab68

Please sign in to comment.