-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support polars as DataFrame engine (#67)
and adjust all tests and doc scripts
- Loading branch information
Showing
45 changed files
with
1,778 additions
and
340 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
dataset.dfs["raw"].head(5) | ||
dataset.dfs["raw"]().head(5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
# each subset can be accessed as its own DataFrame | ||
dataset.dfs["raw"].head(5) | ||
dataset.dfs["raw"]().head(5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
text = dataset.dfs["raw"].loc[0, "text"] | ||
text = dataset.dfs["raw"]().loc[0, "text"] | ||
vec = vectorizer(text) | ||
print(f"Text: {text}") | ||
print(f"Vector shape: {vec.shape}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
# what we did adds 'embed_2d_0' and 'embed_2d_1' columns to the DataFrames in dataset.dfs | ||
dataset.dfs["raw"].head(5) | ||
dataset.dfs["raw"]().head(5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
dataset._callback_update_selection(dataset.dfs["raw"].loc[:10]) | ||
dataset._callback_update_selection(dataset.dfs["raw"][:10]) | ||
|
||
show(dataset.sel_table, notebook_url=notebook_url) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
import re | ||
|
||
|
||
class ConfigSection: | ||
IO = "io" | ||
BACKEND = "backend" | ||
VISUAL = "visual" | ||
DATA_EMBEDDING = "data.embedding" | ||
DATA_COLUMNS = "data.columns" | ||
DATA_VALUES = "data.values" | ||
|
||
|
||
class ConfigKey: | ||
DATA_SAVE_DIR = "data_save_dir" | ||
DATAFRAME_LIBRARY = "dataframe_library" | ||
ABSTAIN_HEXCOLOR = "abstain_hexcolor" | ||
BOKEH_PALETTE = "bokeh_palette" | ||
BOKEH_PALETTE_USAGE = "bokeh_palette_usage" | ||
TABLE_IMG_STYLE = "table_img_style" | ||
TOOLTIP_IMG_STYLE = "tooltip_img_style" | ||
DEFAULT_REDUCTION_METHOD = "default_reduction_method" | ||
ENCODED_LABEL_KEY = "encoded_label_key" | ||
DATASET_SUBSET_FIELD = "dataset_subset_field" | ||
EMBEDDING_FIELD_PREFIX = "embedding_field_prefix" | ||
SOURCE_COLOR_FIELD = "source_color_field" | ||
SOURCE_ALPHA_FIELD = "source_alpha_field" | ||
SEARCH_SCORE_FIELD = "search_score_field" | ||
ABSTAIN_DECODED = "abstain_decoded" | ||
ABSTAIN_ENCODED = "abstain_encoded" | ||
|
||
|
||
class Validator: | ||
@staticmethod | ||
def is_hex_color(x): | ||
return bool(re.match(r"^\#[0-9a-fA-F]{6}$", x)) | ||
|
||
@staticmethod | ||
def is_iterable(x): | ||
return hasattr(x, "__iter__") | ||
|
||
@staticmethod | ||
def is_iterable_of_hex_color(x): | ||
if not Validator.is_iterable(x): | ||
return False | ||
for i in x: | ||
if not Validator.is_hex_color(i): | ||
return False | ||
return True | ||
|
||
@staticmethod | ||
def is_supported_dataframe_library(x): | ||
return x in ["pandas", "polars"] | ||
|
||
@staticmethod | ||
def is_supported_dimensionality_reduction(x): | ||
return x in ["umap", "ivis"] | ||
|
||
@staticmethod | ||
def is_supported_traversal_mode(x): | ||
return x in ["iterate", "linspace"] | ||
|
||
@staticmethod | ||
def is_str(x): | ||
return isinstance(x, str) | ||
|
||
@staticmethod | ||
def is_negative_int(x): | ||
return isinstance(x, int) and x < 0 | ||
|
||
|
||
class Preprocessor: | ||
@staticmethod | ||
def remove_quote_at_ends(x): | ||
return re.sub(r"(^[\'\"]|[\'\"]$)", "", x) | ||
|
||
@staticmethod | ||
def lower(x): | ||
return x.lower() |
Oops, something went wrong.