Skip to content

Commit

Permalink
feat(eda): convert all plot functions to new UI
Browse files Browse the repository at this point in the history
  • Loading branch information
eutialia committed Nov 26, 2020
1 parent 905221b commit 36f8fa3
Show file tree
Hide file tree
Showing 14 changed files with 180 additions and 243 deletions.
85 changes: 55 additions & 30 deletions dataprep/eda/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,25 @@
loader=PackageLoader("dataprep", "eda/templates"),
)

TAB_VISUAL_TYPES = {
"missing_impact_1v1",
"missing_impact",
"categorical_column",
"numerical_column",
"datetime_column",
"cat_and_num_cols",
"two_num_cols",
"two_cat_cols",
"dt_and_num_cols",
"dt_and_cat_cols",
"dt_cat_num_cols",
"correlation_impact",
"correlation_single_heatmaps",
"correlation_scatter",
}

GRID_VISUAL_TYPES = {"distribution_grid", "missing_impact_1vn"}


class Container:
"""
Expand All @@ -30,38 +49,13 @@ def __init__(
to_render: Dict[str, Any],
visual_type: str,
) -> None:
if visual_type in ("distribution_grid", "missing_impact_1vn"):
self.context = {
"resources": INLINE.render(),
"components": components(to_render.get("layout")),
"tabledata": to_render.get("tabledata"),
"overview_insights": to_render.get("overview_insights"),
"column_insights": to_render.get("column_insights"),
"meta": to_render.get("meta"),
"title": "DataPrep.EDA Report",
"rnd": random.randint(0, 99), # for multiple cells running in the same notebook
"container_width": f"{to_render['fig_width']*3}px",
"legend_labels": to_render.get("legend_labels"),
}
self.context = Context(**to_render)
setattr(self.context, "rnd", random.randint(0, 9999))
if visual_type in GRID_VISUAL_TYPES:
self.template_base = ENV_LOADER.get_template("grid_base.html")

elif "_column" in visual_type or visual_type in (
"missing_impact",
"missing_impact_1v1",
"correlation_impact",
):
# todo: param management
elif visual_type in TAB_VISUAL_TYPES:
if to_render.get("tabledata"):
to_render["meta"].insert(0, "Stats")
self.context = {
"resources": INLINE.render(),
"tabledata": to_render.get("tabledata"),
"insights": to_render.get("insights"),
"components": components(to_render.get("layout")),
"meta": to_render.get("meta"),
"title": "DataPrep.EDA Report",
"rnd": random.randint(100, 999), # for multiple cells running in the same notebook
}
self.context.meta.insert(0, "Stats") # type: ignore
if visual_type == "correlation_impact":
self.template_base = ENV_LOADER.get_template("tab_base_corr.html")
else:
Expand Down Expand Up @@ -116,3 +110,34 @@ def show_browser(self) -> None:
with open(tmpf.name, "w") as file:
file.write(self.template_base.render(context=self.context))
webbrowser.open_new_tab(f"file://{tmpf.name}")


class Context:
"""
Define the context class that stores all the parameters needed by template engine.
The instance is read-only.
"""

_title = "DataPrep.EDA Report"
_resources = INLINE.render()
_container_width = 650 # default width just in case nothing got passed in

def __init__(self, **param: Any) -> None:
self.title = self._title
self.resources = self._resources
self.container_width = self._container_width

for attr, value in param.items():
if attr == "layout":
setattr(self, "components", components(value))
else:
setattr(self, attr, value)

def __getitem__(self, key: str) -> Any:
try:
return getattr(self, key)
except KeyError:
return None

def __getattr__(self, attr: str) -> None:
return None
10 changes: 3 additions & 7 deletions dataprep/eda/correlation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import pandas as pd

from ..progress_bar import ProgressBar
from ..report import Report
from .compute import compute_correlation
from .render import render_correlation
from ..container import Container
Expand All @@ -24,7 +23,7 @@ def plot_correlation(
value_range: Optional[Tuple[float, float]] = None,
k: Optional[int] = None,
progress: bool = True,
) -> Union[Report, Container]:
) -> Container:
"""
This function is designed to calculate the correlation between columns
There are three functions: plot_correlation(df), plot_correlation(df, x)
Expand Down Expand Up @@ -67,9 +66,6 @@ def plot_correlation(
"""
with ProgressBar(minimum=1, disable=not progress):
itmdt = compute_correlation(df, x=x, y=y, value_range=value_range, k=k)
fig = render_correlation(itmdt)
to_render = render_correlation(itmdt)

if itmdt.visual_type == "correlation_impact" or "_column" in itmdt.visual_type:
return Container(fig, itmdt.visual_type)
else:
return Report(fig)
return Container(to_render, itmdt.visual_type)
28 changes: 18 additions & 10 deletions dataprep/eda/correlation/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@

def render_correlation(
itmdt: Intermediate,
plot_width: int = 500,
plot_height: int = 500,
plot_width: int = 400,
plot_height: int = 400,
palette: Optional[Sequence[str]] = None,
) -> Figure:
"""
Expand Down Expand Up @@ -154,6 +154,7 @@ def render_correlation_impact(
toolbar_location=None,
tooltips=tooltips,
background_fill_color="#fafafa",
title=" ",
)

tweak_figure(fig)
Expand All @@ -167,8 +168,8 @@ def render_correlation_impact(
fill_color={"field": "correlation", "transform": mapper},
line_color=None,
)

fig.add_layout(color_bar, "right")
fig.frame_width = plot_width
fig.add_layout(color_bar, "left")
tab = Panel(child=fig, title=method)
tabs.append(tab)

Expand All @@ -177,6 +178,7 @@ def render_correlation_impact(
"tabledata": itmdt["tabledata"],
"layout": [panel.child for panel in tabs],
"meta": [panel.title for panel in tabs],
"container_width": plot_width + 150,
}


Expand Down Expand Up @@ -233,7 +235,7 @@ def render_correlation_heatmaps(

def render_correlation_single_heatmaps(
itmdt: Intermediate, plot_width: int, plot_height: int, palette: Sequence[str]
) -> Tabs:
) -> Dict[str, Any]:
"""
Render correlation heatmaps, but with single column
"""
Expand Down Expand Up @@ -272,8 +274,11 @@ def render_correlation_single_heatmaps(
tab = Panel(child=fig, title=method)
tabs.append(tab)

tabs = Tabs(tabs=tabs)
return tabs
return {
"layout": [panel.child for panel in tabs],
"meta": [panel.title for panel in tabs],
"container_width": plot_width,
}


def create_color_mapper(palette: Sequence[str]) -> Tuple[LinearColorMapper, ColorBar]:
Expand All @@ -296,7 +301,7 @@ def create_color_mapper(palette: Sequence[str]) -> Tuple[LinearColorMapper, Colo
######### Scatter #########
def render_scatter(
itmdt: Intermediate, plot_width: int, plot_height: int, palette: Sequence[str]
) -> Figure:
) -> Dict[str, Any]:
"""
Render scatter plot with a regression line and possible most influencial points
"""
Expand All @@ -312,7 +317,6 @@ def render_scatter(
plot_width=plot_width,
plot_height=plot_height,
toolbar_location=None,
title=Title(text="Scatter Plot & Regression Line", align="center"),
tools=[],
x_axis_label=xcol,
y_axis_label=ycol,
Expand Down Expand Up @@ -350,7 +354,11 @@ def render_scatter(
)

fig.add_layout(legend, place="right")
return fig
return {
"layout": [fig],
"meta": ["Scatter Plot & Regression Line"],
"container_width": plot_width,
}


######### Interactions for report #########
Expand Down
2 changes: 1 addition & 1 deletion dataprep/eda/create_report/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from bokeh.resources import INLINE
from jinja2 import Environment, PackageLoader
from .formatter import format_report
from .io import Report
from .report import Report

__all__ = ["create_report"]

Expand Down
8 changes: 5 additions & 3 deletions dataprep/eda/create_report/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,13 @@ def format_basic(df: dd.DataFrame) -> Dict[str, Any]:
res["has_missing"] = True
itmdt = completions["miss"](data["miss"])

rndrd = render_missing(itmdt)["layout"]
rndrd = render_missing(itmdt)
figs.clear()
for fig in rndrd:
for fig in rndrd["layout"]:
fig.sizing_mode = "stretch_width"
fig.title = Title(text=tab.title, align="center", text_font_size="20px")
fig.title = Title(
text=rndrd["meta"][rndrd["layout"].index(fig)], align="center", text_font_size="20px"
)
figs.append(fig)
res["missing"] = components(figs)

Expand Down
File renamed without changes.
10 changes: 3 additions & 7 deletions dataprep/eda/distribution/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from ..container import Container
from ..dtypes import DTypeDef
from ..progress_bar import ProgressBar
from ..report import Report
from .compute import compute
from .render import render

Expand Down Expand Up @@ -39,7 +38,7 @@ def plot(
tile_size: Optional[float] = None,
dtype: Optional[DTypeDef] = None,
progress: bool = True,
) -> Union[Report, Container]:
) -> Container:
"""Generates plots for exploratory data analysis.
If no columns are specified, the distribution of
Expand Down Expand Up @@ -168,8 +167,5 @@ def plot(
value_range=value_range,
dtype=dtype,
)
figure = render(intermediate, yscale=yscale, tile_size=tile_size)
if intermediate.visual_type == "distribution_grid" or "_column" in intermediate.visual_type:
return Container(figure, intermediate.visual_type)
else:
return Report(figure)
to_render = render(intermediate, yscale=yscale, tile_size=tile_size)
return Container(to_render, intermediate.visual_type)

0 comments on commit 36f8fa3

Please sign in to comment.