Skip to content

Commit

Permalink
feat(eda.plot): Redesigned layout for plot(df, x)
Browse files Browse the repository at this point in the history
  • Loading branch information
eutialia committed Sep 11, 2020
1 parent d878b85 commit 04c7fd5
Show file tree
Hide file tree
Showing 16 changed files with 563 additions and 396 deletions.
Binary file modified assets/plot(df).gif
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified assets/plot(df,x)_cat.gif
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified assets/plot(df,x)_num.gif
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
59 changes: 35 additions & 24 deletions dataprep/eda/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@

import sys
import webbrowser
import random
from tempfile import NamedTemporaryFile
from typing import List, Dict, Union, Tuple
from typing import Any, Dict
from bokeh.io import output_notebook
from bokeh.embed import components
from bokeh.models import LayoutDOM
from bokeh.resources import INLINE
from jinja2 import Environment, PackageLoader
from .utils import is_notebook
Expand All @@ -25,28 +25,39 @@ class Container:
This class creates a customized Container object for the plot(df) function.
"""

def __init__(
self,
to_render: Dict[
str,
Union[
List[str],
List[LayoutDOM],
Tuple[Dict[str, str], Dict[str, str]],
Dict[int, List[str]],
],
],
) -> None:
self.context = {
"resources": INLINE.render(),
"components": components(to_render["layout"]),
"tabledata": to_render["tabledata"],
"overview_insights": to_render["overview_insights"],
"column_insights": to_render["column_insights"],
"meta": to_render["meta"],
"title": "DataPrep.EDA Report",
}
self.template_base = ENV_LOADER.get_template("base.html")
def __init__(self, to_render: Dict[str, Any], visual_type: str,) -> None:
if visual_type == "distribution_grid":
self.context = {
"resources": INLINE.render(),
"components": components(to_render["layout"]),
"tabledata": to_render["tabledata"],
"overview_insights": to_render["overview_insights"],
"column_insights": to_render["column_insights"],
"meta": to_render["meta"],
"title": "DataPrep.EDA Report",
"rnd": random.randint(
0, 99
), # for multiple cells running in the same notebook
}
self.template_base = ENV_LOADER.get_template("grid_base.html")

elif "_column" in visual_type:
# todo: param management
to_render["meta"].insert(0, "Stats")
self.context = {
"resources": INLINE.render(),
"tabledata": to_render["tabledata"],
"insights": to_render["insights"],
"components": components(to_render["layout"]),
"meta": to_render["meta"],
"title": "DataPrep.EDA Report",
"rnd": random.randint(
100, 999
), # for multiple cells running in the same notebook
}
self.template_base = ENV_LOADER.get_template("univariate_base.html")
else:
raise TypeError(f"Unsupported Visual Type: {visual_type}.")

def save(self, filename: str) -> None:
"""
Expand Down
14 changes: 9 additions & 5 deletions dataprep/eda/create_report/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def format_report(


def format_basic(df: dd.DataFrame) -> Dict[str, Any]:
# pylint: disable=too-many-statements
"""
Format basic version.
Expand Down Expand Up @@ -104,20 +105,23 @@ def format_basic(df: dd.DataFrame) -> Dict[str, Any]:
itmdt = Intermediate(
col=col, data=data[col], visual_type="numerical_column"
)
rndrd = render(itmdt, plot_height_lrg=250, plot_width_lrg=280)
rndrd = render(itmdt, plot_height_lrg=250, plot_width_lrg=280)["layout"]
stats = format_num_stats(data[col])
elif is_dtype(detect_dtype(df[col]), Nominal()):
itmdt = Intermediate(
col=col, data=data[col], visual_type="categorical_column"
)
rndrd = render(itmdt, plot_height_lrg=250, plot_width_lrg=280)
rndrd = render(itmdt, plot_height_lrg=250, plot_width_lrg=280)["layout"]
stats = format_cat_stats(
data[col]["stats"], data[col]["len_stats"], data[col]["letter_stats"]
)
figs: List[Figure] = []
for tab in rndrd.tabs[1:]:
fig = tab.child.children[0]
fig.title = Title(text=tab.title, align="center")
for tab in rndrd:
try:
fig = tab.children[0]
except AttributeError:
fig = tab
# fig.title = Title(text=tab.title, align="center")
figs.append(fig)
res["variables"][col] = {
"tabledata": stats,
Expand Down
10 changes: 5 additions & 5 deletions dataprep/eda/create_report/templates/styles.html
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@
margin-left: auto;
margin-right: auto;
margin-bottom: 70px;
max-width: 1200px;
max-width: 1320px;
}

.section-variable p {
Expand Down Expand Up @@ -377,7 +377,7 @@
margin: unset !important;
}

@media screen and (max-width: 1024px) {
@media screen and (max-width: 1320px) {
h1.tb-title {
max-width: 850px;
}
Expand All @@ -387,19 +387,19 @@
}

.section {
max-width: 850px;
max-width: 975px;
}

.var-container>.var-title {
flex: 2 1 10%;
}

.var-container>.tb-container {
flex: 2 1 350px;
flex: 2 1 400px;
}

.var-toggle {
width: 70px;
width: 100px;
}

.vp-plot-categorical {
Expand Down
12 changes: 6 additions & 6 deletions dataprep/eda/create_report/templates/variables.html
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ <h2 class="tb-title">{{ key }}</h2>
<div class="tb-container">
<div class="tb-{{ value.col_type }}">
<table class="rp-table">
{% for h, d in value.tabledata[0].items() %}
{% for h, d in value.tabledata['Overview'].items() %}
<tr>
<th>{{ h }}</th>
<td>{{ d }}</td>
Expand Down Expand Up @@ -49,7 +49,7 @@ <h2 class="tb-title">{{ key }}</h2>
<div>
<h4 class="tb-title">Quantile Statistics</h3>
<table class="rp-table">
{% for h, d in value.tabledata[1].items() %}
{% for h, d in value.tabledata['Quantile Statistics'].items() %}
<tr>
<th>{{ h }}</th>
<td>{{ d }}</td>
Expand All @@ -60,7 +60,7 @@ <h4 class="tb-title">Quantile Statistics</h3>
<div>
<h4 class="tb-title">Descriptive Statistics</h3>
<table class="rp-table">
{% for h, d in value.tabledata[2].items() %}
{% for h, d in value.tabledata['Descriptive Statistics'].items() %}
<tr>
<th>{{ h }}</th>
<td>{{ d }}</td>
Expand All @@ -75,7 +75,7 @@ <h4 class="tb-title">Descriptive Statistics</h3>
<div>
<h4 class="tb-title">Length</h3>
<table class="rp-table">
{% for h, d in value.tabledata[1].items() %}
{% for h, d in value.tabledata['Length'].items() %}
<tr>
<th>{{ h }}</th>
<td>{{ d }}</td>
Expand All @@ -86,7 +86,7 @@ <h4 class="tb-title">Length</h3>
<div>
<h4 class="tb-title">Sample</h3>
<table class="rp-table">
{% for h, d in value.tabledata[2].items() %}
{% for h, d in value.tabledata['Sample'].items() %}
<tr>
<th>{{ h }}</th>
<td>{{ d }}</td>
Expand All @@ -97,7 +97,7 @@ <h4 class="tb-title">Sample</h3>
<div>
<h4 class="tb-title">Letter</h3>
<table class="rp-table">
{% for h, d in value.tabledata[3].items() %}
{% for h, d in value.tabledata['Letter'].items() %}
<tr>
<th>{{ h }}</th>
<td>{{ d }}</td>
Expand Down
7 changes: 5 additions & 2 deletions dataprep/eda/distribution/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,10 @@ def plot(
dtype=dtype,
)
figure = render(intermediate, yscale=yscale, tile_size=tile_size)
if intermediate.visual_type == "distribution_grid":
return Container(figure)
if (
intermediate.visual_type == "distribution_grid"
or "_column" in intermediate.visual_type
):
return Container(figure, intermediate.visual_type)
else:
return Report(figure)
5 changes: 3 additions & 2 deletions dataprep/eda/distribution/compute/overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ def compute_overview(
stats=ov_stats,
column_insights=col_insights,
overview_insights=_insight_pagination(ov_insights),
ov_insights=ov_insights,
visual_type="distribution_grid",
)

Expand Down Expand Up @@ -446,9 +447,9 @@ def _insight_pagination(ins: List[Dict[str, str]]) -> Dict[int, List[Dict[str, s
# sort the insights based on the list ins_order
ins.sort(key=lambda x: ins_order.index(list(x.keys())[0]))
# paginate the sorted insights
page_count = int(np.ceil(len(ins) / 11))
page_count = int(np.ceil(len(ins) / 10))
paginated_ins: Dict[int, List[Dict[str, str]]] = {}
for i in range(1, page_count + 1):
paginated_ins[i] = ins[(i - 1) * 11 : i * 11]
paginated_ins[i] = ins[(i - 1) * 10 : i * 10]

return paginated_ins

0 comments on commit 04c7fd5

Please sign in to comment.