feat(eda.plot): Redesigned layout for plot(df, x)

sfu-db · Sep 11, 2020 · 04c7fd5 · 04c7fd5
1 parent d878b85
commit 04c7fd5
Show file tree

Hide file tree

Showing 16 changed files with 563 additions and 396 deletions.
diff --git a/assets/plot(df).gif b/assets/plot(df).gif
diff --git a/assets/plot(df,x)_cat.gif b/assets/plot(df,x)_cat.gif
diff --git a/assets/plot(df,x)_num.gif b/assets/plot(df,x)_num.gif
diff --git a/dataprep/eda/container.py b/dataprep/eda/container.py
@@ -4,11 +4,11 @@
 
 import sys
 import webbrowser
+import random
 from tempfile import NamedTemporaryFile
-from typing import List, Dict, Union, Tuple
+from typing import Any, Dict
 from bokeh.io import output_notebook
 from bokeh.embed import components
-from bokeh.models import LayoutDOM
 from bokeh.resources import INLINE
 from jinja2 import Environment, PackageLoader
 from .utils import is_notebook
@@ -25,28 +25,39 @@ class Container:
     This class creates a customized Container object for the plot(df) function.
     """
 
-    def __init__(
-        self,
-        to_render: Dict[
-            str,
-            Union[
-                List[str],
-                List[LayoutDOM],
-                Tuple[Dict[str, str], Dict[str, str]],
-                Dict[int, List[str]],
-            ],
-        ],
-    ) -> None:
-        self.context = {
-            "resources": INLINE.render(),
-            "components": components(to_render["layout"]),
-            "tabledata": to_render["tabledata"],
-            "overview_insights": to_render["overview_insights"],
-            "column_insights": to_render["column_insights"],
-            "meta": to_render["meta"],
-            "title": "DataPrep.EDA Report",
-        }
-        self.template_base = ENV_LOADER.get_template("base.html")
+    def __init__(self, to_render: Dict[str, Any], visual_type: str,) -> None:
+        if visual_type == "distribution_grid":
+            self.context = {
+                "resources": INLINE.render(),
+                "components": components(to_render["layout"]),
+                "tabledata": to_render["tabledata"],
+                "overview_insights": to_render["overview_insights"],
+                "column_insights": to_render["column_insights"],
+                "meta": to_render["meta"],
+                "title": "DataPrep.EDA Report",
+                "rnd": random.randint(
+                    0, 99
+                ),  # for multiple cells running in the same notebook
+            }
+            self.template_base = ENV_LOADER.get_template("grid_base.html")
+
+        elif "_column" in visual_type:
+            # todo: param management
+            to_render["meta"].insert(0, "Stats")
+            self.context = {
+                "resources": INLINE.render(),
+                "tabledata": to_render["tabledata"],
+                "insights": to_render["insights"],
+                "components": components(to_render["layout"]),
+                "meta": to_render["meta"],
+                "title": "DataPrep.EDA Report",
+                "rnd": random.randint(
+                    100, 999
+                ),  # for multiple cells running in the same notebook
+            }
+            self.template_base = ENV_LOADER.get_template("univariate_base.html")
+        else:
+            raise TypeError(f"Unsupported Visual Type: {visual_type}.")
 
     def save(self, filename: str) -> None:
         """

diff --git a/dataprep/eda/create_report/formatter.py b/dataprep/eda/create_report/formatter.py
@@ -63,6 +63,7 @@ def format_report(
 
 
 def format_basic(df: dd.DataFrame) -> Dict[str, Any]:
+    # pylint: disable=too-many-statements
     """
     Format basic version.
 
@@ -104,20 +105,23 @@ def format_basic(df: dd.DataFrame) -> Dict[str, Any]:
             itmdt = Intermediate(
                 col=col, data=data[col], visual_type="numerical_column"
             )
-            rndrd = render(itmdt, plot_height_lrg=250, plot_width_lrg=280)
+            rndrd = render(itmdt, plot_height_lrg=250, plot_width_lrg=280)["layout"]
             stats = format_num_stats(data[col])
         elif is_dtype(detect_dtype(df[col]), Nominal()):
             itmdt = Intermediate(
                 col=col, data=data[col], visual_type="categorical_column"
             )
-            rndrd = render(itmdt, plot_height_lrg=250, plot_width_lrg=280)
+            rndrd = render(itmdt, plot_height_lrg=250, plot_width_lrg=280)["layout"]
             stats = format_cat_stats(
                 data[col]["stats"], data[col]["len_stats"], data[col]["letter_stats"]
             )
         figs: List[Figure] = []
-        for tab in rndrd.tabs[1:]:
-            fig = tab.child.children[0]
-            fig.title = Title(text=tab.title, align="center")
+        for tab in rndrd:
+            try:
+                fig = tab.children[0]
+            except AttributeError:
+                fig = tab
+            # fig.title = Title(text=tab.title, align="center")
             figs.append(fig)
         res["variables"][col] = {
             "tabledata": stats,

diff --git a/dataprep/eda/create_report/templates/styles.html b/dataprep/eda/create_report/templates/styles.html
@@ -160,7 +160,7 @@
         margin-left: auto;
         margin-right: auto;
         margin-bottom: 70px;
-        max-width: 1200px;
+        max-width: 1320px;
     }
 
     .section-variable p {
@@ -377,7 +377,7 @@
         margin: unset !important;
     }
 
-    @media screen and (max-width: 1024px) {
+    @media screen and (max-width: 1320px) {
         h1.tb-title {
             max-width: 850px;
         }
@@ -387,19 +387,19 @@
         }
 
         .section {
-            max-width: 850px;
+            max-width: 975px;
         }
 
         .var-container>.var-title {
             flex: 2 1 10%;
         }
 
         .var-container>.tb-container {
-            flex: 2 1 350px;
+            flex: 2 1 400px;
         }
 
         .var-toggle {
-            width: 70px;
+            width: 100px;
         }
 
         .vp-plot-categorical {

diff --git a/dataprep/eda/create_report/templates/variables.html b/dataprep/eda/create_report/templates/variables.html
@@ -20,7 +20,7 @@ <h2 class="tb-title">{{ key }}</h2>
                 <div class="tb-container">
                     <div class="tb-{{ value.col_type }}">
                         <table class="rp-table">
-                            {% for h, d in value.tabledata[0].items() %}
+                            {% for h, d in value.tabledata['Overview'].items() %}
                             <tr>
                                 <th>{{ h }}</th>
                                 <td>{{ d }}</td>
@@ -49,7 +49,7 @@ <h2 class="tb-title">{{ key }}</h2>
                     <div>
                         <h4 class="tb-title">Quantile Statistics</h3>
                             <table class="rp-table">
-                                {% for h, d in value.tabledata[1].items() %}
+                                {% for h, d in value.tabledata['Quantile Statistics'].items() %}
                                 <tr>
                                     <th>{{ h }}</th>
                                     <td>{{ d }}</td>
@@ -60,7 +60,7 @@ <h4 class="tb-title">Quantile Statistics</h3>
                     <div>
                         <h4 class="tb-title">Descriptive Statistics</h3>
                             <table class="rp-table">
-                                {% for h, d in value.tabledata[2].items() %}
+                                {% for h, d in value.tabledata['Descriptive Statistics'].items() %}
                                 <tr>
                                     <th>{{ h }}</th>
                                     <td>{{ d }}</td>
@@ -75,7 +75,7 @@ <h4 class="tb-title">Descriptive Statistics</h3>
                     <div>
                         <h4 class="tb-title">Length</h3>
                             <table class="rp-table">
-                                {% for h, d in value.tabledata[1].items() %}
+                                {% for h, d in value.tabledata['Length'].items() %}
                                 <tr>
                                     <th>{{ h }}</th>
                                     <td>{{ d }}</td>
@@ -86,7 +86,7 @@ <h4 class="tb-title">Length</h3>
                     <div>
                         <h4 class="tb-title">Sample</h3>
                             <table class="rp-table">
-                                {% for h, d in value.tabledata[2].items() %}
+                                {% for h, d in value.tabledata['Sample'].items() %}
                                 <tr>
                                     <th>{{ h }}</th>
                                     <td>{{ d }}</td>
@@ -97,7 +97,7 @@ <h4 class="tb-title">Sample</h3>
                     <div>
                         <h4 class="tb-title">Letter</h3>
                             <table class="rp-table">
-                                {% for h, d in value.tabledata[3].items() %}
+                                {% for h, d in value.tabledata['Letter'].items() %}
                                 <tr>
                                     <th>{{ h }}</th>
                                     <td>{{ d }}</td>

diff --git a/dataprep/eda/distribution/__init__.py b/dataprep/eda/distribution/__init__.py
@@ -169,7 +169,10 @@ def plot(
             dtype=dtype,
         )
     figure = render(intermediate, yscale=yscale, tile_size=tile_size)
-    if intermediate.visual_type == "distribution_grid":
-        return Container(figure)
+    if (
+        intermediate.visual_type == "distribution_grid"
+        or "_column" in intermediate.visual_type
+    ):
+        return Container(figure, intermediate.visual_type)
     else:
         return Report(figure)
diff --git a/dataprep/eda/distribution/compute/overview.py b/dataprep/eda/distribution/compute/overview.py
@@ -121,6 +121,7 @@ def compute_overview(
         stats=ov_stats,
         column_insights=col_insights,
         overview_insights=_insight_pagination(ov_insights),
+        ov_insights=ov_insights,
         visual_type="distribution_grid",
     )
 
@@ -446,9 +447,9 @@ def _insight_pagination(ins: List[Dict[str, str]]) -> Dict[int, List[Dict[str, s
     # sort the insights based on the list ins_order
     ins.sort(key=lambda x: ins_order.index(list(x.keys())[0]))
     # paginate the sorted insights
-    page_count = int(np.ceil(len(ins) / 11))
+    page_count = int(np.ceil(len(ins) / 10))
     paginated_ins: Dict[int, List[Dict[str, str]]] = {}
     for i in range(1, page_count + 1):
-        paginated_ins[i] = ins[(i - 1) * 11 : i * 11]
+        paginated_ins[i] = ins[(i - 1) * 10 : i * 10]
 
     return paginated_ins