Quick hack to make some Ibis tables work in show

mwouts · Jan 13, 2024 · 6ed5160 · 6ed5160
1 parent a5d12fa
commit 6ed5160
Show file tree

Hide file tree

Showing 7 changed files with 107 additions and 36 deletions.
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -5,7 +5,7 @@ ITables ChangeLog
 ----------------------
 
 **Added**
-- Added support for Ibis tables (#215)
+- Added support for Ibis tables ([#215](https://github.com/mwouts/itables/issues/215))
 
 
 1.6.3 (2023-12-10)

diff --git a/docs/polars_dataframes.md b/docs/polars_dataframes.md
@@ -21,7 +21,7 @@ dataframes are displayed nicely with the default `itables` settings.
 from itables import init_notebook_mode, show
 from itables.sample_dfs import get_dict_of_test_dfs
 
-dict_of_test_dfs = get_dict_of_test_dfs(polars=True)
+dict_of_test_dfs = get_dict_of_test_dfs(type="polars")
 init_notebook_mode(all_interactive=True)
 ```
 

diff --git a/itables/downsample.py b/itables/downsample.py
@@ -3,6 +3,14 @@
 
 import pandas as pd
 
+try:
+    from ibis.common.exceptions import ExpressionError
+except ImportError:
+
+    class ExpressionError(Exception):
+        pass
+
+
 logging.basicConfig()
 logger = logging.getLogger(__name__)
 
@@ -11,18 +19,37 @@ def nbytes(df):
     try:
         return sum(x.values.nbytes for _, x in df.items())
     except AttributeError:
-        # Polars DataFrame
-        return df.estimated_size()
+        try:
+            # Polars DataFrame
+            return df.estimated_size()
+        except AttributeError:
+            # Ibis Table
+            # TODO: find a more direct way to estimate the size of the table
+            nrows = df.count().execute()
+            if not nrows:
+                return 0
+            return nrows * (nbytes(df.head(5).to_pandas()) / min(nrows, 5))
+
+
+def nrows(df):
+    try:
+        return len(df)
+    except TypeError:
+        # Pandas Styler
+        return len(df.index)
+    except ExpressionError:
+        # ibis table
+        return df.count().execute()
 
 
 def downsample(df, max_rows=0, max_columns=0, max_bytes=0):
     """Return a subset of the dataframe that fits the limits"""
-    org_rows, org_columns, org_bytes = len(df), len(df.columns), nbytes(df)
+    org_rows, org_columns, org_bytes = nrows(df), len(df.columns), nbytes(df)
     df = _downsample(
         df, max_rows=max_rows, max_columns=max_columns, max_bytes=max_bytes
     )
 
-    if len(df) < org_rows or len(df.columns) < org_columns:
+    if nrows(df) < org_rows or len(df.columns) < org_columns:
         link = '<a href="https://mwouts.github.io/itables/downsampling.html">downsampled</a>'
         reasons = []
         if org_rows > max_rows > 0:
@@ -76,7 +103,7 @@ def shrink_towards_target_aspect_ratio(
 
 def _downsample(df, max_rows=0, max_columns=0, max_bytes=0, target_aspect_ratio=None):
     """Implementation of downsample - may be called recursively"""
-    if len(df) > max_rows > 0:
+    if nrows(df) > max_rows > 0:
         second_half = max_rows // 2
         first_half = max_rows - second_half
         if second_half:
@@ -134,6 +161,10 @@ def _downsample(df, max_rows=0, max_columns=0, max_bytes=0, target_aspect_ratio=
             import polars as pl  # noqa
 
             df = pl.DataFrame({df.columns[0]: ["..."]})
-        return df
 
-    return df
+    try:
+        len(df)
+        return df
+    except ExpressionError:
+        # Ibis
+        return df.to_pandas()
diff --git a/itables/javascript.py b/itables/javascript.py
@@ -11,6 +11,8 @@
 import numpy as np
 import pandas as pd
 
+from .downsample import nrows
+
 try:
     import pandas.io.formats.style as pd_style
 except ImportError:
@@ -22,6 +24,12 @@
     # Define pl.Series as pd.Series
     import pandas as pl
 
+try:
+    import ibis.expr.types.relations as ibis_relations
+except ImportError:
+    ibis_relations = None
+
+
 from IPython.display import HTML, Javascript, display
 
 import itables.options as opt
@@ -102,6 +110,9 @@ def init_notebook_mode(
             pd_style.Styler._repr_html_ = _datatables_repr_
         pl.DataFrame._repr_html_ = _datatables_repr_
         pl.Series._repr_html_ = _datatables_repr_
+        pl.DataFrame._repr_html_ = _datatables_repr_
+        if ibis_relations is not None:
+            ibis_relations.Table._repr_html_ = _datatables_repr_
     else:
         pd.DataFrame._repr_html_ = _ORIGINAL_DATAFRAME_REPR_HTML
         if pd_style is not None:
@@ -111,6 +122,9 @@ def init_notebook_mode(
             del pd.Series._repr_html_
         if hasattr(pl.Series, "_repr_html_"):
             del pl.Series._repr_html_
+        if ibis_relations is not None:
+            if hasattr(ibis_relations.Table, "_repr_html_"):
+                del ibis_relations.Table._repr_html_
 
     if not connected:
         display(Javascript(read_package_file("external/jquery.min.js")))
@@ -601,12 +615,7 @@ def _min_rows(kwargs):
 
 def _df_fits_in_one_page(df, kwargs):
     """Display just the table (not the search box, etc...) if the rows fit on one 'page'"""
-    try:
-        # Pandas DF or Style
-        return len(df.index) <= _min_rows(kwargs)
-    except AttributeError:
-        # Polars
-        return len(df) <= _min_rows(kwargs)
+    return nrows(df) <= _min_rows(kwargs)
 
 
 def safe_reset_index(df):

diff --git a/itables/sample_dfs.py b/itables/sample_dfs.py
@@ -1,12 +1,7 @@
 import math
 import string
 from datetime import datetime, timedelta
-
-try:
-    from functools import lru_cache
-except ImportError:
-    from functools32 import lru_cache
-
+from functools import lru_cache
 from itertools import cycle
 
 import numpy as np
@@ -105,7 +100,7 @@ def get_df_complex_index():
     return df
 
 
-def get_dict_of_test_dfs(N=100, M=100, polars=False, ibis=False):
+def get_dict_of_test_dfs(N=100, M=100, type="pandas"):
     NM_values = np.reshape(np.linspace(start=0.0, stop=1.0, num=N * M), (N, M))
 
     test_dfs = {
@@ -266,8 +261,10 @@ def get_dict_of_test_dfs(N=100, M=100, polars=False, ibis=False):
             }
         ),
     }
+    if type == "pandas":
+        return test_dfs
 
-    if polars:
+    if type == "polars":
         import polars as pl
         import pyarrow as pa
 
@@ -279,23 +276,42 @@ def get_dict_of_test_dfs(N=100, M=100, polars=False, ibis=False):
                 pass
         return polars_dfs
 
-    if ibis:
-        import ibis as ib
+    if type == "ibis_memtable":
+        import ibis
 
-        con = ib.pandas.connect(test_dfs)
         ibis_dfs = {}
-        for key in test_dfs:
+        for key, df in test_dfs.items():
+            # Ibis does not support tables with no columns
+            if not len(df.columns):
+                continue
+            try:
+                ibis_dfs[key] = ibis.memtable(df, name=key)
+            except (TypeError, ibis.common.exceptions.IbisInputError):
+                pass
+
+        return ibis_dfs
+
+    if type == "ibis_connect":
+        import ibis
+
+        con = ibis.pandas.connect(test_dfs)
+        ibis_dfs = {}
+        for key, df in test_dfs.items():
+            # Ibis does not support tables with no columns
+            if not len(df.columns):
+                continue
+
             try:
-                ibis_dfs[key] = con.table(key)
+                ibis_dfs[f"{key}_connect"] = con.table(key)
             except (TypeError, AttributeError):
                 pass
 
         return ibis_dfs
 
-    return test_dfs
+    raise NotImplementedError(type)
 
 
-def get_dict_of_test_series(polars=False):
+def get_dict_of_test_series(type="pandas"):
     series = {}
     for df_name, df in get_dict_of_test_dfs().items():
         if len(df.columns) > 6:
@@ -306,7 +322,10 @@ def get_dict_of_test_series(polars=False):
                 continue
             series["{}.{}".format(df_name, col)] = df[col]
 
-    if polars:
+    if type == "pandas":
+        return series
+
+    if type == "polars":
         import polars as pl
         import pyarrow as pa
 
@@ -325,7 +344,7 @@ def get_dict_of_test_series(polars=False):
 
         return polars_series
 
-    return series
+    raise NotImplementedError(type)
 
 
 @lru_cache()

diff --git a/tests/test_ibis.py b/tests/test_ibis.py
@@ -8,9 +8,21 @@
 except ImportError as e:
     pytest.skip(str(e), allow_module_level=True)
 
+# TODO Remove this (and find out how to evaluate count)
+ibis.options.interactive = True
+
+
+@pytest.mark.parametrize(
+    "name,df",
+    [(name, df) for name, df in get_dict_of_test_dfs(type="ibis_memtable").items()],
+)
+def test_show_ibis_memtable(name, df, use_to_html):
+    to_html_datatable(df, use_to_html)
+
 
 @pytest.mark.parametrize(
-    "name,df", [(name, df) for name, df in get_dict_of_test_dfs(ibis=True).items()]
+    "name,df",
+    [(name, df) for name, df in get_dict_of_test_dfs(type="ibis_connect").items()],
 )
-def test_show_ibis_df(name, df, use_to_html):
+def test_show_ibis_connect(name, df, use_to_html):
     to_html_datatable(df, use_to_html)
diff --git a/tests/test_polars.py b/tests/test_polars.py
@@ -10,14 +10,14 @@
 
 
 @pytest.mark.parametrize(
-    "name,x", [(name, x) for name, x in get_dict_of_test_series(polars=True).items()]
+    "name,x", [(name, x) for name, x in get_dict_of_test_series(type="polars").items()]
 )
 def test_show_polars_series(name, x, use_to_html):
     to_html_datatable(x, use_to_html)
 
 
 @pytest.mark.parametrize(
-    "name,df", [(name, df) for name, df in get_dict_of_test_dfs(polars=True).items()]
+    "name,df", [(name, df) for name, df in get_dict_of_test_dfs(type="polars").items()]
 )
 def test_show_polars_df(name, df, use_to_html):
     to_html_datatable(df, use_to_html)