Skip to content

Commit

Permalink
Quick hack to make some Ibis tables work in show
Browse files Browse the repository at this point in the history
  • Loading branch information
mwouts committed Jan 13, 2024
1 parent a5d12fa commit 6ed5160
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 36 deletions.
2 changes: 1 addition & 1 deletion docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ ITables ChangeLog
----------------------

**Added**
- Added support for Ibis tables (#215)
- Added support for Ibis tables ([#215](https://github.com/mwouts/itables/issues/215))


1.6.3 (2023-12-10)
Expand Down
2 changes: 1 addition & 1 deletion docs/polars_dataframes.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dataframes are displayed nicely with the default `itables` settings.
from itables import init_notebook_mode, show
from itables.sample_dfs import get_dict_of_test_dfs
dict_of_test_dfs = get_dict_of_test_dfs(polars=True)
dict_of_test_dfs = get_dict_of_test_dfs(type="polars")
init_notebook_mode(all_interactive=True)
```

Expand Down
45 changes: 38 additions & 7 deletions itables/downsample.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@

import pandas as pd

try:
from ibis.common.exceptions import ExpressionError
except ImportError:

class ExpressionError(Exception):
pass


logging.basicConfig()
logger = logging.getLogger(__name__)

Expand All @@ -11,18 +19,37 @@ def nbytes(df):
try:
return sum(x.values.nbytes for _, x in df.items())
except AttributeError:
# Polars DataFrame
return df.estimated_size()
try:
# Polars DataFrame
return df.estimated_size()
except AttributeError:
# Ibis Table
# TODO: find a more direct way to estimate the size of the table
nrows = df.count().execute()
if not nrows:
return 0
return nrows * (nbytes(df.head(5).to_pandas()) / min(nrows, 5))


def nrows(df):
try:
return len(df)
except TypeError:
# Pandas Styler
return len(df.index)
except ExpressionError:
# ibis table
return df.count().execute()


def downsample(df, max_rows=0, max_columns=0, max_bytes=0):
"""Return a subset of the dataframe that fits the limits"""
org_rows, org_columns, org_bytes = len(df), len(df.columns), nbytes(df)
org_rows, org_columns, org_bytes = nrows(df), len(df.columns), nbytes(df)
df = _downsample(
df, max_rows=max_rows, max_columns=max_columns, max_bytes=max_bytes
)

if len(df) < org_rows or len(df.columns) < org_columns:
if nrows(df) < org_rows or len(df.columns) < org_columns:
link = '<a href="https://mwouts.github.io/itables/downsampling.html">downsampled</a>'
reasons = []
if org_rows > max_rows > 0:
Expand Down Expand Up @@ -76,7 +103,7 @@ def shrink_towards_target_aspect_ratio(

def _downsample(df, max_rows=0, max_columns=0, max_bytes=0, target_aspect_ratio=None):
"""Implementation of downsample - may be called recursively"""
if len(df) > max_rows > 0:
if nrows(df) > max_rows > 0:
second_half = max_rows // 2
first_half = max_rows - second_half
if second_half:
Expand Down Expand Up @@ -134,6 +161,10 @@ def _downsample(df, max_rows=0, max_columns=0, max_bytes=0, target_aspect_ratio=
import polars as pl # noqa

df = pl.DataFrame({df.columns[0]: ["..."]})
return df

return df
try:
len(df)
return df
except ExpressionError:
# Ibis
return df.to_pandas()
21 changes: 15 additions & 6 deletions itables/javascript.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import numpy as np
import pandas as pd

from .downsample import nrows

try:
import pandas.io.formats.style as pd_style
except ImportError:
Expand All @@ -22,6 +24,12 @@
# Define pl.Series as pd.Series
import pandas as pl

try:
import ibis.expr.types.relations as ibis_relations
except ImportError:
ibis_relations = None


from IPython.display import HTML, Javascript, display

import itables.options as opt
Expand Down Expand Up @@ -102,6 +110,9 @@ def init_notebook_mode(
pd_style.Styler._repr_html_ = _datatables_repr_
pl.DataFrame._repr_html_ = _datatables_repr_
pl.Series._repr_html_ = _datatables_repr_
pl.DataFrame._repr_html_ = _datatables_repr_
if ibis_relations is not None:
ibis_relations.Table._repr_html_ = _datatables_repr_
else:
pd.DataFrame._repr_html_ = _ORIGINAL_DATAFRAME_REPR_HTML
if pd_style is not None:
Expand All @@ -111,6 +122,9 @@ def init_notebook_mode(
del pd.Series._repr_html_
if hasattr(pl.Series, "_repr_html_"):
del pl.Series._repr_html_
if ibis_relations is not None:
if hasattr(ibis_relations.Table, "_repr_html_"):
del ibis_relations.Table._repr_html_

if not connected:
display(Javascript(read_package_file("external/jquery.min.js")))
Expand Down Expand Up @@ -601,12 +615,7 @@ def _min_rows(kwargs):

def _df_fits_in_one_page(df, kwargs):
"""Display just the table (not the search box, etc...) if the rows fit on one 'page'"""
try:
# Pandas DF or Style
return len(df.index) <= _min_rows(kwargs)
except AttributeError:
# Polars
return len(df) <= _min_rows(kwargs)
return nrows(df) <= _min_rows(kwargs)


def safe_reset_index(df):
Expand Down
53 changes: 36 additions & 17 deletions itables/sample_dfs.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
import math
import string
from datetime import datetime, timedelta

try:
from functools import lru_cache
except ImportError:
from functools32 import lru_cache

from functools import lru_cache
from itertools import cycle

import numpy as np
Expand Down Expand Up @@ -105,7 +100,7 @@ def get_df_complex_index():
return df


def get_dict_of_test_dfs(N=100, M=100, polars=False, ibis=False):
def get_dict_of_test_dfs(N=100, M=100, type="pandas"):
NM_values = np.reshape(np.linspace(start=0.0, stop=1.0, num=N * M), (N, M))

test_dfs = {
Expand Down Expand Up @@ -266,8 +261,10 @@ def get_dict_of_test_dfs(N=100, M=100, polars=False, ibis=False):
}
),
}
if type == "pandas":
return test_dfs

if polars:
if type == "polars":
import polars as pl
import pyarrow as pa

Expand All @@ -279,23 +276,42 @@ def get_dict_of_test_dfs(N=100, M=100, polars=False, ibis=False):
pass
return polars_dfs

if ibis:
import ibis as ib
if type == "ibis_memtable":
import ibis

con = ib.pandas.connect(test_dfs)
ibis_dfs = {}
for key in test_dfs:
for key, df in test_dfs.items():
# Ibis does not support tables with no columns
if not len(df.columns):
continue
try:
ibis_dfs[key] = ibis.memtable(df, name=key)
except (TypeError, ibis.common.exceptions.IbisInputError):
pass

return ibis_dfs

if type == "ibis_connect":
import ibis

con = ibis.pandas.connect(test_dfs)
ibis_dfs = {}
for key, df in test_dfs.items():
# Ibis does not support tables with no columns
if not len(df.columns):
continue

try:
ibis_dfs[key] = con.table(key)
ibis_dfs[f"{key}_connect"] = con.table(key)
except (TypeError, AttributeError):
pass

return ibis_dfs

return test_dfs
raise NotImplementedError(type)


def get_dict_of_test_series(polars=False):
def get_dict_of_test_series(type="pandas"):
series = {}
for df_name, df in get_dict_of_test_dfs().items():
if len(df.columns) > 6:
Expand All @@ -306,7 +322,10 @@ def get_dict_of_test_series(polars=False):
continue
series["{}.{}".format(df_name, col)] = df[col]

if polars:
if type == "pandas":
return series

if type == "polars":
import polars as pl
import pyarrow as pa

Expand All @@ -325,7 +344,7 @@ def get_dict_of_test_series(polars=False):

return polars_series

return series
raise NotImplementedError(type)


@lru_cache()
Expand Down
16 changes: 14 additions & 2 deletions tests/test_ibis.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,21 @@
except ImportError as e:
pytest.skip(str(e), allow_module_level=True)

# TODO Remove this (and find out how to evaluate count)
ibis.options.interactive = True


@pytest.mark.parametrize(
"name,df",
[(name, df) for name, df in get_dict_of_test_dfs(type="ibis_memtable").items()],
)
def test_show_ibis_memtable(name, df, use_to_html):
to_html_datatable(df, use_to_html)


@pytest.mark.parametrize(
"name,df", [(name, df) for name, df in get_dict_of_test_dfs(ibis=True).items()]
"name,df",
[(name, df) for name, df in get_dict_of_test_dfs(type="ibis_connect").items()],
)
def test_show_ibis_df(name, df, use_to_html):
def test_show_ibis_connect(name, df, use_to_html):
to_html_datatable(df, use_to_html)
4 changes: 2 additions & 2 deletions tests/test_polars.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@


@pytest.mark.parametrize(
"name,x", [(name, x) for name, x in get_dict_of_test_series(polars=True).items()]
"name,x", [(name, x) for name, x in get_dict_of_test_series(type="polars").items()]
)
def test_show_polars_series(name, x, use_to_html):
to_html_datatable(x, use_to_html)


@pytest.mark.parametrize(
"name,df", [(name, df) for name, df in get_dict_of_test_dfs(polars=True).items()]
"name,df", [(name, df) for name, df in get_dict_of_test_dfs(type="polars").items()]
)
def test_show_polars_df(name, df, use_to_html):
to_html_datatable(df, use_to_html)

0 comments on commit 6ed5160

Please sign in to comment.