Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Ibis #217

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/continuous-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ jobs:
- python-version: "3.11"
pandas-version: pre
polars: true
- python-version: "3.11"
ibis: true
- python-version: "3.11"
uninstall_jinja2: true
runs-on: ubuntu-20.04
Expand Down Expand Up @@ -85,6 +87,10 @@ jobs:
if: matrix.polars
run: pip install -e .[polars]

- name: Install Ibis
if: matrix.ibis
run: pip install -e .[ibis]

- name: Uninstall jinja2
if: matrix.uninstall_jinja2
run: pip uninstall jinja2 -y
Expand Down
7 changes: 7 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
ITables ChangeLog
=================

1.7.0-dev (2024-01-??)
----------------------

**Added**
- Added support for Ibis tables ([#215](https://github.com/mwouts/itables/issues/215))


1.6.3 (2023-12-10)
------------------

Expand Down
2 changes: 1 addition & 1 deletion docs/polars_dataframes.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dataframes are displayed nicely with the default `itables` settings.
from itables import init_notebook_mode, show
from itables.sample_dfs import get_dict_of_test_dfs

dict_of_test_dfs = get_dict_of_test_dfs(polars=True)
dict_of_test_dfs = get_dict_of_test_dfs(type="polars")
init_notebook_mode(all_interactive=True)
```

Expand Down
10 changes: 3 additions & 7 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,17 @@ dependencies:
- python
- jupyter
- jupyterlab
- jupytext>=1.13.8
- markdown-it-py>=2.0
- jupytext
- nbconvert
- ipykernel
- pandas
- polars
- pyarrow
- ibis-duckdb
- pytest
- pytest-xdist
- pytest-cov
- pre-commit
- pylint
- flake8
- black
- isort
- pip
- setuptools
- twine
Expand All @@ -29,4 +25,4 @@ dependencies:
- shiny
- pip:
- world_bank_data
- jupyter_book>=0.12 # jupyter-book-0.12.2-pyhd8ed1ab_0 requires jupytext >=1.11.2,<1.12
- jupyter_book
17 changes: 0 additions & 17 deletions environment2.yml

This file was deleted.

45 changes: 38 additions & 7 deletions itables/downsample.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@

import pandas as pd

try:
from ibis.common.exceptions import ExpressionError
except ImportError:

class ExpressionError(Exception):
pass


logging.basicConfig()
logger = logging.getLogger(__name__)

Expand All @@ -11,18 +19,37 @@ def nbytes(df):
try:
return sum(x.values.nbytes for _, x in df.items())
except AttributeError:
# Polars DataFrame
return df.estimated_size()
try:
# Polars DataFrame
return df.estimated_size()
except AttributeError:
# Ibis Table
# TODO: find a more direct way to estimate the size of the table
nrows = df.count().execute()
if not nrows:
return 0
return nrows * (nbytes(df.head(5).to_pandas()) / min(nrows, 5))


def nrows(df):
try:
return len(df)
except TypeError:
# Pandas Styler
return len(df.index)
except ExpressionError:
# ibis table
return df.count().execute()


def downsample(df, max_rows=0, max_columns=0, max_bytes=0):
"""Return a subset of the dataframe that fits the limits"""
org_rows, org_columns, org_bytes = len(df), len(df.columns), nbytes(df)
org_rows, org_columns, org_bytes = nrows(df), len(df.columns), nbytes(df)
df = _downsample(
df, max_rows=max_rows, max_columns=max_columns, max_bytes=max_bytes
)

if len(df) < org_rows or len(df.columns) < org_columns:
if nrows(df) < org_rows or len(df.columns) < org_columns:
link = '<a href="https://mwouts.github.io/itables/downsampling.html">downsampled</a>'
reasons = []
if org_rows > max_rows > 0:
Expand Down Expand Up @@ -76,7 +103,7 @@ def shrink_towards_target_aspect_ratio(

def _downsample(df, max_rows=0, max_columns=0, max_bytes=0, target_aspect_ratio=None):
"""Implementation of downsample - may be called recursively"""
if len(df) > max_rows > 0:
if nrows(df) > max_rows > 0:
second_half = max_rows // 2
first_half = max_rows - second_half
if second_half:
Expand Down Expand Up @@ -134,6 +161,10 @@ def _downsample(df, max_rows=0, max_columns=0, max_bytes=0, target_aspect_ratio=
import polars as pl # noqa

df = pl.DataFrame({df.columns[0]: ["..."]})
return df

return df
try:
len(df)
return df
except ExpressionError:
# Ibis
return df.to_pandas()
20 changes: 14 additions & 6 deletions itables/javascript.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import numpy as np
import pandas as pd

from .downsample import nrows

try:
import pandas.io.formats.style as pd_style
except ImportError:
Expand All @@ -22,6 +24,12 @@
# Define pl.Series as pd.Series
import pandas as pl

try:
import ibis.expr.types.relations as ibis_relations
except ImportError:
ibis_relations = None


from IPython.display import HTML, Javascript, display

import itables.options as opt
Expand Down Expand Up @@ -102,6 +110,8 @@ def init_notebook_mode(
pd_style.Styler._repr_html_ = _datatables_repr_
pl.DataFrame._repr_html_ = _datatables_repr_
pl.Series._repr_html_ = _datatables_repr_
if ibis_relations is not None:
ibis_relations.Table._repr_html_ = _datatables_repr_
else:
pd.DataFrame._repr_html_ = _ORIGINAL_DATAFRAME_REPR_HTML
if pd_style is not None:
Expand All @@ -111,6 +121,9 @@ def init_notebook_mode(
del pd.Series._repr_html_
if hasattr(pl.Series, "_repr_html_"):
del pl.Series._repr_html_
if ibis_relations is not None:
if hasattr(ibis_relations.Table, "_repr_html_"):
del ibis_relations.Table._repr_html_

if not connected:
display(Javascript(read_package_file("external/jquery.min.js")))
Expand Down Expand Up @@ -601,12 +614,7 @@ def _min_rows(kwargs):

def _df_fits_in_one_page(df, kwargs):
"""Display just the table (not the search box, etc...) if the rows fit on one 'page'"""
try:
# Pandas DF or Style
return len(df.index) <= _min_rows(kwargs)
except AttributeError:
# Polars
return len(df) <= _min_rows(kwargs)
return nrows(df) <= _min_rows(kwargs)


def safe_reset_index(df):
Expand Down
56 changes: 44 additions & 12 deletions itables/sample_dfs.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
import math
import string
from datetime import datetime, timedelta

try:
from functools import lru_cache
except ImportError:
from functools32 import lru_cache

from functools import lru_cache
from itertools import cycle

import numpy as np
Expand Down Expand Up @@ -105,7 +100,7 @@ def get_df_complex_index():
return df


def get_dict_of_test_dfs(N=100, M=100, polars=False):
def get_dict_of_test_dfs(N=100, M=100, type="pandas"):
NM_values = np.reshape(np.linspace(start=0.0, stop=1.0, num=N * M), (N, M))

test_dfs = {
Expand Down Expand Up @@ -266,8 +261,10 @@ def get_dict_of_test_dfs(N=100, M=100, polars=False):
}
),
}
if type == "pandas":
return test_dfs

if polars:
if type == "polars":
import polars as pl
import pyarrow as pa

Expand All @@ -279,10 +276,42 @@ def get_dict_of_test_dfs(N=100, M=100, polars=False):
pass
return polars_dfs

return test_dfs
if type == "ibis_memtable":
import ibis

ibis_dfs = {}
for key, df in test_dfs.items():
# Ibis does not support tables with no columns
if not len(df.columns):
continue
try:
ibis_dfs[key] = ibis.memtable(df, name=key)
except (TypeError, ibis.common.exceptions.IbisInputError):
pass

return ibis_dfs

if type == "ibis_connect":
import ibis

con = ibis.pandas.connect(test_dfs)
ibis_dfs = {}
for key, df in test_dfs.items():
# Ibis does not support tables with no columns
if not len(df.columns):
continue

try:
ibis_dfs[f"{key}_connect"] = con.table(key)
except (TypeError, AttributeError):
pass

return ibis_dfs

def get_dict_of_test_series(polars=False):
raise NotImplementedError(type)


def get_dict_of_test_series(type="pandas"):
series = {}
for df_name, df in get_dict_of_test_dfs().items():
if len(df.columns) > 6:
Expand All @@ -293,7 +322,10 @@ def get_dict_of_test_series(polars=False):
continue
series["{}.{}".format(df_name, col)] = df[col]

if polars:
if type == "pandas":
return series

if type == "polars":
import polars as pl
import pyarrow as pa

Expand All @@ -312,7 +344,7 @@ def get_dict_of_test_series(polars=False):

return polars_series

return series
raise NotImplementedError(type)


@lru_cache()
Expand Down
2 changes: 1 addition & 1 deletion itables/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""ITables' version number"""

__version__ = "1.6.3"
__version__ = "1.7.0-dev"
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,10 @@
},
tests_require=["pytest", "pytz"],
install_requires=["IPython", "pandas", "numpy"],
extras_require={"polars": ["polars", "pyarrow"]},
extras_require={
"polars": ["polars", "pyarrow"],
"ibis": ["ibis-framework[pandas]"],
},
license="MIT",
classifiers=[
"Development Status :: 5 - Production/Stable",
Expand Down
28 changes: 28 additions & 0 deletions tests/test_ibis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import pytest

from itables import to_html_datatable
from itables.sample_dfs import get_dict_of_test_dfs

try:
import ibis # noqa
except ImportError as e:
pytest.skip(str(e), allow_module_level=True)

# TODO Remove this (and find out how to evaluate count)
ibis.options.interactive = True


@pytest.mark.parametrize(
"name,df",
[(name, df) for name, df in get_dict_of_test_dfs(type="ibis_memtable").items()],
)
def test_show_ibis_memtable(name, df, use_to_html):
to_html_datatable(df, use_to_html)


@pytest.mark.parametrize(
"name,df",
[(name, df) for name, df in get_dict_of_test_dfs(type="ibis_connect").items()],
)
def test_show_ibis_connect(name, df, use_to_html):
to_html_datatable(df, use_to_html)
4 changes: 2 additions & 2 deletions tests/test_polars.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@


@pytest.mark.parametrize(
"name,x", [(name, x) for name, x in get_dict_of_test_series(polars=True).items()]
"name,x", [(name, x) for name, x in get_dict_of_test_series(type="polars").items()]
)
def test_show_polars_series(name, x, use_to_html):
to_html_datatable(x, use_to_html)


@pytest.mark.parametrize(
"name,df", [(name, df) for name, df in get_dict_of_test_dfs(polars=True).items()]
"name,df", [(name, df) for name, df in get_dict_of_test_dfs(type="polars").items()]
)
def test_show_polars_df(name, df, use_to_html):
to_html_datatable(df, use_to_html)
Loading