diff --git a/src/arche/__init__.py b/src/arche/__init__.py index 109f08f..75c11d6 100755 --- a/src/arche/__init__.py +++ b/src/arche/__init__.py @@ -5,11 +5,14 @@ SH_URL = "https://app.scrapinghub.com/p" # noqa from arche.arche import Arche +from arche.tools import dataframe from arche.tools.schema import basic_json_schema import numpy as np import pandas as pd import plotly.io as pio +pd.DataFrame._repr_html_ = dataframe._repr_html_ + pio.renderers.default = "notebook_connected+jupyterlab" __all__ = ["basic_json_schema", "Arche", "np", "pd"] diff --git a/src/arche/tools/dataframe.py b/src/arche/tools/dataframe.py new file mode 100644 index 0000000..1187ef6 --- /dev/null +++ b/src/arche/tools/dataframe.py @@ -0,0 +1,13 @@ +import re + + +def make_urls_clickable(val): + if isinstance(val, str) and re.search("^https?://", val): + return f'{val}' + else: + return val + + +def _repr_html_(self, *args, **kwargs): + styler = self.style.format(make_urls_clickable) + return styler.render(*args, **kwargs) diff --git a/tests/tools/test_dataframe_html_output.py b/tests/tools/test_dataframe_html_output.py new file mode 100644 index 0000000..c58e564 --- /dev/null +++ b/tests/tools/test_dataframe_html_output.py @@ -0,0 +1,26 @@ +import pandas as pd +import pytest + + +@pytest.fixture() +def df_with_urls(): + data = {"col1": [1, 2], "col2": ["http://foo.com", "https://bar.com"]} + return pd.DataFrame(data) + + +def test_df_has_clickable_urls(df_with_urls): + html = df_with_urls._repr_html_() + assert 'http://foo.com' in html + assert 'https://bar.com' in html + + +def test_derivaded_df_has_clickable_urls(df_with_urls): + html = df_with_urls.head()._repr_html_() + assert 'http://foo.com' in html + assert 'https://bar.com' in html + + +def test_arche_df_does_not_add_links_if_no_url_found(): + df = pd.DataFrame({"col1": [1, 2], "col2": ["foo", "bar"]}) + html = df._repr_html_() + assert "