Skip to content

Commit

Permalink
Wrap DataFrames in order to allow custom styling (closes #76)
Browse files Browse the repository at this point in the history
  • Loading branch information
tcurvelo committed Oct 20, 2019
1 parent 6be3d7a commit 5378208
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 4 deletions.
9 changes: 5 additions & 4 deletions src/arche/readers/items.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from arche import SH_URL
from arche.tools import api
from arche.tools.dataframe import StylizedDataFrame
import numpy as np
import pandas as pd
from scrapinghub import ScrapinghubClient
Expand All @@ -22,11 +23,11 @@ def __len__(self) -> int:
return len(self.df)

@staticmethod
def process_df(df: pd.DataFrame) -> pd.DataFrame:
def process_df(df: pd.DataFrame) -> StylizedDataFrame:
# clean empty objects - mainly lists and dicts, but keep everything else
df = df.applymap(lambda x: x if x or isinstance(x, numbers.Real) else np.nan)
Items.categorize(df)
return df
return StylizedDataFrame(df, copy=True)

@staticmethod
def categorize(df: pd.DataFrame) -> pd.DataFrame:
Expand All @@ -47,7 +48,7 @@ def from_df(cls, df: pd.DataFrame):

@classmethod
def from_array(cls, iterable: RawItems):
return cls(raw=iterable, df=pd.DataFrame(list(iterable)))
return cls(raw=iterable, df=StylizedDataFrame(list(iterable)))


class CloudItems(Items):
Expand All @@ -62,7 +63,7 @@ def __init__(
self._limit: int = 0
self.filters = filters
raw = self.fetch_data()
df = pd.DataFrame(list(raw))
df = StylizedDataFrame(list(raw))
df.index = self.format_keys(df["_key"])
df.index.name = None
df = df.drop(columns=["_key", "_type"], errors="ignore")
Expand Down
15 changes: 15 additions & 0 deletions src/arche/tools/dataframe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import re

import pandas as pd


class StylizedDataFrame(pd.DataFrame):
def _make_urls_clickable(self, val):
if isinstance(val, str) and re.search("^https?://", val):
return f'<a target="_blank" href="{val}">{val}</a>'
else:
return val

def to_html(self, *args, **kwargs):
styler = self.style.format(self._make_urls_clickable)
return styler.render(*args, **kwargs)
18 changes: 18 additions & 0 deletions tests/tools/test_stylizeddataframe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from arche.tools.dataframe import StylizedDataFrame
import pandas as pd


def test_stylized_df_make_urls_clickable():
data = {"col1": [1, 2], "col2": ["http://foo.com", "https://bar.com"]}
df1 = pd.DataFrame(data)
assert "<a href=" not in df1.to_html()

html = StylizedDataFrame(df1).to_html()
assert '<a target="_blank" href="http://foo.com">http://foo.com</a>' in html
assert '<a target="_blank" href="https://bar.com">https://bar.com</a>' in html


def test_stylized_df_does_not_add_links_if_no_url_found():
data = {"col1": [1, 2], "col2": ["foo", "bar"]}
html = pd.DataFrame(data).to_html()
assert "<a href=" not in html

0 comments on commit 5378208

Please sign in to comment.