Skip to content

Commit

Permalink
html.py: updated doc, flake
Browse files Browse the repository at this point in the history
  • Loading branch information
ibgp2 committed Apr 19, 2023
1 parent e39829c commit bd6ef67
Showing 1 changed file with 32 additions and 33 deletions.
65 changes: 32 additions & 33 deletions src/minifold/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def value_to_html(x: object) -> str:
Returns:
The corresponding HTML string.
"""
def str_to_html(s :str) -> str:
def str_to_html(s: str) -> str:
"""
Right-strips ``"."`` and ``","`` from a string.
Expand All @@ -57,7 +57,7 @@ def str_to_html(s :str) -> str:
ret = escape("%s" % x)
return ret

def html(s :str):
def html(s: str):
"""
Evaluates HTML code in a Jupyter Notebook.
Expand All @@ -79,18 +79,18 @@ def print_error(x: object):
print(str(x), file = sys.stderr)

def entries_to_html(
entries :list,
map_attribute_label :dict = None,
attributes :list = None,
keep_entry_if :callable = None
entries: list,
map_attribute_label: dict = None,
attributes: list = None,
keep_entry_if: callable = None
) -> str:
"""
Exports to HTML a list of dict.
Args:
entries: A list of dicts
map_attribute_label: A dict {str : str} which maps each entry key with the column header
to display.
map_attribute_label: A ``dict{str: str}`` which maps each
entry key with the column header to display.
attributes: The subset of keys to display.
keep_entry_if: Callback allowing to filter some entries
Expand All @@ -112,22 +112,22 @@ def keep_entry_if(i, entry):
</table>
</div>
""" % {
"header" : "<tr>%s</tr>" % "".join(
"header": "<tr>%s</tr>" % "".join(
["<th>Index</th>"] + [
"<th style='text-align: left;white-space: nowrap;'>%s</th>"
% str(map_attribute_label.get(attribute, attribute))
for attribute in attributes
]
),
"rows" : "".join([
"rows": "".join([
"""
<tr>
<td>%(index)d</td>
%(values)s
</tr>
""" % {
"index" : i,
"values" : "".join([
"index": i,
"values": "".join([
"<td style='text-align: left;white-space: nowrap;'>%s</td>"
% value_to_html(entry.get(attribute))
for attribute in attributes
Expand All @@ -138,18 +138,18 @@ def keep_entry_if(i, entry):
]),
}

def connector_to_html(connector :Connector, **kwargs) -> str:
def connector_to_html(connector: Connector, **kwargs) -> str:
attributes = kwargs.get("attributes")
if not attributes:
attributes = connector.attributes(None)
entries = connector.query(Query(**kwargs))
return entries_to_html(entries, attributes=attributes)

def entry_to_html(entry :dict, map_attribute_label :dict = None, attributes :list = None) -> str:
def entry_to_html(entry: dict, map_attribute_label: dict = None, attributes: list = None) -> str:
return entries_to_html([entry], map_attribute_label, attributes)

# OBSOLETE
def dict_to_html(d :dict, attributes :list, map_attribute_label :dict = None) -> str:
def dict_to_html(d: dict, attributes: list, map_attribute_label: dict = None) -> str:
return entry_to_html(d, map_attribute_label, attributes)

# ----------------------------------------------------------------------------------
Expand All @@ -175,7 +175,7 @@ def dict_to_html(d :dict, attributes :list, map_attribute_label :dict = None) ->

def html_to_text(s_html: str, blacklist: set = None) -> str:
"""
Convert an HTML page to text, by discarding javascript and css related
Converts an HTML page to text, by discarding javascript and css related
to the site.
Args:
Expand Down Expand Up @@ -207,13 +207,13 @@ def html_to_text(s_html: str, blacklist: set = None) -> str:
}

# Based on https://gist.github.com/revotu/21d52bd20a073546983985ba3bf55deb
def remove_all_attrs_except_saving(soup, whitelist = None):
def remove_all_attrs_except_saving(soup :BeautifulSoup, whitelist: dict = None):
"""
Remove all attributes except some.
Removes all attributes except some.
Args:
soup: A BeautifulSoup instance, modified in place.
whitelist: A dict {tag : list(attr)} where tag is an HTML tag and attr
soup (BeautifulSoup): A BeautifulSoup instance, modified in place.
whitelist: A ``dict{tag : list(attr)}`` where tag is an HTML tag and attr
an HTML attribute.
"""
if whitelist is None:
Expand All @@ -231,13 +231,13 @@ def remove_all_attrs_except_saving(soup, whitelist = None):
tag.attrs[attr] = "#"
return soup

def remove_tags(soup, blacklist :set = None):
def remove_tags(soup: BeautifulSoup, blacklist: set = None):
"""
Remove some HTML tags.
Removes some HTML tags.
Args:
soup: A BeautifulSoup instance, modified in place.
blacklist: A list of str, where each str is an HTML tag.
soup(BeautifulSoup): A BeautifulSoup instance, modified in place.
blacklist (set): A list of str, where each str is an HTML tag.
"""
if blacklist is None:
blacklist = HTML_BLACKLIST_TAGS
Expand All @@ -250,19 +250,18 @@ def remove_tags(soup, blacklist :set = None):
break

def sanitize_html(
s_html :str,
blacklist :set = None,
remove_attrs :bool = True
s_html: str,
blacklist: set = None,
remove_attrs: bool = True
) -> str:
"""
Remove from an HTML string irrelevant HTML blocks and attributes.
!!! This function is SLOW so do not use it on large corpus!
Removes from an HTML string irrelevant HTML blocks and attributes.
Warning: This function is SLOW so do not use it on large corpus!
Args:
s_html: A str instance containing HTML.
blacklist: List of blacklisted HTML tags.
remove_attrs:
s_html (str): A str instance containing HTML.
blacklist (set): List of blacklisted HTML tags.
remove_attrs (bool): Pass ``True`` to remove HTML tag attributes.
Returns:
The sanitized string.
Expand Down

0 comments on commit bd6ef67

Please sign in to comment.