From f31e3d9451cd6f6224d11c5deb7750dd9e3669b5 Mon Sep 17 00:00:00 2001 From: Tom Morris Date: Fri, 21 Feb 2014 22:13:42 +0000 Subject: [PATCH] adding string stripping for p- properties I'm not really happy with this code: this sort of thing should be abstracted by the HTML/XML library. Kartik has suggested we switch to BeautifulSoup. it was a bit hacky last time I looked, but if it reduces this kind of thing, I'd be in favour. --- mf2py/parser.py | 5 ++++- test/examples/string_stripping.html | 11 +++++++++++ test/test_parser.py | 5 +++++ 3 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 test/examples/string_stripping.html diff --git a/mf2py/parser.py b/mf2py/parser.py index 0d4bb96..7b2555c 100644 --- a/mf2py/parser.py +++ b/mf2py/parser.py @@ -139,7 +139,10 @@ def parse_props(el, is_root_element=False): # TODO: parse for value-class here prop_name = prop[2:] prop_value = props.get(prop_name, []) - prop_value.append(el.firstChild.nodeValue) + # TODO: this is a goddamn horror show right here + text_value = " ".join(t.nodeValue for t in el.childNodes if t.nodeType == t.TEXT_NODE) + text_value = text_value.strip() + prop_value.append(text_value) if prop_value is not []: props[prop_name] = prop_value diff --git a/test/examples/string_stripping.html b/test/examples/string_stripping.html new file mode 100644 index 0000000..f4f7103 --- /dev/null +++ b/test/examples/string_stripping.html @@ -0,0 +1,11 @@ + + + + String Stripping example + + +
+ Tom Morris +
+ + diff --git a/test/test_parser.py b/test/test_parser.py index 23c06f6..de1c975 100644 --- a/test/test_parser.py +++ b/test/test_parser.py @@ -115,6 +115,11 @@ def test_backcompat(): result = parse_fixture("backcompat.html") assert set(result["items"][0]["type"]) == set(["h-card"]) +def test_string_strip(): + result = parse_fixture("string_stripping.html") + print result + assert result["items"][0]["properties"]["name"][0] == "Tom Morris" + if __name__ == '__main__': result = parse_fixture("nested_multiple_classnames.html") pprint(result)