Permalink
Browse files

NH: extract_text

  • Loading branch information...
1 parent 0022149 commit 63ebf7858665b2ff9620baf1d79d744544cc924b @jamesturk jamesturk committed Apr 24, 2012
Showing with 10 additions and 1 deletion.
  1. +10 −1 openstates/nh/__init__.py
View
@@ -1,3 +1,6 @@
+from billy.fulltext import oyster_text
+import lxml.html
+
metadata = {
'abbreviation': 'nh',
'name': 'New Hampshire',
@@ -31,8 +34,14 @@ def session_list():
'//a[contains(@href, "Bill%20Status")]/text()')
return [zip.replace(' Bill Status Tables.zip', '') for zip in zips]
+@oyster_text
+def extract_text(oyster_doc, data):
+ doc = lxml.html.fromstring(data)
+ return doc.xpath('//html')[0].text_content()
+
document_class = dict(
AWS_PREFIX = 'documents/nh/',
update_mins = 7*24*60,
+ extract_text = extract_text,
onchanged = []
-)
+)

0 comments on commit 63ebf78

Please sign in to comment.