Skip to content
Browse files

MS: extract_text

  • Loading branch information...
1 parent a44442b commit 189751cd63df0087d9421d541c1736d9eaf20650 @jamesturk jamesturk committed May 1, 2012
Showing with 12 additions and 1 deletion.
  1. +12 −1 openstates/ms/__init__.py
View
13 openstates/ms/__init__.py
@@ -1,3 +1,6 @@
+import lxml.html
+from billy.fulltext import oyster_text
+
metadata = dict(
name='Mississippi',
abbreviation='ms',
@@ -90,8 +93,16 @@ def session_list():
return url_xpath('http://billstatus.ls.state.ms.us/sessions.htm',
'//a/text()')
+@oyster_text
+def extract_text(oyster_doc, data):
+ doc = lxml.html.fromstring(data)
+ text = ' '.join(p.text_content() for p in
+ doc.xpath('//h2/following-sibling::p'))
+ return text
+
document_class = dict(
AWS_PREFIX = 'documents/ms/',
update_mins = 30*24*60,
+ extract_text = extract_text,
onchanged = []
-)
+)

0 comments on commit 189751c

Please sign in to comment.
Something went wrong with that request. Please try again.