Skip to content

Commit

Permalink
MS: extract_text
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesturk committed May 1, 2012
1 parent a44442b commit 189751c
Showing 1 changed file with 12 additions and 1 deletion.
13 changes: 12 additions & 1 deletion openstates/ms/__init__.py
@@ -1,3 +1,6 @@
import lxml.html
from billy.fulltext import oyster_text

metadata = dict(
name='Mississippi',
abbreviation='ms',
Expand Down Expand Up @@ -90,8 +93,16 @@ def session_list():
return url_xpath('http://billstatus.ls.state.ms.us/sessions.htm',
'//a/text()')

@oyster_text
def extract_text(oyster_doc, data):
doc = lxml.html.fromstring(data)
text = ' '.join(p.text_content() for p in
doc.xpath('//h2/following-sibling::p'))
return text

document_class = dict(
AWS_PREFIX = 'documents/ms/',
update_mins = 30*24*60,
extract_text = extract_text,
onchanged = []
)
)

0 comments on commit 189751c

Please sign in to comment.