Skip to content
Browse files

CA: extract_text

  • Loading branch information...
1 parent 8f89a68 commit ff3dc52401d2c19233bb611544c8fc8a107ba90d @jamesturk jamesturk committed May 1, 2012
Showing with 9 additions and 1 deletion.
  1. +9 −1 openstates/ca/__init__.py
View
10 openstates/ca/__init__.py
@@ -1,4 +1,6 @@
import datetime
+import lxml.html
+from billy.fulltext import oyster_text
metadata = dict(
name='California',
@@ -100,8 +102,14 @@ def session_list():
]
return sessions
+@oyster_text
+def extract_text(oyster_doc, data):
+ doc = lxml.html.fromstring(data)
+ return doc.xpath('//div[@id="bill"]')[0].text_content()
+
document_class = dict(
AWS_PREFIX = 'documents/ca/',
update_mins = 7*24*60,
+ extract_text = extract_text,
onchanged = []
-)
+)

0 comments on commit ff3dc52

Please sign in to comment.
Something went wrong with that request. Please try again.