From c3d285213c62974445f546b68a03bcca805ebf71 Mon Sep 17 00:00:00 2001 From: James Turk Date: Tue, 24 Apr 2012 17:51:17 -0400 Subject: [PATCH] RI: extract_text --- openstates/ri/__init__.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/openstates/ri/__init__.py b/openstates/ri/__init__.py index fcdf8ce2ad..c0ec736044 100644 --- a/openstates/ri/__init__.py +++ b/openstates/ri/__init__.py @@ -1,4 +1,6 @@ import datetime +from billy.fulltext import (pdfdata_to_text, oyster_text, + text_after_line_numbers) metadata = dict( _partial_vote_bill_id=True, @@ -30,8 +32,13 @@ def session_list(): return url_xpath( 'http://status.rilin.state.ri.us/bill_history.aspx?mode=previous', "//select[@name='ctl00$rilinContent$cbYear']/option/text()" ) +@oyster_text +def extract_text(oyster_doc, data): + return text_after_line_numbers(pdfdata_to_text(data)) + document_class = dict( AWS_PREFIX = 'documents/ri/', update_mins = 24*7*60, + extract_text = extract_text, onchanged = [] -) \ No newline at end of file +)