Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

MD: 2012s2

  • Loading branch information...
commit 9f8b3a2e96f5faeb8c58d7300c57a4c99bda3f0c 1 parent 49ef5cc
@jamesturk jamesturk authored
Showing with 53 additions and 102 deletions.
  1. +28 −13 openstates/md/__init__.py
  2. +25 −89 openstates/md/bills.py
View
41 openstates/md/__init__.py
@@ -15,7 +15,7 @@
'2009', '2010'],
'start_year': 2007, 'end_year': 2010},
{'name': '2011-2014', 'sessions': ['2011', '2011s1', '2012',
- '2012s1'],
+ '2012s1', '2012s2'],
'start_year': 2011, 'end_year': 2014},
],
session_details={
@@ -24,65 +24,80 @@
'number': 423,
'display_name': '2007 Regular Session',
'type': 'primary',
- '_scraped_name': '2007',
+ '_scraped_name': '2007 Regular Session',
},
'2007s1': {'start_date': datetime.date(2007,10,29),
'end_date': datetime.date(2007,11,19),
'display_name': '2007, 1st Special Session',
'number': 424,
'type': 'special',
- '_scraped_name': '2007S1',
+ '_scraped_name': '2007 Special Session 1',
},
'2008': {'start_date': datetime.date(2008,1,9),
'end_date': datetime.date(2008,4,7),
'display_name': '2008 Regular Session',
'number': 425,
'type': 'primary',
- '_scraped_name': '2008',
+ '_scraped_name': '2008 Regular Session',
},
'2009': {'start_date': datetime.date(2009,1,14),
'end_date': datetime.date(2009,4,13),
'display_name': '2009 Regular Session',
'number': 426,
'type': 'primary',
- '_scraped_name': '2009',
+ '_scraped_name': '2009 Regular Session',
},
'2010': {'start_date': datetime.date(2010,1,13),
'end_date': datetime.date(2010,4,12),
'number': 427,
'display_name': '2010 Regular Session',
'type': 'primary',
- '_scraped_name': '2010',
+ '_scraped_name': '2010 Regular Session',
},
'2011': {'start_date': datetime.date(2011,1,12),
'end_date': datetime.date(2011,4,12),
'number': 428,
'display_name': '2011 Regular Session',
'type': 'primary',
- '_scraped_name': '2011',
+ '_scraped_name': '2011 Regular Session',
},
'2011s1': {'number': 429,
'display_name': '2011, 1st Special Session',
'type': 'special',
- '_scraped_name': '2011S1',
+ '_scraped_name': '2011 Special Session 1',
},
'2012': {'start_date': datetime.date(2012,1,11),
'end_date': datetime.date(2012,4,9),
'number': 430,
'display_name': '2012 Regular Session',
'type': 'primary',
- '_scraped_name': '2012',
+ '_scraped_name': '2012 Regular Session',
},
'2012s1': {'number': 431,
'display_name': '2012, 1st Special Session',
'type': 'special',
- '_scraped_name': '2012S1',
+ '_scraped_name': '2012 Special Session 1',
+ },
+ '2012s2': {'display_name': '2012, 2nd Special Session',
+ 'type': 'special',
+ '_scraped_name': '2012 Special Session 2',
},
},
feature_flags=['subjects', 'events', 'influenceexplorer'],
- _ignored_scraped_sessions=['1996', '1997', '1998', '1999', '2000', '2001',
- '2002', '2003', '2004', '2004S1', '2005',
- '2006', '2006S1']
+ _ignored_scraped_sessions=['1996 Regular Session',
+ '1997 Regular Session',
+ '1998 Regular Session',
+ '1999 Regular Session',
+ '2000 Regular Session',
+ '2013 Regular Session',
+ '2001 Regular Session',
+ '2002 Regular Session',
+ '2003 Regular Session',
+ '2004 Regular Session',
+ '2004 Special Session 1',
+ '2005 Regular Session',
+ '2006 Regular Session',
+ '2006 Special Session 1']
)
View
114 openstates/md/bills.py
@@ -60,8 +60,6 @@ def _clean_sponsor(name):
name = name.rsplit(',', 1)[0]
return name.strip().strip('*')
-BASE_URL = "http://mlis.state.md.us"
-BILL_URL = BASE_URL + "/%s/billfile/%s%04d.htm" # year, session, bill_type, number
class MDBillScraper(BillScraper):
jurisdiction = 'md'
@@ -132,15 +130,13 @@ def parse_bill_documents(self, doc, bill):
bill_text_b = doc.xpath('//b[contains(text(), "Bill Text")]')[0]
for sib in bill_text_b.itersiblings():
if sib.tag == 'a':
- bill.add_version(sib.text.strip(','),
- BASE_URL + sib.get('href'),
+ bill.add_version(sib.text.strip(','), sib.get('href'),
mimetype='application/pdf')
note_b = doc.xpath('//b[contains(text(), "Fiscal and Policy")]')[0]
for sib in note_b.itersiblings():
if sib.tag == 'a' and sib.text == 'Available':
- bill.add_document('Fiscal and Policy Note',
- BASE_URL + sib.get('href'))
+ bill.add_document('Fiscal and Policy Note', sib.get('href'))
def parse_bill_votes(self, doc, bill):
params = {
@@ -162,7 +158,7 @@ def parse_bill_votes(self, doc, bill):
if (href and "votes" in href and href.endswith('htm') and
href not in seen_votes):
seen_votes.add(href)
- vote_url = BASE_URL + href
+ vote_url = href
if bill['session'] in ('2007', '2007s1', '2008', '2009',
'2010', '2011'):
@@ -172,64 +168,6 @@ def parse_bill_votes(self, doc, bill):
vote.add_source(vote_url)
bill.add_vote(vote)
- def parse_old_vote_page(self, vote_url):
- params = {
- 'chamber': None,
- 'date': None,
- 'motion': None,
- 'passed': None,
- 'yes_count': None,
- 'no_count': None,
- 'other_count': None,
- }
-
- with self.urlopen(vote_url) as vote_html:
- vote_doc = lxml.html.fromstring(vote_html)
-
- # motion
- box = vote_doc.xpath('//td[@colspan=3]/font[@size=-1]/text()')
- params['motion'] = box[-1]
- params['type'] = 'other'
- if 'senate' in vote_url:
- params['chamber'] = 'upper'
- else:
- params['chamber'] = 'lower'
- for regex, vtype in vote_classifiers.iteritems():
- if re.findall(regex, params['motion'], re.IGNORECASE):
- params['type'] = vtype
-
- # counts
- bs = vote_doc.xpath('//td[@width="20%"]/font/b/text()')
- yeas = int(bs[0].split()[0])
- nays = int(bs[1].split()[0])
- excused = int(bs[2].split()[0])
- not_voting = int(bs[3].split()[0])
- absent = int(bs[4].split()[0])
- params['yes_count'] = yeas
- params['no_count'] = nays
- params['other_count'] = excused + not_voting + absent
- params['passed'] = yeas > nays
-
- # date
- # parse the following format: March 23, 2009
- date_elem = vote_doc.xpath('//font[starts-with(text(), "Legislative Date")]')[0]
- params['date'] = datetime.datetime.strptime(date_elem.text[18:], '%B %d, %Y')
-
- vote = Vote(**params)
-
- status = None
- for row in vote_doc.xpath('//table')[3].xpath('tr'):
- text = row.text_content()
- if text.startswith('Voting Yea'):
- status = 'yes'
- elif text.startswith('Voting Nay'):
- status = 'no'
- elif text.startswith('Not Voting') or text.startswith('Excused'):
- status = 'other'
- else:
- for cell in row.xpath('a'):
- getattr(vote, status)(cell.text.strip())
- return vote
def parse_vote_page(self, vote_url):
vote_html = self.urlopen(vote_url)
@@ -284,30 +222,28 @@ def parse_vote_page(self, vote_url):
return vote
- def scrape_bill(self, chamber, session, bill_type, number):
+ def scrape_bill_old(self, chamber, session, bill_id, url):
""" Creates a bill object """
if len(session) == 4:
session_url = session+'rs'
else:
session_url = session
- url = BILL_URL % (session_url, bill_type, number)
html = self.urlopen(url)
doc = lxml.html.fromstring(html)
+ doc.make_links_absolute(url)
# find <a name="Title">, get parent dt, get parent dl, then dd n dl
title = doc.xpath('//a[@name="Title"][1]/../../dd[1]/text()')[0].strip()
summary = doc.xpath('//font[@size="3"]/p/text()')[0].strip()
- #print "%s %d %s" % (bill_type, number, title)
-
- if 'B' in bill_type:
+ if 'B' in bill_id:
_type = ['bill']
- elif 'J' in bill_type:
+ elif 'J' in bill_id:
_type = ['joint resolution']
- bill = Bill(session, chamber, "%s %d" % (bill_type, number), title,
- type=_type, summary=summary)
+ bill = Bill(session, chamber, bill_id, title, type=_type,
+ summary=summary)
bill.add_source(url)
self.parse_bill_sponsors(doc, bill) # sponsors
@@ -327,19 +263,19 @@ def scrape_bill(self, chamber, session, bill_type, number):
def scrape(self, chamber, session):
- self.validate_session(session)
-
- start_index = 1
- if session == '2012s1' and chamber == 'lower':
- start_index = 1801
- elif session == '2012s1' and chamber == 'upper':
- start_index = 1301
-
- for bill_type in CHAMBERS[chamber]:
- for i in itertools.count(start_index):
- try:
- self.scrape_bill(chamber, session, bill_type, i)
- except HTTPError as he:
- if he.response.status_code != 404:
- raise he
- break
+ main_page = 'http://mgaleg.maryland.gov/webmga/frmLegislation.aspx?pid=legisnpage&tab=subject3&ys=' + session
+ chamber_prefix = 'S' if chamber == 'upper' else 'H'
+ html = self.urlopen(main_page)
+ doc = lxml.html.fromstring(html)
+
+ ranges = doc.xpath('//table[@class="box1leg"]//td/text()')
+ for range_text in ranges:
+ match = re.match('(\w{2})0*(\d+) - \wB0*(\d+)', range_text.strip())
+ if match:
+ prefix, begin, end = match.groups()
+ if prefix[0] == chamber_prefix:
+ self.info('scraping %ss %s-%s', prefix, begin, end)
+ for number in range(int(begin), int(end)+1):
+ bill_id = prefix + str(number)
+ url = 'http://mgaleg.maryland.gov/webmga/frmMain.aspx?id=%s&stab=01&pid=billpage&tab=subject3&ys=%s' % (bill_id, session)
+ self.scrape_bill_old(chamber, session, bill_id, url)
Please sign in to comment.
Something went wrong with that request. Please try again.