Permalink
Browse files

Merge branch 'master' of github.com:sunlightlabs/openstates

  • Loading branch information...
2 parents 92bb2c5 + 2a6f1ed commit fad803b7243ac4e31e10f9360b36f1adba14e081 @twneale twneale committed Jan 3, 2013
@@ -17,12 +17,16 @@
start_year=2009, end_year=2010),
dict(name='27', sessions=['27'],
start_year=2011, end_year=2012),
+ dict(name='28', sessions=['28'],
+ start_year=2013, end_year=2014),
],
session_details={
'26': {'display_name': '26th Legislature',
'_scraped_name': 'The 26th Legislature (2009-2010)'},
'27': {'display_name': '27th Legislature',
'_scraped_name': 'The 27th Legislature (2011-2012)'},
+ '28': {'display_name': '28th Legislature',
+ '_scraped_name': 'The 28th Legislature (2013-2014)'},
},
_ignored_scraped_sessions=['The 25th Legislature (2007-2008)',
'The 24th Legislature (2005-2006)',
View
@@ -133,6 +133,12 @@
],
'start_year': 2011, 'end_year': 2012
},
+ {'name': '51',
+ 'sessions': [
+ '51st-1st-regular',
+ ],
+ 'start_year': 2013, 'end_year': 2014
+ },
],
session_details={
#'42nd-1st-regular':
@@ -474,6 +480,11 @@
'_scraped_name': 'Fiftieth Legislature - Second Regular Session',
#'start_date': , 'end_date':
},
+ '51st-1st-regular':
+ {'type': 'primary', 'session_id': 108,
+ 'display_name': '51st Legislature - First Regular Session',
+ '_scraped_name': 'Fifty-first Legislature - First Regular Session'
+ }
},
_ignored_scraped_sessions=[
'Forty-second Legislature - First Regular Session',
@@ -15,6 +15,9 @@
{'name': '2011-2012',
'sessions': ['2011', '2012'],
'start_year': 2011, 'end_year': 2012},
+ {'name': '2013-2014',
+ 'sessions': ['2013'],
+ 'start_year': 2013, 'end_year': 2014},
],
session_details={
'2011': {
@@ -24,6 +27,10 @@
'2012': {
'display_name': '2012 Regular Session',
'_scraped_name': '2012',
+ },
+ '2013': {
+ 'display_name': '2013 Regular Session',
+ '_scraped_name': '2013',
}
},
feature_flags=['subjects', 'events', 'influenceexplorer'],
@@ -19,7 +19,7 @@ def scrape(self, term, chambers):
doc = lxml.html.fromstring(data)
doc.make_links_absolute(council_url)
# page should have 13 unique council URLs
- urls = set(doc.xpath('//a[contains(@href, "dccouncil.us/council/")]/@href'))
+ urls = set(doc.xpath('//a[contains(@href, "/council/")]/@href'))
print '\n'.join(urls)
assert len(urls) <= 13, "should have 13 unique councilmember URLs"
@@ -53,7 +53,11 @@ def scrape(self, term, chambers):
office_address = get_field(doc, "Office:")
phone = get_field(doc, "Tel:")
- phone, fax = phone.split(' | Fax: ')
+ if phone.endswith('| Fax:'):
+ fax = None
+ phone = phone.strip('| Fax:') or None
+ else:
+ phone, fax = phone.split(' | Fax: ')
email = doc.xpath('//a[starts-with(text(), "Send an email")]/@href')[0].split(':')[1]
@@ -15,12 +15,18 @@
'terms': [
{'name': '2011-2012', 'sessions': ['2011-2012'],
'start_year': 2011, 'end_year': 2012},
+ {'name': '2013-2014', 'sessions': ['2013-2014'],
+ 'start_year': 2013, 'end_year': 2014},
],
'session_details': {
'2011-2012': {'type':'primary',
'display_name': '2011-2012 Regular Session',
'_scraped_name': '2011-2012',
},
+ '2013-2014': {'type':'primary',
+ 'display_name': '2013-2014 Regular Session',
+ '_scraped_name': '2013-2014',
+ },
},
'feature_flags': ['subjects', 'events', 'influenceexplorer'],
'_ignored_scraped_sessions': ['2009-2010', '2007-2008', '2005-2006',
View
@@ -195,6 +195,8 @@ def parse_roll_call(self, vote, url, rc_num):
vtype = vote.no
elif 'Excused' in p or 'Not Voting' in p:
vtype = vote.other
+ elif 'Roll Call No' in p:
+ continue
elif p.startswith('In The Chair:'):
break
else:
View
@@ -54,16 +54,23 @@
{'name': '2011-2012',
'sessions': ['2011'],
'start_year': 2011, 'end_year': 2012},
+ {'name': '2013-2014',
+ 'sessions': ['2013'],
+ 'start_year': 2013, 'end_year': 2014},
],
session_details={
'2009': {'start_date': datetime.date(2009,1,28), 'type': 'primary',
- 'display_name': '2009 Regular Session',
+ 'display_name': '2009-2010 Session',
'_scraped_name': '2009-2010 Session',
},
'2011': {'start_date': datetime.date(2011,1,26), 'type': 'primary',
- 'display_name': '2011 Regular Session',
+ 'display_name': '2011-2012 Session',
'_scraped_name': '2011-2012 Session',
},
+ '2013': {'start_date': datetime.date(2013,1,30), 'type': 'primary',
+ 'display_name': '2013-2014 Session',
+ '_scraped_name': '2013-2014 Session',
+ },
},
_ignored_scraped_sessions=['2008 Extra Session', '2007-2008 Session',
'2007 Extra Session', '2005-2006 Session',
@@ -10,8 +10,11 @@ def get_table_item(doc, name):
span = doc.xpath('//span[text()="{0}"]'.format(name))[0]
# get neighboring td's span
dataspan = span.getparent().getnext().getchildren()[0]
- return (dataspan.text + '\n' +
- '\n'.join([x.tail for x in dataspan.getchildren()])).strip()
+ if dataspan.text:
+ return (dataspan.text + '\n' +
+ '\n'.join([x.tail for x in dataspan.getchildren()])).strip()
+ else:
+ return None
class NCLegislatorScraper(LegislatorScraper):
jurisdiction = 'nc'
@@ -60,7 +63,7 @@ def scrape_chamber(self, chamber, term):
ldoc.make_links_absolute('http://www.ncga.state.nc.us')
photo_url = ldoc.xpath('//a[contains(@href, "pictures")]/@href')[0]
phone = get_table_item(ldoc, 'Phone:')
- address = get_table_item(ldoc, 'Legislative Mailing Address:')
+ address = get_table_item(ldoc, 'Legislative Mailing Address:') or None
email = ldoc.xpath('//a[starts-with(@href, "mailto:")]')[0].text or ''
# save legislator

0 comments on commit fad803b

Please sign in to comment.