/
legislators.py
70 lines (52 loc) · 2.24 KB
/
legislators.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from billy.scrape import NoDataForPeriod
from billy.scrape.legislators import LegislatorScraper, Legislator
from openstates.sd import metadata
import lxml.html
class SDLegislatorScraper(LegislatorScraper):
state = 'sd'
def _make_headers(self, url):
# South Dakota's gzipped responses seem to be broken
headers = super(SDLegislatorScraper, self)._make_headers(url)
headers['Accept-Encoding'] = ''
return headers
def scrape(self, chamber, term):
start_year = None
for term_ in self.metadata['terms']:
if term_['name'] == term:
start_year = term_['start_year']
break
else:
raise NoDataForPeriod(term)
if int(start_year) > 2009:
self.scrape_legislators(chamber, term)
def scrape_legislators(self, chamber, term):
year = term[0:4]
url = "http://legis.state.sd.us/sessions/%s/MemberMenu.aspx" % (
year)
if chamber == 'upper':
search = 'Senate Members'
else:
search = 'House Members'
with self.urlopen(url) as page:
page = lxml.html.fromstring(page)
page.make_links_absolute(url)
for link in page.xpath("//h4[text()='%s']/../div/a" % search):
name = link.text.strip()
self.scrape_legislator(name, chamber, term,
link.attrib['href'])
def scrape_legislator(self, name, chamber, term, url):
with self.urlopen(url) as page:
page = lxml.html.fromstring(page)
party = page.xpath("string(//span[contains(@id, 'Party')])")
party = party.strip()
if party == 'Democrat':
party = 'Democratic'
district = page.xpath("string(//span[contains(@id, 'District')])")
district = district.strip().lstrip('0')
occupation = page.xpath(
"string(//span[contains(@id, 'Occupation')])")
occupation = occupation.strip()
legislator = Legislator(term, chamber, district, name,
party=party, occupation=occupation)
legislator.add_source(url)
self.save_legislator(legislator)