Browse files

revise parliament scraper, think currently it is BM version

  • Loading branch information...
1 parent f8484c4 commit 0e385af194cc91b7e209f9f806cb89dc6fd0fffe @sweemeng committed Feb 15, 2013
Showing with 32 additions and 1 deletion.
  1. +4 −1 billwatcher/loaders/{base_loader.py → base.py}
  2. +28 −0 billwatcher/loaders/parliament.py
View
5 billwatcher/loaders/base_loader.py → billwatcher/loaders/base.py
@@ -38,19 +38,22 @@ def write_bill(self, name, long_name):
)
self.session.add(bill)
self.session.flush()
+ return bill
- def write_revision(self, url, status, year, read_by=None, supported_by=None, date_presented=None):
+ def write_revision(self, bill_id, url, status, year, read_by=None, supported_by=None, date_presented=None):
revision = models.BillRevision(
url=url,
status=status,
year=year,
read_by=read_by,
+ bill_id=bill_id,
supported_by=supported_by,
date_presented=date_presented,
update_date=datetime.datetime.now()
)
self.session.add(revision)
self.session.flush()
+ return revision
def index_entry(self, revision_id):
self.indexer.inder_single(revision_id)
View
28 billwatcher/loaders/parliament.py
@@ -0,0 +1,28 @@
+from loaders.base import BaseLoader
+from scrapers.parliament import MyTable
+import requests
+
+
+
+class ParliamentLoader(BaseLoader):
+ def __init__(self):
+ self.request_url = 'http://www.parlimen.gov.my/bills-dewan-rakyat.html'
+
+ def process_site(self, page):
+ parser = MyTable(page)
+ result = parser.extract()
+ return result
+
+ def run(self):
+ processor = self.fetch_site()
+ for line in processor:
+ bill = self.write_bill(line['kod']['kod_name'], line['tajuk'])
+ revision = self.write_revision(
+ bill.id,
+ line['kod']['kod_path'],
+ line['status'],
+ line['tahun'],
+ line['Dibentang Oleh'],
+ line['Diluluskan Pada']
+ )
+ self.index_entry(revision.id)

0 comments on commit 0e385af

Please sign in to comment.