/
base.py
75 lines (64 loc) · 2.21 KB
/
base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# internal api
import scraper
import models
import indexer
# external library
import requests
import datetime
# New loader require that
# 1) We now have multiple data source
# 2) Each data source will deal with different state,
# Parliament deal with reading approval,
# AG Chambers deals with gazzette.
# 3) So yeah, 2 different scraper
# 4) For now we using the existing primary key
# 5) AGC need us to implement enforce date
class BaseLoader(object):
def __init__(self):
self.current_rev = None
self.request_URL = ""
self.request_parameter = {}
self.pages = []
self.indexer = indexer.Search()
self.session = models.DBSession()
# a transition table is to check that it is in the next state
# just a validation tool
self.transition = {
}
# Code need to be clear, list all parameter needed
def write_bill(self, name, long_name):
bill = models.Bill(
name=name,
long_name=long_name
)
self.session.add(bill)
self.session.flush()
return bill
def write_revision(self, bill_id, url, status, year, read_by=None, supported_by=None, date_presented=None):
revision = models.BillRevision(
url=url,
status=status,
year=year,
read_by=read_by,
bill_id=bill_id,
supported_by=supported_by,
date_presented=date_presented,
update_date=datetime.datetime.now()
)
self.session.add(revision)
self.session.flush()
return revision
def index_entry(self, revision_id):
self.indexer.inder_single(revision_id)
# we will need to handle multiple page,
def fetch_site(self):
request = requests.get(self.request_url,params)
if request.status_Code != 200:
raise Exception("Site raise a non 200 code")
results = self.process_site(request.text)
for result in results:
yield result
def process_site(self, page):
raise NotImplementedError("This need to be implemented")
def run(self):
raise NotImplementedError("This need to be implemented")