forked from openspending/dpkg-uk25k
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cleanup_gov.py
38 lines (30 loc) · 909 Bytes
/
cleanup_gov.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import sqlaload as sl
from common import *
CACHE = {}
GOV_FIELDS = [
('EntityName', 'uk25k-entities'),
('DepartmentFamilyName', 'uk25k-departments')
]
log = logging.getLogger('cleanup_gov')
def apply(row):
for field, dataset in GOV_FIELDS:
out = field + 'Canonical'
val = row.get(field)
try:
if (dataset, val) in CACHE:
row[out] = CACHE[(dataset, val)]
continue
try:
if val is None or not len(val):
row[out] = None
ds = nk_connect(dataset)
v = ds.lookup(val)
row[out] = v.name
except ds.Invalid:
row[out] = None
except ds.NoMatch:
row[out] = None
CACHE[(dataset, val)] = row[out]
except Exception, e:
log.exception(e)
return row