Skip to content

Commit

Permalink
Merge pull request #4 from openrural/featnames-paflag
Browse files Browse the repository at this point in the history
Use primary feature name during block import; log alternate names. Refs #278. Thanks to Colin and Kim at Caktus!
  • Loading branch information
slinkp committed Apr 23, 2012
2 parents 303df0f + 89baab5 commit 7c0ada7
Showing 1 changed file with 42 additions and 23 deletions.
65 changes: 42 additions & 23 deletions ebpub/ebpub/streets/blockimport/tiger/import_blocks.py
Expand Up @@ -18,7 +18,9 @@
#

import sys
import pprint
import optparse
from collections import defaultdict
from django.contrib.gis.gdal import DataSource
from django.contrib.gis.gdal.error import OGRIndexError
from ebdata.parsing import dbf
Expand Down Expand Up @@ -121,24 +123,7 @@ def __init__(self, edges_shp, featnames_dbf, faces_dbf, place_shp,
BlockImporter.__init__(self, shapefile=edges_shp, layer_id=0,
verbose=verbose, encoding=encoding)
self.fix_cities = fix_cities
self.featnames_db = featnames_db = {}
for tlid, row in self._load_rel_db(featnames_dbf, 'TLID').iteritems():
# TLID is Tiger/Line ID, unique per edge.
# We use TLID instead of LINEARID as the key because
# LINEARID is only unique per 'linear feature', which is
# an implicit union of some edges. So if we used LINEARID,
# we'd clobber a lot of keys in the call to
# _load_rel_db().
# Fixes #14 ("missing blocks").
if row['MTFCC'] not in VALID_MTFCC:
continue
if not row.get('FULLNAME'):
self.log("skipping tlid %r, no fullname" % tlid)
continue

featnames_db.setdefault(tlid, [])
featnames_db[tlid].append(row)

self.featnames_db = self._clean_featnames(featnames_dbf)
self.faces_db = self._load_rel_db(faces_dbf, 'TFID')
# Load places keyed by FIPS code
places_layer = DataSource(place_shp)[0]
Expand All @@ -157,18 +142,17 @@ def __init__(self, edges_shp, featnames_dbf, faces_dbf, place_shp,
self.filter_bounds = filter_bounds
self.tlids_with_blocks = set()


def _load_rel_db(self, dbf_file, rel_key):
"""
Reads rows as dicts from a .dbf file.
Returns a mapping of rel_key -> row dict.
"""
f = open(dbf_file, 'rb')
db = {}
db = defaultdict(list)
rowcount = 0
try:
for row in dbf.dict_reader(f, strip_values=True):
db[row[rel_key]] = row
db[row[rel_key]].append(row)
rowcount += 1
self.log(
" GOT DBF ROW %s for %s" % (row[rel_key], row.get('FULLNAME', 'unknown')))
Expand All @@ -178,6 +162,41 @@ def _load_rel_db(self, dbf_file, rel_key):
self.log("Unique keys for %r: %d" % (rel_key, len(db)))
return db

def _clean_featnames(self, featnames_dbf):
rel_db = self._load_rel_db(featnames_dbf, 'TLID')
featnames_db = defaultdict(list)
for tlid, rows in rel_db.iteritems():
primary = None
alternates = []
for row in rows:
# TLID is Tiger/Line ID, unique per edge.
# We use TLID instead of LINEARID as the key because
# LINEARID is only unique per 'linear feature', which is
# an implicit union of some edges. So if we used LINEARID,
# we'd clobber a lot of keys in the call to
# _load_rel_db().
# Fixes #14 ("missing blocks").
if row['MTFCC'] not in VALID_MTFCC:
continue
if not row.get('FULLNAME'):
self.log("skipping tlid %r, no fullname" % tlid)
continue
if row['PAFLAG'] == 'P':
primary = row
featnames_db[tlid].append(row)
else:
alternates.append(row)
# For now we just log alternates that were found. Ideally we could save these
# as aliases somehow, but at the moment we don't have a good way to do that.
for alternate in alternates:
correct = primary['NAME'].upper()
incorrect = alternate['NAME'].upper()
msg = 'Found alternate name for {0} ({1}): {2}\n{3}\n{4}'
logger.debug(msg.format(correct, primary['TLID'], incorrect,
pprint.pformat(primary),
pprint.pformat(alternate)))
return featnames_db

def _get_city(self, feature, side):
city = ''
if self.fix_cities:
Expand All @@ -189,7 +208,7 @@ def _get_city(self, feature, side):
else:
fid = feature.get('TFID' + side)
if fid in self.faces_db:
face = self.faces_db[fid]
face = self.faces_db[fid][0]
# Handle both 2010 and older census files.
# If none of these work, we simply get no city.
pid = face.get('PLACEFP10') or face.get('PLACEFP00') or face.get('PLACEFP')
Expand All @@ -203,7 +222,7 @@ def _get_city(self, feature, side):
def _get_state(self, feature, side):
fid = feature.get('TFID' + side)
if fid in self.faces_db:
face = self.faces_db[fid]
face = self.faces_db[fid][0]
# Handle both 2010 and older census files.
state_fip = STATE_FIPS[face.get('STATEFP10') or face['STATEFP']]
return state_fip[0]
Expand Down

0 comments on commit 7c0ada7

Please sign in to comment.