In [1]:
import xml.etree.cElementTree as ET
from collections import defaultdict 
import re 
import pprint
import sample_file as sf #sample_file.py in ./osm_src
import data as d
street_type_re = re.compile(r'\b\S+\.?$', re.IGNORECASE)


sf.get_element('sacramento.osm') #write a sample
expected = ["Street", "Avenue", "Boulevard", "Drive", "Court", "Place", "Square", "Lane", "Road", 
            "Trail", "Parkway", "Commons"]

# UPDATE THIS VARIABLE
mapping = { "St"  : "Street",
            "St." : "Street",
            "Ave" : "Avenue",
            "Ave.": "Avenue",
            "Rd." : "Road",
            'Rd'  : 'Road',
            'Ct'  : 'Court',
            'Ct.' : 'Court', 
            'Blvd': 'Boulevard', 
            'Blvd.' : 'Boulevard',
            'Dr'  : 'Drive', 
            'Dr.' : 'Drive',
            'PlaceZ': 'Place'}


In [2]:
element = d.get_element('sample.osm', tags=('node','way')) # get element. 

In [18]:
audit_sample = d.audit('sample.osm')
pprint.pprint(dict(audit_sample))

{'Blvd': set(['Northgate Blvd']),
 'Blvd.': set(['Auburn Blvd.']),
 'Broadway': set(['Broadway']),
 'Circle': set(['Evergreen Circle',
                'Gooseberry Circle',
                'Half Moon Bay Circle',
                'Kathy Circle',
                'Meadowlark Circle',
                'Park Circle',
                'Pitzer Circle']),
 'Ingoglia': set(['Via Ingoglia']),
 'PlaceZ': set(['Town Center PlaceZ']),
 'Terrace': set(['Simon Terrace']),
 'Way': set(['Arden Way',
             'Chaparral Way',
             'Cummins Way',
             'Franklin Way',
             'Hickory Way',
             'Manzanita Way',
             'Merced Way',
             'Oakmont Way',
             'Paradise Way',
             'Rubicon Way',
             'Shasta Way',
             'Spruce Way',
             'Trinity Way'])}


In [22]:
def test():
    st_types = d.audit('sample.osm')
    pprint.pprint(dict(st_types))

    for st_type, ways in st_types.iteritems():
        for name in ways:
            better_name = d.update_name(name, mapping)
            print name, "=>", better_name
            if name == "West Lexington St.":
                assert better_name == "West Lexington Street"
            if name == "Baldwin Rd.":
                assert better_name == "Baldwin Road"


if __name__ == '__main__':
    test()

{'Blvd': set(['Northgate Blvd']),
 'Blvd.': set(['Auburn Blvd.']),
 'Broadway': set(['Broadway']),
 'Circle': set(['Evergreen Circle',
                'Gooseberry Circle',
                'Half Moon Bay Circle',
                'Kathy Circle',
                'Meadowlark Circle',
                'Park Circle',
                'Pitzer Circle']),
 'Ingoglia': set(['Via Ingoglia']),
 'PlaceZ': set(['Town Center PlaceZ']),
 'Terrace': set(['Simon Terrace']),
 'Way': set(['Arden Way',
             'Chaparral Way',
             'Cummins Way',
             'Franklin Way',
             'Hickory Way',
             'Manzanita Way',
             'Merced Way',
             'Oakmont Way',
             'Paradise Way',
             'Rubicon Way',
             'Shasta Way',
             'Spruce Way',
             'Trinity Way'])}


KeyError: 'PlaceZ'

<_sre.SRE_Match object at 0x1039157e8>


In [139]:
street_type_re = re.compile(r'\b\S+\.?$', re.IGNORECASE)

expected = ["Street", "Avenue", "Boulevard", "Drive", "Court", "Place", "Square", "Lane", "Road", 
            "Trail", "Parkway", "Commons"]



def is_street_name(elem):
    return (elem.attrib['k'] == "addr:street")

def is_post_code(elem): 
    return (elem.attrib['k'] == 'addr:postcode')

def audit_inconsistent_zip(osmfile):
    inconsistent_zip = []
    osm_file = open(osmfile, 'r')
    for event, elem in ET.iterparse(osm_file,events=('start',)): 
        
        if elem.tag == 'node' or elem.tag == 'way': 
            for tag in elem.iter('tag'): 
                if is_post_code(tag) and len(tag.attrib['v']) != 5: 
                    inconsistent_zip.append(tag.attrib['v'])
    osm_file.close()
    return inconsistent_zip

def check_zip(osmfile): 
    osm_file = open(osmfile,'r') #open file
    zip_sac = [] #list of all zip codes in Sacramento County 
    for event, elem in ET.iterparse(osm_file,events=('start',)): 
        
        if elem.tag == 'node' or elem.tag == 'way': 
            for tag in elem.iter('tag'): 
                if is_post_code(tag): # use the element attrib for 'addr:postcode'
                    zip_sac.append(tag.attrib['v']) # append them all
    osm_file.close()
    return zip_sac

def audit_incorrect_zip(osmfile):
    incorrect_zip = set() #for unique inconsistent zips we use a set.
    for zip_codes in check_zip(osmfile): 
        m = re.search(r'95', zip_codes, re.M|re.I) #checks for '95' in zip codes
        if m: 
            pass #passing the correct ones 
        else: 
            incorrect_zip.add(zip_codes)
    return incorrect_zip
    
def audit_street_type(street_types, street_name):
    m = street_type_re.search(street_name)
    if m:
        street_type = m.group()
        if street_type not in expected:
            street_types[street_type].add(street_name)
            
def audit_check_street(osmfile):
    osm_file = open(osmfile, "r")
    street_types = defaultdict(set)
    for event, elem in ET.iterparse(osm_file, events=("start",)):

        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                if is_street_name(tag):
                    audit_street_type(street_types, tag.attrib['v'])
    osm_file.close()
    return street_types
# audit_zip inconsistent: 
# ['95832-1447','CA 95826','CA 95819','CA 95834','CA 95834','2557','95819-6055','95819-6055','95819-6055','CA 95832',
# 'CA 95822','95832-1447','CA 95626','95826-2625','95819-6024','95819-6138']


#audit_incorrect_zip: 
#{'2557', '85834', '96816', '98584'}