## 1.  Auditing Street names

The below code audit the OSMFILE and change the street names to the required format based on the 'mapping'. 

    * audit_street_type() searches the i/p string for the regex. If there is a match and it is not within the "expected" list, add the match as a key and add the string to the set.
    * is_street_name() Tchecks if the key is "addr:street" i.e, streetname details record.
    * audit() will return a list that matches previous two functions. Using this we can understand and correct our street names.       
    * mapping {} contains all the mapping required to modify the street names into the required format 
    * update_name() takes a string with street name as an argument and return the fixed name
   

In [55]:

import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint

OSMFILE = "St. John's_NL_Canada.osm"   #"test_file.osm"
street_type_re = re.compile(r'\b\S+\.?$', re.IGNORECASE)


expected = ["Street", "Avenue", "Boulevard", "Drive", "Court", "Place", "Square", "Lane", "Road", 
            "Trail", "Parkway", "Commons"]


mapping = { "st" : "Street",
            "Rd" : "Road",
            "Rd.": "Road",
            "Ave": "Avenue",
            'Extention': "Extension",
            'Monkstown' : 'Monkstown Road',
            'Harvey': 'Harvey Road',
            'Hayward': 'Hayward Avenue', 
            'Larkhall': 'Larkhall Street',
            'Maxse': 'Maxse Street',
            'Monkstown': 'Monkstown Road',
            'Williams': 'Williams Heights',
            'catherine': 'catherine Street'
           
            }



def audit_street_type(street_types, street_name):
    m = street_type_re.search(street_name)
    if m:
        street_type = m.group()
        if street_type not in expected:
            street_types[street_type].add(street_name)


def is_street_name(elem):
    return (elem.attrib['k'] == "addr:street")


def audit(osmfile):
    osm_file = open(osmfile, "r", encoding="utf8")
    street_types = defaultdict(set)
    for event, elem in ET.iterparse(osm_file, events=("start",)):

        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                if is_street_name(tag):
                    audit_street_type(street_types, tag.attrib['v'])
    osm_file.close()
    return street_types


def update_name(name, mapping):
    
    m = street_type_re.search(name)
    if m:        
        st_type = m.group()
        if st_type in mapping:
            print (name)
            name = re.sub(street_type_re, mapping[st_type], name)
            print ("=>", name)
                
    return name


def test():
    st_types = audit(OSMFILE)
    print ("\nThe different types of street types identified in the osm file are listed below : \n")
    pprint.pprint(dict(st_types))
    
    print ("\n The corrected street names are listed below : \n")

    for st_type, ways in st_types.items():
        for name in ways:
            better_name = update_name(name, mapping)
            

if __name__ == '__main__':
    test()


The different types of street types identified in the osm file are listed below : 

{'Ave': {"St. David's Ave"},
 'Close': {'Pembury Close'},
 'Crescent': {'Bellevue Crescent',
              'Brad Gushue Crescent',
              'Burling Crescent',
              'Cedar Brae Crescent',
              'Cherrybark Crescent',
              'Clinch Crescent',
              'Cornwall Crescent',
              'Duntara Crescent',
              'Gander Crescent',
              'Hallett Crescent',
              'Harlequin Crescent',
              'Joshua Crescent',
              'Mansfield Crescent',
              'Pasadena Crescent',
              'Rigolet Crescent',
              "St. Anne's Crescent",
              'Stonegate Crescent',
              'Torngat Crescent'},
 'East': {'Ferryland Street East'},
 'Extension': {'Bauline Line Extension'},
 'Extention': {'Motion Drive Extention'},
 'Harvey': {'Harvey'},
 'Hayward': {'Hayward'},
 'Highway': {'Conception Bay Highway',
             'Pouc

## 2. Auditing Postal Codes

The code in the below cell classifies the different postal code into differnet groups based on their lengths. 

        * The audit() function takes in osm file and parse the file iteratively. It checks 
          for postal codes by calling the is_post_code() and pass it onto audit_post_code().
        * The audit_post_code() groups the post codes into different groups.
        * The updae_post_code() change the post code into required format and prints it.


In [51]:

import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint

OSMFILE = "St. John's_NL_Canada.osm"   #"test_file.osm"



def audit_post_code(post_code_types, post_code):
    length = len(post_code)
    post_code_types[length].add(post_code)
    
    
def is_post_code(elem):
    return (elem.attrib['k'] == "addr:postcode")


def audit(osmfile):
    osm_file = open(osmfile, "r", encoding="utf8")
    post_code_types = defaultdict(set)
    for event, elem in ET.iterparse(osm_file, events=("start",)):

        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                if is_post_code(tag):
                    audit_post_code(post_code_types, tag.attrib['v'])
    osm_file.close()
    return post_code_types


def update_post_code(post_code_type):
    print ("\n The corrected postal codes are listed below : \n")
    
    for types, post_codes in post_code_type.items():
        if types == 6:
            for post_code in post_codes:
                better_post_code = post_code[0:3]+" "+ post_code[3:]
                print (post_code, "=>", better_post_code)
                
    
def test():
    post_code_type = audit(OSMFILE)
    print ("\nThe different types of postal codes identified in the osm file are listed below : \n")
    pprint.pprint(dict(post_code_type))
    
    #Update postal codes
    update_post_code(post_code_type)
    
             

if __name__ == '__main__':
    test()


The different types of postal codes identified in the osm file are listed below : 

{3: {'A1N'},
 5: {'A1L 1'},
 6: {'A0A1W0',
     'A1A0H5',
     'A1A0L2',
     'A1A3S2',
     'A1A4E1',
     'A1A5C9',
     'A1B3V6',
     'A1B3X9',
     'A1C2E9',
     'A1C5R3',
     'A1E4L8',
     'A1K0C8',
     'A1K1A9',
     'A1K1K8',
     'A1L1E7',
     'A1L1H1',
     'A1L1J9',
     'A1X6N9',
     'A1Y1A7'},
 7: {'A0A 2M0',
     'A0A 2R0',
     'A0A 3K0',
     'A1A 2K1',
     'A1A 2K9',
     'A1B 1C3',
     'A1B 1R9',
     'A1B 1S3',
     'A1B 1T9',
     'A1B 1W3',
     'A1B 2A5',
     'A1B 2A6',
     'A1B 2A7',
     'A1B 2A8',
     'A1B 2A9',
     'A1B 2B1',
     'A1B 2B2',
     'A1B 2B3',
     'A1B 2B4',
     'A1B 2B5',
     'A1B 2B6',
     'A1B 2B7',
     'A1B 2B8',
     'A1B 2B9',
     'A1B 2C1',
     'A1B 2C2',
     'A1B 2C3',
     'A1B 2C4',
     'A1B 2C5',
     'A1B 2C6',
     'A1B 2L3',
     'A1B 2N7',
     'A1B 3E3',
     'A1B 3H2',
     'A1B 3H3',
     'A1B 3H4',
     'A1B 3J3',
     'A1B

## 3.  Auditing City names

The below code audit the OSMFILE and change the city names to the required format based on the 'mapping'. 

    * audit_city() searches the whether a city is within the "expected" list, add the match as a key and add the string to the set.
    * is_city_name() checks if the key is "addr:city" i.e, city details record.
    * audit() will return a list that matches previous two functions. Using this we can understand and correct our city names.       
    * mapping {} contains all the mapping required to modify the city names into the correct city name.
    * update_city_names() takes a string with city name as an argument and return the fixed name.

In [82]:
import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint

OSMFILE = "St. John's_NL_Canada.osm"   #"test_file.osm"

expected = ["St. John's"]

mapping = {           
            "Saint John's": "St. John's",
            "St john's": "St. John's",
            'St. John': "St. John's",
            "St. John's": "St. John's",
            'St. John´s': "St. John's",                   
            "st. John's": "St. John's",
            "st. john's": "St. John's",
            "Town of Portugal Cove - St. Philip's":"Portugal Cove-St. Philip's",
            "St. Phillips":"Portugal Cove-St. Philip's" ,
            "Portugal Cove - St. Philips": "Portugal Cove-St. Philip's",
            "Portugal Cove-St. Philip’s": "Portugal Cove-St. Philip's",
            'PORTUGAL COVE-ST PHILIPS': "Portugal Cove-St. Philip's",
            'St. Phillips': "Portugal Cove-St. Philip's"   
           
            }



def audit_city(city_names, city_name):
    if city_name not in expected:
        city_names[city_name].add(city_name)


def is_city_name(elem):
    return (elem.attrib['k'] == "addr:city")


def audit(osmfile):
    osm_file = open(osmfile, "r", encoding="utf8")
    city_names = defaultdict(set)
    for event, elem in ET.iterparse(osm_file, events=("start",)):

        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                if is_city_name(tag):
                    audit_city(city_names, tag.attrib['v'])
    osm_file.close()
    return city_names


def update_city_names(city_names,mapping):
    
    for city_name, ways in city_names.items():
        if city_name in mapping:
            print (city_name)
            city_name = mapping[city_name]
            print ("=>", city_name)
                
    
def test():
    city_names = audit(OSMFILE)
    print ("\nThe different city names identified in the osm file are listed below : \n")
    pprint.pprint(dict(city_names))
    
    print ("\n The corrected city names are listed below : \n")

    #Update street names
    update_city_names(city_names,mapping)
    
               

if __name__ == '__main__':
    test()


The different city names identified in the osm file are listed below : 

{'Bareneed': {'Bareneed'},
 'Bell Island': {'Bell Island'},
 'Carbonear': {'Carbonear'},
 'Conception Bay South': {'Conception Bay South'},
 'Goulds': {'Goulds'},
 'Harbour Grace': {'Harbour Grace'},
 'Holyrood': {'Holyrood'},
 'Logy Bay-Middle Cove-Outer Cove': {'Logy Bay-Middle Cove-Outer Cove'},
 'Mount Pearl': {'Mount Pearl'},
 'PORTUGAL COVE-ST PHILIPS': {'PORTUGAL COVE-ST PHILIPS'},
 'Paradise': {'Paradise'},
 'Petty Harbour-Maddox Cove': {'Petty Harbour-Maddox Cove'},
 'Portugal Cove - St. Philips': {'Portugal Cove - St. Philips'},
 'Portugal Cove-St. Philip’s': {'Portugal Cove-St. Philip’s'},
 "Saint John's": {"Saint John's"},
 "St John's": {"St John's"},
 "St john's": {"St john's"},
 'St. John': {'St. John'},
 'St. John´s': {'St. John´s'},
 'St. Phillips': {'St. Phillips'},
 'Torbay': {'Torbay'},
 "Town of Portugal Cove - St. Philip's": {'Town of Portugal Cove - St. '
                                    