## Audit

In [1]:
import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint

In [3]:
OSMFILE = "G:\\UdacityDataAnalyst\\shanghai_china.osm"
street_type_re = re.compile(r'\b\S+\.?$', re.IGNORECASE)


expected = ["Street", "Avenue", "Boulevard", "Drive", "Court", "Place", "Square", "Lane", "Road", 
            "Trail", "Parkway", "Commons"]

# UPDATE THIS VARIABLE
mapping = { 
            "St.": "Street",
            "Ave": "Avenue",
            "Rd.": "Road",
            'Hwy.': "Highway",
            "Lu": "Road",
            "lu": "Road",
            "Rd.": "Road",
            "Rd)": "Road",
            "Rd.）": "Road",
            "Rode" : "Road",
            "rd": "Road",
            "road": "Road",
            "Rd," : "Road,"
            "\n": "",
            "\r\n":""
            }


def audit_street_type(street_types, street_name):
    m = street_type_re.search(street_name)
    if m:
        street_type = m.group()
        if street_type not in expected:
            street_types[street_type].add(street_name)


def is_street_name(elem):
    return (elem.attrib['k'] == "addr:street")


def audit(osmfile):
    osm_file = open(osmfile, "r", encoding='unicode')
    street_types = defaultdict(set)
    for event, elem in ET.iterparse(osm_file, events=("start",)):

        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                if is_street_name(tag):
                    audit_street_type(street_types, tag.attrib['v'])
    osm_file.close()
    return street_types


def update_name(name, mapping):

    # YOUR CODE HERE
    shortname = mapping.keys()
    for word in shortname:
        if word in name:
            name = name.replace(word,mapping[word])
    return name


def test():
    st_types = audit(OSMFILE)
    pprint.pprint(dict(st_types))


if __name__ == '__main__':
    test()

{'1': {'لندینگ هوم 1', '松花一村 Songhua Community #1'},
 '1094弄': {'1094弄'},
 '1440弄19号': {'1440弄19号'},
 '1501': {'南苏州路 1501'},
 '2': {'长白二村 Changbai Community #2'},
 '203': {'Xiang yang south road 203'},
 '3': {'3'},
 '3101': {'Huyi Highway 3101'},
 '3386': {'Bao An Gong Lu 3386'},
 '378': {'fuyou rd 378'},
 '99': {'CaoXi Bei lu 99'},
 'Ave.': {'Wuchang Ave.', 'Haigang Ave.'},
 'Block': {'Xintiandi North Block'},
 'County': {'Kangshan Industrial Zone, Anji County'},
 'District': {'West of Xihu Lake, Xihu District',
              'Xiangyin Road, Yangpu District'},
 'District,': {'Yuanbao Street, Shangcheng District,'},
 'Dong': {'Suzhou Dadao Dong'},
 'Garden': {'Jinan Garden', 'Tomson Golf Garden'},
 'Gonglu': {'Hangnan Gonglu'},
 'Highway': {'Huyi Highway'},
 'Hwy.': {'Husong Hwy.'},
 'Jie': {'Shiquan Jie'},
 'Lu': {'1388 Huamu Lu',
        '3 Fenyang Lu',
        'Baotun Lu, near Zhongshan Nan Lu',
        'Chuanchang Lu',
        'Fengxian Lu',
        'Jianguo Zhong Lu',
        'Jin

 '浙江中路': {'浙江中路'},
 '浙江北路': {'浙江北路'},
 '浣纱路': {'浣纱路'},
 '浦东南路': {'浦东南路'},
 '浦东大道': {'浦东大道'},
 '浦建路207弄': {'浦建路207弄'},
 '浦明路': {'浦明路'},
 '浦江镇': {'浦江镇'},
 '海乐路': {'海乐路'},
 '海印路': {'海印路'},
 '海天五路': {'海天五路'},
 '海宁路': {'海宁路'},
 '海思路': {'海思路'},
 '海洲路': {'海洲路'},
 '海阔路': {'海阔路'},
 '海防路': {'海防路'},
 '涞亭南路': {'涞亭南路'},
 '涞寅路': {'涞寅路'},
 '润兴路': {'润兴路'},
 '润州路': {'润州路'},
 '淞南路': {'淞南路'},
 '淞沪路': {'淞沪路'},
 '淞虹路': {'淞虹路'},
 '淡水路': {'淡水路'},
 '淮海东路': {'淮海东路'},
 '淮海中路': {'淮海中路'},
 '淮海中路1270弄': {'淮海中路1270弄'},
 '淮海中路1329号': {'淮海中路1329号'},
 '淮海西路': {'淮海西路'},
 '淮阴路': {'淮阴路'},
 '清吟街': {'清吟街'},
 '清峪路': {'清峪路'},
 '清泰街': {'清泰街'},
 '清源环路': {'清源环路'},
 '清溪路': {'清溪路'},
 '港城大道': {'港城大道'},
 '港城路': {'港城路'},
 '湖州市金山路': {'湖州市金山路'},
 '湖州街': {'湖州街'},
 '湖滨路': {'湖滨路'},
 '源深路': {'源深路'},
 '溧水区永阳镇金蛙路金蛙路': {'溧水区永阳镇金蛙路金蛙路'},
 '滇池路': {'滇池路'},
 '滨康路': {'滨康路'},
 '滨湖区青祁路和梁溪路交叉西南角': {'滨湖区青祁路和梁溪路交叉西南角'},
 '滨湖路': {'滨湖路'},
 '滨盛路': {'滨盛路'},
 '漕东支路': {'漕东支路'},
 '漕宝路': {'漕宝路'},
 '漕溪北路': {'漕溪北路'},
 '漕溪路': {'漕溪路'},
 '漠河路': {'漠河路'},
 '潍坊西路': {