In [1]:
import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint

osmfile_sample = "sample3.osm"
street_type_re = re.compile(r'\b\S+\.?$', re.IGNORECASE)


expected = ["Street", "Avenue", "Boulevard", "Drive", "Court", "Place", "Square", "Lane", "Road", 
            "Trail", "Parkway", "Commons"]

# UPDATE THIS VARIABLE
mapping = { "St": "Street",
            "St.": "Street",
            "street": "Street",
            "Rd": "Road",
            "raod": "road",
            "Ln": "Lane",
            "Rd.": "Road",
            "Ave": "Avenue",
           "ave" : " Avenue",
          " Hwg" : "Highway",
           "Hwy" : "Highway",
           "Sq" : "Square"
            }

In [2]:
def audit_street_type(street_types, street_name):
    m = street_type_re.search(street_name)
    if m:
        street_type = m.group()
        if street_type not in expected:
            street_types[street_type].add(street_name)

def is_street_name(elem):
    return (elem.attrib['k'] == "addr:street")

def audit(osmfile_sample):
    osm_file = open(osmfile_sample, "r")
    street_types = defaultdict(set)
    for event, elem in ET.iterparse(osm_file, events=("start",)):

        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                if is_street_name(tag):
                    audit_street_type(street_types, tag.attrib["v"])
    osm_file.close()
    return street_types

In [3]:
pprint.pprint(dict(audit(osmfile_sample)))

{'1': set(['Road No 1']),
 '13': set(['Road No. 13']),
 '16': set(['Road Number 16']),
 '2': set(['Cross Road No 2', 'Road Number 2']),
 '20': set(['Sector 20']),
 '21': set(['Sector 21']),
 '22': set(['RSC Road No 22']),
 '23': set(['Sector 23']),
 '3': set(['Road Number 3']),
 '4': set(['RCF Colony Type 4']),
 '5': set(['Tejpal Scheme Road 5']),
 '50': set(['Sector 50']),
 '9': set(['Road No. 9']),
 'Bhavan': set(['Vidhan Bhavan']),
 'Bunglow': set(['Creak Crest, Model Town, Four Bunglow']),
 'Chowk': set(['Ashok Kedare Chowk']),
 'Circle': set(['Harinivas Circle']),
 'Colony': set(['Mysore Colony']),
 'Eas': set(['A Wing, Abhishek Co Operative Housing Society, Santacruz Eas']),
 'East': set(['Kher Nagar, Bandra East',
              'Near Sai Baba Mandir, Pathanwadi Bus Stand, Malad East']),
 'East,': set(['186, New Andheri Link Road, Bhagat Singh Colony,, Andheri East,']),
 'Estate': set(['Hiranandani Estate', 'Rahul Estate']),
 'Expressway': set(['Eastern Expressway']),
 'Gali': se

In [4]:
def string_case(s): # change string into titleCase except for UpperCase
    if s.isupper():
        return s
    else:
        return s.title()

# return the updated names
def update_name(name, mapping):
    name = name.split(' ')
    for i in range(len(name)):
        if name[i] in mapping:
            name[i] = mapping[name[i]]
            name[i] = string_case(name[i])
        else:
            name[i] = string_case(name[i])
    
    name = ' '.join(name)
   

    return name

update_street = audit(osmfile_sample) 

# print the updated names
for street_type, ways in update_street.iteritems():
    for name in ways:
        better_name = update_name(name, mapping)
        print name, "=>", better_name  

M. G. Road,Bazar Plot => M. G. Road,Bazar Plot
Sakharam Patil Marg => Sakharam Patil Marg
Govindji Shroff Marg => Govindji Shroff Marg
Pirojsha Godrej Marg => Pirojsha Godrej Marg
Sulochana Shetty Marg => Sulochana Shetty Marg
Gopal Krushna Gokhale Marg => Gopal Krushna Gokhale Marg
M. V. Pandloskar Marg => M. V. Pandloskar Marg
V B Phadke Marg => V B Phadke Marg
Maharana Pratap Marg => Maharana Pratap Marg
Ramakrishna Chemburkar Marg => Ramakrishna Chemburkar Marg
Ramabai Chemburkar Marg => Ramabai Chemburkar Marg
Nirmaladevi Arunkumar Ahuja Marg => Nirmaladevi Arunkumar Ahuja Marg
Ahinsa Marg => Ahinsa Marg
Lokhandwala Marg => Lokhandwala Marg
Adi Sankaracharya Marg => Adi Sankaracharya Marg
Lal Bahadur Shastri Marg => Lal Bahadur Shastri Marg
P.L. Lokhande Marg => P.L. Lokhande Marg
M G Ramchandran Marg => M G Ramchandran Marg
Dr Babasaheb Ambedkar Marg => Dr Babasaheb Ambedkar Marg
LBS Marg => LBS Marg
Vihar Lake Marg => Vihar Lake Marg
Lions Sol Marg => Lions Sol Marg
Madhusudhan 