In [1]:
import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint

OSMFILE_sample = "ahmedabad_india.osm"
regex = re.compile(r'\b\S+\.?', re.IGNORECASE)

expected = ["Ahmedabad", "Road", "NR", "Avenue", "SBK", "Gandhi", "Bridge", "Society"] #expected names in the dataset

mapping = {"ahmedabad": "Ahmedabad",
           "Ahmadabad": "Ahmedabad",
           "Ahamadabad": "Ahmedabad",
           "Nr.": "NR",
           "Ave.": "Avenue",
           "sbk": "SBK",
           "gandhi": "Gandhi",
           "bridge": "Bridge",
           "road": "Road",
           "Ft.": "Feet",
           "ft": "Feet",
           "Rd": "Road",
           "Rd.": "Road",
           "rasta": "Road",
           "Roads": "Road",
           "society": "Society",
           "soc.": "Society",
           "Socity": "Society",
           "Gujarat.": "Gujarat,"
            }


In [2]:

# Search string for the regex. If it is matched and not in the expected list then add this as a key to the set.
def audit_street(street_types, street_name): 
    m = regex.search(street_name)
    if m:
        street_type = m.group()
        if street_type not in expected:
            street_types[street_type].add(street_name)


In [3]:

def is_street_name(elem): # Check if it is a street name
    return (elem.attrib['k'] == "addr:street")


In [4]:

def audit(osmfile): # return the list that satify the above two functions
    osm_file = open(osmfile, "r")
    street_types = defaultdict(set)
    for event, elem in ET.iterparse(osm_file, events=("start",)):

        if elem.tag == "node" or elem.tag == "way":
            for tag in elem.iter("tag"):
                if is_street_name(tag):
                    audit_street(street_types, tag.attrib['v'])

    return street_types

pprint.pprint(dict(audit(OSMFILE_sample))) # print the existing names



{'100': set(['100 Ft. Road', '100 ft Road']),
 '101,': set(['101, Paritosh Building, Usmanpura, Ashram Road']),
 '120': set(['120 Feet Ring Road']),
 '132': set(['132 Ft. Ring Road']),
 '2nd': set(['2nd Floor, Aditya Complex, Opp kasturi Dining Hall, Paldi Cross Road, Paldi',
             '2nd Floor, Dev Arc Mall, Satellite Rd, Off S.G Highway, Near Iskcon Bridge']),
 '8': set(['8 aasiyana society jivrajpark nr 132 ring road ahmedabad']),
 'ACHER': set(['ACHER ROAD']),
 'ASarwa': set(['ASarwa']),
 'Adalaj': set(['Adalaj -sarkhej road']),
 'Ahmedabad-Kalol': set(['Ahmedabad-Kalol Highway']),
 'Airport': set(['Airport Road']),
 'Akhabarnagar': set(['Akhabarnagar Circle']),
 'Akshar': set(['Akshar Avenue']),
 'Anand': set(['Anand nagar road']),
 'Ashram': set(['Ashram Road']),
 'Asmita': set(['Asmita Society']),
 'B/H': set(['B/H AryaVilla']),
 'BRTS': set(['BRTS Route', 'BRTS Station']),
 'Balol': set(['Balol Nagar Road']),
 'BalolNagar': set(['BalolNagar Cross Road']),
 'Balvantrai': se

In [5]:
def string_case(s): # change string into titleCase except for UpperCase
    if s.isupper():
        return s
    else:
        return s.title()


In [6]:

# return the updated names
def update_name(name, mapping):
    name = name.split(' ')
    for i in range(len(name)):
        if name[i] in mapping:
            name[i] = mapping[name[i]]
            name[i] = string_case(name[i])
        else:
            name[i] = string_case(name[i])
    
    name = ' '.join(name)
   

    return name


In [7]:

update_street = audit(OSMFILE_sample) 

# print the updated names
for street_type, ways in update_street.iteritems():
    for name in ways:
        better_name = update_name(name, mapping)
        print name, "=>", better_name  



GVMM => GVMM
Service road - Sarkhej Gandhinagar Highway => Service Road - Sarkhej Gandhinagar Highway
Danapith Road => Danapith Road
132 Ft. Ring Road => 132 Feet Ring Road
Netaji Subhash Chandra Road => Netaji Subhash Chandra Road
Kh-0 => Kh-0
Nehrunagar => Nehrunagar
Khokhra Road => Khokhra Road
Odhav Road => Odhav Road
SH-41 => SH-41
GST Crossing Road, Ranip => GST Crossing Road, Ranip
GST Crossing => GST Crossing
GST Road => GST Road
GST Crossing Road, New Ranip => GST Crossing Road, New Ranip
GST Crossing, New Ranip => GST Crossing, New Ranip
Swastik Cross Road => Swastik Cross Road
Swastik Society Road => Swastik Society Road
Nav-sarjan School Road => Nav-Sarjan School Road
Ellisbridge => Ellisbridge
Vrajraiji Colony => Vrajraiji Colony
Manek Baug Road => Manek Baug Road
Gandhinagar-Ahmedabad Highway => Gandhinagar-Ahmedabad Highway
8 aasiyana society jivrajpark nr 132 ring road ahmedabad => 8 Aasiyana Society Jivrajpark Nr 132 Ring Road Ahmedabad
kankariya gate no.3 => Kankariya