#Import data from XML to JSON

In [9]:
#datafile = "../raleigh_north-carolina.osm"
datafile = "../Submission/04-sample.osm"

In [10]:
import json
import xml.etree.cElementTree as ET
import re

In [11]:
def count_tags(filename):
        # YOUR CODE HERE
        tags = {}
        for event, elem in ET.iterparse(filename):
            if event == 'end':
                if elem.tag not in tags.keys():
                    tags[elem.tag] = 1
                else:
                    tags[elem.tag] += 1
        return tags

In [12]:
tags = count_tags(datafile)
print(tags)

{'node': 252427, 'osm': 1, 'relation': 74, 'nd': 281437, 'member': 723, 'tag': 81286, 'way': 21146}


### Define specific formatting functions for each type of data

In [13]:
def process_node(element):
    """Format Node object and associated tags.  Remove attributes as they are processed."""
    node = {'type':'node'}
    #Extract raw information for this node
    attributes = {a:element.attrib[a] for a in element.attrib}
    tags = {t.attrib['k']:t.attrib['v'] for t in element.findall('tag')}
    
    #Add ID
    node['id'] = element.attrib['id']
    del attributes['id']
    
    #Add Position
    if 'lat' in attributes and 'lon' in attributes:
        node['pos'] = [float(element.attrib["lat"]), float(element.attrib["lon"])]
        del attributes['lat']
        del attributes['lon']

    #Created
    CREATED = ["version", "changeset", "timestamp", "user", "uid"]
    created_attrib = [a for a in attributes if a in CREATED] #Which of the created attr are present
    if len(created_attrib) > 0:
        node['created'] = {}
        for a in created_attrib:
            node['created'][a] = element.attrib[a]
            del attributes[a]
    
    #Remaining Attributes
    for a in attributes:
        if problemchars.match(a): #skip attributes with problematic keys
            continue
        else:
            node[a] = element.attrib[a]

    #Process tag dict
    
    
    
    return node
    
def process_way(element):
    """Format Way object and associated nd and tags"""
    way = {'type':'way'}
    #changeset, id, timestamp, uid, user, version
    #list nd each with ref
    #list tags
    return way
    
    
def process_relation(element):
    """Format Relation object and associated members and tags"""
    relation = {'type':'relation'}
    #changeset, id, timestamp, uid, user, version
    #list members - ref, role, type
    #list tags
    return relation

In [14]:
lower = re.compile(r'^([a-z]|_)*$')
lower_colon = re.compile(r'^([a-z]|_)*:([a-z]|_)*$')
problemchars = re.compile(r'[=\+/&<>;\'"\?%#$@\,\. \t\r\n]')

### Iterate through XML file.  Process elements and write to JSON one at a time.

In [15]:
def shape_element(element):
    if element.tag == "node":
        shaped_element = process_node(element)
    elif element.tag == 'way':
        shaped_element = process_way(element)
    elif element.tag == 'relation':
        shaped_element = process_relation(element)
    else:
        shaped_element = None
    return shaped_element
        

def process_map(file_in):
    file_out = "{0}.json".format(file_in)
    with open(file_out, "w") as fo:
        for _, element in ET.iterparse(file_in):
            el = shape_element(element)
            if el:
                fo.write(json.dumps(el, indent=2)+"\n")

In [16]:
process_map(datafile)

TypeError: an integer is required (got type str)

      or element.tag == "way" :
        # YOUR CODE HERE
        node['type'] = element.tag
        attributes = {a:element.attrib[a] for a in element.attrib}
        tags = [(t.attrib['k'],t.attrib['v']) for t in element.findall('tag')]
        noderefs = [n.attrib['ref'] for n in element.findall('nd')]
        
        #Add Position
        if 'lat' in attributes and 'lon' in attributes:
            node['pos'] = [float(element.attrib["lat"]), float(element.attrib["lon"])]
            del attributes['lat']
            del attributes['lon']
        
        #Created
        created_attrib = [a for a in attributes if a in CREATED]
        if len(created_attrib) > 0:
            node['created'] = {}
            for a in created_attrib:
                node['created'][a] = element.attrib[a]
                del attributes[a]
                
        #Remaining
        for a in attributes:
            if problemchars.match(a):
                continue
            else:
                node[a] = element.attrib[a]
        
        #Tags
        for t in tags:
            if problemchars.match(t[0]):
                continue
            if t[0][0:5]=='addr:':
                if t[0].count(':') > 1:
                    continue
                if 'address' not in node.keys():
                    node['address'] = {}
                addrkey = t[0].split(':')[1]
                node['address'][addrkey] = t[1]
            else:
                node[t[0]] = t[1]
        
        #Node Refs
        if len(noderefs) > 0:
            node['node_refs'] = noderefs
        
        return node
    else:
        return None