## Exploring data

In [1]:
import xml.etree.cElementTree as ET
import audit_address

sample = "central_London_sample.osm"

type_keys = set()
amenity_values = set()
cuisine_values = set()
shop_values = set()
religion_values = set()

with open(sample, "rb") as f:
    for event, elem in ET.iterparse(f):
        for child in elem:
            if child.tag == 'tag':
                if "type" in child.attrib['k']:
                    type_keys.add(child.attrib['k'])
                if 'amenity' in child.attrib['k']:
                    amenity_values.add(child.attrib['v'])
                if 'cuisine' in child.attrib['k']:
                    cuisine_values.add(child.attrib['v'])
                if 'shop' in child.attrib['k']:
                    shop_values.add(child.attrib['v'])
                if 'religion' in child.attrib['k']:
                    religion_values.add(child.attrib['v'])

**Auditing type keys**

In [2]:
type_keys

{'addr:type',
 'board_type',
 'camera:type',
 'fence_type',
 'fire_hydrant:type',
 'leaf_type',
 'map_type',
 'post_box:type',
 'recycling_type',
 'surveillance:type',
 'tracktype',
 'type'}

**Auditing amenity values**

In [3]:
amenity_values

{'arts_centre',
 'atm',
 'bank',
 'bar',
 'bench',
 'bicycle_parking',
 'bicycle_rental',
 'biergarten',
 'cafe',
 'car_sharing',
 'charging_station',
 'church_hall',
 'cinema',
 'clinic',
 'clock',
 'college',
 'community_centre',
 'courthouse',
 'dentist',
 'doctors',
 'drinking_water',
 'embassy',
 'fast_food',
 'fuel',
 'grit_bin',
 'kindergarten',
 'library',
 'marketplace',
 'motorcycle_parking',
 'nursing_home',
 'parking',
 'parking_space',
 'pharmacy',
 'place_of_worship',
 'police',
 'post_box',
 'post_office',
 'pub',
 'public_building',
 'recycling',
 'restaurant',
 'school',
 'social_club',
 'social_facility',
 'telephone',
 'theatre',
 'toilets',
 'vending_machine',
 'waste_basket'}

**Auditing cuisine values**

In [4]:
cuisine_values

{'american',
 'barbecue',
 'brazilian',
 'chicken',
 'coffee',
 'french',
 'indian',
 'italian',
 'japanese',
 'kebab',
 'lebanese',
 'local',
 'malaysian',
 'moroccan',
 'pizza',
 'russian',
 'sandwich',
 'spanish',
 'thai',
 'turkish',
 'vietnamese'}

**Auditing shop type values**

In [5]:
shop_values

{'alcohol',
 'antiques',
 'art',
 'bakery',
 'beauty',
 'bicycle',
 'bookmaker',
 'books',
 'boutique',
 'bullion',
 'butcher',
 'car_repair',
 'carpet',
 'cash',
 'catalogue',
 'charity',
 'chemist',
 'clothes',
 'communication',
 'confectionery',
 'convenience',
 'deli',
 'dry_cleaning',
 'electronics',
 'estate_agent',
 'fabric',
 'florist',
 'gift',
 'greengrocer',
 'hairdresser',
 'hardware',
 'health',
 'health_food',
 'houseware',
 'jewelry',
 'laundry',
 'maps',
 'market',
 'mobile_phone',
 'music',
 'musical_instrument',
 'newsagent',
 'outdoor',
 'perfume',
 'pet',
 'plumbing',
 'shoe_repair',
 'shoes',
 'stationery',
 'supermarket',
 'taxi',
 'toys',
 'trade',
 'travel_agency',
 'vacant',
 'wholesale',
 'wine',
 'yes'}

**Auditing religion values**

In [6]:
religion_values

{'christian', 'muslim'}

**Auditing address values**

In [7]:
street_types, post_codes = audit_address.audit(sample)

In [8]:
street_types

defaultdict(set,
            {'Causeway': {'Stepney Causeway'},
             'Centre': {'Southside Shopping Centre'},
             'Cheapside': {'Cheapside'},
             'Eastcheap': {'Eastcheap'},
             'Embankment': {'Victoria Embankment'},
             'Ground': {'Upper Ground'},
             'Kingsway': {'Kingsway'},
             'Limeharbour': {'Limeharbour'},
             'Mead': {'Friars Mead'},
             'Millbank': {'Millbank'},
             'Pleasant': {'Point Pleasant'},
             'Polygon': {'The Polygon'},
             'Rye': {'Peckham Rye'},
             'Strand': {'Strand'},
             'Wall': {'Marsh Wall'},
             'West': {'Jupp Road West'},
             'Whitehall': {'Whitehall'}})

In [9]:
post_codes

{'N1', 'N7', 'NW3', 'SE22', 'SE5', 'SW15', 'SW3 4', 'SW6 5', 'W1', 'W12'}