# Analyzing OpenStreetMap data

In this document I would like to show you a MongoDB queries I used to analyze OpenStreetMap data.

## Import pymogno and acessing database

In [1]:
import pymongo, pprint

client = pymongo.MongoClient('mongodb://localhost:27017/')

db = client.openstreetmap

## Data overview

### Number of valid items

In [2]:
print('Number of valid items: {}'.format(db.elements.count()))

Number of valid items: 9432206


In [3]:
def number_of_elements(node_type):
    count_of_type = db.elements.aggregate([
        {'$match': {'type': node_type}},
        {'$group': {'_id': None, 'count': {'$sum': 1}}}
    ])
    
    return list(count_of_type)[0]['count']

### Number of valid nodes

In [4]:
print('Number of valid nodes: {}'.format(number_of_elements('node')))

Number of valid nodes: 8451459


### Number of valid ways

In [5]:
print('Number of valid ways: {}'.format(number_of_elements('way')))

Number of valid ways: 965023


### Number of valid relations

In [6]:
print('Number of valid relations: {}'.format(number_of_elements('relation')))

Number of valid relations: 15724


In [7]:
def number_of_unique_items(by_field):
    count_of_items = db.elements.aggregate([
        {'$group': {'_id': '${}'.format(by_field), 'count': {'$sum': 1}}},
        {'$group': {'_id': None, 'count': {'$sum': 1}}}
    ])
    
    return list(count_of_items)[0]['count']

### Number of unique authors

In [8]:
print('Number of unique authors: {}'.format(number_of_unique_items('uid')))

Number of unique authors: 2913


### The author with the biggest contribution

In [9]:
author_with_biggest_number_of_elements = db.elements.aggregate([
    {'$group': {'_id': '$uid', 'name': {'$first': '$user'}, 'count': {'$sum': 1}}},
    {'$sort': {'count': -1}},
    {'$limit': 1}
])
stats = list(author_with_biggest_number_of_elements)[0]
print('The biggest contribution has the user \'{}\' with id \'{}\' - he created \'{}\' elements'.format(stats['name'], stats['_id'], stats['count']));

The biggest contribution has the user 'JandaM' with id '2169558' - he created '2068505' elements


### Number of unique changesets

In [10]:
print('Number of unique changesets: {}'.format(number_of_unique_items('changeset')))

Number of unique changesets: 67633


In [11]:
def get_number_of_elements_having_a_tag(tag_name, tag_value):
    elements_with_source_survey = db.elements.aggregate([
        {'$unwind': "$tags"},
        {'$match': {
            '$and': [
                {'tags.key': tag_name},
                {'tags.value': tag_value}
             ]
        }},
        {'$group': {'_id': 'id', 'count': {'$sum': 1}}}
    ])
    
    return list(elements_with_source_survey)[0]['count']

### Number of items that have a source “survey”

In [12]:
print('Number of items that have a source "survey": {}'.format(get_number_of_elements_having_a_tag('source', 'survey')))

Number of items that have a source "survey": 8551


### Number of restaurants

In [13]:
print('Number of restaurants: {}'.format(get_number_of_elements_having_a_tag('amenity', 'restaurant')))

Number of restaurants: 2592


### Number of bakeries

In [14]:
print('Number of bakeries: {}'.format(get_number_of_elements_having_a_tag('shop', 'bakery')))

Number of bakeries: 175
