In [1]:
import os
import pymongo
client = pymongo.MongoClient()
db = client.uda

In [2]:
# 文件大小
os.path.getsize("beijing_china.osm")

201434364

In [3]:
# 文件大小
"{:.2f}MB".format(os.path.getsize("beijing_china.osm")/1024.0/1024)

'192.10MB'

In [4]:
# 总数据量
db.openstreetmap.find().count()

1042475

In [5]:
# 节点数据量
db.openstreetmap.find({"type": "node"}).count()

907927

In [6]:
# 途径数据量
db.openstreetmap.find({"type": "way"}).count()

134534

In [7]:
all_count = db.openstreetmap.find().count()
for x in db.openstreetmap.aggregate([
    {"$group": {"_id": "$created.user", "count":{"$sum":1}}},
    {"$sort": {"count": -1}},
    {"$limit": 10}
]):
    print("uid:{_id:20}   count:{count:10}    {perc:.2f}%".format(perc=x["count"]*100.0/all_count,**x))

uid:Chen Jia               count:    250334    24.01%
uid:R438                   count:    140477    13.48%
uid:hanchao                count:     70806    6.79%
uid:Алекс Мок              count:     65893    6.32%
uid:ij_                    count:     51860    4.97%
uid:katpatuka              count:     23407    2.25%
uid:m17design              count:     22329    2.14%
uid:Esperanza36            count:     18075    1.73%
uid:nuklearerWintersturm   count:     15725    1.51%
uid:RationalTangle         count:     13635    1.31%


In [8]:
all_count = db.openstreetmap.find().count()
for x in db.openstreetmap.aggregate([
    {"$project": {"year": {"$year": "$created.timestamp"}, "created.user":"$created.user" }},
#     {"$match": {"year": 2017}},
    {"$group": {"_id": {"year": "$year", "user":"$created.user"}, "count":{"$sum":1}}},
    {"$sort": {"count": -1, "year": 1}},
    {"$limit": 30}
]):
    print("year:{year:20} uid:{user:20}   count:{count:10}    {perc:.2f}%".format(
        perc=x["count"]*100.0/all_count,count=x["count"], **x["_id"]))

year:                2012 uid:R438                   count:     84875    8.14%
year:                2016 uid:Chen Jia               count:     80059    7.68%
year:                2017 uid:Алекс Мок              count:     65893    6.32%
year:                2017 uid:Chen Jia               count:     59110    5.67%
year:                2015 uid:Chen Jia               count:     52936    5.08%
year:                2012 uid:ij_                    count:     44429    4.26%
year:                2013 uid:R438                   count:     32772    3.14%
year:                2014 uid:hanchao                count:     32115    3.08%
year:                2013 uid:Chen Jia               count:     31055    2.98%
year:                2014 uid:Chen Jia               count:     27169    2.61%
year:                2011 uid:R438                   count:     21211    2.03%
year:                2013 uid:m17design              count:     20403    1.96%
year:                2016 uid:hanchao               

In [9]:
# 各种设施数量top10
for item in db.openstreetmap.aggregate([
    {"$match": {"amenity":{"$exists": 1}}},
    {"$group": {"_id":"$amenity", "count": {"$sum": 1}}},
    {"$sort": {"count": -1}},
    {"$limit": 10}
]):
    print("{:30} {}".format(item["_id"], item["count"]))

restaurant                     1507
parking                        865
school                         549
bank                           476
toilets                        439
fast_food                      351
cafe                           308
fuel                           305
hospital                       197
bar                            168


In [10]:
# 建筑宗教分布
print("{:30} {}".format("所属宗教","建筑数量"))
for i, item in enumerate(db.openstreetmap.aggregate([
    {"$match":{"amenity":"place_of_worship", "name":{"$exists": 1}, "religion": {"$exists": 1}}},
    {"$group": {"_id": "$religion", "count": {"$sum": 1}}},
    {"$sort": {"count": -1}}
])):
    print("{:30} {}".format(item["_id"], item["count"]))

所属宗教                           建筑数量
buddhist                       43
christian                      12
muslim                         6
taoist                         6
confucian                      1
hindu                          1


In [11]:
# 有注明烹饪风格的餐馆总数
no_cuisine_cnt = db.openstreetmap.find({"amenity":"restaurant", "name": {"$exists": 1}, "cuisine":{"$exists": 0}}).count()
has_cuisine_cnt = db.openstreetmap.find({"amenity":"restaurant", "name": {"$exists": 1}, "cuisine":{"$exists": 1}}).count()
no_cuisine_cnt, has_cuisine_cnt

(826, 317)

In [12]:
# 各种设施数量
for item in db.openstreetmap.aggregate([
    {"$match": {"amenity":"restaurant", "name": {"$exists": 1}, "cuisine":{"$exists": 1}}},
    {"$unwind": "$cuisine"},
    {"$group": {"_id":"$cuisine", "count": {"$sum": 1}}},
    {"$sort": {"count": -1}},
    {"$limit": 10}
]):
    print("{:50} {}".format(item["_id"], item["count"]))

chinese                                            162
italian                                            17
japanese                                           17
international                                      12
pizza                                              12
american                                           9
asian                                              9
regional                                           7
german                                             6
korean                                             5


In [13]:
# 创建时间分布
for obj in db.openstreetmap.aggregate([
    {
        "$group": {
            "_id": {"year": {"$year": "$created.timestamp"}}, 
             "count": {"$sum":1}
        }
    },
    {"$sort": {"_id.year": 1}}
]):
    print(obj["_id"]["year"], obj["count"])

2007 81
2008 7856
2009 24758
2010 37414
2011 38223
2012 168198
2013 142529
2014 101093
2015 138463
2016 165670
2017 218190


In [14]:
db.openstreetmap.find({"address.street": {"$exists": 1}})

<pymongo.cursor.Cursor at 0x1073ebcc0>

In [15]:
# 街道top10
for item in db.openstreetmap.aggregate([
    {"$match": {"address.street":{"$exists": 1}}},
    {"$group": {"_id":"$address.street", "count": {"$sum": 1}}},
    {"$sort": {"count": -1}},
    {"$limit": 100}
]):
    print("{:30} {}".format(item["_id"], item["count"]))

成府路                            21
太平路                            12
光华路                            12
广渠门内大街                         12
闵航路                            11
中关村东路                          11
知春路                            11
蒲方路                            10
左安门内大街                         10
建国路                            10
新中街                            9
龙域中路 1号院                       9
北四环西路                          9
中滩村大街 8号院                      8
蒲芳路                            7
苏州街                            7
学院路                            7
广顺南大街                          7
鼓楼东大街                          6
牛街                             6
将台路                            6
北苑路                            6
工人体育场北路                        6
新街口外大街                         6
展览馆路                           6
西直门外大街                         5
石景山路                           5
北三环西路                          5
手帕口南街                          5
漷马路                            5
