# Производительность документной модели на примере работы с MongoDB

<p> https://en.wikipedia.org/wiki/MongoDB
<p> https://docs.mongodb.com/manual/
<p> http://api.mongodb.com/python/current/index.html

```json
document = {
    "region": {
        "id": <number>,
        "name": <string>,
    },
    "year": <number>,
    "group": {
        "id": <number>,
        "name": <string>,
    },
    "category": <string>,
    "gender": <string>,
    "value": <number>,
}
```

In [1]:
import sys
import time

from pymongo import MongoClient

sys.path.append('../config/')
from config import config

In [2]:
params = config(section='mongodb')
client = MongoClient(params["host"], int(params["port"]))
db = client[params["database"]]
table = db.table

In [3]:
commands = (
    None,
    {'year': 2016},
    [
        {
            '$match': { 'year': 2016 }
        },
        {
            '$group': {
                '_id': '$group.id',
                'count': {'$sum': 1}
            }
        },
        {
            '$sort': {'_id': 1}
        }
    ],
    [
        {
            '$group': {
                '_id': '$year',
                'count': {'$sum': 1}
            }
        },
        {
            '$sort': {'_id': 1}
        }
    ],
    "group",
)

## 1. Собираются все документы

In [4]:
docs = [doc for doc in table.find(commands[0])]
print(len(docs))
print(docs[0])

27384
{'_id': ObjectId('5afc3e58672caf0c3490a6e3'), 'region': {'id': '1100', 'name': 'Российская Федерация'}, 'year': 2016, 'group': {'id': 14002, 'name': 'по возрасту'}, 'category': '14-15', 'gender': 'Мужчины', 'value': 13573}


In [5]:
%timeit table.find(commands[0])

50.7 µs ± 2.15 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


## 2. Собираются документы за 2016 год

In [6]:
docs = [doc for doc in table.find(commands[1])]
print(len(docs))
print(docs[0])

3948
{'_id': ObjectId('5afc3e58672caf0c3490a6e3'), 'region': {'id': '1100', 'name': 'Российская Федерация'}, 'year': 2016, 'group': {'id': 14002, 'name': 'по возрасту'}, 'category': '14-15', 'gender': 'Мужчины', 'value': 13573}


In [7]:
%timeit table.find(commands[1])

51.8 µs ± 2.88 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


## 3. Подсчет документов за 2016 год по группам

In [8]:
[group for group in table.aggregate(commands[2])]

[{'_id': 14002, 'count': 1128},
 {'_id': 14005, 'count': 752},
 {'_id': 14006, 'count': 1692},
 {'_id': 14007, 'count': 376}]

In [9]:
%timeit table.aggregate(commands[2])

72.4 ms ± 3.93 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## 4. Подсчет документов по годам

In [10]:
[i for i in table.aggregate(commands[3])]

[{'_id': 2010, 'count': 3864},
 {'_id': 2011, 'count': 3864},
 {'_id': 2012, 'count': 3864},
 {'_id': 2013, 'count': 3864},
 {'_id': 2014, 'count': 3990},
 {'_id': 2015, 'count': 3990},
 {'_id': 2016, 'count': 3948}]

In [11]:
%timeit table.aggregate(commands[3])

80.9 ms ± 3.22 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## 5. Подсчет уникальных групп

In [12]:
len(table.distinct(commands[4]))

4

In [13]:
%timeit len(table.distinct(commands[4]))

103 ms ± 18.3 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [14]:
client.close()