# Производительность графовой модели на примере работы с ArangoDB

<p> https://en.wikipedia.org/wiki/ArangoDB
<p> http://python-driver-for-arangodb.readthedocs.io/en/4.1.0

<img src="img/arango_ex.png" width="50%" align="right"/>
<img src="img/arango_model.png" width="50%" align="left"/>

In [1]:
import sys
import time

from arango import ArangoClient

sys.path.append('../config/')
from config import config

In [2]:
params = config(section="arangodb")
client = ArangoClient()
db = client.db(*tuple(params.values()))
graph = db.graph('crimestatsocial')
group = graph.vertex_collection('group')

In [3]:
commands = (
    """
    FOR v IN 1..1 INBOUND 'group/14006' GRAPH 'crimestatsocial'
        RETURN v""",
    """
    FOR v IN 1..1 INBOUND 'group/14006' GRAPH 'crimestatsocial'
        COLLECT WITH COUNT INTO length
        RETURN length""",
        """
    FOR doc IN group
        FOR v IN 1..1 INBOUND doc._id GRAPH 'crimestatsocial'
            FILTER v.year == 2016
            COLLECT group_id = doc._key WITH COUNT INTO length
    RETURN {group_id, length}""",
    """
    FOR doc IN group
        FOR v IN 1..1 INBOUND doc._id GRAPH 'crimestatsocial'
            COLLECT year = v.year WITH COUNT INTO length
    RETURN {year, length}""",
)

## 1. Поиск входных узлов в узел типа group с id=14006

In [4]:
tr = graph.traverse(group.get('14006')["_id"], 'inbound')
print(tr.keys())
print('\n{}\n'.format(tr.get('vertices')[:3]))
print('{}\n'.format(len(tr.get('vertices')) - 1))
print(tr.get('paths')[:2])

dict_keys(['vertices', 'paths'])

[{'_key': '14006', '_id': 'group/14006', '_rev': '_W0zk8ny--_', 'name': 'по социальному составу'}, {'_key': '10831', '_id': 'css/10831', '_rev': '_W0zpRci--_', 'year': 2010, 'category': 'наемных работников', 'gender': 'Мужчины', 'value': 3670}, {'_key': '13299', '_id': 'css/13299', '_rev': '_W0zs0kq--_', 'year': 2013, 'category': 'работников сельского хозяйства', 'gender': 'Мужчины', 'value': 81}]

11736

[{'edges': [], 'vertices': [{'_key': '14006', '_id': 'group/14006', '_rev': '_W0zk8ny--_', 'name': 'по социальному составу'}]}, {'edges': [{'_key': '10831-14006', '_id': 'css_group/10831-14006', '_from': 'css/10831', '_to': 'group/14006', '_rev': '_W0zpRgu---'}], 'vertices': [{'_key': '14006', '_id': 'group/14006', '_rev': '_W0zk8ny--_', 'name': 'по социальному составу'}, {'_key': '10831', '_id': 'css/10831', '_rev': '_W0zpRci--_', 'year': 2010, 'category': 'наемных работников', 'gender': 'Мужчины', 'value': 3670}]}]


In [5]:
%timeit graph.traverse(group.get('14006')["_id"], 'inbound')

12.8 s ± 863 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [6]:
cursor = db.aql.execute(commands[0], count=True)
print(cursor.count())
print(list(cursor.batch())[:2])
cursor.close()

11736
[{'_key': '10831', '_id': 'css/10831', '_rev': '_W0zpRci--_', 'year': 2010, 'category': 'наемных работников', 'gender': 'Мужчины', 'value': 3670}, {'_key': '13299', '_id': 'css/13299', '_rev': '_W0zs0kq--_', 'year': 2013, 'category': 'работников сельского хозяйства', 'gender': 'Мужчины', 'value': 81}]


True

In [7]:
%timeit db.aql.execute(commands[0], count=True)

139 ms ± 1.62 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [8]:
db.aql.execute(commands[1]).pop()

11736

In [9]:
%timeit db.aql.execute(commands[1])

94.6 ms ± 1.09 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## 2. Подсчет исходных кортежей за 2016 год по группам

In [10]:
db.aql.execute(commands[3]).batch()

deque([{'year': 2010, 'length': 3864},
       {'year': 2011, 'length': 3864},
       {'year': 2012, 'length': 3864},
       {'year': 2013, 'length': 3864},
       {'year': 2014, 'length': 3990},
       {'year': 2015, 'length': 3990},
       {'year': 2016, 'length': 3948}])

In [11]:
%timeit db.aql.execute(commands[3]).batch()

227 ms ± 5.26 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 3. Подсчет исходных кортежей по годам

In [12]:
db.aql.execute(commands[2]).batch()

deque([{'group_id': '14002', 'length': 1128},
       {'group_id': '14005', 'length': 752},
       {'group_id': '14006', 'length': 1692},
       {'group_id': '14007', 'length': 376}])

In [13]:
%timeit db.aql.execute(commands[2])

237 ms ± 7.99 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 4. Подсчет уникальных групп

In [14]:
group.count()

4

In [15]:
%timeit group.count()

12.1 ms ± 856 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
