# Производительность графовой модели на примере работы с ArangoDB

<p> https://en.wikipedia.org/wiki/ArangoDB
<p> http://python-driver-for-arangodb.readthedocs.io/en/4.1.0

<img src="img/arango_ex.png" width="50%" align="right"/>
<img src="img/arango_model.png" width="50%" align="left"/>

In [1]:
! arangod --version | head -n1

3.3.8


In [2]:
import sys
import time

from arango import ArangoClient

sys.path.append('../config/')
from config import config

In [3]:
params = config(section="arangodb")
client = ArangoClient()
db = client.db(*tuple(params.values()))
graph = db.graph('crimestatsocial')
region = graph.vertex_collection('region')
group = graph.vertex_collection('group')
css = graph.vertex_collection('css')
css_region = graph.edge_collection('css_region')
css_group = graph.edge_collection('css_group')

In [4]:
commands = (
    """
    FOR v IN 1..1 INBOUND 'group/14006' GRAPH 'crimestatsocial'
        RETURN v""",
    """
    FOR v IN 1..1 INBOUND 'group/14006' GRAPH 'crimestatsocial'
        COLLECT WITH COUNT INTO length
        RETURN length""",
        """
    FOR doc IN group
        FOR v IN 1..1 INBOUND doc._id GRAPH 'crimestatsocial'
            FILTER v.year == 2016
            COLLECT group_id = doc._key WITH COUNT INTO length
    RETURN {group_id, length}""",
    """
    FOR doc IN group
        FOR v IN 1..1 INBOUND doc._id GRAPH 'crimestatsocial'
            COLLECT year = v.year WITH COUNT INTO length
    RETURN {year, length}""",
)

## 1. Поиск входных узлов в узел типа group с id=14006

In [5]:
tr = graph.traverse(group.get('14006')["_id"], 'inbound')
print(tr.keys())
print('\n{}\n'.format(tr.get('vertices')[:3]))
print('{}\n'.format(len(tr.get('vertices')) - 1))
print(tr.get('paths')[:2])

dict_keys(['vertices', 'paths'])

[{'_key': '14006', '_id': 'group/14006', '_rev': '_W0zk8ny--_', 'name': 'по социальному составу'}, {'_key': '10831', '_id': 'css/10831', '_rev': '_W0zpRci--_', 'year': 2010, 'category': 'наемных работников', 'gender': 'Мужчины', 'value': 3670}, {'_key': '13299', '_id': 'css/13299', '_rev': '_W0zs0kq--_', 'year': 2013, 'category': 'работников сельского хозяйства', 'gender': 'Мужчины', 'value': 81}]

11736

[{'edges': [], 'vertices': [{'_key': '14006', '_id': 'group/14006', '_rev': '_W0zk8ny--_', 'name': 'по социальному составу'}]}, {'edges': [{'_key': '10831-14006', '_id': 'css_group/10831-14006', '_from': 'css/10831', '_to': 'group/14006', '_rev': '_W0zpRgu---'}], 'vertices': [{'_key': '14006', '_id': 'group/14006', '_rev': '_W0zk8ny--_', 'name': 'по социальному составу'}, {'_key': '10831', '_id': 'css/10831', '_rev': '_W0zpRci--_', 'year': 2010, 'category': 'наемных работников', 'gender': 'Мужчины', 'value': 3670}]}]


In [6]:
%timeit graph.traverse(group.get('14006')["_id"], 'inbound')

11.7 s ± 625 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [7]:
cursor = db.aql.execute(commands[0], count=True)
print(cursor.count())
print(list(cursor.batch())[:2])
cursor.close()

11736
[{'_key': '10831', '_id': 'css/10831', '_rev': '_W0zpRci--_', 'year': 2010, 'category': 'наемных работников', 'gender': 'Мужчины', 'value': 3670}, {'_key': '13299', '_id': 'css/13299', '_rev': '_W0zs0kq--_', 'year': 2013, 'category': 'работников сельского хозяйства', 'gender': 'Мужчины', 'value': 81}]


True

In [8]:
%timeit db.aql.execute(commands[0], count=True)

143 ms ± 1.88 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [9]:
db.aql.execute(commands[1]).pop()

11736

In [10]:
%timeit db.aql.execute(commands[1])

99.1 ms ± 927 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## 2. Подсчет исходных кортежей за 2016 год по группам

In [11]:
db.aql.execute(commands[3]).batch()

deque([{'year': 2010, 'length': 3864},
       {'year': 2011, 'length': 3864},
       {'year': 2012, 'length': 3864},
       {'year': 2013, 'length': 3864},
       {'year': 2014, 'length': 3990},
       {'year': 2015, 'length': 3990},
       {'year': 2016, 'length': 3948}])

In [12]:
%timeit db.aql.execute(commands[3]).batch()

285 ms ± 38.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 3. Подсчет исходных кортежей по годам

In [13]:
db.aql.execute(commands[2]).batch()

deque([{'group_id': '14002', 'length': 1128},
       {'group_id': '14005', 'length': 752},
       {'group_id': '14006', 'length': 1692},
       {'group_id': '14007', 'length': 376}])

In [14]:
%timeit db.aql.execute(commands[2])

263 ms ± 21.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 4. Подсчет уникальных групп

In [15]:
group.count()

4

In [16]:
%timeit group.count()

11.8 ms ± 94.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## 5. Создание нового узла

In [17]:
i = int(1e7) - 1
def fun():
    global i
    css.insert({
        '_key': str(i),
        'year': 2018,
        'category': "по возрасту",
        'gender': '14-15',
        'value': 1234567890,
    })
    css_region.insert({
        '_key': '{}-1100'.format(i),
        '_from': 'css/{}'.format(i),
        '_to': 'region/1100'
    })
    css_group.insert({
        '_key': '{}-14002'.format(i),
        '_from': 'css/{}'.format(i),
        '_to': 'group/14002'
    })
    i += 1
    
fun()

In [18]:
%timeit -n 100 -r 3 fun()

71.7 ms ± 3.49 ms per loop (mean ± std. dev. of 3 runs, 100 loops each)


## 6. Изменение свойства узла

In [19]:
css.update({
    '_key': str(int(1e7) - 1),
    'year': 2017,
    'value': 1,
})

{'_id': 'css/9999999',
 '_key': '9999999',
 '_rev': '_W7AWePW--_',
 '_old_rev': '_W7AWJDS--_'}

In [20]:
i = int(1e7)
def fun():
    global i
    css.update({
        '_key': str(i),
        'year': 2017,
        'value': 1,
    })
    i += 1

In [21]:
%timeit -n 100 -r 3 fun()

24 ms ± 821 µs per loop (mean ± std. dev. of 3 runs, 100 loops each)


## 7. Удаление узла

In [22]:
css.delete(str(int(1e7) - 1))

True

In [23]:
i = int(1e7)
def fun():
    global i
    css.delete(str(i))
    i += 1

In [24]:
%timeit -n 100 -r 3 fun()

26.3 ms ± 1.05 ms per loop (mean ± std. dev. of 3 runs, 100 loops each)
