# Syncing data to JanusGraph benchmarks

In [12]:
from thoth.storages.graph import GraphDatabase
from thoth.storages import SolverResultsStore
from thoth.storages import AnalysisResultsStore

from thoth.lab import obtain_location
from thoth.lab import GraphQueryResult as gqr

#graph_db = GraphDatabase.create(obtain_location('paas-thoth-test-core-janusgraph',
#                                                verify=False, 
#                                                only_netloc=True),
#                                port=80)
graph_db = GraphDatabase.create('10.8.253.255', port=8182)

solver_results = SolverResultsStore('thoth-test',
                                    host=obtain_location('thoth-upshift-ceph',
                                                         verify=False))
analysis_results = AnalysisResultsStore('thoth-test', 
                                        host=obtain_location('thoth-upshift-ceph',
                                                             verify=False))

graph_db.connect()
solver_results.connect()
analysis_results.connect()

## Place for experiments:

These are benchmarks when running JanusGraph on a local machine. **Note** the debug mode of JanusGraph was turned **off**.

In [22]:
from thoth.common import init_logging

# Let's keep info about enabling/disabling caches.
init_logging({'thoth.storages.graph.utils': 'INFO'})

2018-10-02 12:08:30,784 [31439] INFO     root: Logging to a Sentry instance is turned off
2018-10-02 12:08:30,785 [31439] INFO     root: Logging to rsyslog endpoint is turned off


## Deleting content of Graph Database

In [16]:
%%time

from thoth.lab import GraphQueryResult as gqr

# Erase content of the graph database:
gqr(graph_db.g.V().drop().next()).result
assert gqr(graph_db.g.V().count().next()).result == 0

CPU times: user 3.83 ms, sys: 1.84 ms, total: 5.67 ms
Wall time: 392 ms


## Syncing image analysis results - with adapter cache

Now we are retrieving all the analyser documents stored on Ceph. It should take about 40 seconds.

In [9]:
%%time
%env THOTH_STORAGES_DISABLE_CACHE=0

all_analysis_results = analysis_results.get_document_listing()
all_analysis_documents = []

for a in all_analysis_results:    
    all_analysis_documents.append({
        "name": f"{a}",
        "document": analysis_results.retrieve_document(a)
    })

print(f"retrieved {len(all_analysis_documents)} Analysis documents from Ceph")

env: THOTH_STORAGES_DISABLE_CACHE=0
retrieved 29 Analysis documents from Ceph
CPU times: user 549 ms, sys: 149 ms, total: 698 ms
Wall time: 30.9 s


In [17]:
%%time

import time

docs_done = 0
for doc in all_analysis_documents:
    start = time.time()
    graph_db.sync_analysis_result(doc['document'])
    end = time.time()
    
    doc['time_elapsed_to_sync'] = end - start
    docs_done = docs_done + 1
    
    print(f"\nDone with {docs_done}/{len(all_analysis_documents)} Analyser documents")
    print(f"Number of vertexes, edges in graph: {gqr(graph_db.g.V().count().next()).result:d}, {gqr(graph_db.g.E().count().next()).result:d}")
    print(f"CPU time elapsed to sync document to graph: {doc['time_elapsed_to_sync']}sec")

#    if docs_done == 3:
#          break


Done with 1/29 Analyser documents
Number of vertexes, edges in graph: 903, 3719
CPU time elapsed to sync document to graph: 1079.0483193397522sec

Done with 2/29 Analyser documents
Number of vertexes, edges in graph: 2501, 12150
CPU time elapsed to sync document to graph: 2348.4578483104706sec


2018-10-02 00:14:58,197 [31439] ERROR    thoth.storages.graph.janusgraph: Failed to sync Python package, error is not fatal: {'digests': {'manifest': '724f958273918715b4d72f5bd643ab00b05dd644'}, 'ecosystem': 'Python', 'path': '/opt/app-root/lib/python2.7/site-packages/pbr/tests/testpackage/setup.py', 'result': {'ext_modules': [], 'pbr': True, 'setup_requires': ['pbr']}}
Traceback (most recent call last):
  File "/home/goern/.local/share/virtualenvs/notebooks-OF6w48C5/lib/python3.6/site-packages/thoth/storages/graph/janusgraph.py", line 582, in sync_analysis_result
    package_name=python_package_info['result']['name'].lower(),
KeyError: 'name'



Done with 3/29 Analyser documents
Number of vertexes, edges in graph: 3184, 19670
CPU time elapsed to sync document to graph: 2190.462992668152sec

Done with 4/29 Analyser documents
Number of vertexes, edges in graph: 3184, 27721
CPU time elapsed to sync document to graph: 2125.468581199646sec


2018-10-02 01:12:03,203 [31439] INFO     thoth.storages.graph.janusgraph: Skipping error entry - {'digests': {'manifest': 'd160dbe73ba777affa7a4e86a37199a91e31427c'}, 'ecosystem': 'Python', 'path': '/usr/lib64/python2.7/site-packages/twisted/internet/iocpreactor/setup.py', 'result': {'error': 'b\'Traceback (most recent call last):\\n  File "<stdin>", line 12, in <module>\\nImportError: No module named Cython.Distutils\\n\''}}



Done with 5/29 Analyser documents
Number of vertexes, edges in graph: 3445, 32043
CPU time elapsed to sync document to graph: 1262.4487073421478sec


2018-10-02 01:39:30,223 [31439] INFO     thoth.storages.graph.janusgraph: Skipping error entry - {'digests': {'manifest': 'd160dbe73ba777affa7a4e86a37199a91e31427c'}, 'ecosystem': 'Python', 'path': '/usr/lib64/python2.7/site-packages/twisted/internet/iocpreactor/setup.py', 'result': {'error': 'b\'Traceback (most recent call last):\\n  File "<stdin>", line 12, in <module>\\nImportError: No module named Cython.Distutils\\n\''}}



Done with 6/29 Analyser documents
Number of vertexes, edges in graph: 3517, 37505
CPU time elapsed to sync document to graph: 1645.9162828922272sec

Done with 7/29 Analyser documents
Number of vertexes, edges in graph: 3833, 41282
CPU time elapsed to sync document to graph: 1087.1923644542694sec


2018-10-02 02:07:23,621 [31439] ERROR    asyncio: Task exception was never retrieved
future: <Task finished coro=<Connection._receive() done, defined at /home/goern/.local/share/virtualenvs/notebooks-OF6w48C5/lib/python3.6/site-packages/aiogremlin/driver/connection.py:159> exception=EofStream()>
Traceback (most recent call last):
  File "/home/goern/.local/share/virtualenvs/notebooks-OF6w48C5/lib/python3.6/site-packages/aiogremlin/driver/connection.py", line 162, in _receive
    await self._protocol.data_received(data, self._result_sets)
  File "/home/goern/.local/share/virtualenvs/notebooks-OF6w48C5/lib/python3.6/site-packages/aiogremlin/driver/protocol.py", line 48, in data_received
    raise data.data
  File "/home/goern/.local/share/virtualenvs/notebooks-OF6w48C5/lib/python3.6/site-packages/aiohttp/client_ws.py", line 183, in receive
    msg = yield from self._reader.read()
  File "/home/goern/.local/share/virtualenvs/notebooks-OF6w48C5/lib/python3.6/site-packages/aiohttp/streams.p


Done with 8/29 Analyser documents
Number of vertexes, edges in graph: 3856, 44851
CPU time elapsed to sync document to graph: 1034.6089341640472sec

Done with 9/29 Analyser documents
Number of vertexes, edges in graph: 3856, 48451
CPU time elapsed to sync document to graph: 1037.8934881687164sec

Done with 10/29 Analyser documents
Number of vertexes, edges in graph: 3908, 52177
CPU time elapsed to sync document to graph: 1115.457729101181sec

Done with 11/29 Analyser documents
Number of vertexes, edges in graph: 3908, 55607
CPU time elapsed to sync document to graph: 1040.4953770637512sec

Done with 12/29 Analyser documents
Number of vertexes, edges in graph: 3980, 62542
CPU time elapsed to sync document to graph: 2037.6315722465515sec


2018-10-02 04:17:01,153 [31439] ERROR    thoth.storages.graph.janusgraph: Failed to sync Python package, error is not fatal: {'digests': {'manifest': '724f958273918715b4d72f5bd643ab00b05dd644'}, 'ecosystem': 'Python', 'path': '/opt/app-root/lib/python2.7/site-packages/pbr/tests/testpackage/setup.py', 'result': {'ext_modules': [], 'pbr': True, 'setup_requires': ['pbr']}}
Traceback (most recent call last):
  File "/home/goern/.local/share/virtualenvs/notebooks-OF6w48C5/lib/python3.6/site-packages/thoth/storages/graph/janusgraph.py", line 582, in sync_analysis_result
    package_name=python_package_info['result']['name'].lower(),
KeyError: 'name'



Done with 13/29 Analyser documents
Number of vertexes, edges in graph: 3980, 69781
CPU time elapsed to sync document to graph: 2122.2617712020874sec


2018-10-02 04:38:44,994 [31439] INFO     thoth.storages.graph.janusgraph: Skipping error entry - {'digests': {'manifest': 'd160dbe73ba777affa7a4e86a37199a91e31427c'}, 'ecosystem': 'Python', 'path': '/usr/lib64/python2.7/site-packages/twisted/internet/iocpreactor/setup.py', 'result': {'error': 'b\'Traceback (most recent call last):\\n  File "<stdin>", line 12, in <module>\\nImportError: No module named Cython.Distutils\\n\''}}



Done with 14/29 Analyser documents
Number of vertexes, edges in graph: 3980, 74002
CPU time elapsed to sync document to graph: 1269.0383327007294sec

Done with 15/29 Analyser documents
Number of vertexes, edges in graph: 3980, 80907
CPU time elapsed to sync document to graph: 2019.1456174850464sec

Done with 16/29 Analyser documents
Number of vertexes, edges in graph: 4002, 87356
CPU time elapsed to sync document to graph: 1888.7892897129059sec

Done with 17/29 Analyser documents
Number of vertexes, edges in graph: 4003, 93793
CPU time elapsed to sync document to graph: 1867.0556786060333sec

Done with 18/29 Analyser documents
Number of vertexes, edges in graph: 4003, 100230
CPU time elapsed to sync document to graph: 1859.4741747379303sec

Done with 19/29 Analyser documents
Number of vertexes, edges in graph: 4003, 106667
CPU time elapsed to sync document to graph: 1835.3830652236938sec

Done with 20/29 Analyser documents
Number of vertexes, edges in graph: 4003, 113572
CPU time elap

2018-10-02 09:24:37,336 [31439] ERROR    thoth.storages.graph.janusgraph: Failed to sync Python package, error is not fatal: {'digests': {'manifest': '724f958273918715b4d72f5bd643ab00b05dd644'}, 'ecosystem': 'Python', 'path': '/opt/app-root/lib/python2.7/site-packages/pbr/tests/testpackage/setup.py', 'result': {'ext_modules': [], 'pbr': True, 'setup_requires': ['pbr']}}
Traceback (most recent call last):
  File "/home/goern/.local/share/virtualenvs/notebooks-OF6w48C5/lib/python3.6/site-packages/thoth/storages/graph/janusgraph.py", line 582, in sync_analysis_result
    package_name=python_package_info['result']['name'].lower(),
KeyError: 'name'



Done with 23/29 Analyser documents
Number of vertexes, edges in graph: 4012, 133694
CPU time elapsed to sync document to graph: 2010.2911307811737sec

Done with 24/29 Analyser documents
Number of vertexes, edges in graph: 4012, 140132
CPU time elapsed to sync document to graph: 1729.1528384685516sec

Done with 25/29 Analyser documents
Number of vertexes, edges in graph: 4012, 148183
CPU time elapsed to sync document to graph: 1925.901031255722sec


2018-10-02 10:44:56,592 [31439] INFO     thoth.storages.graph.janusgraph: Skipping error entry - {'digests': {'manifest': 'd160dbe73ba777affa7a4e86a37199a91e31427c'}, 'ecosystem': 'Python', 'path': '/usr/lib64/python2.7/site-packages/twisted/internet/iocpreactor/setup.py', 'result': {'error': 'b\'Traceback (most recent call last):\\n  File "<stdin>", line 12, in <module>\\nImportError: No module named Cython.Distutils\\n\''}}



Done with 26/29 Analyser documents
Number of vertexes, edges in graph: 4012, 153628
CPU time elapsed to sync document to graph: 1129.053864479065sec

Done with 27/29 Analyser documents
Number of vertexes, edges in graph: 4012, 160066
CPU time elapsed to sync document to graph: 1350.2611811161041sec


2018-10-02 11:27:12,018 [31439] INFO     thoth.storages.graph.janusgraph: Skipping error entry - {'digests': {'manifest': 'd160dbe73ba777affa7a4e86a37199a91e31427c'}, 'ecosystem': 'Python', 'path': '/usr/lib64/python2.7/site-packages/twisted/internet/iocpreactor/setup.py', 'result': {'error': 'b\'Traceback (most recent call last):\\n  File "<stdin>", line 12, in <module>\\nImportError: No module named Cython.Distutils\\n\''}}



Done with 28/29 Analyser documents
Number of vertexes, edges in graph: 4012, 164287
CPU time elapsed to sync document to graph: 1181.451013326645sec

Done with 29/29 Analyser documents
Number of vertexes, edges in graph: 4012, 167717
CPU time elapsed to sync document to graph: 872.3602607250214sec
CPU times: user 12min 15s, sys: 32.6 s, total: 12min 48s
Wall time: 12h 59min 54s


## Syncing solver results - with adapter cache

In [26]:
%%time
%env THOTH_STORAGES_DISABLE_CACHE=0

all_solver_results = solver_results.get_document_listing()
all_solver_documents = []

for a in all_solver_results:    
    all_solver_documents.append({
        "name": f"{a}",
        "document": solver_results.retrieve_document(a)
    })


print(f"retrieved {len(all_solver_documents)} Solver documents from Ceph")

env: THOTH_STORAGES_DISABLE_CACHE=0
retrieved 24 Solver documents from Ceph
CPU times: user 240 ms, sys: 3.06 ms, total: 243 ms
Wall time: 9.1 s


In [27]:
%%time

import time


docs_done = 0
for doc in all_solver_documents:
    start = time.time()
    graph_db.sync_solver_result(doc['document'])
    end = time.time()
    
    doc['time_elapsed_to_sync'] = end - start
    docs_done = docs_done + 1
    
    print(f"\nDone with {doc['name']}, it is {docs_done}/{len(all_solver_documents)} Solver documents")
    print(f"Number of vertexes, edges in graph: {gqr(graph_db.g.V().count().next()).result:d}, {gqr(graph_db.g.E().count().next()).result:d}")
    print(f"CPU time elapsed to sync document to graph: {doc['time_elapsed_to_sync']}sec")

    if docs_done == 2:
          break


Done with solver-f26-5nz7q, it is 1/24 Solver documents
Number of vertexes, edges in graph: 4013, 167717
CPU time elapsed to sync document to graph: 0.18911242485046387sec

Done with solver-f26-675mf, it is 2/24 Solver documents
Number of vertexes, edges in graph: 4013, 167717
CPU time elapsed to sync document to graph: 0.19583511352539062sec
CPU times: user 19.7 ms, sys: 5.72 ms, total: 25.5 ms
Wall time: 4.29 s
