# Syncing data to JanusGraph benchmarks - debug mode

## Used configuration:
* debug mode
* without schema
* without indexes

In [1]:
from thoth.storages.graph import GraphDatabase
from thoth.storages import SolverResultsStore
from thoth.storages import AnalysisResultsStore

from thoth.lab import obtain_location
from thoth.lab import GraphQueryResult as gqr

#graph_db = GraphDatabase.create(obtain_location('thoth-sbu-janusgraph-test1', verify=False, only_netloc=True), port=80)
graph_db = GraphDatabase.create('localhost', port=8182)
solver_results = SolverResultsStore('thoth-sbu', host=obtain_location('thoth-upshift-ceph', verify=False))
analysis_results = AnalysisResultsStore('thoth-sbu', host=obtain_location('thoth-upshift-ceph', verify=False))

graph_db.connect()
solver_results.connect()
analysis_results.connect()

In [2]:
import logging

logging.getLogger('thoth').setLevel(logging.DEBUG)

## Place for experiments:

These are benchmarks when running JanusGraph on a local machine. **Note** the debug mode of JanusGraph was turned **off**.

In [3]:
from thoth.lab import GraphQueryResult as gqr

# Erase content of the graph database:
gqr(graph_db.g.V().drop().next()).result
count = gqr(graph_db.g.V().count().next()).result

count

0

In [4]:
#list(analysis_results.get_document_listing())
analysis_document = analysis_results.retrieve_document('fridex-thoth-package-extract-dash-4f47l')

In [6]:
gqr(graph_db.g.V().count().next()).result

3164

In [4]:
%env THOTH_STORAGES_DISABLE_CACHE=1

import os

os.getenv('THOTH_STORAGES_DISABLE_CACHE')

env: THOTH_STORAGES_DISABLE_CACHE=1


'1'

In [7]:
gqr(graph_db.g.E().count().next()).result

9542

In [23]:
import inspect
from thoth.storages.graph.utils import get_or_create_vertex

lines = inspect.getsourcelines(get_or_create_vertex)
print("".join(lines[0]))

async def get_or_create_vertex(g: AsyncGraphTraversalSource, vertex: VertexBase) -> tuple:
    """Create a vertex if not existed before, if the given vertex already exists, get tis id."""
    if VertexBase.cache:
        try:
            cached_id = VertexBase.cache.get(vertex.to_dict())
            vertex.id = cached_id
            return cached_id, True
        except CacheMiss:
            pass

    query = g.V()
    creation = addV(vertex.__label__)

    for key, value in vertex.to_dict().items():
        if value is not None:
            query = query.has(key, value)
            creation = creation.property(key, value)
        else:
            query = query.hasNot(key)

    result = await query.fold().coalesce(
        unfold().id().as_('id').constant(True).as_('existed').select('id', 'existed'),
        creation.id().as_('id').constant(False).as_('existed').select('id', 'existed')
    ).next()

    if VertexBase.cache:
        VertexBase.cache.put(vertex.to_dict(), result['id'])

In [10]:
import os

bool(int(os.getenv('THOTH_STORAGES_DISABLE_CACHE', '0')))

True

In [21]:
os.getenv('THOTH_STORAGES_DISABLE_CACHE')

'1'

In [11]:
# Erase again so we have clean shield.
gqr(graph_db.g.V().drop().next())
count = gqr(graph_db.g.V().count().next()).result

count

0

In [12]:
solver_document = solver_results.retrieve_document('fridex-thoth-solver-fc27-thoth-solver-fc27-92qtv')

In [13]:
%%time

graph_db.sync_solver_result(solver_document)

CPU times: user 51.1 s, sys: 1.9 s, total: 53 s
Wall time: 32min 17s


In [18]:
gqr(graph_db.g.V().count().next()).result

1253

In [20]:
gqr(graph_db.g.E().count().next()).result

9558