# Initial data ingestion to JanusGraph

In [1]:
from thoth.storages.graph import GraphDatabase
from thoth.storages import SolverResultsStore
from thoth.storages import AnalysisResultsStore

from thoth.lab import obtain_location
from thoth.lab import GraphQueryResult as gqr

graph_db = GraphDatabase()
solver_results = SolverResultsStore()
analysis_results = AnalysisResultsStore()

graph_db.connect()
solver_results.connect()
analysis_results.connect()

## Place for experiments:

In [2]:
gqr(graph_db.g.V().drop().next()).result
assert gqr(graph_db.g.V().count().next()).result == 0

## Syncing image analysis results - with adapter cache

In [3]:
gqr(graph_db.g.V().count().next()).result

0

In [4]:
%%time

for document_name in analysis_results.get_document_listing():
    print("Syncing %s" % document_name)
    analysis_document = analysis_results.retrieve_document(document_name)
    graph_db.sync_analysis_result(analysis_document)

Syncing fridex-thoth-package-extract-centos-6-hh7ws
Syncing fridex-thoth-package-extract-centos-7-glrdg
Syncing fridex-thoth-package-extract-fedora-27-ntwsp
Syncing fridex-thoth-package-extract-fedora-28-cfmtc
CPU times: user 14.2 s, sys: 1 s, total: 15.2 s
Wall time: 1min 42s


Errors reported above are actually fine - the given artifacts do not carry package name informarmation (these are `requirements.txt`). We will filter them out on source code level since we do not want to sync such data anyway.

In [5]:
%%time

for document_name in list(solver_results.get_document_listing()):
    print("Syncing %s" % document_name)
    analysis_document = solver_results.retrieve_document(document_name)
    graph_db.sync_solver_result(analysis_document)

Syncing fridex-thoth-solver-fc26-thoth-solver-fc26-98rx4
Syncing fridex-thoth-solver-fc26-thoth-solver-fc26-tsb4s
Syncing fridex-thoth-solver-fc27-thoth-solver-fc27-w7vzf
CPU times: user 6.26 s, sys: 474 ms, total: 6.73 s
Wall time: 43.3 s


In [6]:
print(f"Number of vertexes present in graph: {gqr(graph_db.g.V().count().next()).result:d}")
print(f"Number of edges present in graph: {gqr(graph_db.g.E().count().next()).result:d}")
print(f"Number of analysis files ingested: {len(list(analysis_results.get_document_listing()))}")
print(f"Number of solver resultes ingested: {len(list(solver_results.get_document_listing()))}")

Number of vertexes present in graph: 2373
Number of edges present in graph: 16913
Number of analysis files ingested: 4
Number of solver resultes ingested: 3


**Note:** JanusGraph was run in verbose mode on localhost with indexes and schema configured.