# Materialize the probabilistic model of LUBM1 and query it

In [13]:
import sys
sys.path.insert(0, 'glog-python/build/')
import tgs
import json
from IPython.display import JSON

In [14]:
loglevel = 2 # INFO level
main_dir = 'TGs/lubm/lubm_1/'

In [15]:
import os
os.chdir(main_dir)

Load the EDB layer

In [16]:
edb_file = 'edb.conf'
layer = tgs.EDBLayer(edb_file)

Load the program

In [17]:
program = tgs.Program(layer)
rules = 'TGs/lubm/rules/LUBM_L_csv.dlog'
program.load_from_file(rules)
print("Number of rules", program.get_n_rules())

Number of rules 127


In [19]:
chaseProcedure = "probtgchase" # If you want to use the standard chase, use "tgchase"
typeProv = "FULLPROV"

reasoner = tgs.Reasoner(chaseProcedure, layer, program, typeProv=typeProv, edbCheck=False, queryCont=False)
statistics = reasoner.create_model(0)
print(statistics)

{"max_mem_mb" : "10623.367188","n_derivations" : "7957292","n_edges" : "870","n_nodes" : "411","n_triggers" : "16942486","runtime_ms" : "7462.180587","steps" : "10"}


We can retrieve the trigger graph (TG) that we just computed as follows

In [20]:
tg = reasoner.get_TG()

Now, we add a rule that specifies the query that we want to execute. This command returns the ID of the rule that we just added

In [21]:
queries = [
"Q1(X) :- GraduateStudent(X), takesCourse(X,C), mgc_q01_fb(C)", 
"Q2(X,Y,Z) :- Department(Z), subOrganizationOf(Z,Y), University(Y), undergraduateDegreeFrom(X,Y), GraduateStudent(X), memberOf(X,Z)",  
"Q3(X) :- Publication(X), publicationAuthor(X,C), mgc_q03_fb(C)", 
"Q4(X,Y1,Y2,Y3) :- worksFor(X,C), mgc_q04_ffffb(C), Professor(X), name(X,Y1), emailAddress(X,Y2), telephone(X,Y3)", 
"Q5(X) :- memberOf(X,C), mgc_q05_fb(C), Person(X)", 
"Q6(X) :-  Student(X)", 
"Q7(X,Y) :- teacherOf(C,Y), mgc_q07_ffb(C), Course(Y), takesCourse(X,Y), Student(X)", 
"Q8(X,Y,Z) :- subOrganizationOf(Y,C), mgc_q08_fffb(C), Department(Y), memberOf(X,Y), Student(X), emailAddress(X,Z)", 
"Q9(X,Y,Z) :- Course(Z), takesCourse(X,Z), teacherOf(Y,Z), advisor(X,Y), Student(X), Faculty(Y)", 
"Q10(X) :- takesCourse(X,C), mgc_q10_fb(C), Student(X)", 
"Q11(X) :- subOrganizationOf(X,C), mgc_q11_fb(C), ResearchGroup(X)", 
"Q12(X,Y) :- subOrganizationOf(Y,C), mgc_q12_ffb(C), Department(Y), worksFor(X,Y), Chair(X)", 
"Q13(X) :-hasAlumnus(C,X), mgc_q13_fb(C), Person(X)", 
"Q14(X) :- UndergraduateStudent(X)"
]  

In [22]:
newRuleId = program.add_rule(queries[0])

In [23]:
ts, stats_query = reasoner.execute_rule(newRuleId)

In [24]:
n_answers = ts.get_n_facts()
print("The rule that we executed returnd {:n} answers".format(n_answers))

The rule that we executed returnd 4 answers


Additional statistics collected during the execution of the rule:

In [25]:
print(stats_query)

{'n_answers': 4, 'runtime_ms': 4.591233}


In order to query the TG or the answers of a query, we create a special object. We can also use the same object to compute the derivation trees of each fact

In [26]:
querier = tgs.Querier(tg)

Print the answers of the new rule that we just executed

In [27]:
for i in range(n_answers):
    sFact = querier.get_fact_in_TupleSet(ts,i)
    print("Answers {:n}: {})".format(i, str(sFact)))

Answers 0: ['Department0-University0-GraduateStudent44'])
Answers 1: ['Department0-University0-GraduateStudent101'])
Answers 2: ['Department0-University0-GraduateStudent124'])
Answers 3: ['Department0-University0-GraduateStudent142'])


We can check the derivation tree of a certain answer

In [28]:
answerNumber = 3
dertree = querier.get_derivation_tree_in_TupleSet(ts, answerNumber)
dertree = json.loads(dertree)
display(JSON(dertree, expanded=True))

<IPython.core.display.JSON object>

We can also retrieve all the nodes with a certain predicate, etc.

In [30]:
nodes = querier.get_node_details_predicate('GraduateStudent')
nodes = json.loads(nodes)
n_answers = 0
for n in nodes:
    n_answers += int(n['n_facts'])
print("The answers are", n_answers, " found in", len(nodes), "nodes.")

The answers are 1874  found in 1 nodes.
