In [None]:
from das.distributed_atom_space import DistributedAtomSpace, QueryOutputFormat
from das.pattern_matcher.pattern_matcher import PatternMatchingAnswer, OrderedAssignment, UnorderedAssignment, CompositeAssignment, Node, Link, Variable, Not, And, Or
from das.database.db_interface import WILDCARD
import warnings
import time
warnings.filterwarnings('ignore')
das = DistributedAtomSpace()
db = das.db
das.count_atoms()

In [None]:
class WallClock:
    
    def __init__(self):
        self.start_time = None
        self.wall_time = None
        self.epochs = 0
    
    def start(self):
        self.start_time = time.perf_counter()
    
    def stop(self):
        self.wall_time = time.perf_counter() - self.start_time
    
    def epoch(self, n=1):
        self.epochs += n
        
    def print(self, text="query"):
        if self.wall_time >= 1:
            total_time = f"{self.wall_time:.3f} seconds"
        else:
            total_time = f"{(self.wall_time * 1000):.0f} milliseconds"
        if self.epochs == 0:
            time_per_epoch = ""
        else:
            time_per_epoch = f"{((self.wall_time * 1000) / self.epochs):.3f} milliseconds per {text}"
        print(f"{total_time} ({time_per_epoch})")
            
        
def print_ordered_assignment(assignment):
    if assignment is not None:
        for key, value in assignment.mapping.items():
            print(f"{key}: {db.get_node_name(value)}")

def print_unordered_assignment(assignment):
    if assignment is not None:
        symbols = []
        for key in assignment.symbols:
            for i in range(assignment.symbols[key]):
                symbols.append(key)
        values = []
        for key in assignment.values:
            for i in range(assignment.values[key]):
                values.append(key)
        mapping_keys = []
        mapping_values = []
        for symbol, value in zip(symbols, values):
            mapping_keys.append(symbol)
            mapping_values.append(db.get_node_name(value))
        print(f"{mapping_keys} = {mapping_values}")

def print_elapsed_time(start):
    end = time.perf_counter()
    wall_time = end - start
    if wall_time >= 1:
        print(f"{wall_time:.3f} seconds")
    else:
        print(f"{(wall_time * 1000):.0f} milliseconds")
        
def query(query_obj, log = False, detailed_log = False):
    assert log or (not detailed_log)
    query_answer = PatternMatchingAnswer()
    start = time.perf_counter()
    matched = query_obj.matched(db, query_answer)
    if log:
        print_elapsed_time(start)
        print(matched)
        if matched:
            print(f"{len(query_answer.assignments)} answers")
            if detailed_log:
        #         print(query_answer.assignments)
                for assignment in query_answer.assignments:
                    if type(assignment) is OrderedAssignment:
                        print_ordered_assignment(assignment)
                    elif type(assignment) is UnorderedAssignment:
                        print_unordered_assignment(assignment)
                    elif type(assignment) is CompositeAssignment:
                        print_ordered_assignment(assignment.ordered_mapping)
                        for unordered_assignment in assignment.unordered_mappings:
                            print_unordered_assignment(unordered_assignment)
                    print("")
    return query_answer.assignments

def get_mappings(q, variable):
    """
    Executes passed query and return the values assigned to the passed variable by searching for the respective node name
    """
    assignments = query(q)
    return [das.get_node_name(assignment.mapping[variable]) for assignment in assignments]
    

def get_gene_node_handle(name):
    """
    Get the handle of the corresponding Gene node given a gene name.
    """
    verbatim_node = das.get_node("Verbatim", name)
    schema_node = das.get_node("Schema", "Schema:gene_name")
    v1 = Variable("v1")
    links = das.get_links("Execution", None, [schema_node, WILDCARD, verbatim_node])
    link = das.get_atom(links[0], output_format=QueryOutputFormat.ATOM_INFO)
    return link["targets"][1]

def build_gene_node(name):
    """
    Build a Node obejct to be used to compose queries. 
    This object is not exactly a DAS node (a apologize for re-using the name)
    """
    gene_node_handle = get_gene_node_handle(name)
    gene_node = das.get_atom(gene_node_handle, output_format=QueryOutputFormat.ATOM_INFO)
    return Node("gene", gene_node["name"])

def get_gene_fb_id(name):
    """
    Get the FB id of a given gene by its name
    """
    n = build_gene_node(name)
    v = Variable("v1")
    s = Node("Schema", "Schema:gene_uniquename")
    q = Link("Execution", ordered=True, targets=[s, n, v])
    assignment = query(q)
    assert len(assignment) == 1
    id_handle = assignment.pop().mapping['v1'] # handle of "Verbatim" node
    return db.get_node_name(id_handle)

In [None]:
# Get uniquename of all genes in the knowledge base
v1 = Variable("v1")
v2 = Variable("v2")
s = Node("Schema", "Schema:gene_uniquename")
q1 = Link("Execution", ordered=True, targets=[s, v1, v2])
assignments = query(q1, True)

In [None]:
# Print the mapping uniquename -> FB id for all genes from the above query
clock = WallClock()
clock.start()
for assignment in assignments:
    if clock.epochs > 100:
        break
    pkey_handle = assignment.mapping["v1"] # handle of a "gene" node
    unique_name_handle = assignment.mapping["v2"] # handle of a "Verbatim" node
    pkey = db.get_node_name(pkey_handle) # sequential integer used as PK in the DB table
    unique_name = db.get_node_name(unique_name_handle) # FB id of the gene
    v1 = Variable("v1")
    s = Node("Schema", "Schema:gene_name")
    q = Link("Execution", ordered=True, targets=[s, Node("gene", pkey), v1])
    assignment2 = query(q)
    assert len(assignment2) == 1 # There's only one link between the gene and its name
    name_handle = assignment2.pop().mapping['v1'] # handle of "Verbatim" node
    name = db.get_node_name(name_handle) # gene's name
    print(f"{unique_name} -> {name}")
    clock.epoch()
clock.stop()
clock.print()
    

In [None]:
# get FB id and sequence_loc of gene "mud"
fb_id = get_gene_fb_id("mud")
print(fb_id)
n1 = Node("Verbatim", fb_id)
v1 = Variable("v1")
s = Node("Schema", "Schema:gene_map_table_recombination_loc")
q1 = Link("Execution", ordered=True, targets=[s, n1, v1])
print(get_mappings(q1, "v1")[0])

In [None]:
# Search for all genes with the same recombination_loc of gene "mud"
fb_id = get_gene_fb_id("mud")
n1 = Node("Verbatim", fb_id)
v1 = Variable("v1") # recombination_loc
v2 = Variable("v2") # target
v3 = Variable("v3")
s1 = Node("Schema", "Schema:gene_map_table_recombination_loc")
s2 = Node("Schema", "Schema:gene_uniquename")
q1 = And([
    Link("Execution", ordered=True, targets=[s1, n1, v1]),
    Link("Execution", ordered=True, targets=[s1, v2, v1]),
    Link("Execution", ordered=True, targets=[s2, v3, v2]),
])
answer = get_mappings(q1, "v2")
print(f"{len(answer)}: {sorted(answer)}")

In [None]:
# Search for all genes with the same cytogenetic_loc of gene "mud"
fb_id = get_gene_fb_id("mud")
n1 = Node("Verbatim", fb_id)
v1 = Variable("v1") # cytogenetic_loc
v2 = Variable("v2") # target
v3 = Variable("v3")
s1 = Node("Schema", "Schema:gene_map_table_cytogenetic_loc")
s2 = Node("Schema", "Schema:gene_uniquename")
q1 = And([
    Link("Execution", ordered=True, targets=[s1, n1, v1]),
    Link("Execution", ordered=True, targets=[s1, v2, v1]),
    Link("Execution", ordered=True, targets=[s2, v3, v2]),
])
answer = get_mappings(q1, "v2")
print(f"{len(answer)}: {sorted(answer)}")

In [None]:
# Search for all genes with the same recombination_loc but different cytogenetic_loc of gene "mud"
fb_id = get_gene_fb_id("mud")
n1 = Node("Verbatim", fb_id)
v1 = Variable("v1") # recombination_loc
v2 = Variable("v2") # target
v3 = Variable("v3") # cytogenetic_loc
v4 = Variable("v4")
s1 = Node("Schema", "Schema:gene_map_table_recombination_loc")
s2 = Node("Schema", "Schema:gene_map_table_cytogenetic_loc")
s3 = Node("Schema", "Schema:gene_uniquename")
q1 = And([
    Link("Execution", ordered=True, targets=[s1, n1, v1]),
    Link("Execution", ordered=True, targets=[s1, v2, v1]),
    Link("Execution", ordered=True, targets=[s2, n1, v3]),
    Not(Link("Execution", ordered=True, targets=[s2, v2, v3])),
    Link("Execution", ordered=True, targets=[s3, v4, v2]),
])
answer = get_mappings(q1, "v2")
print(f"{len(answer)}: {sorted(answer)}")

In [None]:
# Search for the FB id of all genes whose name matches a given regexp
for handle in db.get_matched_node_name("Verbatim", "^mus\d\d\d$"):
    name = das.get_node_name(handle)
    fbid = get_gene_fb_id(name)
    print(f"{name} -> {fbid}")
    

In [None]:
# Search for the FB id of all genes whose DO_term disease_model_annotations matches the one of a given gene
fb_id = get_gene_fb_id("mei-9")
n1 = Node("Verbatim", fb_id)
s1 = Node("Schema", "Schema:disease_model_annotations_DO_term")
s2 = Node("Schema", "Schema:gene_uniquename")
v1 = Variable("v1") # DO_term
v2 = Variable("v2") # target

diseases = get_mappings(Link("Execution", ordered=True, targets=[s1, n1, v1]), "v1")
disease_nodes = [Node("Verbatim", d) for d in diseases]
links = [Link("Execution", ordered=True, targets=[s1, v1, dn]) for dn in disease_nodes]

query_answer = get_mappings(Or(links), "v1")

all_fbids = get_mappings(Link("Execution", ordered=True, targets=[s2, v1, v2]), "v2")
final_answer = set([fb for fb in query_answer if fb in all_fbids])

In [None]:
final_answer