In [2]:
from das.distributed_atom_space import DistributedAtomSpace, QueryOutputFormat
from das.pattern_matcher.pattern_matcher import PatternMatchingAnswer, OrderedAssignment, UnorderedAssignment, CompositeAssignment, Node, Link, Variable, Not, And, Or
from das.database.db_interface import WILDCARD
import warnings
import time
warnings.filterwarnings('ignore')
das = DistributedAtomSpace()
db = das.db
das.count_atoms()

ModuleNotFoundError: No module named 'das'

In [None]:
class WallClock:
    
    def __init__(self):
        self.start_time = None
        self.wall_time = None
        self.epochs = 0
    
    def start(self):
        self.start_time = time.perf_counter()
    
    def stop(self):
        self.wall_time = time.perf_counter() - self.start_time
    
    def epoch(self, n=1):
        self.epochs += n
        
    def print(self, text="query"):
        if self.wall_time >= 1:
            total_time = f"{self.wall_time:.3f} seconds"
        else:
            total_time = f"{(self.wall_time * 1000):.0f} milliseconds"
        if self.epochs == 0:
            time_per_epoch = ""
        else:
            time_per_epoch = f"{((self.wall_time * 1000) / self.epochs):.3f} milliseconds per {text}"
        print(f"{total_time} ({time_per_epoch})")
            
        
def print_ordered_assignment(assignment):
    if assignment is not None:
        for key, value in assignment.mapping.items():
            print(f"{key}: {db.get_node_name(value)}")

def print_unordered_assignment(assignment):
    if assignment is not None:
        symbols = []
        for key in assignment.symbols:
            for i in range(assignment.symbols[key]):
                symbols.append(key)
        values = []
        for key in assignment.values:
            for i in range(assignment.values[key]):
                values.append(key)
        mapping_keys = []
        mapping_values = []
        for symbol, value in zip(symbols, values):
            mapping_keys.append(symbol)
            mapping_values.append(db.get_node_name(value))
        print(f"{mapping_keys} = {mapping_values}")

def print_elapsed_time(start):
    end = time.perf_counter()
    wall_time = end - start
    if wall_time >= 1:
        print(f"{wall_time:.3f} seconds")
    else:
        print(f"{(wall_time * 1000):.0f} milliseconds")
        
def query(query_obj, log = False, detailed_log = False):
    assert log or (not detailed_log)
    query_answer = PatternMatchingAnswer()
    start = time.perf_counter()
    matched = query_obj.matched(db, query_answer)
    if log:
        print_elapsed_time(start)
        print(matched)
        if matched:
            print(f"{len(query_answer.assignments)} answers")
            if detailed_log:
        #         print(query_answer.assignments)
                for assignment in query_answer.assignments:
                    if type(assignment) is OrderedAssignment:
                        print_ordered_assignment(assignment)
                    elif type(assignment) is UnorderedAssignment:
                        print_unordered_assignment(assignment)
                    elif type(assignment) is CompositeAssignment:
                        print_ordered_assignment(assignment.ordered_mapping)
                        for unordered_assignment in assignment.unordered_mappings:
                            print_unordered_assignment(unordered_assignment)
                    print("")
    return query_answer.assignments

def get_mappings(q, variable):
    """
    Executes passed query and return the values assigned to the passed variable by searching for the respective node name
    """
    assignments = query(q)
    return [das.get_node_name(assignment.mapping[variable]) for assignment in assignments]
    

def get_feature_node_handle(name):
    """
    Get the handle of the corresponding Gene node given a gene name.
    """
    verbatim_node = das.get_node("Verbatim", name)
    schema_node = das.get_node("Schema", "Schema:feature_name")
    v1 = Variable("v1")
    links = das.get_links("Execution", None, [schema_node, WILDCARD, verbatim_node])
    # To be replaced by:
    # assert len(links) > 0
    if len(links) == 0:
        return None
    link = das.get_atom(links[0], output_format=QueryOutputFormat.ATOM_INFO)
    return link["targets"][1]

def build_feature_node(name):
    """
    Build a Node object to be used to compose queries. 
    """
    feature_node_handle = get_feature_node_handle(name)
    if feature_node_handle == None:
        return None
    feature_node = das.get_atom(feature_node_handle, output_format=QueryOutputFormat.ATOM_INFO)
    return Node("feature", feature_node["name"])

def get_feature_fb_id(name):
    """
    Get the FB id of a given feature by its name
    """
    n = build_feature_node(name)
    if n == None:
        return None
    v = Variable("v1")
    s = Node("Schema", "Schema:feature_uniquename")
    # searches for an Execution link to the schema "feature_uniquename", node of type "feature" 
    # and value "name", and any ***atom***??? represented by Variable "v1"
    q = Link("Execution", ordered=True, targets=[s, n, v])
    assignment = query(q)
    assert len(assignment) == 1
    id_handle = assignment.pop().mapping['v1'] # handle of "Verbatim" node
    return db.get_node_name(id_handle)

In [None]:
# Get uniquename of all features in the knowledge base
v1 = Variable("v1")
v2 = Variable("v2")
s = Node("Schema", "Schema:feature_uniquename")
q1 = Link("Execution", ordered=True, targets=[s, v1, v2])
assignments = query(q1, True)

In [None]:
# Print the mapping uniquename -> FB id for all features from the above query
clock = WallClock()
clock.start()
for assignment in assignments:
    if clock.epochs > 100:
        break
    pkey_handle = assignment.mapping["v1"] # handle of a "gene" node
    unique_name_handle = assignment.mapping["v2"] # handle of a "Verbatim" node
    pkey = db.get_node_name(pkey_handle) # sequential integer used as PK in the DB table
    unique_name = db.get_node_name(unique_name_handle) # FB id of the gene
    v1 = Variable("v1")
    s = Node("Schema", "Schema:feature_name")
    q = Link("Execution", ordered=True, targets=[s, Node("feature", pkey), v1])
    assignment2 = query(q)
    assert len(assignment2) == 1 # There's only one link between the gene and its name
    name_handle = assignment2.pop().mapping['v1'] # handle of "Verbatim" node
    name = db.get_node_name(name_handle) # gene's name
    print(f"{unique_name} -> {name}")
    clock.epoch()
clock.stop()
clock.print()  

In [None]:
# get FB id and group name(s) of gene designated by gene_symbol  <==> DOESN'T GET CORRECT RESULTS IN "essential pairing" mode because
# there is no link from FB_group_name to Group_member_FB_gene_id
def get_gene_group_names(gene_symbol):
    fb_id = get_feature_fb_id(gene_symbol)    
    n1 = Node("Verbatim", fb_id)
    v1 = Variable("v1")
    # groups
    s = Node("Schema", "Schema:gene_group_data_FB_group_name")
    q1 = Link("Execution", ordered=True, targets=[s, n1, v1])
    return fb_id, get_mappings(q1, "v1")
    
print(f"Mef2: {get_gene_group_names('Mef2')}")
print(f"Clk: {get_gene_group_names('Clk')}")
print(f"Myc: {get_gene_group_names('Myc')}")
print(f"Abd-B: {get_gene_group_names('Abd-B')}")

In [None]:
"""
This is an expanded version of the above tailored to performing queries using only the  "essential pairing" of columns.

In fact, the three functions use the same code structure. So, the three could be merged into only one adding a "schema parameter"
(and changing identifiers, of course
"""
# get FB id and group name(s) of gene designated by gene_symbol
def get_gene_group_ids(gene_symbol):
    fb_id = get_feature_fb_id(gene_symbol)    
    n1 = Node("Verbatim", fb_id)
    v1 = Variable("v1")
    # groups
    s = Node("Schema", "Schema:gene_group_data_FB_group_id")
    
    q1 = Link("Execution", ordered=True, targets=[s, n1, v1])
    return fb_id, get_mappings(q1, "v1")


def get_gene_group_symbols(gene_symbol):
    gene_fb_id, gene_groups_ids = get_gene_group_ids(gene_symbol)

    gg_symbols = []
    for gg_id in gene_groups_ids:
        n1 = Node("Verbatim", gg_id)
        v1 = Variable("v1")
        # groups
        s = Node("Schema", "Schema:gene_group_data_FB_group_symbol")
        
        q1 = Link("Execution", ordered=True, targets=[s, n1, v1])
        for symb in get_mappings(q1, "v1"):
            gg_symbols.append(symb)
    return gene_fb_id, gg_symbols


# get FB id and group name(s) of gene designated by gene_symbol
def get_gene_group_names(gene_symbol):
    gene_fb_id, gene_groups_ids = get_gene_group_ids(gene_symbol)
    
    gg_names = []
    for gg_id in gene_groups_ids:
        n1 = Node("Verbatim", gg_id)
        v1 = Variable("v1")
        # groups
        s = Node("Schema", "Schema:gene_group_data_FB_group_name")
        
        q1 = Link("Execution", ordered=True, targets=[s, n1, v1])
        for gg_name in get_mappings(q1, "v1"):
            gg_names.append(gg_name)
    return gene_fb_id, gg_names


symbols_list = ["Mef2", "Clk", "Myc", "Abd-B"]
for gene_symbol in symbols_list:
    print(f"{gene_symbol}: {get_gene_group_ids(gene_symbol)}")
    print(f"{gene_symbol}: {get_gene_group_symbols(gene_symbol)}")
    print(f"{gene_symbol}: {get_gene_group_names(gene_symbol)}")



In [None]:

# get FB id and group name(s) of gene designated by gene_symbol
def get_gene_group_ids(gene_symbol):
    fb_id = get_feature_fb_id(gene_symbol)    
    n1 = Node("Verbatim", fb_id)
    v1 = Variable("v1")
    # groups
    s = Node("Schema", "Schema:gene_group_data_FB_group_id")
    
    q1 = Link("Execution", ordered=True, targets=[s, n1, v1])
    return fb_id, get_mappings(q1, "v1")


def get_gene_group_entity(gene_symbol, schema):
    gene_fb_id, gene_groups_ids = get_gene_group_ids(gene_symbol)

    gg_symbols = []
    for gg_id in gene_groups_ids:
        n1 = Node("Verbatim", gg_id)
        v1 = Variable("v1")
        # groups
        s = Node("Schema", f"Schema:{schema}")
        
        q1 = Link("Execution", ordered=True, targets=[s, n1, v1])
        for symb in get_mappings(q1, "v1"):
            gg_symbols.append(symb)
    return gene_fb_id, gg_symbols
    
symbols_list = ["Mef2", "Clk", "Myc", "Abd-B"]
schema_list = ["gene_group_data_FB_group_id", "gene_group_data_FB_group_symbol", "gene_group_data_FB_group_name"]
for gene_symbol in symbols_list:
    for schema in schema_list:
        print(f"{gene_symbol}: {get_gene_group_entity(gene_symbol, schema)}")

In [None]:
# get FB id and group name(s) of gene designated by gene_symbol  <==> DOESN'T GET CORRECT RESULTS IN "essential pairing" mode because
# there is no link from FB_group_name to Group_member_FB_gene_id
def get_gene_group_names(gene_symbol):
    fb_id = get_feature_fb_id(gene_symbol)    
    n1 = Node("Verbatim", fb_id)
    v1 = Variable("v1")
    # groups
    s = Node("Schema", "Schema:gene_group_data_FB_group_name")
    q1 = Link("Execution", ordered=True, targets=[s, n1, v1])
    return fb_id, get_mappings(q1, "v1")
    
print(f"Mef2: {get_gene_group_names('Mef2')}")
print(f"Clk: {get_gene_group_names('Clk')}")
print(f"Myc: {get_gene_group_names('Myc')}")
print(f"Abd-B: {get_gene_group_names('Abd-B')}")

In [None]:
# Get uniquename of all GROUPS in the knowledge base
v1 = Variable("v1")
v2 = Variable("v2")
s = Node("Schema", "Schema:grp_uniquename")
q1 = Link("Execution", ordered=True, targets=[s, v1, v2])
assignments = query(q1, True)

In [None]:
# Print the mapping uniquename -> FB id for all GROUPS from the above query
clock = WallClock()
clock.start()
for assignment in assignments:
    if clock.epochs > 100:
        break
    pkey_handle = assignment.mapping["v1"] # handle of a "gene" node
    unique_name_handle = assignment.mapping["v2"] # handle of a "Verbatim" node
    pkey = db.get_node_name(pkey_handle) # sequential integer used as PK in the DB table
    unique_name = db.get_node_name(unique_name_handle) # FB id of the gene
    v1 = Variable("v1")
    s = Node("Schema", "Schema:grp_name")
    q = Link("Execution", ordered=True, targets=[s, Node("grp", pkey), v1])
    assignment2 = query(q)
    assert len(assignment2) == 1 # There's only one link between the gene and its name
    name_handle = assignment2.pop().mapping['v1'] # handle of "Verbatim" node
    name = db.get_node_name(name_handle) # gene's name
    print(f"{unique_name} -> {name}")
    clock.epoch()
clock.stop()
clock.print()  