In [2]:
from das.distributed_atom_space import DistributedAtomSpace, QueryOutputFormat
from das.pattern_matcher.pattern_matcher import PatternMatchingAnswer, OrderedAssignment, UnorderedAssignment, CompositeAssignment, Node, Link, Variable, Not, And, Or
from das.database.db_interface import WILDCARD
import warnings
import time
warnings.filterwarnings('ignore')
das = DistributedAtomSpace()
db = das.db
das.count_atoms()

Log initialized. Log file: /tmp/das.log


(2584425, 26915329)

In [3]:
class WallClock:
    
    def __init__(self):
        self.start_time = None
        self.wall_time = None
        self.epochs = 0
    
    def start(self):
        self.start_time = time.perf_counter()
    
    def stop(self):
        self.wall_time = time.perf_counter() - self.start_time
    
    def epoch(self, n=1):
        self.epochs += n
        
    def print(self, text="query"):
        if self.wall_time >= 1:
            total_time = f"{self.wall_time:.3f} seconds"
        else:
            total_time = f"{(self.wall_time * 1000):.0f} milliseconds"
        if self.epochs == 0:
            time_per_epoch = ""
        else:
            time_per_epoch = f"{((self.wall_time * 1000) / self.epochs):.3f} milliseconds per {text}"
        print(f"{total_time} ({time_per_epoch})")
            
        
def print_ordered_assignment(assignment):
    if assignment is not None:
        for key, value in assignment.mapping.items():
            print(f"{key}: {db.get_node_name(value)}")

def print_unordered_assignment(assignment):
    if assignment is not None:
        symbols = []
        for key in assignment.symbols:
            for i in range(assignment.symbols[key]):
                symbols.append(key)
        values = []
        for key in assignment.values:
            for i in range(assignment.values[key]):
                values.append(key)
        mapping_keys = []
        mapping_values = []
        for symbol, value in zip(symbols, values):
            mapping_keys.append(symbol)
            mapping_values.append(db.get_node_name(value))
        print(f"{mapping_keys} = {mapping_values}")

def print_elapsed_time(start):
    end = time.perf_counter()
    wall_time = end - start
    if wall_time >= 1:
        print(f"{wall_time:.3f} seconds")
    else:
        print(f"{(wall_time * 1000):.0f} milliseconds")
        
def query(query_obj, log = False, detailed_log = False):
    assert log or (not detailed_log)
    query_answer = PatternMatchingAnswer()
    start = time.perf_counter()
    matched = query_obj.matched(db, query_answer)
    if log:
        print_elapsed_time(start)
        print(matched)
        if matched:
            print(f"{len(query_answer.assignments)} answers")
            if detailed_log:
        #         print(query_answer.assignments)
                for assignment in query_answer.assignments:
                    if type(assignment) is OrderedAssignment:
                        print_ordered_assignment(assignment)
                    elif type(assignment) is UnorderedAssignment:
                        print_unordered_assignment(assignment)
                    elif type(assignment) is CompositeAssignment:
                        print_ordered_assignment(assignment.ordered_mapping)
                        for unordered_assignment in assignment.unordered_mappings:
                            print_unordered_assignment(unordered_assignment)
                    print("")
    return query_answer.assignments

def get_mappings(q, variable):
    """
    Executes passed query and return the values assigned to the passed variable by searching for the respective node name
    """
    assignments = query(q)
    return [das.get_node_name(assignment.mapping[variable]) for assignment in assignments]
    

def get_gene_node_handle(name):
    """
    Get the handle of the corresponding Gene node given a gene name.
    """
    verbatim_node = das.get_node("Verbatim", name)
    schema_node = das.get_node("Schema", "Schema:gene_name")
    v1 = Variable("v1")
    links = das.get_links("Execution", None, [schema_node, WILDCARD, verbatim_node])
    link = das.get_atom(links[0], output_format=QueryOutputFormat.ATOM_INFO)
    return link["targets"][1]

def build_gene_node(name):
    """
    Build a Node obejct to be used to compose queries. 
    This object is not exactly a DAS node (a apologize for re-using the name)
    """
    gene_node_handle = get_gene_node_handle(name)
    gene_node = das.get_atom(gene_node_handle, output_format=QueryOutputFormat.ATOM_INFO)
    return Node("gene", gene_node["name"])

def get_gene_fb_id(name):
    """
    Get the FB id of a given gene by its name
    """
    n = build_gene_node(name)
    v = Variable("v1")
    s = Node("Schema", "Schema:gene_uniquename")
    q = Link("Execution", ordered=True, targets=[s, n, v])
    assignment = query(q)
    assert len(assignment) == 1
    id_handle = assignment.pop().mapping['v1'] # handle of "Verbatim" node
    return db.get_node_name(id_handle)

In [28]:

table_strings = [
    "featureprop_value",
    "featureprop_rank",
    "pubprop_value",
    "pubprop_rank",
    "allele_symbol",
    "allele_is_alleleof",
    "allele_is_construct",
    "allele_propagate_transgenic_uses",
    "allele_gene_is_regulatory_region",
    "allele_stocks_count",
    "allele_pub_count",
    "allele_known_lesion",
    "allele_has_image",
    "feature_name",
    "feature_uniquename",
    "feature_residues",
    "feature_seqlen",
    "feature_md5checksum",
    "feature_is_analysis",
    "feature_is_obsolete",
    "featureloc_fmin",
    "featureloc_is_fmin_partial",
    "featureloc_fmax",
    "featureloc_is_fmax_partial",
    "featureloc_strand",
    "featureloc_phase",
    "featureloc_residue_info",
    "featureloc_locgroup",
    "featureloc_rank",
    "gene_name",
    "gene_uniquename",
    "gene_residues",
    "gene_seqlen",
    "gene_md5checksum",
    "gene_is_analysis",
    "gene_is_obsolete",
    "cvterm_definition",
    "cvterm_is_obsolete",
    "cvterm_is_relationshiptype",
    "dbxref_accession",
    "dbxref_version",
    "dbxref_description",
    "dbxref_url",
    "organism_abbreviation",
    "organism_genus",
    "organism_species",
    "organism_common_name",
    "organism_comment",
    "pub_title",
    "pub_volumetitle",
    "pub_volume",
    "pub_series_name",
    "pub_issue",
    "pub_pyear",
    "pub_pages",
    "pub_miniref",
    "pub_is_obsolete",
    "pub_publisher",
    "pub_pubplace",
    "synonym_name",
    "synonym_synonym_sgml"]
'''
table_strings = [
    "featureprop_value",
    "pubprop_value",
    "allele_fbal_id",
    "feature_name",
    "feature_uniquename",
    "featureloc_residue_info",
    "gene_uniquename",
    "cvterm_definition",
    "cvterm_name",
    "dbxref_accession",
    "dbxref_version",
    "dbxref_description",
    "dbxref_url",
    "pub_title",
    "pub_volumetitle",
    "synonym_name"]

table_strings = [
    "organism_abbreviation",
    "organism_genus",
    "organism_species",
    "organism_common_name",
    "organism_comment",
    "feature_uniquename",
    "featureloc_residue_info",
    "gene_uniquename",
    "cvterm_definition",
    "cvterm_name",
    "dbxref_accession",
    "dbxref_version",
    "dbxref_description",
    "dbxref_url",
    "pub_title",
    "pub_volumetitle",
    "pub_uniquename",
    "synonym_name"]
'''

def verificar_prefixo(string):
#    prefixos = ['FBab', 'FBal', 'FBba', 'FBcl', 'Fbgg', 'FBgn', 'FBhh', 'FBim', 'FBig', 'FBlc', 'FBmc',
#               'FBms', 'FBpp', 'FBrf', 'FBsf', 'FBsn', 'FBst', 'FBtc', 'FBte', 'FBti', 'FBto', 'FBtp', 'FBtr']
    prefixos = ['FBab',        'FBba', 'FBcl', 'FBgg',         'FBhh', 'FBim', 'FBig', 'FBlc', 'FBmc',
                'FBms',               'FBsf', 'FBsn', 'FBst', 'FBtc', 'FBte', 'FBti', 'FBto', 'FBtp'        ]
    
        
    for prefixo in prefixos:
        #if string.startswith(prefixo):
        if prefixo in string:
            return string
    
    return 'NONE'


In [30]:
for column in table_strings:
    v1 = Variable("v1")
    v2 = Variable("v2")
    s = Node("Schema", "Schema:" + column)
    #s1 = Node("Schema", "Schema:gene_residues")
    q1 = Link("Execution", ordered=True, targets=[s, v1, v2])#linka schema=s à v1 e v2. v1 = pk e v2 = column
    #q2 = Link("Execution", ordered=True, targets=[s1, v1, v2])
    assignments = query(q1, True)
    #ass2 = query(q2, True)

    cont = 0
    print("\nColumn: " + column)
    for assignment in assignments:
        #pkey_handle = assignment.mapping["v1"]
        #pkey = db.get_node_name(pkey_handle)
        un_handle = assignment.mapping["v2"]
        unique_name = db.get_node_name(un_handle)
        print(str(cont) + ": " + unique_name)
        cont += 1
        if cont > 100:
            break
        #if unique_name == verificar_prefixo(unique_name):
        #    print("v2: " + unique_name)
    print("FINISHED for column: " + column + "\n")
    

1 milliseconds
False

Column: featureprop_value
FINISHED for column: featureprop_value

1 milliseconds
False

Column: featureprop_rank
FINISHED for column: featureprop_rank

1 milliseconds
False

Column: pubprop_value
FINISHED for column: pubprop_value

1 milliseconds
False

Column: pubprop_rank
FINISHED for column: pubprop_rank

1 milliseconds
False

Column: allele_symbol
FINISHED for column: allele_symbol

1 milliseconds
False

Column: allele_is_alleleof
FINISHED for column: allele_is_alleleof

1 milliseconds
False

Column: allele_is_construct
FINISHED for column: allele_is_construct

1 milliseconds
False

Column: allele_propagate_transgenic_uses
FINISHED for column: allele_propagate_transgenic_uses

1 milliseconds
False

Column: allele_gene_is_regulatory_region
FINISHED for column: allele_gene_is_regulatory_region

1 milliseconds
False

Column: allele_stocks_count
FINISHED for column: allele_stocks_count

1 milliseconds
False

Column: allele_pub_count
FINISHED for column: allele_pub

In [8]:
# Print the mapping uniquename -> FB id for all genes from the above query
clock = WallClock()
clock.start()
for assignment in assignments:
    if clock.epochs > 100:
        break
    pkey_handle = assignment.mapping["v1"] # handle of a "gene" node
    unique_name_handle = assignment.mapping["v2"] # handle of a "Verbatim" node
    pkey = db.get_node_name(pkey_handle) # sequential integer used as PK in the DB table
    unique_name = db.get_node_name(unique_name_handle) # FB id of the gene
    v1 = Variable("v1")
    s = Node("Schema", "Schema:gene_name")
    q = Link("Execution", ordered=True, targets=[s, Node("gene", pkey), v1])
    assignment2 = query(q)
    assert len(assignment2) == 1 # There's only one link between the gene and its name
    name_handle = assignment2.pop().mapping['v1'] # handle of "Verbatim" node
    name = db.get_node_name(name_handle) # gene's name
    print(f"{unique_name} -> {name}")
    clock.epoch()
clock.stop()
clock.print()
    

0 milliseconds ()


In [29]:
# Clk, Mef2, Myc, Abd-B  <<-->> The 4 TFs

tfs4 = ["Clk", "Mef2", "Myc", "Abd-B"]
for tf in tfs4:
    # get FB id and sequence_loc of TF
    fb_id = get_gene_fb_id(tf)
    #print(fb_id)
    n1 = Node("Verbatim", fb_id)
    v1 = Variable("v1")
    s = Node("Schema", "Schema:gene_map_table_recombination_loc")
    q1 = Link("Execution", ordered=True, targets=[s, n1, v1])
    print("\n\n" + tf + ": " + fb_id)
    print(get_mappings(q1, "v1")[0])



Clk: FBgn0023076
3-21


Mef2: FBgn0011656
2-61


Myc: FBgn0262656
1-3


Abd-B: FBgn0000015
3-59


In [30]:
# Search for all genes with the same recombination_loc of gene "mud"

for tf in tfs4:
    fb_id = get_gene_fb_id(tf)
    n1 = Node("Verbatim", fb_id)
    v1 = Variable("v1") # recombination_loc
    v2 = Variable("v2") # target
    v3 = Variable("v3")
    s1 = Node("Schema", "Schema:gene_map_table_recombination_loc")
    s2 = Node("Schema", "Schema:gene_uniquename")
    q1 = And([
        Link("Execution", ordered=True, targets=[s1, n1, v1]),
        Link("Execution", ordered=True, targets=[s1, v2, v1]),
        Link("Execution", ordered=True, targets=[s2, v3, v2]),
    ])
    answer = get_mappings(q1, "v2")
    print("\n\n" + tf + ": " + fb_id)
    print(f"{len(answer)}: {sorted(answer)}")



Clk: FBgn0023076
66: ['FBgn0001208', 'FBgn0002422', 'FBgn0003041', 'FBgn0010406', 'FBgn0014011', 'FBgn0015033', 'FBgn0016694', 'FBgn0023076', 'FBgn0024187', 'FBgn0027554', 'FBgn0028582', 'FBgn0035782', 'FBgn0035785', 'FBgn0035786', 'FBgn0035787', 'FBgn0035788', 'FBgn0035789', 'FBgn0035790', 'FBgn0035791', 'FBgn0035792', 'FBgn0035793', 'FBgn0035795', 'FBgn0035797', 'FBgn0035798', 'FBgn0035799', 'FBgn0035800', 'FBgn0035802', 'FBgn0035805', 'FBgn0035806', 'FBgn0035807', 'FBgn0035811', 'FBgn0035812', 'FBgn0035813', 'FBgn0035815', 'FBgn0035816', 'FBgn0035817', 'FBgn0035823', 'FBgn0035824', 'FBgn0035825', 'FBgn0035827', 'FBgn0035829', 'FBgn0035830', 'FBgn0035831', 'FBgn0041156', 'FBgn0052365', 'FBgn0052368', 'FBgn0052369', 'FBgn0052371', 'FBgn0052373', 'FBgn0052374', 'FBgn0052376', 'FBgn0052382', 'FBgn0052383', 'FBgn0053276', 'FBgn0053278', 'FBgn0086074', 'FBgn0259935', 'FBgn0261536', 'FBgn0261537', 'FBgn0261788', 'FBgn0262984', 'FBgn0264472', 'FBgn0265084', 'FBgn0267526', 'FBgn0267556', '

In [31]:
# Search for all genes with the same cytogenetic_loc of gene "mud"

for tf in tfs4:
    fb_id = get_gene_fb_id(tf)
    n1 = Node("Verbatim", fb_id)
    v1 = Variable("v1") # cytogenetic_loc
    v2 = Variable("v2") # target
    v3 = Variable("v3")
    s1 = Node("Schema", "Schema:gene_map_table_cytogenetic_loc")
    s2 = Node("Schema", "Schema:gene_uniquename")
    q1 = And([
        Link("Execution", ordered=True, targets=[s1, n1, v1]),
        Link("Execution", ordered=True, targets=[s1, v2, v1]),
        Link("Execution", ordered=True, targets=[s2, v3, v2]),
    ])
    answer = get_mappings(q1, "v2")
    print("\n\n" + tf + ": " + fb_id)
    print(f"{len(answer)}: {sorted(answer)}")



Clk: FBgn0023076
4: ['FBgn0001208', 'FBgn0023076', 'FBgn0035817', 'FBgn0052371']


Mef2: FBgn0011656
1: ['FBgn0011656']


Myc: FBgn0262656
3: ['FBgn0029657', 'FBgn0262656', 'FBgn0267963']


Abd-B: FBgn0000015
1: ['FBgn0000015']


In [20]:
# Search for all genes with the same recombination_loc but different cytogenetic_loc of gene "mud"
fb_id = get_gene_fb_id("bcd")
n1 = Node("Verbatim", fb_id)
v1 = Variable("v1") # recombination_loc
v2 = Variable("v2") # target
v3 = Variable("v3") # cytogenetic_loc
v4 = Variable("v4")
s1 = Node("Schema", "Schema:gene_map_table_recombination_loc")
s2 = Node("Schema", "Schema:gene_map_table_cytogenetic_loc")
s3 = Node("Schema", "Schema:gene_uniquename")
q1 = And([
    Link("Execution", ordered=True, targets=[s1, n1, v1]),
    Link("Execution", ordered=True, targets=[s1, v2, v1]),
    Link("Execution", ordered=True, targets=[s2, n1, v3]),
    Not(Link("Execution", ordered=True, targets=[s2, v2, v3])),
    Link("Execution", ordered=True, targets=[s3, v4, v2]),
])
answer = get_mappings(q1, "v2")
print(f"{len(answer)}: {sorted(answer)}")

460: ['FBgn0000244', 'FBgn0000412', 'FBgn0000447', 'FBgn0000504', 'FBgn0000552', 'FBgn0000723', 'FBgn0001077', 'FBgn0001112', 'FBgn0001138', 'FBgn0001180', 'FBgn0001255', 'FBgn0002306', 'FBgn0002522', 'FBgn0002542', 'FBgn0002932', 'FBgn0003015', 'FBgn0003129', 'FBgn0003165', 'FBgn0003205', 'FBgn0003884', 'FBgn0003885', 'FBgn0003889', 'FBgn0004172', 'FBgn0004173', 'FBgn0004174', 'FBgn0004175', 'FBgn0004777', 'FBgn0004778', 'FBgn0004779', 'FBgn0004780', 'FBgn0004781', 'FBgn0004782', 'FBgn0004783', 'FBgn0004901', 'FBgn0004908', 'FBgn0004980', 'FBgn0010173', 'FBgn0010222', 'FBgn0010355', 'FBgn0010401', 'FBgn0010433', 'FBgn0010774', 'FBgn0010803', 'FBgn0011020', 'FBgn0011282', 'FBgn0011740', 'FBgn0011959', 'FBgn0011960', 'FBgn0011961', 'FBgn0011962', 'FBgn0011963', 'FBgn0011966', 'FBgn0011998', 'FBgn0011999', 'FBgn0012000', 'FBgn0012001', 'FBgn0012024', 'FBgn0012025', 'FBgn0012026', 'FBgn0012027', 'FBgn0012028', 'FBgn0014018', 'FBgn0014022', 'FBgn0014023', 'FBgn0014380', 'FBgn0014861', 'FBg

In [42]:
# Search for the FB id of all genes whose name matches a given regexp
for handle in db.get_matched_node_name("Verbatim", "^FBlc\d\d\d$"):
    name = das.get_node_name(handle)
    fbid = get_gene_fb_id(name)
    print(f"{name} -> {fbid}")
    

In [32]:
# Search for the FB id of all genes whose DO_term disease_model_annotations matches the one of a given gene
tfs4 = ["Clk", "Mef2", "Myc", "Abd-B"]
for tf in tfs4:
    fb_id = get_gene_fb_id(tf)
    n1 = Node("Verbatim", fb_id)
    s1 = Node("Schema", "Schema:disease_model_annotations_DO_term")
    s2 = Node("Schema", "Schema:gene_uniquename")
    v1 = Variable("v1") # DO_term
    v2 = Variable("v2") # target

    diseases = get_mappings(Link("Execution", ordered=True, targets=[s1, n1, v1]), "v1")
    disease_nodes = [Node("Verbatim", d) for d in diseases]
    links = [Link("Execution", ordered=True, targets=[s1, v1, dn]) for dn in disease_nodes]

    query_answer = get_mappings(Or(links), "v1")

    all_fbids = get_mappings(Link("Execution", ordered=True, targets=[s2, v1, v2]), "v2")
    final_answer = set([fb for fb in query_answer if fb in all_fbids])
    print("\n\n" + tf + ": " + fb_id)
    print(final_answer)



Clk: FBgn0023076
set()


Mef2: FBgn0011656
{'FBgn0010352', 'FBgn0041184', 'FBgn0001308', 'FBgn0014018', 'FBgn0262451', 'FBgn0285942', 'FBgn0000490', 'FBgn0037116', 'FBgn0024961', 'FBgn0011739', 'FBgn0024887', 'FBgn0263601', 'FBgn0041585', 'FBgn0011726', 'FBgn0266186', 'FBgn0000404', 'FBgn0036239', 'FBgn0264610', 'FBgn0040528', 'FBgn0025679', 'FBgn0261854', 'FBgn0086677', 'FBgn0003044', 'FBgn0283468', 'FBgn0053517', 'FBgn0264975', 'FBgn0262029', 'FBgn0038927', 'FBgn0026404', 'FBgn0067413', 'FBgn0041723', 'FBgn0283499', 'FBgn0001078', 'FBgn0019957', 'FBgn0026721', 'FBgn0003067', 'FBgn0010909', 'FBgn0264538', 'FBgn0039636', 'FBgn0033902', 'FBgn0000352', 'FBgn0263144', 'FBgn0024734', 'FBgn0032150', 'FBgn0041100', 'FBgn0263995', 'FBgn0021967', 'FBgn0002781', 'FBgn0011288', 'FBgn0001247', 'FBgn0010397', 'FBgn0283914', 'FBgn0284084', 'FBgn0262872', 'FBgn0019976', 'FBgn0004619', 'FBgn0085428', 'FBgn0031972', 'FBgn0010385', 'FBgn0031728', 'FBgn0017579', 'FBgn0001491', 'FBgn0034240', 'FBgn0262

In [23]:
final_answer

set()

In [None]:

from das.distributed_atom_space import DistributedAtomSpace, QueryOutputFormat
from das.pattern_matcher.pattern_matcher import PatternMatchingAnswer, OrderedAssignment, UnorderedAssignment, CompositeAssignment, Node, Link, Variable, Not, And, Or
from das.database.db_interface import WILDCARD
import warnings
import time
warnings.filterwarnings('ignore')
das = DistributedAtomSpace()
db = das.db
print("Atoms (nodes, links): " + str(das.count_atoms()))
# 2023_02 :-->  ( 2584425,  26915329)
# 2023_04 :-->  (11004962, 176057040)

In [None]:
class WallClock:
    
    def __init__(self):
        self.start_time = None
        self.wall_time = None
        self.epochs = 0
    
    def start(self):
        self.start_time = time.perf_counter()
    
    def stop(self):
        self.wall_time = time.perf_counter() - self.start_time
    
    def epoch(self, n=1):
        self.epochs += n
        
    def print(self, text="query"):
        if self.wall_time >= 1:
            total_time = f"{self.wall_time:.3f} seconds"
        else:
            total_time = f"{(self.wall_time * 1000):.0f} milliseconds"
        if self.epochs == 0:
            time_per_epoch = ""
        else:
            time_per_epoch = f"{((self.wall_time * 1000) / self.epochs):.3f} milliseconds per {text}"
        print(f"{total_time} ({time_per_epoch})")
            
        
def print_ordered_assignment(assignment):
    if assignment is not None:
        for key, value in assignment.mapping.items():
            print(f"{key}: {db.get_node_name(value)}")

def print_unordered_assignment(assignment):
    if assignment is not None:
        symbols = []
        for key in assignment.symbols:
            for i in range(assignment.symbols[key]):
                symbols.append(key)
        values = []
        for key in assignment.values:
            for i in range(assignment.values[key]):
                values.append(key)
        mapping_keys = []
        mapping_values = []
        for symbol, value in zip(symbols, values):
            mapping_keys.append(symbol)
            mapping_values.append(db.get_node_name(value))
        print(f"{mapping_keys} = {mapping_values}")

def print_elapsed_time(start):
    end = time.perf_counter()
    wall_time = end - start
    if wall_time >= 1:
        print(f"{wall_time:.3f} seconds")
    else:
        print(f"{(wall_time * 1000):.0f} milliseconds")
        
def query(query_obj, log = False, detailed_log = False):
    assert log or (not detailed_log)
    query_answer = PatternMatchingAnswer()
    start = time.perf_counter()
    matched = query_obj.matched(db, query_answer)
    if log:
        print_elapsed_time(start)
        print(matched)
        if matched:
            print(f"{len(query_answer.assignments)} answers")
            if detailed_log:
        #         print(query_answer.assignments)
                for assignment in query_answer.assignments:
                    if type(assignment) is OrderedAssignment:
                        print_ordered_assignment(assignment)
                    elif type(assignment) is UnorderedAssignment:
                        print_unordered_assignment(assignment)
                    elif type(assignment) is CompositeAssignment:
                        print_ordered_assignment(assignment.ordered_mapping)
                        for unordered_assignment in assignment.unordered_mappings:
                            print_unordered_assignment(unordered_assignment)
                    print("")
    return query_answer.assignments

def get_mappings(q, variable_name):
    """
    Executes passed query and return the values assigned to the passed variable by searching for the respective node name
    """
    assignments = query(q)
    return [das.get_node_name(assignment.mapping[variable_name]) for assignment in assignments]
    

def get_feature_node_handle(name):
    """
    Get the handle of the corresponding Gene node given a gene name.
    """
    verbatim_node = das.get_node("Verbatim", name)
    schema_node = das.get_node("Schema", "Schema:feature_name")
    v1 = Variable("v1")
    links = das.get_links("Execution", None, [schema_node, WILDCARD, verbatim_node])
    # To be replaced by:
    # assert len(links) > 0
    if len(links) == 0:
        return None
    link = das.get_atom(links[0], output_format=QueryOutputFormat.ATOM_INFO)
    return link["targets"][1]


def build_feature_node(name):
    """
    Build a Node object to be used to compose queries. 
    """
    feature_node_handle = get_feature_node_handle(name)
    if feature_node_handle == None:
        return None
    feature_node = das.get_atom(feature_node_handle, output_format=QueryOutputFormat.ATOM_INFO)
    return Node("feature", feature_node["name"])


def get_feature_fb_id(symbol):
    """
    Get the FB id of a given feature by its symbol
    """
    n = build_feature_node(symbol)
    if n == None:
        return None
    v = Variable("v1")
    s = Node("Schema", "Schema:feature_uniquename")
    # searches for an Execution link to the schema "feature_uniquename", node of type "feature" 
    # and value "symbol", and any ***atom***??? represented by Variable "v1"
    q = Link("Execution", ordered=True, targets=[s, n, v])
    assignment = query(q)
    assert len(assignment) == 1
    id_handle = assignment.pop().mapping['v1'] # handle of "Verbatim" node
    return db.get_node_name(id_handle)


"""
    Get all handles gene node given a gene name (symbol).
    
    Some genes have more than  one uniquename. Eg "AGO2" in the next cell
    Unfortunately, for some DAS setups, the uniquename retrieved by get_feature_fb_id doesn't work. 
    To ensure correct retrieval, the way is to get all FBgn# and test each one... :/
"""    
def get_all_feature_node_handles(name):
    verbatim_node = das.get_node("Verbatim", name)
    schema_node = das.get_node("Schema", "Schema:feature_name")
    v1 = Variable("v1")
    links = das.get_links("Execution", None, [schema_node, WILDCARD, verbatim_node])
    # To be replaced by:
    # assert len(links) > 0
    if len(links) == 0:
        return None
    handles = []
    for link in links:
        atom_link = das.get_atom(link, output_format=QueryOutputFormat.ATOM_INFO)
        handles.append(atom_link["targets"][1])
    return handles



"""
    Build a Node object to be used to compose queries. 
    
    Some genes have more than  one uniquename. Eg "AGO2" in the next cell
    Unfortunately, for some DAS setups, the uniquename retrieved by get_feature_fb_id doesn't work. 
    To ensure correct retrieval, the way is to get all FBgn# and test each one... :/
"""
def build_all_feature_nodes(name):
    feature_node_handles = get_all_feature_node_handles(name)    
    if feature_node_handles == None:
        return None
    nodes = []
    for feature_node_handle in feature_node_handles:
        feature_node = das.get_atom(feature_node_handle, output_format=QueryOutputFormat.ATOM_INFO)
        nodes.append( Node("feature", feature_node["name"]) )
    return nodes
    

"""
    Some genes have more than  one uniquename. Eg "AGO2" in the next cell
    Unfortunately, for some DAS setups, the uniquename retrieved by get_feature_fb_id doesn't work. 
    To ensure correct retrieval, the way is to get all FBgn# and test each one... :/
"""
def get_all_feature_fb_id(symbol):
    nodes = build_all_feature_nodes(symbol)
    if nodes == None:
        return None
    fb_ids = []
    for node in nodes:
        uniq_var = Variable("v1")
        schema = Node("Schema", "Schema:feature_uniquename")
        q = Link("Execution", ordered=True, targets=[schema, node, uniq_var])
        assignment = query(q)
        assert len(assignment) == 1
        id_handle = assignment.pop().mapping['v1'] # handle of "Verbatim" node
        fb_ids.append( db.get_node_name(id_handle) )
    return fb_ids


In [None]:
"""
About HGNC groups. 
The "HGNC_family_ID" (eg 705) is a group id; not a gene id (which is like HGNC:705)
Flybase assigned 705 to the PTP (PROTEIN TYROSINE PHOSPHATASES) group that corresponds to this: 
https://www.genenames.org/data/genegroup/#!/group/705 (Class I classical Cys-based phosphatases)

So,the get_groups_HGNC_ids method returns a list of pairs ("id", id_URL) because Flybase states that could exist
more than one HGNC id for a given Flybase group and the table "gene_groups_HGNC_fb*.tsv" reflects that statement.

get_groups_HGNC_ids method output:

For the current example (Flybase PTP group) it is:

[(705, https://www.genenames.org/data/genegroup/#!/group/705)]


For multiple HGNC ids with input of "FBgg0000112":

FB_group_id	FB_group_symbol	FB_group_name	HGNC_family_ID
FBgg0000112	INX	            INNEXINS	    314
FBgg0000112	INX	            INNEXINS	    288

it outputs:

[ 
    (314, https://www.genenames.org/data/genegroup/#!/group/314), 
    (288, https://www.genenames.org/data/genegroup/#!/group/288)
]
"""
# 1
# Gets the group ***symbol*** from the "group_id" (FBgg#) as stored in the "Schema Node" whose name is "table_name".
def get_group_symbol(group_id, table_name):
    n1 = Node("Verbatim", group_id)
    v1 = Variable("v1")
    s = Node("Schema", f"Schema:{table_name}_FB_group_symbol")
    q1 = Link("Execution", ordered=True, targets=[s, n1, v1])
    return get_mappings(q1, "v1")[0]

# 2
# Gets the group ***name*** from the "group_id" (FBgg#) as stored in the "Schema Node" whose name is "table_name".
def get_group_name(group_id, table_name):
    n1 = Node("Verbatim", group_id)
    v1 = Variable("v1")
    s = Node("Schema", f"Schema:{table_name}_FB_group_name")
    q1 = Link("Execution", ordered=True, targets=[s, n1, v1])
    return get_mappings(q1, "v1")[0]

# 3
# Gets a ***list*** of HGNC ids for the group designated by "group_id".
def get_groups_HGNC_ids(group_id):
    n1 = Node("Verbatim", group_id)
    v1 = Variable("v1")
    s = Node("Schema", f"Schema:gene_groups_HGNC_HGNC_family_ID")
    q1 = Link("Execution", ordered=True, targets=[s, n1, v1])
    ids = get_mappings(q1, "v1")
    HGNC_ids_data_list = []
    for id in ids:
        HGNC_ids_data_list.append((id, f"https://www.genenames.org/data/genegroup/#!/group/{id}"))
    return HGNC_ids_data_list

# 4
# Gets a ***list of groups ids*** (FBgg#) from the "group_symbol" as stored in the "Schema Node" whose name is "table_name".    
def get_groups_ids(gene_symbol, table_name):
    fb_id = get_feature_fb_id(gene_symbol)     
    if fb_id == None:
        return None
    n1 = Node("Verbatim", fb_id)
    v1 = Variable("v1")
    s = Node("Schema", f"Schema:{table_name}_FB_group_id")    
    q1 = Link("Execution", ordered=True, targets=[s, n1, v1])    
    return get_mappings(q1, "v1")

# 5
# gets group id(s) (FBgg#) of the gene designated by "gene_symbol"
def get_gene_groups_ids(gene_symbol):
    return get_groups_ids(gene_symbol, "gene_group_data")

# 6
# gets PATHWAY group id(s) (FBgg#) of the gene designated by "gene_symbol"
def get_pathway_groups_ids(gene_symbol):
    return get_groups_ids(gene_symbol, "pathway_group_data")    
    

# 7
# Gets a ***list of groups symbols*** from the "generoup_symbol" as stored in the "Schema Node" whose name is "table_name".    
def get_groups_symbols(gene_symbol, table_name):
    gene_groups_ids = get_groups_ids(gene_symbol, table_name)
    if gene_groups_ids == None:
        return None
    gg_symbols = []
    for gg_id in gene_groups_ids:
        n1 = Node("Verbatim", gg_id)
        v1 = Variable("v1")
        # groups
        s = Node("Schema", f"Schema:{table_name}_FB_group_symbol")
        q1 = Link("Execution", ordered=True, targets=[s, n1, v1])
        for symb in get_mappings(q1, "v1"):
            gg_symbols.append(symb)    
    return gg_symbols

# 8
def get_gene_groups_symbols(gene_symbol):
    return get_groups_symbols(gene_symbol, "gene_group_data")

# 9
def get_pathway_groups_symbols(gene_symbol):
    return get_groups_symbols(gene_symbol, "pathway_group_data")


# 10
# get group name(s) of gene designated by gene_symbol
def get_groups_names(gene_symbol, table_name):
    gene_groups_ids = get_groups_ids(gene_symbol, table_name)   
    if gene_groups_ids == None:
        return None
    gg_names = []
    for gg_id in gene_groups_ids:
        n1 = Node("Verbatim", gg_id)
        v1 = Variable("v1")
        # groups
        s = Node("Schema", f"Schema:{table_name}_FB_group_name")        
        q1 = Link("Execution", ordered=True, targets=[s, n1, v1])
        for gg_name in get_mappings(q1, "v1"):
            gg_names.append(gg_name)
    return gg_names

# 11
def get_gene_groups_names(gene_symbol):
    return get_groups_names(gene_symbol, "gene_group_data")

# 12
def get_pathway_groups_names(gene_symbol):
    return get_groups_names(gene_symbol, "pathway_group_data")

# 13
# In fact, a group could have more than one parent! E.g: FBgg0000275
def get_parent_groups_ids(group_id):
    group_node = Node("Verbatim", group_id)
    parent_var = Variable("v1")
    parent_query = Link("Inheritance", ordered=True, targets=[group_node, parent_var])
    return get_mappings(parent_query, "v1")


# 14
def get_parent_groups_symbols(group_id, table_name):
    parent_ids = get_parent_groups_ids(group_id)
    parents_symbols = []
    for parent_id in parent_ids:
        n1 = Node("Verbatim", parent_id)
        v1 = Variable("v1")
        s = Node("Schema", f"Schema:{table_name}_Parent_FB_group_symbol")        
        q1 = Link("Execution", ordered=True, targets=[s, n1, v1])
        parents_symbols.extend(get_mappings(q1, "v1"))    
    return parents_symbols

# 15    
# In fact, a group could have more than one parent! E.g: FBgg0000275
def get_parent_gene_groups_symbols(group_id):
    return get_parent_groups_symbols(group_id, "gene_group_data")        

# 16
# In fact, a group could have more than one parent! E.g: FBgg0000275
def get_parent_pathway_group_symbols(group_id):
    return get_parent_groups_symbols(group_id, "pathway_group_data")  



# 17
# In fact, a group could have more than one parent. E.g: FBgg0000275
# In the precomputed tables there are only group PARENT id and symbol...
# So, the parent group names should be retrieved as the regular groups names.
def get_parent_groups_names(group_id, table_name):
    parent_ids = get_parent_groups_ids(group_id)
    parents_names = []
    for parent_id in parent_ids:
        n1 = Node("Verbatim", parent_id)
        v1 = Variable("v1")
        s = Node("Schema", f"Schema:{table_name}_FB_group_name")        
        q1 = Link("Execution", ordered=True, targets=[s, n1, v1])
        parents_names.extend(get_mappings(q1, "v1"))    
    return parents_names

# 18
# In fact, a group could have more than one parent! E.g: FBgg0000275
def get_parent_gene_groups_names(group_id):
    return get_parent_groups_names(group_id, "gene_group_data")

# 19
# In fact, a group could have more than one parent! E.g: FBgg0000275
def get_parent_pathway_groups_names(group_id):
    return get_parent_groups_names(group_id, "pathway_group_data")



# 20
def get_groups_members(gene_symbol, table_name):
    gene_groups_ids = get_groups_ids(gene_symbol, table_name)
    if gene_groups_ids == None:
        return None
    gene_group_symbols = []
    fb_id_schema_name = f"Schema:{table_name}_Group_member_FB_gene_id"
    gene_sym_chema_name = f"Schema:{table_name}_Group_member_FB_gene_symbol"
    for gg_id in gene_groups_ids:
        a_gg_symbols = []
        n1 = Node("Verbatim", gg_id)
        v1 = Variable("v1")
        # gets all gene ids (FBgn#) of group given by gg_id
        sp = Node("Schema", fb_id_schema_name)
        q1 = Link("Execution", ordered=True, targets=[sp, n1, v1])
        #gene_ids = sorted(get_mappings(q1, "v1"))
        gene_ids = sorted(list(set(get_mappings(q1, "v1"))))
        for gene_id in gene_ids:
            n2 = Node("Verbatim", gene_id)
            v2 = Variable("v2")
            ss = Node("Schema", gene_sym_chema_name)
            q1 = Link("Execution", ordered=True, targets=[ss, n2, v2])
            a_gg_symbols.append(get_mappings(q1, "v2")[0].replace("[", "(").replace("]", ")"))
        gene_group_symbols.append(((gg_id, get_group_symbol(gg_id, table_name), get_group_name(gg_id, table_name), len(a_gg_symbols), sorted(a_gg_symbols))))        
    return gene_group_symbols

# 21    
# Returns list(s) of gene symbols that are members of the same group(s) of 
# the gene desinated by gene_symbol
def get_gene_groups_members(gene_symbol):
    return get_groups_members(gene_symbol, "gene_group_data")

# 22
# same as get_gene_group_members() for pathway groups
def get_pathway_groups_members(gene_symbol):
    return get_groups_members(gene_symbol, "pathway_group_data")



# 23
"""
    Level zero holds the groups ids that the gene belongs to.
    Upper levels hold the hierarchy itself.
    An empty list signals that the hierarchy finished in the previous level (exception to zero
    that means there is no hierarchy at all).
"""
def get_group_hierarchy(gene_symbol, table_name):
    gene_group_ids = get_groups_ids(gene_symbol, table_name)
    if gene_group_ids == None:
        return None
    level = 0
    group_hierarchy = [(level, gene_group_ids)]    
    while gene_group_ids != []:
        end = []
        for gg_id in gene_group_ids:
            parents = get_parent_groups_ids(gg_id)
            if parents != []:
                group_hierarchy.append((level + 1, parents))
                end.extend(parents)
            else:
                group_hierarchy.append((level + 1, parents))  # empty parents marks the end of the hierarchy
        level += 1
        gene_group_ids = end
    return group_hierarchy

# 24
"""
    Level zero holds the groups ids that the gene belongs to.
    Upper levels hold the hierarchy itself.
    An empty list signals that the hierarchy finished in the previous level (exception to zero
    that means there is no hierarchy at all).
"""
def get_gene_group_hierarchy(gene_symbol):
    return get_group_hierarchy(gene_symbol, "gene_group_data")

# 25
"""
    Level zero holds the groups ids that the gene belongs to.
    Upper levels hold the hierarchy itself.
    An empty list signals that the hierarchy finished in the previous level (exception to zero
    that means there is no hierarchy at all).
"""
def get_pathway_group_hierarchy(gene_symbol):
    return get_group_hierarchy(gene_symbol, "pathway_group_data")


# 26
# remember that "gene_group_GO_id(s)" could be a list. So, every GO id should be retrieved (together with its name)
    #  get GENE ontology  ::--->  from Dmel_enzyme_data
    # get ontology id
    # get ontology name (has_name)
    #  get definition?



# 27    
# get enzyme data  ::--->  from Dmel_enzyme_data
# remember that "gene_EC_number(s)" could be a list. So, every EC number should be retrieved (together with its name)

In [None]:
clock = WallClock()
clock.start()
print(get_gene_groups_members('AGO2'))
clock.stop()
clock.print()

In [None]:
# General building block
#
# Gets a ***list of entities*** from table_column "another_entity_table_column_name" (as stored in the "Schema Node" whose name is
# "another_entity_table_column_name") for a given gene_symbol.
#
# Most times the returned list will contain only one element.
#
# Look the next cell for examples of using this query
def get_feature_entities_list(feature_identifier, another_entity_table_column_name, feature_identifier_type="symbol"):
    if feature_identifier_type == "symbol":
        return _get_feature_entities_list(feature_identifier, another_entity_table_column_name)
    #elif feature_identifier_type == "FBxx":
    else:
        if feature_identifier_type == "FBid":
            n1 = Node("Verbatim", feature_identifier)
            v1 = Variable("v1")
            s = Node("Schema", f"Schema:{another_entity_table_column_name.replace('(', '[').replace(')', ']')}")    
        
            q1 = Link("Execution", ordered=True, targets=[s, n1, v1])    
    return get_mappings(q1, "v1")

In [None]:
# General building block
#
# Gets a ***list of entities*** from table_column "another_entity_table_column_name" (as stored in the "Schema Node" whose name is
# "another_entity_table_column_name") for a given gene_symbol.
#
# Most of times the returned list will contain only one element.
#
# Look the next cell for examples of using this query
def _get_feature_entities_list(gene_symbol, another_entity_table_column_name):
    fb_id = get_feature_fb_id(gene_symbol)     
    if fb_id == None:
        return None
    n1 = Node("Verbatim", fb_id)
    v1 = Variable("v1")
    s = Node("Schema", f"Schema:{another_entity_table_column_name.replace('(', '[').replace(')', ']')}")    

    q1 = Link("Execution", ordered=True, targets=[s, n1, v1])    
    return get_mappings(q1, "v1")

In [None]:
symbols_list = ["Top3beta", "Mef2", "Clk", "Myc", "Abd-B"]
symbols_list = ["Su[var]205", "Top3beta", "Mef2", "Clk", "Dref", "TfIIB", "Myc", "AGO2", "Nipped-B", 
                "Cp190", "TfIIA-L","Trl", "ash1", "Raf", "Abd-B", "Orc2", "Rbf", "mof", "msl-1", "Hmr"]
#symbols_list = ["Top3beta"]               
for gene_symbol in symbols_list:
    print(f'\n\nGene {gene_symbol}:')
    print(f'{gene_symbol} (fbgn_fbtr_fbpp_expanded) organism: {get_feature_entities_list(gene_symbol, "fbgn_fbtr_fbpp_expanded_organism")}')
    print(f'{gene_symbol} (Dmel_enzyme_data) GO terms: {get_feature_entities_list(gene_symbol, "Dmel_enzyme_data_gene_group_GO_id(s)")}') 
    print(f'{gene_symbol} (Dmel_enzyme_data) gene group id (FBgg#): {get_feature_entities_list(gene_symbol, "Dmel_enzyme_data_gene_group_id")}') 
    print(f'{gene_symbol} (Dmel_enzyme_data) gene_group_EC_number(s): {get_feature_entities_list(gene_symbol, "Dmel_enzyme_data_gene_group_EC_number(s)")}') 
    print(f'{gene_symbol} (Dmel_enzyme_data) gene_EC_number(s): {get_feature_entities_list(gene_symbol, "Dmel_enzyme_data_gene_EC_number(s)")}')     
    print(f'{gene_symbol} (Dmel_enzyme_data) gene_name: {get_feature_entities_list(gene_symbol, "Dmel_enzyme_data_gene_name")}')     
    
    print(f'{gene_symbol} (gene_map_table) recombination_loc: {get_feature_entities_list(gene_symbol, "gene_map_table_recombination_loc")}') 
    print(f'{gene_symbol} (gene_map_table) cytogenetic_loc: {get_feature_entities_list(gene_symbol, "gene_map_table_cytogenetic_loc")}') 

    # THIS WOULD RETURN [] BECAUSE THERE IS NO PAIR FROM "feature_uniquename" TO "dmel_human_orthologs_disease_Human_gene_symbol"
    # BUT THERE IS FROM "dmel_human_orthologs_disease_Dmel_gene_ID" TO "dmel_human_orthologs_disease_Human_gene_symbol" THAT SHOULD BE USED
    print(f'{gene_symbol} (dmel_human_orthologs_disease) Human_gene_symbol: {get_feature_entities_list(gene_symbol, "dmel_human_orthologs_disease_Human_gene_symbol")}') 
    #		Dmel_gene_ID	Dmel_gene_symbol

    
    print(f'{gene_symbol} (best_gene_summary) Summary source: {get_feature_entities_list(gene_symbol, "best_gene_summary_Summary_Source")}') 
    print(f'{gene_symbol} (best_gene_summary) Summary: {get_feature_entities_list(gene_symbol, "best_gene_summary_Summary")}') 

In [None]:
# Allele / diseases related gene data:
symbols_list = ["Top3beta", "Mef2", "Clk", "Myc", "Abd-B"]
symbols_list = ["Su[var]205", "Top3beta", "Mef2", "Clk", "Dref", "TfIIB", "Myc", "AGO2", "Nipped-B", 
                "Cp190", "TfIIA-L","Trl", "ash1", "Raf", "Abd-B", "Orc2", "Rbf", "mof", "msl-1", "Hmr"]
#symbols_list = ["Top3beta"]               
for gene_symbol in symbols_list:
    print(f'\n\nGene {gene_symbol}:')
    alleles_ids = get_feature_entities_list(gene_symbol, "fbal_to_fbgn_AlleleID")
    for allele_id in alleles_ids: 
        print(f'{gene_symbol} (fbal_to_fbgn) Allele id(s): '
              f'{get_feature_entities_list(gene_symbol, "fbal_to_fbgn_AlleleID", feature_identifier_type="FBxx")}')
        print(f'{gene_symbol} (dmel_human_orthologs_disease) OMIM_Phenotype_IDs[name]: {get_feature_entities_list(gene_symbol, "dmel_human_orthologs_disease_OMIM_Phenotype_IDs[name]")}') 
    print(f'{gene_symbol} (dmel_human_orthologs_disease) Human_gene_HGNC_ID: {get_feature_entities_list(gene_symbol, "dmel_human_orthologs_disease_Human_gene_HGNC_ID")}') 
    print(f'{gene_symbol} (dmel_human_orthologs_disease) Human_gene_OMIM_ID: {get_feature_entities_list(gene_symbol, "dmel_human_orthologs_disease_Human_gene_OMIM_ID")}') 


In [3]:
# Evaluation "Predicate has_name" (List "Enzyme EC 1.1.1.1" "Concept alcohol dehydrogenase"))

# (Execution "Schema Schema:grp_uniquename" "grp 162" "Verbatim FBgg0000201")
# (Execution "Schema Schema:public.cvterm" "grp 162" "Concept Concept:public.cvterm_505")
# (Execution "Schema Schema:grp_uniquename" "grp 163" "Verbatim FBgg0000202")
# (Execution "Schema Schema:public.cvterm" "grp 163" "Concept Concept:public.cvterm_505")
# (Execution "Schema Schema:grp_is_analysis" "grp 163" "Concept Concept:False")

# (Evaluation "Predicate has_name" (List "Enzyme EC 1.1.1.7" "Concept propanediol-phosphate dehydrogenase"))

def get_EC_name(ec_number):
    if ec_number.startswith("EC"):
        enz_node = Node("Enzyme", ec_number)
    else:
        enz_node = Node("EnzymeOntology", ec_number)
    v1 = Variable("v1")
    has_name_pred_node = Node("Predicate", "has_name")    

    q1 = Link("Evaluation", ordered=True, targets=[has_name_pred_node, enz_node, v1])    
    return get_mappings(q1, "v1")[0]


In [None]:
import json

# GO dictionary for knowing correct GO nodes types
with open("dict_data/go-namespace.json", "r") as ns:
    go_plus_dict = json.load(ns)

   
def get_GO_term_name(go_term):
    # finds go_term type (BP, MF, CC)
    if go_term.startswith('GO'):        # Gene Ontology ID
        go_namespace = go_plus_dict[go_term]
        #print(go_namespace)
    go_node = Node(go_namespace, go_term)
    v1 = Variable("v1")
    has_name_pred_node = Node("Predicate", "has_name")    

    q1 = Link("Evaluation", ordered=True, targets=[has_name_pred_node, go_node, v1])    
    return get_mappings(q1, "v1")[0]

In [None]:
#  GO terms / predicates
# ExplorEnz number / names

symbols_list = ["Top3beta", "Mef2", "Clk", "Myc", "Abd-B"]
symbols_list = ["Su(var)205", "Top3beta", "Mef2", "Clk", "Dref", "TfIIB", "Myc", "AGO2", "Nipped-B", 
                "Cp190", "TfIIA-L","Trl", "ash1", "Raf", "Abd-B", "Orc2", "Rbf", "mof", "msl-1", "Hmr"]
#symbols_list = ["Top3beta"]               
for gene_symbol in symbols_list:
    print(f'\n\nGene {gene_symbol}:') 
 
    print(f'{gene_symbol} (Dmel_enzyme_data) gene_group_EC_number(s): {get_feature_entities_list(gene_symbol, "Dmel_enzyme_data_gene_group_EC_number(s)")}')
    ec_numbers = get_feature_entities_list(gene_symbol, "Dmel_enzyme_data_gene_EC_number(s)")
    for ec_number in ec_numbers:
        print(f'{gene_symbol} (Dmel_enzyme_data) gene_EC_number(s)/name: {ec_number}: {get_EC_name(ec_number)}')
        
    go_terms = get_feature_entities_list(gene_symbol, "Dmel_enzyme_data_gene_group_GO_id(s)")
    for go_term in go_terms:
        print(f'{gene_symbol} (Dmel_enzyme_data) GO term: {get_GO_term_name(go_term)}')    

In [None]:
      return json.dumps(answer, sort_keys=False, indent=4)
        else:
            self._error(
                ValueError(f"Invalid output format: '{output_format}'")
            )

    def get_nodes(
        self,
        node_type: str,
        node_name: str = None,
        output_format: QueryOutputFormat = QueryOutputFormat.HANDLE,
    ) -> Union[List[str], List[Dict]]:
        """
        Retrieve information about Nodes based on their type and optional name.

        This method retrieves information about nodes from the database based
        on its type and name (if provided). The retrieved nodes information can be
        presented in different output formats as specified by the output_format parameter.


        Args:
            node_type (str): The type of nodes being queried.
            node_name (str, optional): The name of the specific node being queried. Defaults to None.
            output_format (QueryOutputFormat, optional): The desired output format.
                Defaults to QueryOutputFormat.HANDLE.

        Returns:
            Union[List[str], List[Dict]]: Depending on the output_format, returns either:
                - A list of strings representing handles of the nodes (output_format == QueryOutputFormat.HANDLE),
                - A list of dictionaries containing atom information of the nodes (output_format == QueryOutputFormat.ATOM_INFO),
                - A JSON-formatted string representing the deep representation of the nodes (output_format == QueryOutputFormat.JSON).

        Raises:
            ValueError: If an invalid output format is provided.

        Note:
            If node_name is provided and the specified node does not exist, an empty list is returned.

        Example:
            >>> result = obj.get_nodes(
                    node_type='Concept',
                    output_format=QueryOutputFormat.HANDLE
                )
            >>> print(result)
            [
                'af12f10f9ae2002a1607ba0b47ba8407',
                '1cdffc6b0b89ff41d68bec237481d1e1',
                '5b34c54bee150c04f9fa584b899dc030',
                'c1db9b517073e51eb7ef6fed608ec204',
                ...
            ]
        """