In [1]:
import requests

In [1]:
!curl -X POST "https://mygene.info/v3/query?q=MTOR%2CGADD45A&scopes=symbol&species=human&fields=entrezgene%2Csymbol&dotfield=false&size=10&from=0&fetch_all=false&facet_size=10&entrezonly=false&ensemblonly=false" -H  "accept: */*" -H  "Content-Type: application/json" -d "{\"q\":[\"1017\",\"1018\"],\"scopes\":[\"entrezgene\"]}"

[{"query":"MTOR","_id":"2475","_score":17.545715,"entrezgene":"2475","symbol":"MTOR"},{"query":"GADD45A","_id":"1647","_score":18.302315,"entrezgene":"1647","symbol":"GADD45A"}]

In [45]:
gene_id = ['3845']
fields = "refseq.protein"

url = "https://mygene.info/v3/query"
headers = {
    "Accept": "application/json",
    "Content-Type": "application/json"
}
data = {
    "q": gene_id,
    "fields": fields
}
response = requests.post(url, json=data, headers=headers)
results = response.json()

field_list = fields.split(",") if "," in fields else [fields]
field_list = [field.split(".")[0] if "." in field else field for field in field_list]

gene_dict = {field: {} for field in field_list}

for entry in results:
    query_key = str(entry.get("query"))
    for field in field_list:
        if field.startswith("refseq"):
            refseq_info = entry.get("refseq", {})
            if field == "refseq":
                refseq_rna = refseq_info.get("rna", []) if isinstance(refseq_info.get("rna", []), list) else [refseq_info.get("rna", [])]
                refseq_protein = refseq_info.get("protein", []) if isinstance(refseq_info.get("protein", []), list) else [refseq_info.get("protein", [])]
                gene_dict["refseq"][query_key] = refseq_rna + refseq_protein
            else:
                refseq_field = field.split(".")[1]
                gene_dict["refseq"][query_key] = refseq_info.get(refseq_field, []) if isinstance(refseq_info.get(refseq_field, []), list) else [refseq_info.get(refseq_field, [])]

        else:
            gene_dict[field][query_key] = entry.get(field, None)

gene_dict

{'refseq': {'3845': ['NP_001356715.1',
   'NP_001356716.1',
   'NP_004976.2',
   'NP_203524.1',
   'XP_047284782.1',
   'XP_054227990.1']}}

In [36]:
def getCrossReference(gene_ids, fields = "uniprot"):
    """
    Query the mygene.info API to retrieve EntrezGene IDs and UniProt data 
    for given gene ids (ncbigene, or entrezgene), then process the results into a dictionary.

    Parameters:
    - gene_ids (list): List of gene ids (ncbigene, or entrezgene) to query.
    - fields (str): Fields to query (default: "uniprot"). Single field or comma-separated.

    Returns:
    - dict: A dictionary structured as {field: {gene_id: value}}
            If 'uniprot' is included, 'Swiss-Prot' is prioritized compared to 'TrEMBL'.
    """
    url = "https://mygene.info/v3/query"
    headers = {
        "Accept": "application/json",
        "Content-Type": "application/json"
    }
    data = {
        "q": gene_ids,
        "fields": fields
    }
    response = requests.post(url, json=data, headers=headers)
    if response.status_code != 200:
        print(f"Error: {response.status_code}, {response.text}")
        return {}
    results = response.json()

    field_list = fields.split(",") if "," in fields else [fields]
    field_list = [field.split(".")[0] if "." in field else field for field in field_list]
    gene_dict = {field: {} for field in field_list}

    for entry in results:
        query_key = str(entry.get("query"))
        for field in field_list:
            if field == "uniprot":
                uniprot_info = entry.get("uniprot", {})
                swiss_prot = uniprot_info.get("Swiss-Prot")
                trembl = uniprot_info.get("TrEMBL", [])
                if swiss_prot:
                    gene_dict["uniprot"][query_key] = [swiss_prot] if isinstance(swiss_prot, str) else swiss_prot
                elif trembl:
                    gene_dict["uniprot"][query_key] = [trembl] if isinstance(trembl, str) else trembl
                else:
                    gene_dict["uniprot"][query_key] = None
            elif field.startswith("ensembl"):
                ensembl_info = entry.get("ensembl", {})
                if field == "ensembl":
                    ensembl_gene = ensembl_info.get("gene", []) if isinstance(ensembl_info.get("gene", []), list) else [ensembl_info.get("gene", [])]
                    ensembl_protein = ensembl_info.get("protein", []) if isinstance(ensembl_info.get("protein", []), list) else [ensembl_info.get("protein", [])]
                    gene_dict["ensembl"][query_key] = ensembl_gene + ensembl_protein
                else:
                    ensembl_field = field.split(".")[1]
                    gene_dict["ensembl"][query_key] = ensembl_info.get(ensembl_field, []) if isinstance(ensembl_info.get(ensembl_field, []), list) else [ensembl_info.get(ensembl_field, [])]
            elif field.startswith("refseq"):
                refseq_info = entry.get("refseq", {})
                if field == "refseq":
                    refseq_rna = refseq_info.get("rna", []) if isinstance(refseq_info.get("rna", []), list) else [refseq_info.get("rna", [])]
                    refseq_protein = refseq_info.get("protein", []) if isinstance(refseq_info.get("protein", []), list) else [refseq_info.get("protein", [])]
                    gene_dict["refseq"][query_key] = refseq_rna + refseq_protein
                else:
                    refseq_field = field.split(".")[1]
                    gene_dict["refseq"][query_key] = refseq_info.get(refseq_field, []) if isinstance(refseq_info.get(refseq_field, []), list) else [refseq_info.get(refseq_field, [])]
            else:
                gene_dict[field][query_key] = entry.get(field, None)   
    return gene_dict

gene_ids = ['3845']
getCrossReference(gene_ids, fields = "uniprot,ensembl")


{'uniprot': {'3845': ['P01116']},
 'ensembl': {'3845': ['ENSG00000133703',
   'ENSP00000256078',
   'ENSP00000308495',
   'ENSP00000451856',
   'ENSP00000452512',
   'ENSP00000508568',
   'ENSP00000508921',
   'ENSP00000509223',
   'ENSP00000509238',
   'ENSP00000509798',
   'ENSP00000510254',
   'ENSP00000510431',
   'ENSP00000510479',
   'ENSP00000510511']}}

In [39]:
result = getCrossReference(gene_ids, fields = "uniprot")
result["uniprot"]['3845']

['P01116']

In [41]:
KNOWLEDGE_RESOURCE = {'species': 'chebi',
                      'reaction': 'rhea',
                      'genes': ['ncbigene', 'uniprot'],
                      'qual_species':'ncbigene'}
KNOWLEDGE_RESOURCE['qual_species']

'ncbigene'

In [2]:


def getAnnotationString(
                        candidates,
                        meta_id,
                        cross_reference = None):
    """
    Get a string of annotations,
    using a list of strings.
    (of candidates)
    Can replace a whole annotation. 

    Parameters
    ----------
    candidates: list-str
        e.g., ['CHEBI:12345', 'CHEBI:98765']

    meta_id: str
        Meta ID of the element to be included in the annotation. 

    cross_reference: str
        Cross reference to be used for adding additional annotations.
        Single field or comma-separated. E.g., 'uniprot' or 'uniprot,HGNC'

    Returns
    -------
    str
    """
    # get the cross reference first if provided
    if cross_reference:
        all_cross_dict = getCrossReference(candidates, fields = cross_reference)
        field_list = cross_reference.split(",") if "," in cross_reference else [cross_reference]

    # First, construct an empty container
    container_items = ['annotation', 
                        cn.RDF_TAG,
                        'rdf:Description rdf:about="#' + str(meta_id) + '"',
                        self.prefix,
                        'rdf:Bag']
    empty_container = self.createAnnotationContainer(container_items)
    # Next, create annotation lines
    items_from = []
    for one_cand in candidates:
        items_from.append(createAnnotationItem('ncbigene',
                                                    one_cand))
        if cross_reference:
            for field in field_list:
                one_cand_cross = all_cross_dict[field][one_cand]
                if isinstance(one_cand_cross, list):
                    for i in one_cand_cross:
                        items_from.append(createAnnotationItem(field, i))    
                else:
                    items_from.append(createAnnotationItem(field, one_cand_cross))     

    result = self.insertList(insert_to=empty_container,
                                insert_from=items_from)
    return ('\n').join(result)

def createAnnotationItem(knowledge_resource,
                        identifier):
    """
    Create a one-line annotation,
    e.g., <rdf:li rdf:resource="http://identifiers.org/chebi/CHEBI:15414"/>

    Parameters
    ----------
    knowledge_resource: str

    identifier: str

    Returns
    -------
    str
    """
    annotation_items = ['identifiers.org',
                        knowledge_resource,
                        identifier]
    res = '<rdf:li rdf:resource="http://' + \
            '/'.join(annotation_items)  +\
            '"/>'
    return res

In [54]:
candidates = ['3845', '5879']
meta_id = '12345'
cross_reference = 'uniprot'
all_cross_dict = getCrossReference(candidates, fields = cross_reference)
all_cross_dict

{'uniprot': {'3845': ['P01116'], '5879': ['P63000']}}

In [56]:
field_list = cross_reference.split(",") if "," in cross_reference else [cross_reference]
field_list

['uniprot']

In [57]:
items_from = []
for one_cand in candidates:
    items_from.append(createAnnotationItem('ncbigene',
                                                one_cand))
    if cross_reference:
        for field in field_list:
            one_cand_cross = all_cross_dict[field][one_cand]
            if isinstance(one_cand_cross, list):
                for i in one_cand_cross:
                    items_from.append(createAnnotationItem(field, i))    
            else:
                items_from.append(createAnnotationItem(field, one_cand_cross))    

items_from


['<rdf:li rdf:resource="http://identifiers.org/ncbigene/3845"/>',
 '<rdf:li rdf:resource="http://identifiers.org/uniprot/P01116"/>',
 '<rdf:li rdf:resource="http://identifiers.org/ncbigene/5879"/>',
 '<rdf:li rdf:resource="http://identifiers.org/uniprot/P63000"/>']

In [62]:
import re
def extract_ontology_from_items(items_list):
    """
    Extract ontology from items and return a flat list of tuples.
    Each tuple contains (ontology type, ontology id).

    Parameters
    ----------
    items_list : list
        A list of string items containing ontology annotations.

    Returns
    -------
    list of tuples
        A flat list of (ontology type, ontology id).
    """
    result_identifiers = []
    for item in items_list:
        # Extract identifiers from "urn:miriam" URIs
        miriam_identifiers = re.findall(r'urn:miriam:([^"]+)"', item)
        for r in miriam_identifiers:
            ontology_type, ontology_id = r.split(":", 1)
            result_identifiers.append((ontology_type, ontology_id))

        # Extract identifiers from "identifiers.org" URIs
        identifiers_list = re.findall(r'identifiers\.org/([^/]+)/([^/"]+)', item)
        for ontology_type, ontology_id in identifiers_list:
            # Clean ontology_id to remove any unwanted characters
            ontology_id = ontology_id.replace('"', '')
            result_identifiers.append((ontology_type, ontology_id))

    return result_identifiers
ontology_list = extract_ontology_from_items(items_from)
ontology_list

[('ncbigene', '3845'),
 ('uniprot', 'P01116'),
 ('ncbigene', '5879'),
 ('uniprot', 'P63000')]

In [63]:
ontology_type_list = [val for val in ontology_list if val[0].lower()=='ncbigene']
[val[1] for val in ontology_type_list]

['3845', '5879']

In [1]:
import libsbml
import sys
sys.path.insert(0, '/Users/luna/Desktop/CRBM/AMAS_proj/AMAS-v2')
from AMAS import tools

sbml = "/Users/luna/Desktop/Pancreatic cancer/Models/Werle2021.sbml"
reader = libsbml.SBMLReader()
document = reader.readSBML(sbml)
model = document.getModel()
qual_spec = model.getPlugin("qual").getQualitativeSpecies('ARP2_3')
inp_str = qual_spec.getAnnotationString()
print(inp_str)

<annotation>
  <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:vCard4="http://www.w3.org/2006/vcard/ns#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/">
    <rdf:Description rdf:about="#metaid_0000011">
      <bqbiol:is>
        <rdf:Bag>
          <rdf:li rdf:resource="http://identifiers.org/ncbigene/10096"/>
          <rdf:li rdf:resource="http://identifiers.org/ncbigene/10097"/>
        </rdf:Bag>
      </bqbiol:is>
    </rdf:Description>
  </rdf:RDF>
</annotation>


In [3]:

def addCrossReference(inp_str, fields):
    """
    Add cross reference of NCBI Gene ID to existing annotations.

    Parameters
    ----------
    inp_str: str
        Existing annotation string

    fields: str
        Fields to add to annotations. Single field or comma-separated. E.g., 'uniprot' or 'uniprot,HGNC'.

    Returns
    -------
    :str
        The updated annotation string with cross reference items given in fields.
    """
    # find all existing items, only <bqbiol:is> items
    annotation_dict = tools.divideExistingAnnotation(inp_str, qualifier = 'bqbiol:is')
    if annotation_dict is None:
      print("No existing NCBI Gene ID found, skipping cross reference.")
      return inp_str
    items = annotation_dict['items']
    # find all existing ncbi gene ids
    ontology_list = tools.extract_ontology_from_items(items)
    ontology_type_list = [val for val in ontology_list if val[0].lower()=='ncbigene']
    ncbi_ids = [val[1] for val in ontology_type_list]
    # get the cross reference items of the ncbi ids
    cross_dict = tools.getCrossReference(ncbi_ids, fields)
    print(cross_dict)
    # add the cross reference items to the annotation
    field_list = fields.split(",") if "," in fields else [fields]
    for field in field_list:
      for ncbi_id in ncbi_ids:
        print(field, ncbi_id)
        print(cross_dict[field][ncbi_id])
        for one_item in cross_dict[field][ncbi_id]:
            items.append(createAnnotationItem(field, one_item))
    # formatting items
    print(items)
    items = list(set(items)) # remove duplicates
    container = annotation_dict['container']
    res = tools.insertItemsBackToContainer(container, items, qualifier = 'bqbiol:is')
    return res

after_str = addCrossReference(inp_str, fields = 'uniprot,HGNC')
print(after_str)


{'uniprot': {'10096': ['P61158'], '10097': ['P61160']}, 'HGNC': {'10096': ['170'], '10097': ['169']}}
uniprot 10096
['P61158']
uniprot 10097
['P61160']
HGNC 10096
['170']
HGNC 10097
['169']
['<rdf:li rdf:resource="http://identifiers.org/ncbigene/10096"/>', '<rdf:li rdf:resource="http://identifiers.org/ncbigene/10097"/>', '<rdf:li rdf:resource="http://identifiers.org/uniprot/P61158"/>', '<rdf:li rdf:resource="http://identifiers.org/uniprot/P61160"/>', '<rdf:li rdf:resource="http://identifiers.org/HGNC/170"/>', '<rdf:li rdf:resource="http://identifiers.org/HGNC/169"/>']
<annotation>
  <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vcard4="http://www.w3.org/2006/vcard/ns#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:vCard4="http://www.w3.org/2006/vcard/ns#">
    <rdf:Description rdf:about="#metaid_000