# Configure KBase Jupyter Dev Environment
<sub><sup>(contact chenry@anl.gov with questions)</sub></sup>

In [None]:
import platform
print("python version " + platform.python_version())
import sys
import json
import os
import pandas as pd
from os.path import exists
from pathlib import Path
import logging
import re

sys.path = [os.environ.get("CODE_BASE","/scratch/shared/code")+"/chenry_utility_module/lib"] + sys.path
from chenry_utility_module.kbdevutils import KBDevUtils
kbdevutil = KBDevUtils("ModelSEED2")

from modelseedpy import ModelSEEDBiochem
from modelseedpy.core.mstemplate import MSTemplateBuilder
from modelseedpy.core.annotationontology import convert_to_search_role,split_role
from modelseedpy.helpers import get_template
import cobra
import cobrakbase

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
msrecon = kbdevutil.msseedrecon()
annoapi = kbdevutil.anno_client(native_python_api=True)

# Identifying filter reactions

In [None]:
kbase_api = msrecon.kbase_api
template = msrecon.get_template(msrecon.templates["gramneg"])
biochem = ModelSEEDBiochem.get()
filter_df = [] 
no_form = {}
allowed = ["cpd11416"]
for compound in biochem.compounds:
    if str(compound.formula) == "nan" and compound.id not in allowed:
        no_form[compound.id+"_0"] = 1
print(len(no_form))
count = 0
for reaction in biochem.reactions:
    reason = None
    if re.search("MI", reaction.status) != None:
        reason = "MI"
    elif re.search("CI", reaction.status) != None and reaction.id+"_c" not in template.reactions:
        reason = "CI"
    elif reaction.is_obsolete == True:
        reason = "OBS"
    elif len(reaction.metabolites) == 0:
        reason = "Empty"
    else:
        for met in reaction.metabolites:
            if met.id in no_form:
                reason = "NOFORM"
                break
            if met.smiles and "*" in met.smiles:
                reason = "Abstract"
                break
    if reason != None:
        if reason in ["MI","CI","OBS","EMPTY"] or reaction.id not in template.reactions:
            filter_df.append({
                "id":reaction.id,
                "reason":reason
            })
df = pd.DataFrame.from_records(filter_df)
df.to_csv("/scratch/shared/code/cb_annotation_ontology_api/data/FilteredReactions.csv", sep='\t', index=False)

# Updating ModelSEED reaction obsolete reaction aliases

In [None]:
biochem = ModelSEEDBiochem.get()
ModelSEED_reaction_hash = {}
for rxn in biochem.reactions:
    if rxn.is_obsolete:
        ModelSEED_reaction_hash[rxn.id] = []
        if "modelseed" in rxn.aliases:
            ModelSEED_reaction_hash[rxn.id].append(rxn.aliases["modelseed"])
        
with open('/scratch/shared/code/cb_annotation_ontology_api/data/msrxn_hash.json', 'w') as outfile:
    json.dump(ModelSEED_reaction_hash, outfile)

# Update SSO and reaction SSO mappings

In [5]:
create_new_sso = False
#Getting current template object
template = msrecon.get_template(msrecon.templates["gramneg"])
#Getting SSO ontology object
sso = msrecon.kbase_api.get_object("seed_subsystem_ontology","KBaseOntology")

#Pulling all roles
role_hash = dict()
with open('/scratch/shared/data/TemplateFunctions/core.2015-2020.json') as json_file:
    role_hash = json.load(json_file)
unique_role_hash = {}
consolidated_roles = {}
for role_set in role_hash:
    for role in role_hash[role_set]:
        role_hash[role_set][role] = re.sub("=>.+","",role_hash[role_set][role])
        roles = split_role(role_hash[role_set][role])
        for fr in roles:
            if fr not in unique_role_hash:
                unique_role_hash[fr] = []
            unique_role_hash[fr].append(role_set)
            searchname = convert_to_search_role(fr)
            if searchname not in consolidated_roles:
                consolidated_roles[searchname] = {"roles":{},"sso":None,"rxn":{},"subsys":None,"source":"seed"}
            if role_set == "core.2020-0417":
                consolidated_roles[searchname]["source"] = "latest"
            consolidated_roles[searchname]["roles"][fr] = 1

#Parsing SSO ontology
largest_term = None
sso_hash = dict()
for term in sso["term_hash"]:
    name = convert_to_search_role(sso["term_hash"][term]["name"])
    if name not in consolidated_roles:
        consolidated_roles[name] = {"roles":{},"sso":None,"rxn":{},"subsys":None,"source":"sso"}
    consolidated_roles[name]["sso"] = term
    consolidated_roles[name]["roles"][sso["term_hash"][term]["name"]] = 1
    if largest_term == None or term > largest_term:
        largest_term = term
    sso_hash[term] = sso["term_hash"][term]["name"]

#Parsing new template
rxn_hash = {}
for reaction in template.reactions:
    for cpx in reaction.complexes:
        for role in cpx.roles:
            rxnid = re.sub("_[a-z]","",reaction.id)
            rolename = convert_to_search_role(role.name)
            if rolename not in consolidated_roles:
                consolidated_roles[rolename] = {"roles":{},"sso":None,"rxn":{},"subsys":None,"source":"template"}
            consolidated_roles[rolename]["roles"][role.name] = 1       
            consolidated_roles[rolename]["rxn"][rxnid] = 1
            if rxnid not in rxn_hash:
                rxn_hash[rxnid] = {}
            rxn_hash[rxnid][rolename] = cpx.id

#Issuing new SSO IDs and saving new SSO rxn mapping
if create_new_sso:
    largest_term = re.sub("SSO:","",largest_term)
    largest_term = int(largest_term)
    for name in consolidated_roles:
        if consolidated_roles[name]["sso"] == None:
            if len(consolidated_roles[name]["rxn"].keys()) > 0 or consolidated_roles[name]["source"] == "latest":
                largest_term += 1
                newsso = "SSO:"
                zeros = 9 - len(str(largest_term))
                for x in range(zeros):
                    newsso += "0"
                newsso += str(largest_term)
                keylist = list(consolidated_roles[name]["roles"].keys())
                consolidated_roles[name]["sso"] = newsso
                sso_hash[newsso] = keylist[0]
            
    new_sso = {
        "format_version" : "0.1",
        "default_namespace" : "seed_subsystem_ontology",
        "ontology" : "sso",
        "data_version" : "releases/2020-04-17",
        "term_hash" : {}
    }

    for term in sso_hash:
        new_sso["term_hash"][term] = {
            "id" : term,
            "name" : sso_hash[term]
        }

    with open("/scratch/shared/code/cb_annotation_ontology_api/data/new_sso.json", 'w') as outfile:
        json.dump(new_sso, outfile, indent=2)

sso_rxns = {}
for name in consolidated_roles:
    if len(consolidated_roles[name]["rxn"].keys()) > 0:
        if consolidated_roles[name]["sso"]:
            sso_rxns[consolidated_roles[name]["sso"]] = list(consolidated_roles[name]["rxn"].keys())
with open("/scratch/shared/code/cb_annotation_ontology_api/data/new_SSO_reactions.json", 'w') as outfile:
    json.dump(sso_rxns, outfile, indent=2)


# Printing Obsolete EC numbers

In [24]:
import xml.etree.ElementTree as ET

# Parse the XML file
tree = ET.parse("/scratch/shared/data/ECOntology/enzyme-data.xml")
root = tree.getroot()
root = root[0]
ec_hash = {}
for child in root:
    if child.attrib["name"] == "hist" and child.tag == "table_data":
        root = child
        break
for row in root:
    data = {}
    for child in row:
        data[child.attrib["name"]] = child.text
    ec_hash[data["ec_num"]] = data
obsolete_ec = {}
for ec in ec_hash:
    if ec_hash[ec]["action"] == "transferred" or ec_hash[ec]["action"] == "deleted":
        match = re.search(r'(\d+\.[\d-]+\.[\d-]+\.[\d-]+)',ec_hash[ec]["note"])
        if match:
            if ec not in obsolete_ec:
                obsolete_ec[ec] = match.group(0)
with open("/scratch/shared/code/cb_annotation_ontology_api/data/obsolete_ec.json", 'w') as outfile:
    json.dump(obsolete_ec, outfile, indent=2)