In [3]:
from Scripts.core import read_sbml_into_cobra_model
from Scripts.utils import ReconstructionTool, get_reactions, get_genes, get_metabolites, calculate_quality_metrics
import pandas as pd
import openpyxl

'''
import sys, importlib
importlib.reload(sys.modules['Scripts.utils'])
from Scripts.utils import ReconstructionTool, get_kegg_reactions, get_genes, get_kegg_metabolites
'''

"\nimport sys, importlib\nimportlib.reload(sys.modules['Scripts.utils'])\nfrom Scripts.utils import ReconstructionTool, get_kegg_reactions, get_genes, get_kegg_metabolites\n"

"\nimport sys, importlib\nimportlib.reload(sys.modules['Scripts.utils'])\nfrom Scripts.utils import ReconstructionTool, get_kegg_reactions, get_genes, get_kegg_metabolites\n"

In [4]:
merlin_blast_model = read_sbml_into_cobra_model(
    file_path = "../Models/Merlin-BA/merlin_model.xml",
    database_version = "kegg",
    reconstruction_tool = ReconstructionTool.MERLIN.value)

carveme_model = read_sbml_into_cobra_model(
    file_path = "../Models/CarveMe/model_carveme.xml",
    database_version = "bigg",
    reconstruction_tool = ReconstructionTool.CARVEME.value)

kbase_model = read_sbml_into_cobra_model(
    file_path = "../Models/KBase/kbase_model.xml",
    database_version = "modelseed",
    reconstruction_tool = ReconstructionTool.MODELSEED.value)

merlin_bit_model = read_sbml_into_cobra_model(
    file_path = "../Models/Merlin-BIT/BIT_model.xml",
    database_version = "bigg",
    reconstruction_tool = ReconstructionTool.MERLIN.value)

aureme_model = read_sbml_into_cobra_model(
    file_path = "../Models/AuReMe/aureme_model3.xml",
    database_version = "bigg",
    reconstruction_tool = ReconstructionTool.AUREME.value)

1204725 does not conform to 'http(s)://identifiers.org/collection/id' or'http(s)://identifiers.org/COLLECTION:id
1204725 does not conform to 'http(s)://identifiers.org/collection/id' or'http(s)://identifiers.org/COLLECTION:id
No objective in listOfObjectives
No objective coefficients in model. Unclear what should be optimized
1204725 does not conform to 'http(s)://identifiers.org/collection/id' or'http(s)://identifiers.org/COLLECTION:id
No objective in listOfObjectives
No objective coefficients in model. Unclear what should be optimized


In [6]:
from Scripts.utils import get_reactions, get_metabolites, get_cross_reference_reactions, get_cross_reference_metabolites

models = {
    "merlin_blast": merlin_blast_model.model,
    "aureme": aureme_model.model,
    "carveme": carveme_model.model,
    "kbase": kbase_model.model,
    "merlin_bit": merlin_bit_model.model
}
func_ids = []
metanetx_ids = []
kegg_ids = []
reactions_conversion_df = pd.read_csv('Xrefs files/reactions-conversion.csv')
metabolites_conversion_df = pd.read_csv('Xrefs files/compounds-conversion.csv')
for name, xml in models.items():
    print(name)
    func_ids.append({
        'tool': name,
        'genes': ','.join(get_genes(xml, tool=name)),
        'reactions': ','.join(get_reactions(xml, tool=name)),
        'metabolites': ','.join(get_metabolites(xml, tool=name))
    })
    cross_reference_reactions = get_cross_reference_reactions(xml, reactions_conversion_df, tool=name)
    cross_reference_metabolites = get_cross_reference_metabolites(xml, metabolites_conversion_df, tool=name)
    metanetx_ids.append({
        'tool': name,
        'genes': ','.join(get_genes(xml, tool=name)),
        'reactions': ','.join(cross_reference_reactions[0]),
        'metabolites': ','.join(cross_reference_reactions[0])
    })
    kegg_ids.append({
        'tool': name,
        'genes': ','.join(get_genes(xml, tool=name)),
        'reactions': ','.join(cross_reference_reactions[1]),
        'metabolites': ','.join(cross_reference_reactions[1])
    })
pd.DataFrame(func_ids).to_csv('../Results/functional_ids.tsv', sep='\t', index=False)
pd.DataFrame(metanetx_ids).to_csv('../Results/metanetx_functional_ids.tsv', sep='\t', index=False)
pd.DataFrame(kegg_ids).to_csv('../Results/kegg_functional_ids.tsv', sep='\t', index=False)

merlin_blast
Found 722 reactions.
Found 382 reactions.
aureme
Found 400 reactions.
Found 645 reactions.
carveme
Found 2679 reactions.
Found 1583 reactions.
kbase
Found 491 reactions.
Found 538 reactions.
merlin_bit
Found 5159 reactions.
Found 0 reactions.


In [None]:
formicicum = pd.read_csv('../Results/formicicum_uniprotinfo.tsv', sep='\t')
formicicum_genes = formicicum['Entry'].tolist()
metrics = []
for tool in ['merlin_blast', 'merlin_bit', 'carveme', 'kbase']:
    tool_uniprotinfo = pd.read_csv(f'../Results/{tool}_uniprotinfo.tsv', sep='\t')
    tool_genes = tool_uniprotinfo['Entry'].tolist()
    TPs = len([ide for ide in tool_genes if ide in formicicum_genes])
    FPs = len([ide for ide in tool_genes if ide not in formicicum_genes])
    FNs = len([ide for ide in formicicum_genes if ide not in tool_genes])
    metrics.append([tool, TPs, FPs, FNs] + list(calculate_quality_metrics(TPs, FPs, FNs)))
pd.DataFrame(metrics, columns=['tool', 'TPs', 'FPs', 'FNs', 'Precision', 'Recall', 'F1 score', 'Jaccard distance']).to_excel('../Results/quality_metrics.xlsx', index=False)


In [None]:
formicicum = pd.read_csv('../Results/kegg_functional_ids.tsv', sep='\t')
formicicum_reactions = formicicum['reactions'].tolist()
metrics = []
for tool in ['merlin_blast', 'merlin_bit', 'carveme', 'kbase']:
    tool_uniprotinfo = pd.read_csv(f'../Results/{tool}_uniprotinfo.tsv', sep='\t')
    tool_genes = tool_uniprotinfo['Entry'].tolist()
    TPs = len([ide for ide in tool_genes if ide in formicicum_genes])
    FPs = len([ide for ide in tool_genes if ide not in formicicum_genes])
    FNs = len([ide for ide in formicicum_genes if ide not in tool_genes])
    metrics.append([tool, TPs, FPs, FNs] + list(calculate_quality_metrics(TPs, FPs, FNs)))
pd.DataFrame(metrics, columns=['tool', 'TPs', 'FPs', 'FNs', 'Precision', 'Recall', 'F1 score', 'Jaccard distance']).to_excel('../Results/quality_metrics.xlsx', index=False)

In [None]:
formicicum = pd.read_csv('../Results/formicicum_uniprotinfo.tsv', sep='\t')
formicicum_genes = formicicum['Entry'].tolist()
metrics = []
for tool in ['merlin_blast', 'merlin_bit', 'carveme', 'kbase']:
    tool_uniprotinfo = pd.read_csv(f'../Results/{tool}_uniprotinfo.tsv', sep='\t')
    tool_genes = tool_uniprotinfo['Entry'].tolist()
    TPs = len([ide for ide in tool_genes if ide in formicicum_genes])
    FPs = len([ide for ide in tool_genes if ide not in formicicum_genes])
    FNs = len([ide for ide in formicicum_genes if ide not in tool_genes])
    metrics.append([tool, TPs, FPs, FNs] + list(calculate_quality_metrics(TPs, FPs, FNs)))
pd.DataFrame(metrics, columns=['tool', 'TPs', 'FPs', 'FNs', 'Precision', 'Recall', 'F1 score', 'Jaccard distance']).to_excel('../Results/quality_metrics.xlsx', index=False)