In [98]:
import libsbml
import pandas as pd
import numpy as np
from tools import *

In [215]:
model_name = "Zerrouk2024_M2"
csv_file = f"Sources/Zerrouk2024.csv"
sbml_file = f"Models/{model_name}_cleaned.sbml"
save_file = f"Models/{model_name}.sbml"
strip_file = f"Models/{model_name}_no_annotation.sbml"

In [216]:
# Load SBML-qual model
reader = libsbml.SBMLReader()
document = reader.readSBML(sbml_file)
model = document.getModel().getPlugin('qual')

## Model statistics

In [217]:
num_species = model.getNumQualitativeSpecies()
num_transitions = model.getNumTransitions()
print(f"Number of qualitative species: {num_species}")
print(f"Number of transitions: {num_transitions}")

Number of qualitative species: 254
Number of transitions: 197


## Set metaids of the model

The default metaids given by GINsim are random generated and too long.
We need to change them to a shorter format.


In [218]:
def generateMetaId(index):
    return f"metaid_{index:07d}"

index = 1
for species in model.getListOfQualitativeSpecies():
    index += 1
    species.setMetaId(generateMetaId(index))

# libsbml.writeSBMLToFile(document, 'test.sbml')

## Species annotation 

In [219]:
# csv file with the species annotations
reference_df_all = pd.read_csv(csv_file, usecols=['id','mapName','elementId','type','name','fullName','HGNC','ENTREZ','REFSEQ','UNIPROT','CHEBI','complexId'], dtype=str)
reference_df = reference_df_all[reference_df_all['mapName'] == 'RA_M2_macrophage'].copy()
reference_df.fillna('', inplace=True)
reference_df.replace('nan', '', inplace=True)
reference_df

Unnamed: 0,id,type,name,fullName,complexId,mapName,elementId,HGNC,ENTREZ,UNIPROT,REFSEQ,CHEBI
2780,281562,Compartment,M2 macrophage: cytoplasm,,,RA_M2_macrophage,ca18,,,,,
2781,281563,Protein,IL1B,interleukin 1 beta,,RA_M2_macrophage,sa2288,5992;5992;5992,3553;3553;3553,P01584;P01584,NM_000576,
2782,281564,Protein,GAB2,GRB2 associated binding protein 2,,RA_M2_macrophage,sa2078,14458,9846,Q9UQC2,NM_080491,
2783,281565,Protein,PIK3AP1,phosphoinositide-3-kinase adaptor protein 1,,RA_M2_macrophage,sa3304,30034;30034;30034,118788;118788;118788,Q6ZUJ8;Q6ZUJ8,NM_152309,
2784,281566,Protein,JNK1,,,RA_M2_macrophage,sa1992,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
3558,282340,Phenotype,Matrix degradation,,,RA_M2_macrophage,sa2120,,,,,
3559,282341,Phenotype,proliferation/survival,,,RA_M2_macrophage,sa2122,,,,,
3560,282342,Phenotype,angiogenesis,,,RA_M2_macrophage,sa2115,,,,,
3561,282343,Phenotype,inflammation,,,RA_M2_macrophage,sa2119,,,,,


In [220]:
# statistics of reference_df['type']
reference_df['type'].value_counts()

type
Protein            496
Complex             91
Degraded            59
Gene                59
RNA                 59
Compartment          9
Phenotype            7
Simple molecule      3
Name: count, dtype: int64

In [221]:
reference_df[reference_df['type']=='Phenotype']

Unnamed: 0,id,type,name,fullName,complexId,mapName,elementId,HGNC,ENTREZ,UNIPROT,REFSEQ,CHEBI
3556,282338,Phenotype,T cells activation,,,RA_M2_macrophage,sa2121,,,,,
3557,282339,Phenotype,apoptosis,,,RA_M2_macrophage,sa2114,,,,,
3558,282340,Phenotype,Matrix degradation,,,RA_M2_macrophage,sa2120,,,,,
3559,282341,Phenotype,proliferation/survival,,,RA_M2_macrophage,sa2122,,,,,
3560,282342,Phenotype,angiogenesis,,,RA_M2_macrophage,sa2115,,,,,
3561,282343,Phenotype,inflammation,,,RA_M2_macrophage,sa2119,,,,,
3562,282344,Phenotype,Cell chemotaxis/migration,,,RA_M2_macrophage,sa2118,,,,,


### Add full name to notes

In [222]:
# Go over each row in the reference_df
for index, row in reference_df.iterrows():
    # Get the species id
    species_id = row['elementId']
    oneSpecies = model.getQualitativeSpecies(species_id)
    if oneSpecies is not None:
        if row['fullName'] != '':
            # add fullName to notes
            oneSpecies.setNotes(row['fullName'], True)

# libsbml.writeSBMLToFile(document, 'test.sbml')

### Annotate biological identity

In [223]:
qualifier = 'bqbiol:is'
# Go over each row in the reference_df
for index, row in reference_df.iterrows():
    # Get the species id
    species_id = row['elementId']
    oneSpecies = model.getQualitativeSpecies(species_id)
    if oneSpecies is not None:
        # Uniprot for proteins
        if row['type'] == 'Protein':
            if row['UNIPROT'] != '':
                knowledge_source = 'uniprot'
                meta_id = oneSpecies.getMetaId()
                list_of_ids = str(row['UNIPROT']).split(';')
                list_of_ids = list(set(list_of_ids)) # remove duplicates
                AnnotationString = createAnnotationString(qualifier,knowledge_source,RDF_TAG,list_of_ids,meta_id)
                oneSpecies.setAnnotation(AnnotationString)
        # Entrez for genes
        elif row['type'] == 'Gene':
            if row['ENTREZ'] != '':
                knowledge_source = 'ncbigene'
                meta_id = oneSpecies.getMetaId()
                list_of_ids = str(row['ENTREZ']).split(';')
                list_of_ids = list(set(list_of_ids)) # remove duplicates
                AnnotationString = createAnnotationString(qualifier,knowledge_source,RDF_TAG,list_of_ids,meta_id)
                oneSpecies.setAnnotation(AnnotationString)            
        # Refseq for RNA
        elif row['type'] == 'RNA':
            if row['REFSEQ'] != '':
                knowledge_source = 'refseq'
                meta_id = oneSpecies.getMetaId()
                list_of_ids = str(row['REFSEQ']).split(';')
                list_of_ids = list(set(list_of_ids)) # remove duplicates
                AnnotationString = createAnnotationString(qualifier,knowledge_source,RDF_TAG,list_of_ids,meta_id)
                oneSpecies.setAnnotation(AnnotationString)
        # ChEBI for simple molecules
        elif row['type'] == 'Simple molecule':
            if row['CHEBI'] != '':
                knowledge_source = 'chebi'
                meta_id = oneSpecies.getMetaId()
                list_of_ids = str(row['CHEBI']).split(';')
                list_of_ids = list(set(list_of_ids)) # remove duplicates
                AnnotationString = createAnnotationString(qualifier,knowledge_source,RDF_TAG,list_of_ids,meta_id)
                oneSpecies.setAnnotation(AnnotationString)
        # Add complex annotation
        elif row['type'] == 'Complex':
            knowledge_source = 'uniprot'
            qualifier = 'bqbiol:hasPart'
            meta_id = oneSpecies.getMetaId()
            components_list = reference_df[reference_df['complexId'] == row['id']]['UNIPROT'].dropna().replace('', pd.NA).dropna()
            components_list = components_list.str.split(';').explode().tolist()
            components_list = list(set(components_list)) # remove duplicates
            AnnotationString = createAnnotationString(qualifier,knowledge_source,RDF_TAG,components_list,meta_id)
            oneSpecies.setAnnotation(AnnotationString)

### Check which species have no annotation

In [224]:
unannotated_species = []
for species in model.getListOfQualitativeSpecies():
    if not species.isSetAnnotation():
        unannotated_species.append(species.getId())
print(len(unannotated_species))

27


In [225]:
# Check for the larger reference_df for the missing annotations
for unannot in unannotated_species:
    names = model.getQualitativeSpecies(unannot).getName()
    print(names)
    type = reference_df[reference_df['elementId'] == unannot]['type']
    if type.empty:
        print(f"No type found for {unannot}")
    else:
        print(type.iloc[0])

MEKK1
Protein
TPL2
Protein
JNK1_phosphorylated_M2 macrophage: cytoplasm
Protein
JNK1_phosphorylated_M2 macrophage:nucleus
Protein
apoptosis_M2_macrophage_phenotype
Phenotype
inflammation_signal_phenotype
Phenotype
Matrix degradation_signal_phenotype
Phenotype
T cells activation_signal_phenotype
Phenotype
proliferation/survival_M2_macrophage_phenotype
Phenotype
SARA
Protein
cFLIP_ubiquitinated
Protein
Shc_phosphorylated
Protein
VegfR1
Protein
ASC
Protein
IKK2_phosphorylated
Protein
IKK1_phosphorylated
Protein
JAK2
No type found for sa3788
PTK2
No type found for sa3795
TRAF3IP2_phosphorylated
No type found for sa3796
proliferation/survival_signal_phenotype
No type found for sa3800
apoptosis_signal_phenotype
No type found for sa3801
Cell chemotaxis/migration_signal_phenotype
No type found for sa3802
DUSP1
No type found for sa3806
SMAD7
No type found for sa3807
mcl1
No type found for sa3809
CYLD
No type found for sa3810
GSK3B
No type found for sa3812


In [226]:
for unannot in unannotated_species:
    oneSpecies = model.getQualitativeSpecies(unannot)
    # first try the id
    id = oneSpecies.getId()
    row = reference_df_all[reference_df_all['elementId'] == id].head(1)
    if row.empty:
        # then try the name
        name = oneSpecies.getName()
        # Get name before underscore if it exists
        if '_' in name:
            name = name.split('_')[0]
        row = reference_df_all[reference_df_all['name'] == name].head(1)
    if row.empty:
        print(f"No match found for {unannot}: {name}")
    else:
        type = row['type'].iloc[0]
        if type == 'Protein':
            knowledge_source = 'uniprot'
            qualifier = 'bqbiol:is'
            meta_id = oneSpecies.getMetaId()
            uniprot_value = row['UNIPROT'].iloc[0]
            if pd.notna(uniprot_value):
                list_of_ids = str(uniprot_value).split(';')
                list_of_ids = list(set(list_of_ids)) # remove duplicates
                print(list_of_ids)
                AnnotationString = createAnnotationString(qualifier,knowledge_source,RDF_TAG,list_of_ids,meta_id)
                oneSpecies.setAnnotation(AnnotationString)
        elif type == 'Complex':
            knowledge_source = 'uniprot'
            qualifier = 'bqbiol:hasPart'
            meta_id = oneSpecies.getMetaId()
            components_list = reference_df_all[reference_df_all['complexId'] == str(row['id'])]['UNIPROT'].dropna().replace('', pd.NA).dropna()
            components_list = components_list.str.split(';').explode().tolist()
            components_list = list(set(components_list)) # remove duplicates
            AnnotationString = createAnnotationString(qualifier,knowledge_source,RDF_TAG,components_list,meta_id)
            oneSpecies.setAnnotation(AnnotationString)

libsbml.writeSBMLToFile(document, 'test.sbml')

['Q16665']
['Q16665']
['P42081']
['Q9Y4K3']
['P01374']
['P13501']


1

In [227]:
unannotated_species = []
for species in model.getListOfQualitativeSpecies():
    if not species.isSetAnnotation():
        unannotated_species.append(species.getId())
print(len(unannotated_species))
print(unannotated_species)

21
['sa1976', 'sa1977', 'sa1993', 'sa1994', 'sa2114', 'sa2119', 'sa2122', 'sa2147', 'sa2180', 'sa2218', 'sa2254', 'sa2285', 'sa2572', 'sa2577', 'sa3796', 'sa3800', 'sa3801', 'sa3806', 'sa3807', 'sa3810', 'sa3812']


In [228]:
for unannot in unannotated_species:
    names = model.getQualitativeSpecies(unannot).getName()
    print(names)
    type = reference_df[reference_df['elementId'] == unannot]['type']
    if type.empty:
        print(f"No type found for {unannot}")
    else:
        print(type.iloc[0])

MEKK1
Protein
TPL2
Protein
JNK1_phosphorylated_M2 macrophage: cytoplasm
Protein
JNK1_phosphorylated_M2 macrophage:nucleus
Protein
apoptosis_M2_macrophage_phenotype
Phenotype
inflammation_signal_phenotype
Phenotype
proliferation/survival_M2_macrophage_phenotype
Phenotype
SARA
Protein
cFLIP_ubiquitinated
Protein
Shc_phosphorylated
Protein
VegfR1
Protein
ASC
Protein
IKK2_phosphorylated
Protein
IKK1_phosphorylated
Protein
TRAF3IP2_phosphorylated
No type found for sa3796
proliferation/survival_signal_phenotype
No type found for sa3800
apoptosis_signal_phenotype
No type found for sa3801
DUSP1
No type found for sa3806
SMAD7
No type found for sa3807
CYLD
No type found for sa3810
GSK3B
No type found for sa3812


In [251]:
# Remove annotation for phenotypes to avoid mismatch
for species in model.getListOfQualitativeSpecies():
    id = species.getId()
    if id in reference_df['elementId'].values:
        type = reference_df[reference_df['elementId'] == id]['type'].iloc[0]
        if type == 'Phenotype':
            print(id)
            species.unsetAnnotation()

sa2114
sa2115
sa2119
sa2120
sa2121
sa2122


In [252]:
libsbml.writeSBMLToFile(document, save_file)

1

## Strip annotations and notes

In [258]:
model_name = "Zerrouk2024_M1"
save_file = f"Models/{model_name}.sbml"
strip_file = f"Models/{model_name}_no_annotation.sbml"

reader = libsbml.SBMLReader()
document = reader.readSBML(save_file)
model = document.getModel().getPlugin('qual')

# Process species
for species in model.getListOfQualitativeSpecies():
    species.unsetAnnotation()
    species.unsetNotes()

# Process transitions
for transition in model.getListOfTransitions():
    transition.unsetAnnotation()
    transition.unsetNotes()
    for input in transition.getListOfInputs():
        input.unsetAnnotation()
        input.unsetNotes()
    for output in transition.getListOfOutputs():
        output.unsetAnnotation()
        output.unsetNotes()

libsbml.writeSBMLToFile(document, strip_file)

1