In [2]:
import cobra
import pandas as pd
import os
import subprocess
import glob
import json
from cobra import Model, Reaction, Metabolite
path = "/Users/maureencarey/local_documents/work/comparative_parasite_models/paradigm"
data_path = "/Users/maureencarey/local_documents/work/comparative_parasite_models/paradigm/data"

# load curated Plasmodium falciparum model # THIS ONE ISN"T CURATED THOUGH need iPfal18
os.chdir(data_path+'/other_models')
# iPfal17 = cobra.io.read_sbml_model("iPfal17.xml")

# load universal model
os.chdir(data_path)
universal_model = cobra.io.load_json_model('universal_model_may2018.json')
  
# open/add exchanges and remove unnecessary biomass functions from universal
for rxn in universal_model.reactions:
    if rxn.id.startswith('EX_'):
        rxn.lower_bound = -1000.
        rxn.upper_bound = 1000.
for met in universal_model.metabolites:
    if met.id.endswith('_e'):
        if 'EX_'+met.id not in universal_model.reactions:
            universal_model.add_boundary(met, type = 'exchange')
for rxn in [r for r in universal_model.reactions if r.id.lower().startswith('biomass')]:
    rxn.remove_from_model()

In [None]:
# generic things - use for all species

# get available compartments
compartment_options = list()
for string in [met.id for met in universal_model.metabolites]:
    if string.startswith('EX_') or string.startswith('DM_') or string.startswith('SK_'):
        s = 'asedf'
    else:
        compartment_options.append('_'+string.split('_')[len(string.split('_'))-1])
compartment_options = set(compartment_options)

# get metabolites involved in reactions in universal model in dictionary format
universal_dict = dict() # get products and reactants for every reaction
for rxn in universal_model.reactions:
    rxn_dict = dict()
    
    check_rxn_products = rxn.products
    check_rxn_reactants = rxn.reactants
    all_mets = rxn.metabolites
    compart = set([x.id[-2:] for x in all_mets])
    
    rxn_dict['reactants'] = [x.id[:-2] for x in check_rxn_reactants]
    rxn_dict['products'] = [x.id[:-2] for x in check_rxn_products]
    rxn_dict['compartment'] = list(compart)
    
    universal_dict[rxn.id] = rxn_dict
# universal_dict = all universal model reactions, mapped to a dictionary 
# containing its compartment, products and reactants

# map reactions to duplicate reactions in different compartments
universal_dict_with_alts = universal_dict.copy() 
for reaction, data in universal_dict_with_alts.items():
    alternative_reactions = dict()
    data_with_options = data.copy()
    for potential_rxn,potential_data in universal_dict.items():
        if potential_rxn != reaction:
            if potential_data['reactants'] == data['reactants'] and \
            potential_data['products'] == data['products']:
                alternative_reactions[potential_rxn] = potential_data['compartment']
        
    data_with_options['alternative_reactions'] = alternative_reactions
    universal_dict_with_alts[reaction] = data_with_options
    
# database mapping # MUST UPDATE
plasmodb = ["PadleriG01","PbergheiANKA","PbillcollinsiG01","PblacklockiG01","Pchabaudichabaudi","PcoatneyiHackeri","PcynomolgiB","PcynomolgiM","Pfalciparum3D7","PfalciparumIT","PfragileNilgiri","PgaboniG01","PgaboniSY75","Pgallinaceum8A","PinuiSanAntonio1","PknowlesiH","PknowlesiMalayanPk1A","PmalariaeUG01","PovalecurtisiGH01","PpraefalciparumG01","PreichenowiCDC","PreichenowiG01","PrelictumSGS1-like","PvinckeipetteriCR","Pvinckeivinckeivinckei","PvivaxP01","PvivaxSal1","Pyoeliiyoelii17X","Pyoeliiyoelii17XNL","PyoeliiyoeliiYM"]
# MUST ASK PERMISSION TO USE 3D7
cryptodb = ["Candersoni30847","Chominis30976","ChominisTU502","ChominisTU502_2012","ChominisUdeA01","CmeleagridisUKMEL1","CmurisRN66","CparvumIowaII","CtyzzeriUGA55", "Cubiquitum39726","CveliaCCMP2878", "GniphandrodesUnknown", "VbrassicaformisCCMP3155"]
giardiadb = ["GintestinalisAssemblageADH", "GintestinalisAssemblageAWB", "GintestinalisAssemblageBGS", "GintestinalisAssemblageBGS_B", "GintestinalisAssemblageEP15", "SsalmonicidaATCC50377"]
tritrypdb = ["BayalaiB08-376","CfasciculataCfCl","EmonterogeiiLV88","LaethiopicaL147", "LarabicaLEM1108", "LbraziliensisMHOMBR75M2903", "LbraziliensisMHOMBR75M2904", "LdonovaniBPK282A1", "LenriettiiLEM3045", "LgerbilliLEM452","LinfantumJPCM5", "LmajorFriedlin", "LmajorLV39c5", "LmajorSD75.1", "LmexicanaMHOMGT2001U1103", "LpanamensisMHOMCOL81L13","LpanamensisMHOMPA94PSC1", "LpyrrhocorisH10", "LseymouriATCC30220", "LspMARLEM2494", "LtarentolaeParrotTarII", "LtropicaL590", "LturanicaLEM423", "PconfusumCUL13","TbruceigambienseDAL972", "TbruceiLister427", "TbruceiTREU927", "TcongolenseIL3000", "TcruziCLBrener", "TcruziCLBrenerEsmeraldo-like", "TcruziCLBrenerNon-Esmeraldo-like", "TcruzicruziDm28c","TcruziDm28c", "TcruzimarinkelleiB7", "TcruziSylvioX10-1", "TcruziSylvioX10-1-2012","TevansiSTIB805", "TgrayiANR4", "TrangeliSC58", "TvivaxY486", "TtheileriEdinburgh"]
# MUST ASK PERMISSION TO USE MANY OF THE TRITRYP GENOMES - S.M. Beverley at Wash U
trichdb = ["TvaginalisG3"]
amoebadb = ["AcastellaniiNeff", "EdisparSAW760", "EhistolyticaHM1IMSS-A", "EhistolyticaHM1IMSS-B", "EhistolyticaHM1IMSS", "EhistolyticaHM3IMSS", "EhistolyticaKU27", "EinvadensIP1", "EmoshkovskiiLaredo", "EnuttalliP19", "NfowleriATCC30863"]
toxodb = ["CcayetanensisCHN_HEN01", "CsuisWienI","EacervulinaHoughton", "EbrunettiHoughton", "EfalciformisBayerHaberkorn1970", "EmaximaWeybridge", "EmitisHoughton", "EnecatrixHoughton", "EpraecoxHoughton", "EtenellaHoughton", "HhammondiHH34", "NcaninumLIV", "SneuronaSN3", "SneuronaSOSN1", "TgondiiARI", "TgondiiFOU", "TgondiiGAB2-2007-GAL-DOM2", "TgondiiGT1", "TgondiiMAS", "TgondiiME49", "Tgondiip89", "TgondiiRH", "TgondiiRUB", "TgondiiTgCatPRC2", "TgondiiVAND", "TgondiiVEG"]
microsporidiadb = ["AalgeraePRA109", "AalgeraePRA339", "EaedisUSNM41457", "EbieneusiH348", "EcanceriGB1","EcuniculiEC1", "EcuniculiEC2", "EcuniculiEC3", "EcuniculiGBM1", "EhellemATCC50504", "EhellemSwiss", "EhepatopenaeiTH1","EintestinalisATCC50506","EromaleaeSJ2008","Heriocheircanceri","HeriocheirGB1", "MdaphniaeUGP3", "NausubeliERTm2", "NausubeliERTm6", "NbombycisCQ1", "NceranaeBRL01", "NdisplodereJUm2807","NparisiiERTm1", "NparisiiERTm3", "OcolligataOC4", "PneurophiliaMK1", "Slophii42_110", "ThominisUnknown", "VcorneaeATCC50505", "Vculicisfloridensis"]
piroplasmadb = ["BbigeminaBOND", "BbovisT2Bo", "BmicrotiRI","BovataMiyake", "CfelisWinnie", "TannulataAnkara", "TequiWA", "TorientalisShintoku", "TparvaMuguga"]
fungidb = ["AaculeatusATCC16872", "AbrasiliensisCBS101740", "AcampestrisIBT28561", "Acandida2VRR", "AcarbonariusITEM5010", "AclavatusNRRL1", "AfischeriNRRL181","AflavusNRRL3357","AfumigatusA1163","AfumigatusAf293", "AglaucusCBS516.65","AinvadansNJM9701", "AlaibachiiNc14", "AluchuensisCBS106.47", "AmacrogynusATCC38327", "AnidulansFGSCA4", "AnigerATCC1015", "AnigerCBS513-88", "AnovofumigatusIBT16806", "AochraceoroseusIBT24754","AoryzaeRIB40", "AsteyniiIBT23096", "AsydowiiCBS593.65", "AterreusNIH2624", "AtubingensisCBS134.48", "AversicolorCBS583.65", "AwentiiDTO134E9","AzonataCBS506.65", "BcinereaB05-10", "BdendrobatidisJEL423", "CalbicansSC5314", "CalbicansSC5314_B", "CalbicansWO1", "CaurisB8441", "Ccinereaokay7-130", "CdeuterogattiiR265", "CgattiiCA1873", "CgattiiEJB2", "CgattiiIND107", "CgattiiWM276", "CglabrataCBS138", "CimmitisH538-4", "CimmitisRS", "ClusitaniaeATCC42720", "CneoformansB-3501A", "CneoformansH99", "CneoformansJEC21", "CneoformansKN99", "CposadasiiC735deltSOWgp", "CposadasiiRMSCC3488", "CposadasiiRMSCC3700", "CposadasiiSilveira", "FfujikuroiIMI58289", "FgraminearumPH-1", "Foxysporum26406", "Foxysporum4287", "Foxysporum54006", "FoxysporumFo47", "Foxysporumrace1", "Foxysporumrace4", "Fverticillioides7600", "HarabidopsidisEmoy2", "HcapsulatumG186AR", "HcapsulatumG217B", "HcapsulatumH143", "HcapsulatumH88", "HcapsulatumNAm1", "McircinelloidesCBS277-49", "MglobosaCBS7966", "Mlarici-populina98AG31", "Moryzae70-15", "MoryzaeBR32", "NcrassaOR74A", "NdiscretaFGSC8579", "NtetraspermaFGSC2508", "PaphanidermatumDAOMBR444", "ParrhenomanesATCC12531", "PblakesleeanusNRRL1555", "PbrasiliensisPb03", "PbrasiliensisPb18", "PcapsiciLT1534", "PchrysosporiumRP-78", "PcinnamomiCBS144-22", "PgraminisCRL75-36-700-3", "PinfestansT30-4", "PirregulareDAOMBR486", "PiwayamaiDAOMBR242034", "PjiroveciiSE8", "PlutziiPb01", "PparasiticaINRA-310", "PramorumPr-102", "PrubensWisconsin54-1255", "PsojaeP6497", "PultimumBR650", "PultimumDAOMBR144", "PvexansDAOMBR484", "RdelemarRA99-880", "ScerevisiaeS288c", "SdiclinaVS20", "SjaponicusyFS275", "Smacrosporak-hell", "SoctosporusyFS286", "SparasiticaCBS223", "Spombe972h", "SpunctatusDAOMBR117", "SreilianumSRZ2", "Sschenckii1099-18", "Ssclerotiorum1980UF-70", "TmarneffeiATCC18224", "TmesentericaDSM1558", "TreeseiQM6a", "TstipitatusATCC10500", "Umaydis521", "Ureesii1704", "YlipolyticaCLIB122", "ZtriticiIPO323"]


In [None]:
# load annotation data
os.chdir(path)
annotations_dict = dict()
columns = ['query_gene', 'BiGG_gene', 'pident', 'length', 'mismatch', 'gapopen','qstart', 'qend', 'sstart', 'send', 'evalue', 'score']
os.chdir(data_path) # CHECK PATH
for filename in glob.glob(os.path.join(data_path+'/diamond_output_BiGG', '*_Jul2018_BiGG.tsv')):
    annotations_dict[filename.split('/')[len(filename.split('/'))-1]] = pd.read_table(filename, sep = '\t', names=columns)


In [None]:
# Map to Bigg Functions for model generation
# can't use carveme without cplex - so copy and paste carveme code 
# Use CarveMe to generate BiGG GPRs

os.chdir(data_path)
gprs = pd.read_csv('bigg_gprs.csv') # From CarveMe
gprs.reaction = [x[2:] for x in gprs.reaction]
gprs = gprs[gprs.reaction.isin([rxn.id for rxn in universal_model.reactions])] # updated from CarveMe

def merge_subunits(genes): # From CarveMe
    """ Merge list of protein subunit genes into complex
    Args: genes (pandas.Series): list of genes
    Returns: str: boolean rule
    """
    genes = genes.dropna()

    if len(genes) == 0:
        return None
    else:
        protein = ' and '.join(sorted(genes))
        if len(genes) > 1:
            return '(' + protein + ')'
        else:
            return protein
        
def merge_subunit_scores(scores): # From CarveMe
    """ Merge scores of all genes in a protein complex.
    Calculates the mean score among all subunits.
    Args: scores: individual gene scores
    Returns: float: merged score
    """
    return scores.fillna(0).mean()

def merge_proteins(proteins): # From CarveMe
    """ Merge all isozymes that catalyze a given reaction.
    Automatically removes all isozymes with missing score.
    Args: proteins (pandas.Series): list of proteins
    Returns: str: boolean rule
    """
    proteins = set(proteins.dropna())
    if not proteins:
        return None
    gpr_str = ' or '.join(sorted(proteins))
    if len(proteins) > 1:
        return '(' + gpr_str + ')'
    else:
        return gpr_str

def merge_protein_scores(scores): # From CarveMe
    """ Merge scores of all isozymes that catalyze a given reaction.
    Calculates the maximum score among all isozymes.
    Args: scores (pandas.Series): protein scores
    Returns: float: merged score
    """
    return scores.max(skipna=True)

def reaction_scoring(annotation, gprs, spontaneous_score=0.0, debug_output=None): # From CarveMe
    """ Calculate reaction scores using new eggnog output.
    Args: annotation (pandas.DataFrame): gene annotation results
        gprs (pandas.DataFrame): BiGG GPR rules
        spontaneous_score (float): score to give to spontaneous reactions (default: 0.0)
    Returns: pandas.DataFrame: reaction scores
    """

    # filter best match for each gene
    gene2gene = annotation.sort_values(by='score', ascending=False) \
                          .groupby('BiGG_gene', as_index=False).apply(lambda x: x.iloc[0])
    # merge with gpr table
    gprs['BiGG_gene'] = gprs.apply(lambda row: '{}.{}'.format(row['model'], row['gene'][2:]), axis=1)
    gene_scores = pd.merge(gene2gene, gprs, how='right')
    # add default scores for spontaneous genes
    spontaneous = {'G_s0001', 'G_S0001', 'G_s_0001', 'G_S_0001', 'G_KPN_SPONT'}
    gene_scores.loc[gene_scores.gene.isin(spontaneous), 'score'] = spontaneous_score
    gene_scores.loc[gene_scores.gene.isin(spontaneous), 'query_gene'] = 'spontaneous'
    # from gene to protein scores
    protein_scores = gene_scores.groupby(['protein', 'reaction', 'model'], as_index=False) \
        .agg({'query_gene': merge_subunits, 'score': merge_subunit_scores})
    protein_scores.rename(columns={'query_gene': 'GPR'}, inplace=True)
    # from protein to reaction scores
    reaction_scores = protein_scores.groupby(['reaction'], as_index=False) \
        .agg({'GPR': merge_proteins, 'score': merge_protein_scores}).dropna()
    return(reaction_scores)

# varying from CarveMe code here:
# skip normalization of reactions scores
# skip otpimization of high reaction scores because here we are including ALL potential reactions
# regardless of whether that makes a functional model

scores_dict2 = dict()
for species, annotations in annotations_dict.items():
    print(species)
    if species == 'CsuisWienI_Jul2018_BiGG.tsv':
        annotations = annotations_dict[species]
        reaction_scores = reaction_scoring(annotations, gprs)
        # scores = dict(reaction_scores[['reaction', 'normalized_score']].values)
        scores_dict2[species] = reaction_scores #scores
        # carveme will maximize positive scores and minimize negative scores while maintaining a functional network
    

In [None]:
'CsuisWienI_Jul2018_BiGG.tsv' in annotations_dict.keys()

In [None]:

# # save all scores as json
# import json
# class JSONEncoder(json.JSONEncoder):
#     def default(self, obj):
#         if hasattr(obj, 'to_json'):
#             return obj.to_json(orient='records')
#         return json.JSONEncoder.default(self, obj)
# with open('scores_dict2_july23.json', 'w') as fp:
#     json.dump(scores_dict2, fp, cls=JSONEncoder)

In [None]:
os.chdir(data_path)
scores_dict2 = dict()
for species in annotations_dict.keys():
    scores_dict2[species] = pd.read_json(json.load(open(\
    '/Users/maureencarey/local_documents/work/comparative_parasite_models/paradigm/data/scores_dict2_july11.json'))[species])
# distribution of scores? use for confidence?????
# duplicate reactions just in different compartments are being added - dealing with this farther down


In [None]:
new_model_dict = dict()

for x in scores_dict2.keys():
    
    if '_Jul2018_BiGG.tsv' in x: ###### CHECK ALL THIS
        species = x.split('_Jul2018_BiGG.tsv')[0]
    elif '.tsv' in x:
        print('_May20.tsv not in the annotations file string, this might cause problems')
        species = x.split('.tsv')[0]
    else:
        print('_May20.tsv not in the annotations file string, this might cause problems')
        species = x
        
    new_model_dict[species] = universal_model.copy()
    print('copied universal')
    new_model_dict[species].name = species
    new_model_dict[species].id = species
    
    print(species)
    starting = len(new_model_dict[species].reactions)

    keep_scores = scores_dict2[x].loc[scores_dict2[x].score>10]
    if len(keep_scores.reaction) == len(set(keep_scores.reaction)):

        rxns_to_add = dict()
        for index, row in keep_scores.iterrows():
            rxns_to_add[row['reaction']] = row['GPR']

        new_model_dict[species].remove_reactions([rxn for rxn in new_model_dict[species].reactions if rxn.id not in rxns_to_add.keys()])
        
        if not [rxn.id for rxn in new_model_dict[species].reactions if rxn.gene_reaction_rule != '']:
            for rxn in new_model_dict[species].reactions:
                    if rxn.gene_reaction_rule == '':
                        rxn.gene_reaction_rule = rxns_to_add[rxn.id]
        else:
            print('some reactions already have GPRs')

        print('made new model from universal')
        new_model_dict[species].repair()

        if len(rxns_to_add.keys()) == len(new_model_dict[species].reactions):
            if starting > len(rxns_to_add.keys()):
                print(' ')
            else:
                print('error with original model, reactions already removed')
        else:
            print('error with reaction removal, resultant len(model.reactions) != rxns_to_keep')
    else:
        print('duplicate keep_scores.reaction')
        
    if len(rxns_to_add.keys()) != len(new_model_dict[species].reactions):
        print('error in universal reaction pruning')
    #cobra.io.save_json_model(new_model_dict[species], "DIY1_"+species+".json")
    

In [None]:
# remove duplicate reactions in mulitple compartments
total_compartments = ["_c","_e","_m","_ap","_fv","_k","_glc","_pm"]
# cytosol, extracellular, mitochondrdia, apicoplast, food vacuole, kinetoplast, glycosome, pseudomitochondria

compartment_dictionary = dict()
for species in new_model_dict.keys():
    
    # Plasmodium = cytosol, extracellular, mitochondrdia, apicoplast, food vacuole
    if species in plasmodb:
    # if species.startswith('P') and species is not 'PneurophiliaMK1' and species is not 'PconfusumCUL13':
        # PneurophiliaMK1_May20 is microsporidia
        # PconfusumCUL13_May20 trypanosoma
        model_compartments = ["_c","_e","_m","_ap","_fv"]
            
    # Leishmania = cytosol, extracellular, mitochondrdia, kinetoplast, glycosome
    elif species in tritrypdb:
    # elif species.startswith('L') and species is not "LpyrrhocorisH10":
        #LpyrrhocorisH10_May20.tsv is Leptomonas
        model_compartments = ["_c","_e","_m","_k","_glc"]
    
    # Cryptosporidium = cytosol, extracellular, pseudomitochondria (USE MITO)
    elif species in cryptodb:
    # elif species.startswith('C') and species is not "CveliaCCMP2878" and species is not 'CsuisWienI' and \
#         species is not 'CcayetanensisCHN_HEN01' and species is not 'CfelisWinnie' \
#         and species is not 'CfasciculataCfCl':
        # CveliaCCMP2878_May20.tsv is Chromera
        # CsuisWienI_May20.tsv is Cystoisospora (Apicomplexan)
        # CcayetanensisCHN_HEN01_May20.tsv is Cyclospora (Apicomplexan)
        # CfelisWinnie_May20.tsv is Cytauxzoon(Apicomplexan)
        # CfasciculataCfCl_May20 is kinetoplastid 
        model_compartments = ["_c","_e","_m"]
    
    # Toxoplasma = cytosol, extracellular, mitochondrdia, apicoplast
    elif species in toxodb:
#     elif species.startswith('Tgondii'):
        model_compartments = ["_c","_e","_ap","_m"]
        
    # Giardia, Entamoeba = cytosol, extracellular  
    elif species in giardiadb or species in amoebadb:
#     elif (species.startswith('G') or species.startswith('Edis') or species.startswith('Ehis') or \
#     species.startswith('Einvad') or species.startswith('Emoshk') or species.startswith('Enutt')) and \
#     species is not 'GniphandrodesUnknown_May20.tsv':
        # GniphandrodesUnknown_May20.tsv is Gregarina (apicomplexan)
        model_compartments = ["_c","_e"]
   
    else:
        model_compartments = ["_c","_e"]
    
    compartment_dictionary[species] = model_compartments
    

In [None]:
columns = ['species','reactions_removed1','mets_removed1','reactions_removed2','mets_removed2','reactions_added', 'mets_added','gene_change']
modifications = pd.DataFrame(index = new_model_dict.keys(), columns=columns)
inappropriate_compartments_that_remain = dict()
transport_for_inappropariate_compartment_dict = dict()

for species, model in new_model_dict.items():
    
    print(species)
    print('finding good or bad reactions')
    compartment = compartment_dictionary[species]
    not_compartments = compartment_options - set(compartment)

    # get reactions that use/make at least one metabolite that is in an inappropariate compartment
    good_rxns = list()
    bad_rxns = list()
    not_compartments = [x+' ' for x in not_compartments]
    for rxn_object in model.reactions: # if a reaction does not contain any bad compartments
        rxn_bad_counter = 0
        for x in not_compartments:
            if x in rxn_object.reaction or rxn_object.reaction.endswith(x[:-1]):
                rxn_bad_counter = rxn_bad_counter + 1
        if rxn_bad_counter == 0:
            good_rxns.append(rxn_object.id)
        else:
            bad_rxns.append(rxn_object.id)
     
    print('found good or bad reactions, now doing things')       
    bad_rxns_keep_rewrite = list()
    add_reaction = list()
    remove_rxn = list()
    for rxn_id in bad_rxns: 
        if len(universal_dict_with_alts[rxn_id]['alternative_reactions']) == 0:
            bad_rxns_keep_rewrite.append(rxn_id) # no alternative, keep reaction - will have to change via strings
        else:
            alt_rxns = universal_dict_with_alts[rxn_id]['alternative_reactions']
            keep_og = 0
            for alt_rxn_1, locations in alt_rxns.items():
                keep_alt = 0
                for loc in locations:
                    if loc in compartment: keep_alt = keep_alt 
                    else: keep_alt = keep_alt + 1 
                if keep_alt == 0:
                    keep_og = 1
                    add_reaction.append(alt_rxn_1)
                else:  
                    keep_og = keep_og
            if keep_og == 0:
                bad_rxns_keep_rewrite.append(rxn_id) # no usable alternative - will have to change via strings
            else:
                remove_rxn.append(rxn_id)
    add_reaction = list(set(add_reaction))

    if (len((bad_rxns)) == (len((remove_rxn)) + len((bad_rxns_keep_rewrite)))):
        print('bad reactions are split into remove reactions and bad reactions to rewrite - math is good')

    print('no. reactions removed')
    print(len(remove_rxn))
    print('no. reactions to add')
    print(len(add_reaction))
    
    # remove reactions
    x = len(model.reactions)
    y = len(model.metabolites)
    model.remove_reactions(remove_rxn)
    model.repair()
    if len(remove_rxn) != (x - len(model.reactions)):
        print('reaction not removed properly')
    x1 = x - len(model.reactions)
    y1 = y - len(model.metabolites)
    
    # save this number
    inappropriate_compartments_that_remain[species] = (len(bad_rxns_keep_rewrite)/len(model.reactions))*100
    print((len(bad_rxns_keep_rewrite)/len(model.reactions))*100)

    modifications.species.loc[species] = species
    modifications.reactions_removed1.loc[species] = x1
    #     modifications.mets_removed1.loc[species] = y1

    for rxn_id in add_reaction: #there are ids in add_reaction that are in the model already
        rxn = universal_model.reactions.get_by_id(rxn_id).copy()
        for met in rxn.metabolites:
            if met.id not in [m.id for m in model.metabolites]:
                model.add_metabolites(met.copy())
    rxns_to_add_list = [universal_model.reactions.get_by_id(x).copy() for x in add_reaction if x not in [r.id for r in model.reactions]]
    # if reaction is already there, it is because the reaction was in multiple compartments
    # print([rxn.id for rxn in rxns_to_add_list if rxn.id not in add_reaction])
    model.add_reactions(rxns_to_add_list)
    modifications.reactions_added.loc[species] = len(model.reactions) - x1 # CHECK

#     if '_x' in ([x.id[-2:] for x in model.metabolites]): print('ERROR, UNACCEPTABLE COMPARTMENTS, should be some here')
#     print('the right number of reactions are being added and removed')

    for rxn in model.reactions:
        if rxn.lower_bound == 0 and rxn.upper_bound == 0:
            print(rxn.id + ' has bounds == 0 in '+key)
            rxn.lower_bound = -1000.
            rxn.upper_bound = 1000.
            # NOTHING SHOULD PRINT - this was a problem in CarveMe

    new_model_dict[species] = model
    # cobra.io.save_json_model(model, "TEST_DIY5_"+species+"_TEST.json")

    fix_these_reactions_list = list(set([model.reactions.get_by_id(x) for x in bad_rxns_keep_rewrite]))

    reactions_added = list()
    transport_for_inappropariate_compartment = list()

    og = len(model.reactions)
    og_mets= len(model.metabolites)
    
    print('starting to move reactions to the right compartment, if this function isnt yet in BiGG')
    for rxn in fix_these_reactions_list:
        
        if [hf.met_ids_without_comp(model,x.id) for x in rxn.reactants] == [hf.met_ids_without_comp(model,x.id) for x in rxn.products]:
            # remove things like x_p + y_p => x_e + y_e
            transport_for_inappropariate_compartment.append(rxn.id)
            new_rxn = list() # double check
        else:

            new_rxn = Reaction()
            met_dict = dict()

            for met in rxn.metabolites:

                if hf.get_comp(model,met.id) == '_p': # move periplasmic metabolites to extracellular instead of cytosol
                    if hf.met_ids_without_comp(model,met.id)+'_e' not in [x.id for x in model.metabolites]:
                        met2 = met.copy()
                        met2.id = hf.met_ids_without_comp(model,met.id)+'_e'
                        met_dict[met2] = rxn.metabolites[met]
                        model.add_metabolites(met2) # []
                    else:
                        met2 = model.metabolites.get_by_id(hf.met_ids_without_comp(model,met.id)+'_e')
                        met_dict[met2] = rxn.metabolites[met]
                else: # non periplasmic metabolite
                    if hf.met_ids_without_comp(model,met.id)+'_c' not in [x.id for x in model.metabolites]:
                        met2 = met.copy()
                        met2.id = hf.met_ids_without_comp(model,met.id)+'_c'
                        met_dict[met2] = rxn.metabolites[met]
                        model.add_metabolites(met2) # []
                    else:
                        met2 = model.metabolites.get_by_id(hf.met_ids_without_comp(model,met.id)+'_c')
                        met_dict[met2] = rxn.metabolites[met]

        # fix reaction variables
        if new_rxn:
            new_rxn.add_metabolites(met_dict)
            new_rxn.name = rxn.name
            new_rxn.id = rxn.id+'c'
            new_rxn.lower_bound = rxn.lower_bound
            new_rxn.upper_bound = rxn.upper_bound
            new_rxn.gene_reaction_rule = rxn.gene_reaction_rule
            model.add_reactions([new_rxn])
            reactions_added.append(new_rxn.id)

        model.remove_reactions([rxn])
        
    model.repair()
    print('finished moving reactions to the right compartment')

    print('reactions added overall')
    print(len(model.reactions) - og)
    # print('mets added')
    # print(len(model.metabolites) - og_mets)
    transport_for_inappropariate_compartment_dict[species] = list(set(transport_for_inappropariate_compartment))

    new_model_dict[species] = model
    # cobra.io.save_json_model(model, "DIY6_"+species+".json")
    l2 = list()
    for rxn in model.reactions:
        for suffix in [m.id[-2:] for m in rxn.metabolites]:
            l2.append(suffix)
    print('compartments')
    print(set(l2))

# modifications.to_csv('model_modifications_oct4.csv')
# pd.DataFrame.from_dict(inappropriate_compartments_that_remain, orient="index").to_csv("percent_reactions_in_wrong_compartment_oct4.csv")
      
   

In [None]:
# future thought:
# what to do for reactions like this:
#     model_dict['Vculicisfloridensis_May20.tsv'].reactions.get_by_id('O16A4COLIPAabctex')
# transport from extracellular to periplasm
# what to do with reactions like this:
#'atp_c + h2o_c + o16a4colipa_p --> adp_c + h_c + o16a4colipa_e + pi_c'

In [None]:
l = list()
for key in new_model_dict.keys():
    for x in new_model_dict[key].reactions:
        for met in x.metabolites:
            l.append(met.id[-2:])
set(l)

In [None]:
for species in new_model_dict.keys():
    print(species)
    list_om= list()
    list_om2= list()
    for x in new_model_dict[species].reactions:
        for m in x.metabolites:
            list_om.append(m.id[-2:])
    for m in new_model_dict[species].metabolites:
        list_om2.append(m.id[-2:])
    set(list_om)
    print(set(list_om))
    print(set(list_om2))
    
## use my cobra.manipulation.delete.prune_unused_metabolites(model_dict[species])
# do above again