## Imports

In [48]:
import cobra
import numpy as np
import pandas as pd
from cobra.exceptions import OptimizationError, Infeasible
from typing import NewType, Dict, List, Any, Counter
from enum import Enum
import os

## User parameters passed to the program

In [49]:
user_params = {
    'host': 'ecoli', # ecoli or ropacus
    'modelfile': 'iJO1366_MVA.json',
    'timestart': 0.0,
    'timestop': 8.0,
    'numpoints': 9,
    'reactants': ['glc__D_e', 'nh4_e', 'pi_e', 'so4_e', 'mg2_e', 'k_e', 'na1_e', 'cl_e'],
    'initial_substrates': [22.203, 18.695, 69.454, 2.0, 2.0, 21.883, 103.7, 27.25],
    
}

## Declare some variables and types

In [50]:
# Enumerations
class Omics(Enum):
    """Enumeration with supported omics data types."""
    PROTEOMICS = 0
    TRANSCRIPTOMICS = 1
    METABOLOMICS = 2

    def __str__(self):
        return f'{str(self.name).lower()}'
    
# Type annotations
Filename = NewType('Filename', str)

# Constants
UNIPROT_URL = '''https://www.uniprot.org/uploadlists/'''
CTS_URL = '''https://cts.fiehnlab.ucdavis.edu/rest/convert/'''
# HOST NAME
HOST_NAME: str = user_params['host'] 
# TODO: Move some constants to variables by program arguments
DATA_FILE_PATH: Filename = Filename('data')
# Output file path
OUTPUT_FILE_PATH: Filename = Filename('data/output')
# INCHIKEY_TO_CID_MAP_FILE_PATH: mapping file path to map inchikey to cids
INCHIKEY_TO_CID_MAP_FILE_PATH: Filename = Filename('mapping') 
# MODEL_FILENAME: Filename = Filename('iECIAI39_1322.xml')  # E. coli
MODEL_FILENAME: Filename = Filename('reannotated_base_v3.sbml')  # R. opacus

# NOTE: user input to the program
REACTION_ID_ECOLI: str = 'BIOMASS_Ec_iJO1366_core_53p95M'  # E. coli
REACTION_ID: str = 'biomass_target'  # R. opacus
# REACTION_ID: str = 'SRC_C00185_e'  # R. opacus
GENE_IDS_DBS: List[str] = ['kegg.genes']  # R. opacus
# GENE_IDS_DBS: List[str] = ['uniprot', 'goa', 'ncbigi']  # E. coli
UNITS: Dict[Omics, str] = {
    Omics.PROTEOMICS: 'proteins/cell',
    Omics.TRANSCRIPTOMICS: "FPKM",
    Omics.METABOLOMICS: "mg/L"
}
# Fix the flux value to -15 as we have data for this constraint
LOWER_BOUND: int = -15
UPPER_BOUND: int = -15

## Define the Ecoli class that has all the mothds to generate the data

In [51]:
class Ecoli():

    def __init__(self):
        self.time_series_omics_data = {}
        self.LOWER_BOUND = -15
        self.UPPER_BOUND = 1000


    def generate_time_series_data(self, model, condition):

        # intiializing omics dictionaries to contain data across timepoints
        proteomics_list: List = []
        transcriptomics_list: List = []
        fluxomics_list: List = []
        metabolomics_list: List = []
       
        # The whole idea of using "batch simulation concepts from Joonhoon" is to
        # estimate glucose consumption values (written as flux constraints below)
        # is a more realistic way
        
        # In order to update flux values and estimate concentration of glucose, we 
        # assume concentrations at t = 0h to be "subs0" (refer to batch simulation notebook).
        # We also assume "volume" = 1.0 and OD at t = 0h (cell0)
        # Assume time points, you can keep this time points or change them to what Joonhoon has
        # Step1: Evaluate flux of glucose at current time point by this equation:
        # model.reactions.get_by_id(k).lower_bound = max(model.reactions.get_by_id(k).lower_bound,
        #                                                  -subs.loc[t,v]*volume/cell[t]/delt)
        # Step2: Solve the model using the function "get_optimized_solution" present below and generate
        # data for that time point
        # Step 3: Calculate mu where mu = solution(biomass)
        # Step 4: Calculate OD for next time point: cell[t+delt] = cell[t]*np.exp(mu*delt)
        # Step 5: Calculate glucose for next time point t+deltat: 
        # subs.loc[t+delt,v] = max(subs.loc[t,v]-sol[k]/mu*cell[t]*(1-np.exp(mu*delt)),0.0)
        # Go back to step 1
        
        time_series_omics_data = {}

        # time steps to calculate the biomass production for
        t0 = user_params['timestart']
        tf = user_params['timestop']
        points = user_params['numpoints']
        tspan, delt = np.linspace(t0, tf, points, dtype='float64', retstep=True)

        # step interval
        # delt = tspan[1] - tspan[0]

        # panda series containing OD values for the timepionts
        cell = pd.Series(index=tspan)
        cell0 = 0.01 # in gDW/L
        cell[t0] = cell0

        # reactants
        comp = user_params['reactants']

        # Dataframe containing substrates
        subs = pd.DataFrame(index=tspan, columns=comp)
        # initial substrate vlaues
        subs0 = user_params['initial_substrates']# in mM
        subs.loc[t0] = subs0

        # Panda series containing the isopentanol concentrations
        # and solutions to the wild type after adding isopentenol
        # pathway and introducing isopentenol fluzes and solving for the 
        # optimum solution for maimum biomass
        conc_iso = pd.Series(index=tspan)
        conc_iso[tspan[0]] = 0.0

        sol_time_wild = pd.Series(index=tspan)
        
        # exterior substrates
        subs_ext = {r.id: r.reactants[0].id for r in model.exchanges if r.reactants[0].id in comp}

        # NOTE: put the body of the for loop inside a function
        for t in tspan:
            # Not changing the model but adding constraints for each time point
            with model:
                for k, v in subs_ext.items():
                    # why do we set volume to one? Is it arbitrary?
                    volume = 1.0
                    # print(model.reactions.get_by_id(k).lower_bound)
                    # print(-subs.loc[t,v]*volume/cell[t]/delt)

                    # Set global reactions bounds (in addition to local)
                    # set the lower bound to the maximum of the lower bound in the model and the change of glucose 
                    model.reactions.get_by_id(k).lower_bound = max(model.reactions.get_by_id(k).lower_bound, -subs.loc[t,v]*volume/cell[t]/delt)
                    self.LOWER_BOUND = model.reactions.get_by_id(k).lower_bound
                    # print(self.LOWER_BOUND)
                    # self.UPPER_BOUND = -15
#                     cobra_config = cobra.Configuration()
#                     cobra_config.bounds = self.LOWER_BOUND, self.UPPER_BOUND

                    # get fake proteomics data and write it to XLSX file
                    condition = 1
                    proteomics, transcriptomics, fluxomics, metabolomics, solution = self.generate_fake_data(model, condition)

                    # Step 3: Calculate mu where mu = solution(biomass)
                    mu = solution[REACTION_ID_ECOLI]

                    # Step 4: Calculate OD for next time point: cell[t+delt] = cell[t]*np.exp(mu*delt)
                    # print("===================")
                    # print("t, t+delt, cell[t]: ", t, t+delt, cell[t])
                    # print("cell[t]*np.exp(mu*delt): ", cell[t]*np.exp(mu*delt) )
                    # print("cell[t], mu, delt: ", cell[t], mu, delt)
                    cell[t+delt] = cell[t]*np.exp(mu*delt)
                    # print("t+delt, cell[t+delt]: ", t+delt, cell[t+delt])
                    # if np.isnan(cell[t+delt]):
                    #     print("I am here")
                    #     print("cell: ", cell)
                    #     return

                    # Step 5: Calculate glucose for next time point t+deltat:
                    subs.loc[t+delt,v] = max(subs.loc[t,v]-solution[k]/mu*cell[t]*(1-np.exp(mu*delt)),0.0) 

                    # appending the dictionaries to a master list that keeps track of the timepoints associated with the data generated
                    proteomics_list.append((proteomics, t))
                    transcriptomics_list.append((transcriptomics, t))
                    fluxomics_list.append((fluxomics, t))
                    metabolomics_list.append((metabolomics, t))
 
                # optimize model using pFBA after inducing isopentenol and formate formation 
                # and get isopentenol concentrations
                # NOTE: pass the training file as an argument from the cli
                training_data_file = f'{DATA_FILE_PATH}/training_data_8genes.csv'
                sol_time_wild = self.generate_isopentenol_concentrations(model, sol_time_wild, training_data_file, t, tspan, delt, cell, subs, subs_ext, conc_iso)
                # print(sol_time_wild)

        # generate training data for reactions with isopentenol production after optimizing model using MOMA
        # NOTE: This is not working as I do not have the 'cplex' solver installed and it is looking for a 
        # qp-solver that is not there to solve the MOMA optimization
        # Have to run this in the jprime server
        
        # self.generate_isopentenol_and_solution_for_biomass_using_moma(model, sol_time_wild, training_data_file, tspan, delt, cell, subs, subs_ext)
        # print(type(subs))
        # print(subs)
        # sys.exit()

        time_series_omics_data = {'proteomics': proteomics_list, 'transcriptomics': transcriptomics_list, 'fluxomics': fluxomics_list, 'metabolomics': metabolomics_list}
        
        # write all the data generated
        self.write_experiment_description_file(condition)
        self.write_omics_files(time_series_omics_data)
        self.write_OD_data(cell)
        # write external metabolites in subs: Ammonia and glucose and isoprenol concentrations
        self.write_external_metabolite(subs, conc_iso)


    # This uses the modified E. Coli model that has the added isopentenol pathway
    # QUESTION: DO we need to add the isopentenol pathway to it, if not provided?DO we need to check it?
    def generate_isopentenol_concentrations(self, model, sol_time_wild, training_data_file, timepoint, tspan, delt, cell, subs, subs_ext, conc_iso):
        iso = 'EX_isoprenol_e'
        df = pd.read_csv(training_data_file)

        # Calculating the number of reactions that should be modified (n_genes) and 
        # number of strains for which isoprenol concentration should be estimated 
        n_reactions = df.shape[1] - 1
        n_instances = df.shape[0] - 1
        # print(n_reactions,n_instances)

        # Inserting the isoprenol concentration as the last column in the dataframe
        df.insert(loc=n_reactions+1, column='Isoprenol Concentration (mM)', value=None)

        iso_cons = model.problem.Constraint(model.reactions.EX_isoprenol_e.flux_expression,
                                lb = 0.20)
        model.add_cons_vars(iso_cons)
        for_cons = model.problem.Constraint(model.reactions.EX_for_e.flux_expression,
                                lb = 0.10)
        model.add_cons_vars(for_cons)
        # display(model.summary())
        sol_t = model.optimize()
        # storing the solution for each timepoint which are going to be reference solutions for moma (see below)
        sol_time_wild[timepoint] = sol_t
        mu = sol_t[REACTION_ID_ECOLI]

        if sol_t.status == 'optimal' and mu > 1e-6:
            # Calculating next time point's OD
            cell[timepoint+delt] = cell[timepoint]*np.exp(mu*delt)
            for k, v in subs_ext.items():
                # Calculating substrate's concentration for next time point
                subs.loc[timepoint+delt,v] = max(subs.loc[timepoint,v]-sol_t[k]/mu*cell[timepoint]*(1-np.exp(mu*delt)),0.0)
            if sol_t[iso] > 0:
                # Calculating isoprenol concentration for next time point
                conc_iso.loc[timepoint+delt] = conc_iso.loc[timepoint]-sol_t[iso]/mu*cell[timepoint]*(1-np.exp(mu*delt))
            else:
                conc_iso.loc[0:t] = 0
                conc_iso.loc[timepoint+delt] = conc_iso.loc[timepoint]-sol_t[iso]/mu*cell[timepoint]*(1-np.exp(mu*delt))
        else:
            cell[timepoint+delt] = cell[timepoint]
            for k, v in subs_ext.items():
                subs.loc[timepoint+delt,v] = subs.loc[timepoint,v]
            conc_iso.loc[timepoint+delt] = conc_iso.loc[timepoint]

        return sol_time_wild

    def generate_isopentenol_and_solution_for_biomass_using_moma(self, model, sol_time_wild, training_data_file, tspan, delt, cell, subs, subs_ext):
        model.solver = 'cplex'
        iso = 'EX_isoprenol_e'
        df = pd.read_csv(training_data_file)

        # The original e.coli iJO1366 model does not have the isoprenol pathway. 
        # Thus, performing simple flux balance analysis on the model will not allocate 
        # any flux for isoprenol production reaction. So, we modify the model so that 
        # it produces a small amount of isoprenol. In addition, we force a small amount 
        # of formate production which forces the model to activate the 'PFL' reaction.
        with model:
            # display(model.summary())
            # Constraint to force a small amount of isoprenol production
            iso_cons = model.problem.Constraint(model.reactions.EX_isoprenol_e.flux_expression,
                                        lb = 0.20)
            # Adding the constraint to the model
            model.add_cons_vars(iso_cons)
            # Constraint to force a small amount of formate production which would activate the "PFL" reaction
            for_cons = model.problem.Constraint(model.reactions.EX_for_e.flux_expression,
                                        lb = 0.10)
            # Adding the constraint to the model
            model.add_cons_vars(for_cons)
            WT_FBA_sol = cobra.flux_analysis.pfba(model)
            print(WT_FBA_sol.status, WT_FBA_sol[REACTION_ID_ECOLI], WT_FBA_sol[iso])

        # Calculating the number of reactions that should be modified (n_genes) and 
        # number of strains for which isoprenol concentration should be estimated 
        n_reactions = df.shape[1] - 1
        n_instances = df.shape[0] - 1

        # Inserting the isoprenol concentration as the last column in the dataframe
        df.insert(loc=n_reactions+1, column='Isoprenol Concentration (mM)', value=None)

        # Panda series containing the isopentanol concentrations
        # and solutions to the wild type after adding isopentenol
        # pathway and introducing isopentenol fluzes and solving for the 
        # optimum solution for maimum biomass
        conc_iso = pd.Series(index=tspan)
        conc_iso[tspan[0]] = 0.0

        volume = 1.0
        # For each strain
        for i in range(0,n_instances):
            # At each time point
            for t in tspan:
                # Adding constraints to the model at each time point for each strain without globally changing the model
                with model:
                    for k, v in subs_ext.items():
                        model.reactions.get_by_id(k).lower_bound = max(model.reactions.get_by_id(k).lower_bound,
                                                                -subs.loc[t,v]*volume/cell[t]/delt)
                    # Adding the fluxed modifications for chosen reactions
                    cons1 = model.problem.Constraint(model.reactions.ACCOAC.flux_expression, 
                                                    lb = WT_FBA_sol['ACCOAC']*df.iloc[i,1],
                                                    ub = WT_FBA_sol['ACCOAC']*df.iloc[i,1])
                    model.add_cons_vars(cons1)
                
                    cons2 = model.problem.Constraint(model.reactions.MDH.flux_expression,
                                                    lb = WT_FBA_sol['MDH']*df.iloc[i,2],
                                                    ub = WT_FBA_sol['MDH']*df.iloc[i,2])
                    model.add_cons_vars(cons2)
                
                    cons3 = model.problem.Constraint(model.reactions.PTAr.flux_expression,
                                                    lb = WT_FBA_sol['PTAr']*df.iloc[i,3],
                                                    ub = WT_FBA_sol['PTAr']*df.iloc[i,3])
                    model.add_cons_vars(cons3)
                
                    cons4 = model.problem.Constraint(model.reactions.CS.flux_expression,
                                                    lb = WT_FBA_sol['CS']*df.iloc[i,4],
                                                    ub = WT_FBA_sol['CS']*df.iloc[i,4])
                    model.add_cons_vars(cons4)
                
                    cons5 = model.problem.Constraint(model.reactions.ACACT1r.flux_expression,
                                                    lb = WT_FBA_sol['ACACT1r']*df.iloc[i,5],
                                                    ub = WT_FBA_sol['ACACT1r']*df.iloc[i,5])
                    model.add_cons_vars(cons5)
                
                    cons6 = model.problem.Constraint(model.reactions.PPC.flux_expression,
                                                    lb = WT_FBA_sol['PPC']*df.iloc[i,6],
                                                    ub = WT_FBA_sol['PPC']*df.iloc[i,6])
                    model.add_cons_vars(cons6)
                
                    cons7 = model.problem.Constraint(model.reactions.PPCK.flux_expression,
                                                    lb = WT_FBA_sol['PPCK']*df.iloc[i,7],
                                                    ub = WT_FBA_sol['PPCK']*df.iloc[i,7])
                
                    model.add_cons_vars(cons7)
                
                    cons8 = model.problem.Constraint(model.reactions.PFL.flux_expression,
                                                    lb = WT_FBA_sol['PFL']*df.iloc[i,8],
                                                    ub = WT_FBA_sol['PFL']*df.iloc[i,8])
                
                    model.add_cons_vars(cons8)
                    
                    # Reference solution calculated for each time point in above cell for wild type
                    sol1 = sol_time_wild[t]
                    print(sol_time_wild)
                    print(sol1)

                    # Moma solution for each time point
                    sol2 = cobra.flux_analysis.moma(model, solution=sol1, linear=False)
                    mu = sol2[REACTION_ID_ECOLI]
                    print(i,t, sol2.status, mu)
                    if sol2.status == 'optimal' and mu > 1e-6:
                        cell[t+delt] = cell[t]*np.exp(mu*delt)
                        for k, v in subs_ext.items():
                            subs.loc[t+delt,v] = max(subs.loc[t,v]-sol2[k]/mu*cell[t]*(1-np.exp(mu*delt)),0.0)
                        if sol2[iso] > 0:
                            conc_iso.loc[t+delt] = conc_iso.loc[t]-sol2[iso]/mu*cell[t]*(1-np.exp(mu*delt))
                        else:
                            conc_iso.loc[0:t] = 0
                            conc_iso.loc[t+delt] = conc_iso.loc[t]-sol2[iso]/mu*cell[t]*(1-np.exp(mu*delt))
                    else:
                        cell[t+delt] = cell[t]
                        for k, v in subs_ext.items():
                            subs.loc[t+delt,v] = subs.loc[t,v]
                        conc_iso.loc[t+delt] = conc_iso.loc[t]
            
            
            # Storing the final concentration for all strains
            df.iloc[i,9] = conc_iso.iloc[-1]
            print(conc_iso)
            print(i,sol2[iso],conc_iso.iloc[-1])

            # write out the training dataset with isopentenol production concentrations
            # filename = 'training_data_8genes_withiso.csv'
            # self.write_training_data_with_isopentenol(df, filename)

    def generate_fake_data(self, model, condition):
        """

        :param model: cobra model object
        :param solution: solution for the model optimization using cobra
        :param data_type: defines the type of -omics data to generate (all by default)
        :return:
        """

        self.proteomics = {}
        self.transcriptomics = {}
        self.fluxomics = {}
        self.metabolomics = {}

        # reaction_id of choice passed to the function# hardcoded here for this particular file (Need to convert this to an interactive cli program)
        reaction_id = REACTION_ID_ECOLI

        # while condition:
            # print("Condition parameter: ", condition)
        condition-=1
        solution = self.get_optimized_solution(model, reaction_id)
        # solution: cobra.Solution = cobra.core.solution.get_solution(
        #     model, raise_error=False)

        proteomics, transcriptomics, fluxomics = self.get_proteomics_transcriptomics_fluxomics_data(model, solution, condition)
        
        metabolomics = self.get_metabolomics_data(model, condition)
        
        return (proteomics, transcriptomics, fluxomics, metabolomics, solution)

    # NOTE: 
    def read_pubchem_id_file(self):
        inchikey_to_cid = {}
        filename = f'{INCHIKEY_TO_CID_MAP_FILE_PATH}/inchikey_to_cid.txt'
        with open(filename, 'r') as fh:
                try:
                    line = fh.readline()
                    while line:
                        # checking to ignore inchikey records with no cid mappings
                        if (len(line.split()) > 1):
                            inchikey_to_cid[line.split()[0]] = 'CID:'+line.split()[1]
                        else:
                            inchikey_to_cid[line.strip()] = None

                        line = fh.readline()
                # NOTE: propagated exception, raise
                except Exception as ex:
                    print("Error in reading file!")
                    print(ex)
        # fh.close()

        return inchikey_to_cid

    def get_metabolomics_data(self, model, condition):
        """

        :param model:
        :param condition:
        :return:
        """
        metabolomics = {}
        # get metabolites
        # NOTE: Need to find a better algorithm. This is O(n^3)

        # read the inchikey to pubchem ids mapping file
        inchikey_to_cid = {}
        inchikey_to_cid = self.read_pubchem_id_file()

        for met in model.metabolites:
            # get associated reactions
            for reaction in list(met.reactions):
                # get dictionary of associated metabolites and their concentrations
                for metabolite, conc in reaction._metabolites.items():
                    if metabolite.id == met.id:
                        # map the BIGG ids to CIDs using the inchikeys in the metabolites and the ampping file
                        # that we have generated from Pubchem
                        # remember that not all Inchikeys dont have a mappping to a CIDs and there are
                        # multiple mappings for some Inchikeys
                        if 'inchi_key' in met.annotation:
                            if type(met.annotation['inchi_key']) is list:
                                inchi_key = met.annotation['inchi_key'][0]
                            else:
                                inchi_key = met.annotation['inchi_key']
                            
                            if inchi_key in inchikey_to_cid.keys():
                                if inchikey_to_cid[inchi_key] not in metabolomics.keys():
                                    if inchikey_to_cid[inchi_key] is not None:
                                        metabolomics[inchikey_to_cid[inchi_key]] = abs(conc)
                                else:
                                    if inchikey_to_cid[inchi_key] is not None:
                                        metabolomics[inchikey_to_cid[inchi_key]] += abs(conc)
            # getting number of associated reactions and averaging the metabolic concentration value
            num_reactions = len(list(met.reactions))

            # check if inchi_key attribite present else ignore metabolite
            if 'inchi_key' in met.annotation.keys() and inchi_key in inchikey_to_cid.keys():
                if inchikey_to_cid[inchi_key] is not None:
                    metabolomics[inchikey_to_cid[inchi_key]]/=num_reactions

        return metabolomics

    def get_proteomics_transcriptomics_fluxomics_data(self, model, solution, condition):
        """

        :param model:
        :param solution:
        :param condition:
        :return:
        """

        # pre-determined linear constant (NOTE: Allow user to set this via parameter)
        # DISCUSS!!
        k = 0.8
        q = 0.06

        proteomics = {}
        transcriptomics = {}
        fluxomics = {}

        # print(solution.fluxes['EX_cm_e'])
        rxnIDs = solution.fluxes.keys()
        for rxnId in rxnIDs:
            reaction = model.reactions.get_by_id(rxnId)
            for gene in list(reaction.genes):

                # this will ignore all the reactions that does not have the gene.annotation property
                # DISCUSS!!
                if gene.annotation:
                    if 'uniprot' not in gene.annotation:
                        if 'goa' in gene.annotation:
                            protein_id = gene.annotation['goa']
                            # print("HERE")
                        else:
                            break
                    else:
                        protein_id = gene.annotation['uniprot'][0]
                        # print("HERERERERERERERER")

                    # create proteomics dict
                    # Adding noise which is 5% of the signal data. signal + signal*0.05 = signal*1.05
                    # print(rxnId)
                    # print(solution.fluxes)
                    # print(type(solution.fluxes))
                    # print(solution.fluxes[rxnId])
                    # print(protein_id)

                    proteomics[protein_id] = (solution.fluxes[rxnId]/k)*1.05
                    fluxomics[rxnId] = solution.fluxes[rxnId]

                # create transcriptomics dict
                transcriptomics[gene.id] = (proteomics[protein_id]/q)*1.05

        return proteomics, transcriptomics, fluxomics

    def write_experiment_description_file(self, condition=1, line_name='WT'):
        # create the filename
        experiment_description_file_name = f'{OUTPUT_FILE_PATH}/experiment_description_file.csv'

        #write experiment description file
        try:
            with open(experiment_description_file_name, 'w') as fh:
                fh.write(f'Line Name, Line Description, Part ID, Media, Shaking Speed, Starting OD, Culture Volume, Flask Volume, Growth Temperature, Replicate Count\n')
                fh.write(f"{line_name}, KEIO wild type, ABF_001327, M9, 1, 0.1, 50, 200, 30, 1\n")
        except Exception as ex:
            print("Error in writing file!")
            print(ex)

        fh.close()

    def write_omics_files(self, time_series_omics_data, condition=1, line_name='WT'):
        """

        :param dataframe:
        :param data_type:
        :param condition:
        :return:
        """

        # create file number two: omics file
        # TODO: Need to change the units to actual relevant units
        unit_dict = { "fluxomics": 'g/L',\
                "proteomics": 'proteins/cell',\
                "transcriptomics": "FPKM",\
                "metabolomics": "mg/L"
                }

        # for each omics type data
        for omics_type, omics_list in time_series_omics_data.items():
            # create the filenames
            omics_file_name: str = f'{OUTPUT_FILE_PATH}/{omics_type}_fakedata_sample_{condition}.csv'
            
            # open a file to write omics data for each type and for all timepoints and constraints
            try:
                with open(omics_file_name, 'w') as fh:
                    fh.write(f'Line Name,Measurement Type,Time,Value,Units\n')
                    for omics_dict, timepoint in omics_list:
                        dataframe = pd.DataFrame.from_dict(omics_dict, orient='index', columns=[f'{omics_type}_value'])
                        for index, series in dataframe.iteritems():
                            for id, value in series.iteritems():
                                fh.write((f'{line_name},{id},{timepoint},{value},{unit_dict[omics_type]}\n'))

            except Exception as ex:
                print("Error in writing file!")
                print(ex)
        
            fh.close()

    def write_OD_data(self, cell, line_name='WT'):
        # create the filename
        OD_data_file: str = f'{OUTPUT_FILE_PATH}/OD_fakedata_sample.csv'

        # write experiment description file
        try:
            with open(OD_data_file, 'w') as fh:
                fh.write(f'Line Name,Measurement Type,Concentration,Units,Time,Value\n')
                for index, value in cell.items():
                    # print(index, value)
                    fh.write((f'{line_name},Optical Density,0.75,g/L,{index},{value}\n'))

        except Exception as ex:
            print("Error in writing OD file")
            print(ex)
        
    def write_training_data_with_isopentenol(self, df, filename):
        filename = f'{DATA_FILE_PATH}/{filename}'
        df.to_csv(filename, header=True, index=False)

    def write_external_metabolite(self, substrates, isopentenol_conc, filename='external_metabolites.csv', linename='WT'):
        # create the filename
        external_metabolites: str = f'{OUTPUT_FILE_PATH}/{filename}'
        # get ammonium and glucose from substrates
        glucose = substrates.loc[:, 'glc__D_e']
        ammonium = substrates.loc[:, 'nh4_e']

        try:
            with open(external_metabolites, 'w') as fh:
                # get ammonium and glucose from substrates
                fh.write(f'Line Name,Measurement Type,Time,Value,Units\n')
                for index, value in glucose.items():
                    fh.write((f'{linename},CID:5793,{index},{value},mg/L\n'))
                    
                for index, value in ammonium.items():
                    fh.write((f'{linename},CID:16741146,{index},{value},mg/L\n'))

                # write out isopentenol concentrations
                for index, value in isopentenol_conc.items():
                    fh.write((f'{linename},CID:15983957,{index},{value},mg/L\n'))
        
        except Exception as ex:
            print("Error in writing OD file")
            print(ex)

    def get_random_number(self):
        """

        :return:
        """
        random.seed(12312)
        return random.random()

    def add_random_noise(self):
        """

        :return:
        """
        pass


    def get_list_of_reactions(self, file_name):
        """

        :param file_name: Name of the model file (has to be xml for now)
        :return: None (prints the list of reactions that has mass in them)
        """

        # Load model¶depending on the kind of file (the file has to be xml)
        if file_name.endswith(".xml"):
            model = cobra.io.read_sbml_model(file_name)

        # Print out the reaction name and reaction id for all reactions related to BIOMASS production:
        print("List of reactions related to BIOMASS production:")
        for rxn in model.reactions:
            if rxn.name is not None and 'BIOMASS' in rxn.id:
                print("{}: {}".format(rxn.id, rxn.name))



    def get_optimized_solution(self, model, reaction_id):
        """

        :param model:
        :param reaction_id:
        :return solution:
        """

        # fix the flux value to -15 as we have data for this constraint
        model.reactions.get_by_id(reaction_id).lower_bound = self.LOWER_BOUND
        model.reactions.get_by_id(reaction_id).upper_bound = self.UPPER_BOUND
        # print(model.reactions.get_by_id(reaction_id))

        print("Displaying the reaction bounds after constraining them:")
        print(model.reactions.get_by_id(reaction_id).bounds)

        # optimizing the model for only the selected reaction   
        # model.slim_optimize()

        # optimizing model
        solution = model.optimize()

        return solution


    def read_model(self, file_name):
        """

        :param file_name:
        :return model:
        """

        # Load model¶depending on the kind of file
        if file_name.endswith(".xml"):
            model = cobra.io.read_sbml_model(file_name)
        elif file_name.endswith(".json"):
            model = cobra.io.load_json_model(file_name)

        return model

In [52]:

def generate_data_for_host(filename):
    """
        Generate omics data for host and model name
    """
    if HOST_NAME == 'ecoli':
        # create instance of the E. Coli class
        ecoli = Ecoli()

        # read model file
        model = ecoli.read_model(filename)

        # generate ecoli synthetic data for model and condition
        condition = 1
        print('here')
        ecoli.generate_time_series_data(model, condition)

In [53]:
# get time series omics data for specified host and model
filename = user_params['modelfile']

if not os.path.isdir(DATA_FILE_PATH):
    os.mkdir(DATA_FILE_PATH)
if not os.path.isdir(OUTPUT_FILE_PATH):
    os.mkdir(OUTPUT_FILE_PATH)
    
# generate data for host
generate_data_for_host(filename)

iJO1366_MVA.json
here
Displaying the reaction bounds after constraining them:
(-10.0, 1000.0)
Displaying the reaction bounds after constraining them:
(-1000.0, 1000.0)
Displaying the reaction bounds after constraining them:
(-200.0, 1000.0)
Displaying the reaction bounds after constraining them:
(-1000.0, 1000.0)
Displaying the reaction bounds after constraining them:
(-1000.0, 1000.0)
Displaying the reaction bounds after constraining them:
(-1000.0, 1000.0)
Displaying the reaction bounds after constraining them:
(-1000.0, 1000.0)
Displaying the reaction bounds after constraining them:
(-200.0, 1000.0)
Displaying the reaction bounds after constraining them:
(-10.0, 1000.0)
Displaying the reaction bounds after constraining them:
(-839.5219917959024, 1000.0)
Displaying the reaction bounds after constraining them:
(-76.7338840163307, 1000.0)
Displaying the reaction bounds after constraining them:
(-1000.0, 1000.0)
Displaying the reaction bounds after constraining them:
(-710.6633672620803



Displaying the reaction bounds after constraining them:
(-11.435539261593945, 1000.0)
Displaying the reaction bounds after constraining them:
(-1.0542687781548548, 1000.0)
Displaying the reaction bounds after constraining them:
(-55.111244447101335, 1000.0)
Displaying the reaction bounds after constraining them:
(-0.0, 1000.0)




Displaying the reaction bounds after constraining them:
(-14.476803483353759, 1000.0)
Displaying the reaction bounds after constraining them:
(-35.95174307885627, 1000.0)
Displaying the reaction bounds after constraining them:
(-0.8120280420812217, 1000.0)
Displaying the reaction bounds after constraining them:
(0.0, 1000.0)
Displaying the reaction bounds after constraining them:
(-11.435539261593945, 1000.0)
Displaying the reaction bounds after constraining them:
(-1.0542687781548548, 1000.0)
Displaying the reaction bounds after constraining them:
(-55.111244447101335, 1000.0)
Displaying the reaction bounds after constraining them:
(-0.0, 1000.0)
Displaying the reaction bounds after constraining them:
(-14.476803483353759, 1000.0)
Displaying the reaction bounds after constraining them:
(-35.95174307885627, 1000.0)
Displaying the reaction bounds after constraining them:
(-0.8120280420812217, 1000.0)
Displaying the reaction bounds after constraining them:
(0.0, 1000.0)
Displaying the re