# Set up 

In [1]:
SEED = 1
import random

import tellurium as te
import pandas as pd
import numpy as np
np.random.seed(SEED)

import matplotlib.pyplot as plt
import seaborn as sns
import arviz as az

import aesara.tensor as at
import aesara
floatX = aesara.config.floatX
import pymc as pm
import numpy as np

import cobra
import os

from scipy import stats
import scipy as sp
import math

import gzip
import cloudpickle



In [2]:
os.chdir('../../')
from src import antemll, util
import emll
from emll.aesara_utils import LeastSquaresSolve
os.chdir('notebooks/topologyC/')

# Perturbing enzymes

In [3]:
os.getcwd()

'c:\\Users\\user\\Documents\\research\\BMCA-pipeline\\notebooks\\topologyC'

In [3]:
Teusink_ant ='../../models/Antimony/topologyC.ant' 
r = te.loada(Teusink_ant)
r.conservedMoietyAnalysis = True
r.steadyState()

1.9044426622520472e-14

In [8]:
a = r.getScaledFluxControlCoefficientMatrix()
print(a.min())
print(a.max())

-3.2105510468062435
5.929277831446836


In [5]:
pt_levels = [1.5]
pt_labels = ['1.5x']

In [6]:
metabolites = r.getFloatingSpeciesIds() + r.getBoundarySpeciesIds()
data = pd.read_csv('../../data/generated_data/topologyC/topologyC_1.5.csv')

## Running ADVI

In [None]:
def run_BayesInf(BMCA_obj, n_samp=1):
    known_chi_inds = []
    omitted_chi_inds = []
    for i, sp in enumerate(r.getFloatingSpeciesIds()):
        if sp in BMCA_obj.xn.columns:
            known_chi_inds.append(i)
        else: 
            omitted_chi_inds.append(i)
    chi_inds = np.hstack([known_chi_inds, omitted_chi_inds]).argsort()

    known_y_inds = []
    omitted_y_inds = []
    for i, y in enumerate(r.getBoundarySpeciesIds()):
        if y in BMCA_obj.yn.columns:
            known_y_inds.append(i)
        else: 
            omitted_y_inds.append(i)
    y_inds = np.hstack([known_y_inds, omitted_y_inds]).argsort()

    with pm.Model() as pymc_model:
    
        # Initialize elasticities
        Ex_t = pm.Deterministic('Ex', util.initialize_elasticity(BMCA_obj.Ex.to_numpy(), name='Ex'))
        Ey_t = pm.Deterministic('Ey', util.initialize_elasticity(BMCA_obj.Ey.to_numpy(), name='Ey'))
        
        if omitted_chi_inds: 
            chi_measured = pm.Normal('chi_measured', mu=0, sigma=0.1, observed=BMCA_obj.xn.T)
            chi_unmeasured = pm.Normal('chi_unmeasured', mu=0, sigma=10, shape=(len(omitted_chi_inds), len(BMCA_obj.xn)))
            chi_t = at.concatenate([chi_measured, chi_unmeasured], axis=0)[chi_inds, :]
            pm.Deterministic('chi_t', chi_t)
        else: 
            chi_t = pm.Normal('chi_obs', mu=0, sigma=10, observed=BMCA_obj.xn.T)

        if omitted_y_inds:
            y_measured = pm.Normal('y_measured', mu=0, sigma=0.1, observed=BMCA_obj.yn.T)
            y_unmeasured = pm.Normal('y_unmeasured', mu=0, sigma=5, shape=(len(omitted_y_inds), len(BMCA_obj.vn)))
            y_t = at.concatenate([y_measured, y_unmeasured], axis=0)[y_inds, :]
            pm.Deterministic('y_t', y_t)
        else: 
            y_t = pm.Normal('y_obs', mu=0, sigma=10, observed=BMCA_obj.yn.T)

        e_t = pm.Normal('e_t', mu=1, sigma=1, observed=BMCA_obj.en.T) # e_hat?

        likelihood = pm.Deterministic('vn', e_t * (np.ones(BMCA_obj.en.T.shape) + pm.math.dot(Ex_t,chi_t) + pm.math.dot(Ey_t,y_t)))
        v_hat_obs = pm.Normal('v_hat_obs', mu=likelihood, sigma=0.1, observed=BMCA_obj.vn.squeeze().T)

        N_ITERATIONS = 40000

        advi = pm.ADVI(random_seed=SEED)
        tracker = pm.callbacks.Tracker(
            mean = advi.approx.mean.eval,
            std = advi.approx.std.eval
        )
        approx = advi.fit(
            n=N_ITERATIONS, 
            callbacks = [tracker],
            obj_optimizer=pm.adagrad_window(learning_rate=5E-3), 
            total_grad_norm_constraint=0.7,
            obj_n_mc=1)
        
    with sns.plotting_context('notebook', font_scale=1.2):

        fig = plt.figure(figsize=(5,4))
        plt.plot(approx.hist + 30, '.', rasterized=True, ms=1)
        # plt.ylim([-1E1, 1E3])
        plt.xlim([0, N_ITERATIONS])
        sns.despine(trim=True, offset=10)

        plt.ylabel('-ELBO')
        plt.xlabel('Iteration')
        plt.title('in vitro ADVI convergence')
        plt.tight_layout()

    with pymc_model:
        trace = approx.sample(draws=1000, random_seed=SEED)
        ppc_vi = pm.sample_posterior_predictive(trace)

    return trace, ppc_vi

: 

In [None]:
for m in metabolites: 
    data15 = data[data.columns.drop(m)]
    BMCA_obj15 = antemll.antemll(Teusink_ant, data15, '../../models/sbml/topologyC_cobra.xml')
    trace15, ppc15 = run_BayesInf(BMCA_obj15)

    with gzip.open(f"../../data/results/tracePickles/giraffe-topC_{m}.pgz", "wb") as f:
            cloudpickle.dump(
                { 
                "trace15": trace15,
                "ppc15":ppc15,
                },
                f,
            )


../../models/Antimony/topologyC_cobra.ant


No objective coefficients in model. Unclear what should be optimized



invalid value encountered in log

Finished [100%]: Average Loss = 75,414
Sampling: [chi_measured, e_t, v_hat_obs, y_obs]


../../models/Antimony/topologyC_cobra.ant


No objective coefficients in model. Unclear what should be optimized



invalid value encountered in log

Finished [100%]: Average Loss = 75,315
Sampling: [chi_measured, e_t, v_hat_obs, y_obs]


In [None]:
BMCA_obj15 = antemll.antemll(Teusink_ant, data, '../../models/sbml/topologyC_cobra.xml')
trace15, ppc15 = run_BayesInf(BMCA_obj15)

with gzip.open(f"../../data/results/tracePickles/giraffe-topC_allMets.pgz", "wb") as f:
    cloudpickle.dump(
        { 
        "trace15":trace15,
        "ppc15":ppc15,
        },
        f,
    )

# Analysis

In [None]:
metSens_dict = dict()

for m in metabolites: 
    with gzip.open(f"../../data/results/tracePickles/giraffe-topC_{m}.pgz", "rb") as f:
        traces = cloudpickle.load(f, encoding='bytes')

    metSens_dict[m] = (traces['trace15'], traces['ppc15'])

with gzip.open(f"../../data/results/tracePickles/giraffe-topC_allMets.pgz", "rb") as f:
    traces = cloudpickle.load(f, encoding='bytes')

metSens_dict['none'] = (traces['trace15'], traces['ppc15'])

### Elasticity check 

In [None]:
metSens_dict.keys()

In [None]:
Ex_dict = dict()

for i in metSens_dict.keys():
    Ex_dict[i] = util.get_az_summary(metSens_dict[i][0])

In [None]:
elasticity_values = np.column_stack([r.getScaledElasticityMatrix().flatten()] + [Ex_dict[i] for i in Ex_dict.keys()])

elasticities_df = pd.DataFrame(elasticity_values, columns=['gt']+[i for i in Ex_dict.keys()], 
                               index=[i + '_' + ii for i in r.getReactionIds() for ii in r.getFloatingSpeciesIds()])
elasticities_df.head()

In [None]:
plt.axes().set_aspect('equal', 'datalim')

for i in Ex_dict.keys():
    plt.scatter(elasticities_df['gt'], elasticities_df[i], alpha=0.4, label=i, zorder=10)

plt.axline([0, 0], [1, 1], c='gray', linestyle='dashed', zorder=1, linewidth=3)
plt.grid(True, which='both', axis='both', zorder=0)
plt.xlabel('ground truth elasticity values', size=14)
plt.ylabel('predicted elasticity values, $\it{r}$', size=14)
plt.title('allData noReg CRISPRi', size=20)
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")


In [None]:
RMSE = np.sqrt(((elasticities_df.sub(elasticities_df['gt'], axis=0))**2).mean())
RMSE

In [None]:
RMSE_sorted = RMSE.drop('gt', errors='ignore')  # Drop gt if it's in there
RMSE_sorted = pd.concat([
    pd.Series({'none': RMSE_sorted['none']}),
    RMSE_sorted.drop('none').sort_values(ascending=False)
])

plt.figure(figsize=(10, 6))
RMSE_sorted.plot(kind='bar', edgecolor='black') # , color='skyblue')
plt.ylim(1,1.14)
plt.ylabel("RMSE")
plt.xlabel("metabolites omitted")
plt.title("TopC--RMSE in elasticity predictions by metabolite omitted")
plt.xticks(rotation=0, ha='center')
plt.tight_layout()
plt.show()
