## Download data from EDD

data was previously saved as a CSV using the following code

```python
!pip install --user edd-utils
from edd_utils import login, export_study
study_slug = 'pputida_wt_cj522_gb032_gb045_gb062'
edd_server = 'edd.agilebiofoundry.org'

session = login(edd_server=edd_server, user='pstjohn')
df = export_study(session, study_slug, edd_server=edd_server)
```

In [1]:
import matplotlib.pyplot as plt
%matplotlib inline

import gzip
from tqdm import tqdm
import numpy as np
import pandas as pd
df = pd.read_csv('pputida_wt_cj522_gb032_gb045_gb062.csv.gz')

In [2]:
df.Protocol.unique()

array(['PNNL Global Proteomics', 'Targeted Proteomics',
       'PNNL Global Metabolomics (extracellular)',
       'PNNL Global Metabolomics (intracellular)'], dtype=object)

## Convert EDD identifiers to BIGG identifiers (proteins and metabolites)

In [3]:
# Jeremy Z. provided these uniprot matches
protein_mapping = pd.read_csv('P_putida_KT2440_uniprot_protein_refs.tsv', sep='\t')
protein_mapping.head()

Unnamed: 0,FrameId,Locus,GeneSymbol,ProductName,GenbankAccession.version,Entrez,UniprotAccession,UniprotName
0,G18UU-22920-MONOMER,PP_5387,cusA,"probable copper efflux transporter, CzcA family",AAN70952.1,24987202,Q88BZ6,Q88BZ6_PSEPK
1,G18UU-22919-MONOMER,PP_5386,cusB,Probable copper RND efflux membrane fusion pro...,AAN70951.1,24987201,Q88BZ7,Q88BZ7_PSEPK
2,G18UU-22905-MONOMER,PP_5374,PP_5374,Choline/carnitine/betaine transporter family p...,AAN70939.1,24987188,Q88C09,Q88C09_PSEPK
3,G18UU-22861-MONOMER,PP_5329,PP_5329,"putative phosphate ABC transporter, periplasmi...",AAN70894.1,24987138,Q88C54,Q88C54_PSEPK
4,G18UU-22860-MONOMER,PP_5328,PP_5328,putative phosphate transport system permease p...,AAN70893.2,1001556072,Q88C55,Q88C55_PSEPK


In [4]:
df_protein = df[df.Protocol.str.contains('Proteomics')]
df_protein = df_protein.merge(protein_mapping[['Locus', 'GeneSymbol', 'UniprotAccession']],
                              how='left', left_on='Formal Type', right_on='UniprotAccession')

# For AsbF and AroG enzymes, fill with name
df_protein['Locus'] = df_protein.Locus.fillna(df_protein['Measurement Type'])
df_protein.head()

Unnamed: 0,Study ID,Study Name,Line ID,Line Name,Line Description,Protocol,Assay ID,Assay Name,Formal Type,Measurement Type,Compartment,Units,Value,Hours,Locus,GeneSymbol,UniprotAccession
0,7882,P.putida_WT_CJ522_GB032_GB045_GB062,8163,WT-glu-R3,,PNNL Global Proteomics,8492,WT-glu-R3,Q88EQ1,,0,intensity,30.08957,24.0,PP_4402,bkdAB,Q88EQ1
1,7882,P.putida_WT_CJ522_GB032_GB045_GB062,8163,WT-glu-R3,,PNNL Global Proteomics,8492,WT-glu-R3,Q88EQ2,2-oxoisovalerate dehydrogenase subunit alpha,0,intensity,28.55962,24.0,PP_4401,bkdAA,Q88EQ2
2,7882,P.putida_WT_CJ522_GB032_GB045_GB062,8163,WT-glu-R3,,PNNL Global Proteomics,8492,WT-glu-R3,Q88EQ6,Flagellar brake protein YcgR,0,intensity,33.36829,24.0,PP_4397,ycgR,Q88EQ6
3,7882,P.putida_WT_CJ522_GB032_GB045_GB062,8163,WT-glu-R3,,PNNL Global Proteomics,8492,WT-glu-R3,Q88EQ7,,0,intensity,31.72792,24.0,PP_4396,PP_4396,Q88EQ7
4,7882,P.putida_WT_CJ522_GB032_GB045_GB062,8163,WT-glu-R3,,PNNL Global Proteomics,8492,WT-glu-R3,Q88EQ8,,0,intensity,29.86861,24.0,PP_4395,flgM,Q88EQ8


In [5]:
# Some of these may be missing the correct annotation
df_protein['Locus'][~df_protein['Locus'].str.startswith('PP_')].unique()

array(['AroG-D146N', '3-hydroxyisobutyrate dehydrogenase', 'Q88GS0_PSEPK',
       'Q88QD9_PSEPK', 'AsbF', 'B3ZVR2_BACCE', 'Q88EL2_PSEPK',
       'Q88GR0_PSEPK', 'Q88HX9_PSEPK', 'Q88KP7_PSEPK'], dtype=object)

In [6]:
# These mappings were made with the help of the chemical translation service,
# https://cts.fiehnlab.ucdavis.edu/, as well as a few manual matches

metabolite_mapping = pd.read_csv('cid_to_bigg_matches.csv')
metabolite_mapping['cid'] = 'cid:' + metabolite_mapping.cid.astype('str')
metabolite_mapping.head()

Unnamed: 0,cid,db,identifier,metabolite
0,cid:3035456,KEGG,C06473,2dhglcn_c
1,cid:72,KEGG,C00230,34dhbz_c
2,cid:22639876,KEGG,C01353,hco3_c
3,cid:5280518,KEGG,C02480,ccmuac_c
4,cid:10690,KEGG,C00257,glcn_c


In [7]:
df_metabolite = df[df.Protocol.str.contains('Metabolomics')]
df_metabolite = df_metabolite.merge(metabolite_mapping, how='inner', left_on='Formal Type', right_on='cid')
df_metabolite.head()

Unnamed: 0,Study ID,Study Name,Line ID,Line Name,Line Description,Protocol,Assay ID,Assay Name,Formal Type,Measurement Type,Compartment,Units,Value,Hours,cid,db,identifier,metabolite
0,7882,P.putida_WT_CJ522_GB032_GB045_GB062,8118,WT-gfg-R3,,PNNL Global Metabolomics (extracellular),9393,WT-gfg-R3,cid:3035456,"(3S,4R,5R)-3,4,5,6-tetrahydroxy-2-keto-hexanoi...",2,,3753492.0,24.0,cid:3035456,KEGG,C06473,2dhglcn_c
1,7882,P.putida_WT_CJ522_GB032_GB045_GB062,8131,WT-gg-R1,,PNNL Global Metabolomics (extracellular),9394,WT-gg-R1,cid:3035456,"(3S,4R,5R)-3,4,5,6-tetrahydroxy-2-keto-hexanoi...",2,,5677345.0,24.0,cid:3035456,KEGG,C06473,2dhglcn_c
2,7882,P.putida_WT_CJ522_GB032_GB045_GB062,8132,WT-gg-R2,,PNNL Global Metabolomics (extracellular),9395,WT-gg-R2,cid:3035456,"(3S,4R,5R)-3,4,5,6-tetrahydroxy-2-keto-hexanoi...",2,,3880688.0,24.0,cid:3035456,KEGG,C06473,2dhglcn_c
3,7882,P.putida_WT_CJ522_GB032_GB045_GB062,8133,WT-gg-R3,,PNNL Global Metabolomics (extracellular),9396,WT-gg-R3,cid:3035456,"(3S,4R,5R)-3,4,5,6-tetrahydroxy-2-keto-hexanoi...",2,,4136200.0,24.0,cid:3035456,KEGG,C06473,2dhglcn_c
4,7882,P.putida_WT_CJ522_GB032_GB045_GB062,8146,WT-glc-R1,,PNNL Global Metabolomics (extracellular),9397,WT-glc-R1,cid:3035456,"(3S,4R,5R)-3,4,5,6-tetrahydroxy-2-keto-hexanoi...",2,,2187088.0,24.0,cid:3035456,KEGG,C06473,2dhglcn_c


In [8]:
# Correct a few metabolites that are only present in the periplasm
df_metabolite.loc[df_metabolite.Protocol == 'PNNL Global Metabolomics (intracellular)', 'metabolite'] = \
    df_metabolite.loc[df_metabolite.Protocol == 'PNNL Global Metabolomics (intracellular)', 'metabolite'].replace({
    '2dhglcn_c': '2dhglcn_p',
    'hdca_c': 'hdca_p',
    'ocdca_c': 'ocdca_p'})

# Correct the compartment for extracellular omics
df_metabolite.loc[df_metabolite.Protocol == 'PNNL Global Metabolomics (extracellular)', 'metabolite'] = \
    df_metabolite.loc[df_metabolite.Protocol == 'PNNL Global Metabolomics (extracellular)', 'metabolite'].str.replace('_[cp]$', '_e')

# Split internal and external metabolomics
df_internal = df_metabolite[df_metabolite.Protocol == 'PNNL Global Metabolomics (intracellular)']
df_external = df_metabolite[df_metabolite.Protocol == 'PNNL Global Metabolomics (extracellular)']

## Load (reduced) metabolic model
* details on model reduction in `model_modifications.ipynb` and `model_compression.ipynb`

In [10]:
import cobra.io

model = cobra.io.load_json_model('cobrapy_models/reduced_iJN1463.json')
reference_flux = pd.read_csv('reference_fluxes_GB032gfg.csv', index_col=0)

import pickle
with open('compression_3.p', 'rb') as f:
    comp_data = pickle.load(f)

In [11]:
# N = cobra.util.create_stoichiometric_matrix(model)
# v_star = reference_flux.values.flatten()

# N[:, v_star < 0] = -1 * N[:, v_star < 0]
# v_star = np.abs(v_star)

# rxn_labels = pd.Series([r.id for r in model.reactions])
# met_labels = pd.Series([m.id for m in model.metabolites])

# comp_data = {
#     'N': N,
#     'v_star': v_star,
#     'rxn_labels': rxn_labels,
#     'met_labels': met_labels
# }

## Load boundary fluxes calculated from spent media
I don't believe this data made it into EDD, this was sent to me from Gayle and processed in a seperate file. Essentially I used the time and OD at collection to fit an exponential growth curve, and then calculate averaged specific uptake and secretion rates in mmol/gDCW*hr

In [12]:
boundary_data = pd.read_csv('boundary_flux_from_external_measurements.csv')
boundary_data = boundary_data.join(boundary_data['Sample Name'].str.extract('^(?P<strain>\S+)\.(?P<media>\S+)\.(?P<replicate>\d+)'))
# boundary_data.head()

## Normalize data and reference to model

In [13]:
boundary_data = boundary_data[boundary_data.strain  != 'KT2440']
boundary_fluxes = boundary_data.groupby(['strain', 'media'])[
    ['growth_rate', 'glucose_uptake', 'fructose_uptake', 'gluconate_uptake',
     '2-ketogluconate_production', 'muconate_production']].mean()

normalized_fluxes = boundary_fluxes.divide(boundary_fluxes.loc[('GB032', 'gfg')])
normalized_fluxes.columns = ['BIOMASS_KT2440_WT3', 'EX_glc__D_e', 'EX_fru_e', 'EX_glcn_e', 'EX_2dhglcn_e', 'EX_ccmuac_e']

assert normalized_fluxes.columns.isin(comp_data['rxn_labels']).all()
normalized_fluxes.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,BIOMASS_KT2440_WT3,EX_glc__D_e,EX_fru_e,EX_glcn_e,EX_2dhglcn_e,EX_ccmuac_e
strain,media,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
CJ522,f,0.487098,0.0,1.975299,0.0,0.0,0.881701
CJ522,fg,0.798485,7.932922,2.2269,0.0,0.0,0.960507
CJ522,gfg,1.077386,0.448648,1.377914,1.10357,1.186095,1.340728
CJ522,gg,1.364378,4.684939,0.0,2.20205,1.924476,1.720941
CJ522,glc,1.205576,0.0,0.0,3.913859,3.516521,2.177055


In [14]:
rxn_indexer = comp_data['rxn_labels'].reset_index().set_index(0)
vn = normalized_fluxes
v_inds = np.array([rxn_indexer.loc[r] for r in vn.columns]).flatten()

#### convert extracellular concentrations into relative fluxes for other species

In [15]:
df_external_mean = df_external.join(df_external['Assay Name'].str.extract(
    '^(?P<strain>\S+)-(?P<media>\S+)-(?P<replicate>R\d+)')).pivot_table(
    values='Value', index=['strain', 'media'], columns='metabolite').drop('WT')

normalized_external = df_external_mean.divide(df_external_mean.loc[('GB032', 'gfg')])
model_mets = {m.id for m in model.metabolites}
normalized_external = normalized_external.loc[:, normalized_external.columns.isin(model_mets)]
normalized_external = normalized_external.drop(['2dhglcn_e', 'ccmuac_e', 'glcn_e', 'glc__D_e', 'fru_e'], 1).dropna(axis=1)
normalized_external.columns = 'EX_' + normalized_external.columns

assert normalized_external.columns.isin(comp_data['rxn_labels']).all()
normalized_external.head()

Unnamed: 0_level_0,metabolite,EX_34dhbz_e,EX_glyc_e,EX_glyclt_e,EX_hdca_e,EX_lac__D_e,EX_ocdca_e,EX_pi_e,EX_pyr_e,EX_s7p_e,EX_succ_e
strain,media,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
CJ522,f,0.095441,0.90463,1.008456,0.93955,1.200807,0.850966,0.856285,1.040755,1.979376,1.171362
CJ522,fg,0.473364,0.850765,1.095477,0.907586,2.173134,0.803833,0.863932,0.831342,0.471541,0.888036
CJ522,gfg,0.723307,0.830809,1.1468,0.874794,1.167569,0.831242,0.746716,0.740932,3.002336,1.468446
CJ522,gg,1.012883,0.85951,0.899475,0.809321,1.019545,0.876662,1.078371,0.985469,0.26233,0.878468
CJ522,glc,0.554108,0.742228,0.603,0.807423,1.131504,0.818964,0.978379,0.904496,0.034789,0.779047


### Get reaction IDs associated with boundary fluxes

In [16]:
rxn_indexer = comp_data['rxn_labels'].reset_index().set_index(0)
vn_rel = np.log(normalized_external.replace(0, 1E-6))
v_rel_inds = np.array([rxn_indexer.loc[r] for r in vn_rel.columns]).flatten()

### Convert intracellular concentrations into relative metabolite concentrations

In [17]:
df_internal_mean = df_internal.join(df_internal['Assay Name'].str.extract(
    '^(?P<strain>\S+)-(?P<media>\S+)-(?P<replicate>R\d+)')).pivot_table(
    values='Value', index=['strain', 'media'], columns='metabolite').drop('WT')

df_internal_mean += 1E-6  # Ensure nothing is zero

normalized_internal = df_internal_mean.divide(df_internal_mean.loc[('GB032', 'gfg')])
model_mets = {m.id for m in model.metabolites}
normalized_internal = normalized_internal.loc[:, normalized_internal.columns.isin(model_mets)]

In [18]:
met_indexer = comp_data['met_labels'].reset_index().set_index(0)
xn = np.log(normalized_internal)
x_inds = np.array([met_indexer.loc[m] for m in xn.columns]).flatten()

# pd.Series(xn.columns).to_csv('temp_data/measured_mets.csv', index=False)
print(xn.shape)

(24, 53)


## Use media measurements to establish y values

In [19]:
media_data = pd.read_csv('media.csv')  # Use quantification of the initial media

media = media_data.set_index('Media')[['Glucose (mM)', 'Fructose (mM)', 'Gluconic acid (mM)']]
media = media.replace(0, .1)

y = media.reindex(index=[index[1] for index in xn.index])
yn = np.log(y)
yn.head()

Unnamed: 0_level_0,Glucose (mM),Fructose (mM),Gluconic acid (mM)
Media,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
f,-2.302585,3.388712,-2.302585
fg,2.999362,2.808448,-2.302585
gfg,2.502261,2.285358,2.111232
gg,2.983843,-2.302585,2.586933
glc,-2.302585,-2.302585,3.305744


### Convert protein concentrations into relative enzyme expression

In [20]:
def iter_locus_matches():
    
    for locus in tqdm(df_protein.Locus.unique()):
        
        try:
            gene = model.genes.get_by_id(locus)
        except KeyError:
            continue
        
        for reaction in gene.reactions:
            yield pd.Series({'Locus': locus, 'Reaction': reaction.id})

bigg_ids = pd.DataFrame(iter_locus_matches()).astype(str)
df_protein_bigg = df_protein.join(df_protein['Assay Name'].str.extract(
    '^(?P<strain>\S+)-(?P<media>\S+)-(?P<replicate>R\d+)'))

# The existing data is log-transformed, undo the log before calculating mean statistics
df_protein_bigg['Value'] = np.exp(df_protein_bigg['Value'])

locus_means = df_protein_bigg.groupby(['strain', 'media', 'Locus']).Value.mean()
normalized_locus = locus_means.divide(locus_means.loc['GB032', 'gfg'])
norm_with_compressed_rxns = normalized_locus.reset_index().merge(
    bigg_ids, on='Locus', how='left').dropna(subset=['Reaction']).merge(
    comp_data['matches'], how='left', left_on='Reaction', right_on='original')
en = np.log(norm_with_compressed_rxns.groupby
            (['strain', 'media', 'compressed']).mean()).reset_index().pivot_table(
    values='Value', index=['strain', 'media'], columns='compressed'
    ).drop('WT').dropna(axis=1).clip(lower=-3, upper=3)

e_inds = np.array([rxn_indexer.loc[r] for r in en.columns]).flatten()

en.head()

100%|██████████| 3098/3098 [00:00<00:00, 19128.81it/s]


Unnamed: 0_level_0,compressed,2AACLPPEAT160,2AACLPPEAT180,3HAD100,3HAD121,3OAR100,3OAR141,ACCOAC,ACGS,ACODA,ACONTa,...,UAGDP,UAGPT3,UDCPDPS,UDCPPtppi,UMPK,UPP3MT,UPP3S,UREA,URIDK2r,ZNabcpp
strain,media,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
CJ522,f,0.300903,0.300903,0.691456,0.647082,0.246715,0.407889,0.517677,0.445891,0.147172,0.272017,...,0.019044,1.582952,-0.349822,0.766622,-0.058837,-0.000217,0.150541,0.607042,0.03412,0.548361
CJ522,fg,-0.046415,-0.046415,0.042884,0.009719,-0.028247,-0.014911,0.205839,0.041048,-0.131777,0.213284,...,-0.139198,0.450731,0.465293,0.433926,-0.1225,-0.12184,0.088719,0.440639,0.097535,0.903112
CJ522,gfg,0.15078,0.15078,-0.022788,-0.005126,0.003637,0.099089,0.22776,0.312177,0.091987,0.13453,...,-0.067595,0.282518,0.148239,0.475487,-0.13195,0.037108,0.304774,0.477122,-0.069545,-1.170227
CJ522,gg,0.021503,0.021503,-0.067975,0.02181,0.225189,0.232341,0.258085,-0.468058,-0.001391,0.04898,...,-0.245149,-0.051038,0.67793,0.039952,-0.130533,-0.232545,0.012108,0.008758,0.055309,-1.574517
CJ522,glc,0.10061,0.10061,0.143934,0.182155,0.318642,0.325343,0.532416,0.315826,0.1821,-0.050325,...,-0.034645,-0.12321,0.676106,0.563237,-0.001695,0.059262,0.161414,0.3925,0.194533,1.646215


In [21]:
# some enzymes are unmeasured but can vary, others we want to pin at zero
e_laplace_inds = []
e_zero_inds = []

for i, rxnid in comp_data['rxn_labels'].iteritems():
    rxn = model.reactions.get_by_id(rxnid)
    if rxnid not in en.columns:
#        e_laplace_inds += [i]
        
        if 'e' not in rxn.compartments:
            e_laplace_inds += [i]
        else:
            e_zero_inds += [i]

e_laplace_inds = np.array(e_laplace_inds)
e_zero_inds = np.array(e_zero_inds)
e_indexer = np.hstack([e_inds, e_laplace_inds, e_zero_inds]).argsort()

## Construct the probablity model

In [22]:
import pymc3 as pm

import theano
import theano.tensor as T
from theano import sparse

import emll
from emll.util import initialize_elasticity

In [23]:
Ey = np.zeros((comp_data['N'].shape[1], 3))
Ey[rxn_indexer.loc['EX_glc__D_e'], 0] = 1
Ey[rxn_indexer.loc['EX_fru_e'], 1] = 1
Ey[rxn_indexer.loc['EX_glcn_e'], 2] = 1

m_labels = comp_data['met_labels'].values
r_labels = comp_data['rxn_labels'].values

ex_labels = np.array([['$\epsilon_{' + '{0},{1}'.format(rlabel, mlabel) + '}$'
                       for mlabel in m_labels] for rlabel in r_labels]).flatten()

reduced_rxns = (model.reactions.get_by_id(rxn) for rxn in comp_data['rxn_labels'])
reduced_mets = (model.metabolites.get_by_id(met) for met in comp_data['met_labels'])

r_compartments = [
    list(r.compartments)[0] if len(r.compartments) == 1 else 't'
    for r in reduced_rxns
]

m_compartments = [
    m.compartment for m in reduced_mets
]

In [24]:
N = comp_data['N']
v_star = comp_data['v_star']

In [25]:
ll = emll.LinLogLeastNorm(N, -N.T, Ey, v_star)

n_exp = xn.shape[0]
n_exp

24

In [26]:
# (boundary_fluxes.max() - boundary_fluxes.min())

In [27]:
flux_std = np.array([0.01, 0.05, 0.05, 0.05, 0.05, 0.01])

In [28]:
with pm.Model() as pymc_model:
    
    # Initialize elasticities
    Ex_t = pm.Deterministic(
        'Ex', initialize_elasticity(N, 'ex', b=0.05, sd=1, alpha=None,
                                    m_compartments=m_compartments,
                                    r_compartments=r_compartments))
                                                        
    Ey_t = pm.Deterministic('Ey', initialize_elasticity(-Ey.T, 'ey', b=0.05, sd=1, alpha=None))

    yn_t = T.as_tensor_variable(yn.values)

    e_measured = pm.Normal('log_e_measured', mu=en.values, sd=0.2,
                           shape=(n_exp, len(e_inds)))
    
    e_unmeasured = pm.Laplace('log_e_unmeasured', mu=0, b=0.1,
                              shape=(n_exp, len(e_laplace_inds)))
    log_en_t = T.concatenate(
        [e_measured, e_unmeasured,
         T.zeros((n_exp, len(e_zero_inds)))], axis=1)[:, e_indexer]

    pm.Deterministic('log_en_t', log_en_t)
    
    chi_ss, vn_ss = ll.steady_state_theano(Ex_t, Ey_t, T.exp(log_en_t), yn_t)
    pm.Deterministic('chi_ss', chi_ss)
    pm.Deterministic('vn_ss', vn_ss)
        
    chi_clip = T.clip(chi_ss[:, x_inds], -3, 3)
    vn_rel_clip = T.clip(T.log(T.clip(vn_ss[:, v_rel_inds], 1E-5, 1E5)), -3, 3)
    en_clip = T.clip(log_en_t[:, e_inds], -3, 3)


    chi_obs = pm.Normal('chi_obs', mu=chi_clip, sd=0.2,
                        observed=xn.clip(lower=-3, upper=3).values)
    vn_obs = pm.Normal('vn_obs', mu=vn_ss[:, v_inds], sd=flux_std,
                       observed=vn.values)
    vn_rel_obs = pm.Normal('vn_rel_obs', mu=vn_rel_clip, sd=.1,
                           observed=vn_rel.clip(lower=-3, upper=3).values)    
#    en_obs = pm.Normal('en_obs', mu=en_clip, sd=0.5,
#                           observed=en.clip(lower=-3, upper=3).values)                              
    
print(pymc_model.logpt.tag.test_value)

-404655.02998282714


In [29]:
with gzip.open('model_large.pz', 'wb') as f:
    pickle.dump(pymc_model, f)

In [30]:
with gzip.open('model_data_large.pz', 'wb') as f:
    pickle.dump({
        'vn': vn,
        'en': en,
        'yn': yn,
        'xn': xn,
        'x_inds': v_inds,
        'e_inds': e_inds,
        'v_inds': v_inds}
        , f)