In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from scipy.integrate import odeint

In [23]:
def rhs(y, t, ks, P0):
    pald, pCoa = y
    kcatf_P, Kmf_P, kcatr_P, Kmr_P = ks

    f = lambda s, kcat, Km, E0 : (kcat*E0*s)/(Km + s)
    fluxf_P = f(pald, kcatf_P, Kmf_P, P0)
    fluxr_P = f(pCoa, kcatr_P, Kmr_P, P0)

    dydt = [ -fluxf_P + fluxr_P, # pald
             -fluxr_P + fluxf_P] # pCoa
    return dydt

def solveTest(tSolve, IC, ks):
    kcatf_P, Kmf_P, kcatr_P, Kmr_P = ks
    y0, E0s = IC
    P0 = E0s[0]
    sol = odeint(lambda y, t: rhs(y, t, ks, E0s[0]), y0, tSolve)
   
    comP_sol = P0 * sol[:, 0] / (Kmf_P + sol[:, 0]) + P0 * sol[:, 1] / (Kmr_P + sol[:, 1])
    P_sol = P0 - comP_sol

    all_sol = np.column_stack((sol[:, 0], P_sol, comP_sol, sol[:, 1]))
    return all_sol

In [24]:
# Define all ICs

IC1 = ([0.5, 0.], [0.694]) # y0, E0
IC2 = ([1., 0.], [0.694])
IC3 = ([1.5, 0.], [0.694])
IC4 = ([5., 0.], [0.694])
IC5 = ([10., 0.], [0.694])
IC6 = ([0., 0.5], [0.694])
IC7 = ([0., 1.], [0.694])
IC8 = ([0., 1.5], [0.694])
IC9 =  ([0., 5.], [0.694])
IC10 = ([0., 10.], [0.694])
ICs = [IC1, IC2, IC3, IC4, IC5, IC6, IC7, IC8, IC9, IC10]

ksAll = [55., 15., 6., 95.]

tSolve = np.linspace(0, .6, 5000)
outs = [solveTest(tSolve, IC, ksAll) for IC in ICs]

In [25]:
from IPython.display import display, clear_output

data = {
    'IC1' : outs[0][:, [0,3]],
    'IC2' : outs[1][:, [0,3]],
    'IC3' : outs[2][:, [0,3]],
    'IC4' : outs[3][:, [0,3]],
    'IC5' : outs[4][:, [0,3]],
    'IC6' : outs[5][:, [0,3]],
    'IC7' : outs[6][:, [0,3]],
    'IC8' : outs[7][:, [0,3]],
    'IC9' : outs[8][:, [0,3]],
    'IC10' : outs[9][:, [0,3]]
}

titles = {
    'IC1' : '55mM 1,2-propanediol',
    'IC2' : '55mM propionaldehyde',
    'IC3' : '55mM 1-propanol',
    'IC4' : '55mM propionyl-CoA',
    'IC5' : '25mM 1,2-propanediol, 20mM 1-propanol, 10 mM propionyl-CoA',
    'IC6' : '25mM 1,2-propanediol, 10mM 1-propanol, 20 mM propionyl-CoA',
    'IC7' : '13.75mM all upstream species',
    'IC8' : '15mM 1,2-propanediol',
    'IC9' : '35mM 1,2-propanediol',
    'IC10' :'55mM propionate'
}

def plotThisIC(dat, title):
    plt.plot(tSolve, dat, lw=2, label=['propionaldehyde', 'propionyl-CoA'])
    plt.xlabel('time (s)', fontsize=12)
    plt.ylabel('concentration (mM)', fontsize=12)
    plt.title(title)
    plt.legend(fontsize=12)


# Dropdown menu setup
dropdown = widgets.Dropdown(
    options=list(data.keys()),
    value='IC1',
    description='IC:'
)

def on_change(change):
    if change['name'] == 'value' and (change['new'] != change['old']):
        clear_output()
        display(dropdown)
        plotThisIC(data[change['new']], titles[change['new']])

# Display the dropdown menu and the plot
display(dropdown)
plotThisIC(data['IC1'], titles['IC1'])
dropdown.observe(on_change)


In [5]:
from IPython.display import display, clear_output

data = {
    'IC1' : outs[0][:, [1,2]],
    'IC2' : outs[1][:, [1,2]],
    'IC3' : outs[2][:, [1,2]],
    'IC4' : outs[3][:, [1,2]],
    'IC5' : outs[4][:, [1,2]],
    'IC6' : outs[5][:, [1,2]],
    'IC7' : outs[6][:, [1,2]],
    'IC8' : outs[7][:, [1,2]],
    'IC9' : outs[8][:, [1,2]],
    'IC10' : outs[9][:, [1,2]]
}

titles = {
    'IC1' : '55mM 1,2-propanediol',
    'IC2' : '55mM propionaldehyde',
    'IC3' : '55mM 1-propanol',
    'IC4' : '55mM propionyl-CoA',
    'IC5' : '25mM 1,2-propanediol, 20mM 1-propanol, 10 mM propionyl-CoA',
    'IC6' : '25mM 1,2-propanediol, 10mM 1-propanol, 20 mM propionyl-CoA',
    'IC7' : '13.75mM all upstream species',
    'IC8' : '15mM 1,2-propanediol',
    'IC9' : '35mM 1,2-propanediol',
    'IC10' :'55mM propionate'
}

def plotThisIC(dat, title):
    plt.plot(tSolve, dat, lw=2, label=['unbound PduP', 'bound PduP'])
    plt.xlabel('time (s)', fontsize=12)
    plt.ylabel('concentration (mM)', fontsize=12)
    plt.title(title)
    plt.legend(fontsize=12)


# Dropdown menu setup
dropdown = widgets.Dropdown(
    options=list(data.keys()),
    value='IC1',
    description='IC:'
)

def on_change(change):
    if change['name'] == 'value' and (change['new'] != change['old']):
        clear_output()
        display(dropdown)
        plotThisIC(data[change['new']], titles[change['new']])

# Display the dropdown menu and the plot
display(dropdown)
plotThisIC(data['IC1'], titles['IC1'])
dropdown.observe(on_change)


In [6]:
tol = 0.0001

# If == 1 then enz is correct
isP = np.prod([np.prod(outs[i][:, 1] + outs[i][:, 2] - 0.694 <= tol) for i in range(10)])

print(isP)

In [7]:
for i in range(10):
    tot = np.sum(outs[i], axis=1)
    print(np.prod(tot-tot[0] <= tol))

In [8]:
def get_block_symbol(feature_list):
    return ["[{}]".format(feat) for feat in feature_list]

data_matrix_df_list = []
for i in range(10):
    sol_df = pd.DataFrame(outs[i], columns=['Ald', 'P', 'Pc', 'CoA'])
    sol_df["t"] = tSolve
    data_matrix_df_list.append(sol_df.rename(columns=dict(zip(sol_df.columns, get_block_symbol(sol_df.columns)))))

data_matrix_df = pd.concat(data_matrix_df_list, ignore_index=True)
data_matrix_df

In [9]:
from dae_finder import add_noise_to_df
noise_perc = 0


data_matrix_features = data_matrix_df_list[0].columns
for ind, data_matrix_ in enumerate(data_matrix_df_list):
    t_exact = data_matrix_["[t]"]
    noisy_data_df = add_noise_to_df(data_matrix_, noise_perc=noise_perc, random_seed=111)
    noisy_data_df["[t]"] = t_exact
    data_matrix_df_list[ind] = noisy_data_df

In [10]:
from dae_finder import smooth_data

#Calling the smoothening function
data_matrix_smooth_df_list = [smooth_data(data_matrix,domain_var="[t]",derr_order=1, noise_perc=noise_perc,
                                          eval_points=tSolve) for data_matrix in data_matrix_df_list]

if len(data_matrix_df_list) >1:
    data_matrix_df_smooth_appended = pd.concat(data_matrix_smooth_df_list, ignore_index=True)
else:
    data_matrix_df_smooth_appended = data_matrix_smooth_df_list[0]

data_matrix_df_smooth = data_matrix_df_smooth_appended[data_matrix_features]
if "[t]" in data_matrix_df_smooth:
    data_matrix_df_smooth = data_matrix_df_smooth.drop("[t]", axis=1)

In [11]:
data_matrix_df_smooth_appended

In [12]:
data_matrix_df_smooth = data_matrix_df.drop("[t]", axis=1)

In [13]:
from dae_finder import PolyFeatureMatrix

poly_degree = 3

poly_feature_ob = PolyFeatureMatrix(poly_degree)

candidate_lib_full = poly_feature_ob.fit_transform(data_matrix_df_smooth)

#Dropping 1 from the candidate library since we use fit_with_intercept flag instead of using constants directly
# in the candidate library
candidate_lib_full = candidate_lib_full.drop(["1"], axis=1)
candidate_lib_full

In [14]:
#scaling the candidate columns to set std of 1
from sklearn.preprocessing import StandardScaler
s_scaler = StandardScaler(with_std=True, with_mean=False)
scaled_cand_lib = pd.DataFrame(s_scaler.fit_transform(candidate_lib_full), columns=s_scaler.feature_names_in_)
if '1' in scaled_cand_lib.columns:
        scaled_cand_lib['1'] = 1

In [15]:
scaled_cand_lib.describe()

In [16]:
import sympy

from dae_finder import get_refined_lib, remove_paranth_from_feat

# Adding the state variables as scipy symbols
feat_list = list(data_matrix_df.columns)
feat_list_str = ", ".join(remove_paranth_from_feat(data_matrix_df.columns))
exec(feat_list_str+ "= sympy.symbols("+str(feat_list)+")")

In [17]:
from dae_finder import sequentialThLin, AlgModelFinder
algebraic_model_lasso = AlgModelFinder(model_id='lasso',
                                       fit_intercept=True)

algebraic_model_lasso.fit(data_matrix_df_smooth, scale_columns= True)
algebraic_model_lasso.best_models(10) # Best 10 models using R2 matrix

In [18]:
algebraic_model_lasso.get_fitted_intercepts()

In [19]:
from sklearn.linear_model import LinearRegression

lin_model = LinearRegression()
lin_model.fit(candidate_lib_full[["[Pc]"]], candidate_lib_full["[P]"])
print(dict(zip(lin_model.feature_names_in_, lin_model.coef_)))
print(lin_model.intercept_)

In [20]:
features_to_remove = {P}

features_to_remove, refined_candid_lib = get_refined_lib(features_to_remove, data_matrix_df,
                                                  candidate_lib_full, get_dropped_feat=True)

In [21]:
refined_candid_lib

<img src="deriv.jpeg" width="600">

In [22]:
algebraic_model_lasso = AlgModelFinder(model_id='lasso',
                                       fit_intercept=False, alpha=0.01)
algebraic_model_lasso.fit(refined_candid_lib, scale_columns= True)

In [181]:
best_models_full = algebraic_model_lasso.best_models()

In [182]:
best_models_full

In [152]:
from dae_finder import get_simplified_equation_list

intercept_dictionary = algebraic_model_lasso.get_fitted_intercepts()

simplified_equations = get_simplified_equation_list(best_model_df=best_models_full.fillna(0)[:-1],
                            coef_threshold=0.025,
                            intercept_threshold= 0.01,
                             global_feature_list=data_matrix_df.columns,
                             intercept_dict= intercept_dictionary,
                             simplified = True)

In [154]:
simplified_equations['[Ald] [Pc] [CoA]']

In [155]:
from dae_finder import construct_reduced_fit_list

reduced_relationship_features = construct_reduced_fit_list(best_models_full.fillna(0)[:-1], simplified_eqs=simplified_equations)

reduced_relationship_features

In [54]:
from sklearn.linear_model import LinearRegression

#Removing None from reduced relationship features if it is present. Whenever a constant appears in either LHS or RHS of the relationship,
# while converting the sympy symbol to feature name corresponding to the canddiate library, a None is returned. 

best_relationship = reduced_relationship_features[0]
if None in best_relationship:
    best_relationship.remove(None)
    
lin_model = LinearRegression()

best_relation_lhs = best_relationship[0]
best_relation_rhs = best_relationship[1:]
lin_model.fit(candidate_lib_full[best_relation_rhs], candidate_lib_full[best_relation_lhs])
lin_model.score(candidate_lib_full[best_relation_rhs], candidate_lib_full[best_relation_lhs])

In [74]:
dict(zip(lin_model.feature_names_in_, lin_model.coef_))

In [62]:
df_ = algebraic_model_lasso.best_models()
{key_: value_ for key_, value_ in dict(df_["[Ald] [Pc] [CoA]"]).items() if abs(value_)>0.01}

In [167]:
from sklearn.linear_model import LinearRegression

lin_model = LinearRegression()
lin_model.fit(candidate_lib_full[["[Ald] [CoA]", "[Ald]", "[CoA]", "[Ald] [Pc]", "[Pc] [CoA]", "[Pc]"]], candidate_lib_full["[Ald] [Pc] [CoA]"]).score(candidate_lib_full[["[Ald] [CoA]", "[Ald]", "[CoA]", "[Ald] [Pc]", "[Pc] [CoA]", "[Pc]"]], candidate_lib_full["[Ald] [Pc] [CoA]"])

In [166]:
lin_model.intercept_

In [71]:
testAld = outs[0][:, 0]
testPc = outs[0][:, 2]

testLHS = np.multiply(testAld, testPc)
testRel = .054198*testAld + 23.02722*np.square(testPc)


plt.subplot(1, 2, 1)
plt.plot(tSolve, testLHS, lw=4, label='[CoA]', alpha=.5)
plt.plot(tSolve, testRel, '--k', lw=2, label='discovered algebraic expr for [CoA]')
plt.xlabel('time (s)', fontsize=12)
plt.ylabel('concentration (mM)', fontsize=12)
plt.legend(fontsize=12, loc=9, bbox_to_anchor=(0.5, -0.2))

plt.subplot(1, 2, 2)
plt.semilogy(tSolve, np.abs(np.subtract(testLHS, testRel)), '.')
plt.xlabel('time (s)', fontsize=12)
plt.ylabel('|error|', fontsize=12)

plt.tight_layout()
plt.show()