In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
#pd.set_option('display.max_rows', None)

import matplotlib.pyplot as plt
import seaborn as sns

from itertools import permutations, combinations, product
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

from pynverse import inversefunc

from scipy.optimize import curve_fit
import sklearn.gaussian_process as gp
from sklearn.gaussian_process.kernels import Matern, WhiteKernel, ConstantKernel, RBF
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression, ElasticNet
from sklearn.preprocessing import PolynomialFeatures, OneHotEncoder
from sklearn.model_selection import train_test_split, cross_validate, cross_val_score, KFold, RandomizedSearchCV
from sklearn.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error

from utils.model import hill_activation, hill_activation_single, hill_activation_and

In [2]:
promoters = ['PBAD', 'PCin', 'PSalTTC', 'PTac']
rbs = ['B0033']
ecfs = ['E11', 'E20', 'E38']
terminators = ['B0015']

In [3]:
p_prom = list(permutations(promoters[:4], 2))
len(p_prom), p_prom

(12,
 [('PBAD', 'PCin'),
  ('PBAD', 'PSalTTC'),
  ('PBAD', 'PTac'),
  ('PCin', 'PBAD'),
  ('PCin', 'PSalTTC'),
  ('PCin', 'PTac'),
  ('PSalTTC', 'PBAD'),
  ('PSalTTC', 'PCin'),
  ('PSalTTC', 'PTac'),
  ('PTac', 'PBAD'),
  ('PTac', 'PCin'),
  ('PTac', 'PSalTTC')])

In [4]:
parts = [p_prom, rbs, ecfs[:2], terminators]
gate2 = list(product(*parts))
len(gate2), gate2

(24,
 [(('PBAD', 'PCin'), 'B0033', 'E11', 'B0015'),
  (('PBAD', 'PCin'), 'B0033', 'E20', 'B0015'),
  (('PBAD', 'PSalTTC'), 'B0033', 'E11', 'B0015'),
  (('PBAD', 'PSalTTC'), 'B0033', 'E20', 'B0015'),
  (('PBAD', 'PTac'), 'B0033', 'E11', 'B0015'),
  (('PBAD', 'PTac'), 'B0033', 'E20', 'B0015'),
  (('PCin', 'PBAD'), 'B0033', 'E11', 'B0015'),
  (('PCin', 'PBAD'), 'B0033', 'E20', 'B0015'),
  (('PCin', 'PSalTTC'), 'B0033', 'E11', 'B0015'),
  (('PCin', 'PSalTTC'), 'B0033', 'E20', 'B0015'),
  (('PCin', 'PTac'), 'B0033', 'E11', 'B0015'),
  (('PCin', 'PTac'), 'B0033', 'E20', 'B0015'),
  (('PSalTTC', 'PBAD'), 'B0033', 'E11', 'B0015'),
  (('PSalTTC', 'PBAD'), 'B0033', 'E20', 'B0015'),
  (('PSalTTC', 'PCin'), 'B0033', 'E11', 'B0015'),
  (('PSalTTC', 'PCin'), 'B0033', 'E20', 'B0015'),
  (('PSalTTC', 'PTac'), 'B0033', 'E11', 'B0015'),
  (('PSalTTC', 'PTac'), 'B0033', 'E20', 'B0015'),
  (('PTac', 'PBAD'), 'B0033', 'E11', 'B0015'),
  (('PTac', 'PBAD'), 'B0033', 'E20', 'B0015'),
  (('PTac', 'PCin'), 'B00

In [5]:
gate_part = list(np.array([[l[0][0] + '-' + l[1] + '-' + l[2] + '-N-' + l[3], \
             l[0][1] + '-' + l[1] + '-' + l[2] + '-C-' + l[3]] for l in gate2]).flatten())
len(gate_part), gate_part

(48,
 ['PBAD-B0033-E11-N-B0015',
  'PCin-B0033-E11-C-B0015',
  'PBAD-B0033-E20-N-B0015',
  'PCin-B0033-E20-C-B0015',
  'PBAD-B0033-E11-N-B0015',
  'PSalTTC-B0033-E11-C-B0015',
  'PBAD-B0033-E20-N-B0015',
  'PSalTTC-B0033-E20-C-B0015',
  'PBAD-B0033-E11-N-B0015',
  'PTac-B0033-E11-C-B0015',
  'PBAD-B0033-E20-N-B0015',
  'PTac-B0033-E20-C-B0015',
  'PCin-B0033-E11-N-B0015',
  'PBAD-B0033-E11-C-B0015',
  'PCin-B0033-E20-N-B0015',
  'PBAD-B0033-E20-C-B0015',
  'PCin-B0033-E11-N-B0015',
  'PSalTTC-B0033-E11-C-B0015',
  'PCin-B0033-E20-N-B0015',
  'PSalTTC-B0033-E20-C-B0015',
  'PCin-B0033-E11-N-B0015',
  'PTac-B0033-E11-C-B0015',
  'PCin-B0033-E20-N-B0015',
  'PTac-B0033-E20-C-B0015',
  'PSalTTC-B0033-E11-N-B0015',
  'PBAD-B0033-E11-C-B0015',
  'PSalTTC-B0033-E20-N-B0015',
  'PBAD-B0033-E20-C-B0015',
  'PSalTTC-B0033-E11-N-B0015',
  'PCin-B0033-E11-C-B0015',
  'PSalTTC-B0033-E20-N-B0015',
  'PCin-B0033-E20-C-B0015',
  'PSalTTC-B0033-E11-N-B0015',
  'PTac-B0033-E11-C-B0015',
  'PSalTTC-B0033

In [6]:
level2_part = [l[0][0] + '-' + l[0][1] + '-' + l[1] + '-' + l[2] + '-' + l[3] for l in gate2]
level2_part

['PBAD-PCin-B0033-E11-B0015',
 'PBAD-PCin-B0033-E20-B0015',
 'PBAD-PSalTTC-B0033-E11-B0015',
 'PBAD-PSalTTC-B0033-E20-B0015',
 'PBAD-PTac-B0033-E11-B0015',
 'PBAD-PTac-B0033-E20-B0015',
 'PCin-PBAD-B0033-E11-B0015',
 'PCin-PBAD-B0033-E20-B0015',
 'PCin-PSalTTC-B0033-E11-B0015',
 'PCin-PSalTTC-B0033-E20-B0015',
 'PCin-PTac-B0033-E11-B0015',
 'PCin-PTac-B0033-E20-B0015',
 'PSalTTC-PBAD-B0033-E11-B0015',
 'PSalTTC-PBAD-B0033-E20-B0015',
 'PSalTTC-PCin-B0033-E11-B0015',
 'PSalTTC-PCin-B0033-E20-B0015',
 'PSalTTC-PTac-B0033-E11-B0015',
 'PSalTTC-PTac-B0033-E20-B0015',
 'PTac-PBAD-B0033-E11-B0015',
 'PTac-PBAD-B0033-E20-B0015',
 'PTac-PCin-B0033-E11-B0015',
 'PTac-PCin-B0033-E20-B0015',
 'PTac-PSalTTC-B0033-E11-B0015',
 'PTac-PSalTTC-B0033-E20-B0015']

In [None]:
gate3_raw = list(permutations(level2_part))
gate3_raw

In [None]:
gate3_raw = list(permutations(level2_part))
gate3 = []
for gate in gate3_raw:
    g1 = gate[0].split('-')
    g2 = gate[1].split('-')
    prom_gate = [g1[0], g1[1], g2[0], g2[1]]
    ecf_gate = [g1[3], g2[3]]
    if len(list(set(prom_gate)))==len(prom_gate) and len(list(set(ecf_gate)))==len(ecf_gate):
        gate3.append(gate)
len(gate3), gate3

In [None]:
folder = 'datasets/cleansed'

color = ['red', 'green', 'blue', 'orange', 'purple', 'cyan']

hill_params = {
    "pbad": [0.52, -0.06, 0.15, -2.06],
    "pcin": [-0.32, 0.91, 0.29, -3.11],
    "psalttc": [-0.46, 1.76, 0.29, -1.71]
}

In [None]:
ara_2 = np.array([0, 0.05078125, 0.203125, 0.8125, 3.25, 13])
ohc14_2 = np.array([0, 0.078125, 0.3125, 1.25, 5, 20])
ara_rpu_2 = hill_activation_single(ara_2, *hill_params['pbad'])
ohc14_rpu_2 = hill_activation_single(ohc14_2, *hill_params['pcin'])

gates = pd.read_csv('{}/jump-and-gates.csv'.format(folder))
e20 = pd.concat([pd.DataFrame(itertools.product(ara_rpu_2, ohc14_rpu_2)), gates['e20']], axis=1)
e20.columns = ['ara', 'ohc14', 'fluo']
e38 = pd.concat([pd.DataFrame(itertools.product(ara_rpu_2, ohc14_rpu_2)), gates['e38']], axis=1)
e38.columns = ['ara', 'ohc14', 'fluo']

In [None]:
def inverse_hill(rpu, ag, K, n, eps):
    
    ag_, K_, n_, eps_ = 10**ag, 10**K, 10**n, 10**eps
    #return (((rpu * (K_**n_)) - (ag_ * eps_ * (K_**n_))) / (ag_ - rpu))**(1/n_)
    return (((rpu - (ag_ * eps_)) * (K_**n_))/(ag_ - rpu))**(1/n_)

In [None]:
inverse_hill(ara_rpu_2, *hill_params['pbad'])

3-input AND gate.

In [None]:
ara_3 = np.array([13, 52])
ohc14_3 = np.array([0, 0.15])
sal_3 = np.array([0, 100])
ara_rpu_3 = hill_activation_single(ara_3, *hill_params['pbad'])
ohc14_rpu_3 = hill_activation_single(ohc14_3, *hill_params['pcin'])
sal_rpu_3 = hill_activation_single(sal_3, *hill_params['psalttc'])

circuit = pd.DataFrame(itertools.product(ara_rpu_3, ohc14_rpu_3, sal_rpu_3))
circuit.columns = ['ara', 'ohc14', 'sal']
circuit = pd.concat([circuit, pd.read_csv('{}/3i-and-gates-jump.csv'.format(folder))['e38-e20']], axis=1)
circuit = np.log10(circuit)

In [None]:
ind1 = np.logspace(np.log10(ara_rpu_3.min()), np.log10(ara_rpu_3.max()), 4)
ind2 = np.logspace(np.log10(ohc14_rpu_3.min()), np.log10(ohc14_rpu_3.max()), 7)
ind3 = np.logspace(np.log10(sal_rpu_3.min()), np.log10(sal_rpu_3.max()), 9)
r_ind1 = inverse_hill(ind1, *hill_params['pbad'])
r_ind2 = inverse_hill(ind2, *hill_params['pcin'])
r_ind3 = inverse_hill(ind3, *hill_params['psalttc'])
r_ind1, r_ind2, r_ind3

In [None]:
def plot_2d(model, data, title, show_interval=False, i1=ara_rpu_2, i2=ohc14_rpu_2):

    ind1 = np.logspace(np.log10(i1.min()), np.log10(i1.max()), 1000)
    ind2 = np.logspace(np.log10(i2.min()), np.log10(i2.max()), 1000)

    act_data = np.log10(data)

    row, col = 1, 2
    f, axs = plt.subplots(row, col, sharex=False, sharey=False, figsize=(col*6, row*3))
    axr = axs.ravel()

    fixed = 'ara'
    varied = 'ohc14'

    inducers = [ara_rpu_2, ind2]
    pred_data = pd.DataFrame(itertools.product(*inducers))
    pred_data.columns = ['ara', 'ohc14']
    pred_data = np.log10(pred_data)
    if show_interval:
        pred_data['prediction'], pred_data['std_prediction'] = model.predict(pred_data, return_std=True)
    else:
        pred_data['prediction'] = model.predict(pred_data)

    for x, i in enumerate(act_data[fixed].unique()):
        act = act_data[act_data[fixed]==i]
        pred = pred_data[pred_data[fixed]==i]
        axr[0].scatter(act[varied], act['fluo'], label=i, color=color[x])
        axr[0].plot(pred[varied], pred['prediction'], color=color[x])
        if show_interval:
            axr[0].fill_between(
                pred[varied],
                pred['prediction'] - 1.96 * pred['std_prediction'],
                pred['prediction'] + 1.96 * pred['std_prediction'],
                alpha=0.5,
                label=r"95% confidence interval", color=color[x]
            )
    axr[0].set_xlabel('{} (log)'.format(varied))
    axr[0].set_ylabel('RPU (log)')

    fixed = 'ohc14'
    varied = 'ara'

    inducers = [ind1, ohc14_rpu_2]
    pred_data = pd.DataFrame(itertools.product(*inducers))
    pred_data.columns = ['ara', 'ohc14']
    pred_data = np.log10(pred_data)
    if show_interval:
        pred_data['prediction'], pred_data['std_prediction'] = model.predict(pred_data, return_std=True)
    else:
        pred_data['prediction'] = model.predict(pred_data)

    for x, i in enumerate(act_data[fixed].unique()):
        act = act_data[act_data[fixed]==i]
        pred = pred_data[pred_data[fixed]==i]
        axr[1].scatter(act[varied], act['fluo'], label=i, color=color[x])
        axr[1].plot(pred[varied], pred['prediction'], color=color[x])
        if show_interval:
            axr[1].fill_between(
                pred[varied],
                pred['prediction'] - 1.96 * pred['std_prediction'],
                pred['prediction'] + 1.96 * pred['std_prediction'],
                alpha=0.4,
                label=r"95% confidence interval", color=color[x]
            )
    axr[1].set_xlabel('{} (log)'.format(varied))
    axr[1].set_ylabel('RPU (log)')

    plt.suptitle(title)
    plt.tight_layout()
    sns.despine()

In [None]:
def plot_3d(models, data, title, i1=ara_rpu_3, i2=ohc14_rpu_3, i3=sal_rpu_3):

    ind1 = np.logspace(np.log10(i1.min()), np.log10(i1.max()), 1000)
    ind2 = np.logspace(np.log10(i2.min()), np.log10(i2.max()), 1000)
    ind3 = np.logspace(np.log10(i3.min()), np.log10(i3.max()), 1000)

    row, col = 2, 2
    f, axs = plt.subplots(row, col, sharex=False, sharey=False, figsize=(col*6, row*3))
    axr = axs.ravel()
    
    for x, s in enumerate(i3):
    
        for a in i1:

            inducers = [[a], ind2, [s]]
            pred_data = pd.DataFrame(itertools.product(*inducers))
            pred_data.columns = ['ara', 'ohc14', 'sal']
            pred_data = np.log10(pred_data)

            pred_data['e38-pred'] = models[1].predict(pred_data[['ara', 'ohc14']])
            pred_data['e20-pred'] = models[0].predict(pred_data[['e38-pred', 'sal']])

            act_data = data[(data['ara']==np.log10(a)) & (data['sal']==np.log10(s))].reset_index(drop=True)

            axr[x].scatter(act_data['ohc14'], act_data['e38-e20'])
            axr[x].plot(pred_data['ohc14'], pred_data['e20-pred'], label=np.round(a, 2))

            axr[x].set_xlabel('OHC14 (log)')
            axr[x].set_ylabel('RPU (log)')
            axr[x].set_title('Sal:{}'.format(np.round(s, 2)))
            axr[x].legend(loc=4, bbox_to_anchor=(1.1, 1.05))

    plt.suptitle(title)
    plt.tight_layout()
    sns.despine()

In [None]:
X1 = np.log10(e20[['ara', 'ohc14']])
y1 = np.log10(e20['fluo'])
X2 = np.log10(e38[['ara', 'ohc14']])
y2 = np.log10(e38['fluo'])

#### Gaussian Process #1

In [None]:
kernel = WhiteKernel() + RBF(1, "fixed")
model1 = gp.GaussianProcessRegressor(kernel=kernel)
model1.fit(X1, y1)

In [None]:
kernel = WhiteKernel() + RBF(1, "fixed")
model2 = gp.GaussianProcessRegressor(kernel=kernel)
model2.fit(X2, y2)

In [None]:
plot_2d(model1, e20, 'ECF20 - GP (White + RBF Kernels)')

In [None]:
plot_2d(model2, e38, 'E38 - GP (White + RBF Kernels)')

In [None]:
plot_2d(model1, e20, 'ECF20 - GP (White + RBF Kernels)', True)

In [None]:
plot_2d(model2, e38, 'ECF38 - GP (White + RBF Kernels)', True)

In [None]:
plot_3d([model2, model1], circuit, 'E38-E20 - GP (White + RBF Kernels)')

#### Gaussian Process #2

In [None]:
model1 = gp.GaussianProcessRegressor()
model1.fit(X1, y1)

In [None]:
model2 = gp.GaussianProcessRegressor()
model2.fit(X2, y2)

In [None]:
def plot_2d_(model, data, title, show_interval=False, i1=ara_rpu_2, i2=ohc14_rpu_2):

    ind1 = np.logspace(np.log10(i1.min()), np.log10(i1.max()), 1000)
    ind2 = np.logspace(np.log10(i2.min()), np.log10(i2.max()), 1000)

    act_data = np.log10(data)

    row, col = 1, 2
    f, axs = plt.subplots(row, col, sharex=False, sharey=False, figsize=(col*6, row*3))
    axr = axs.ravel()

    fixed = 'ara'
    varied = 'ohc14'

    inducers = [ara_rpu_2, ind2]
    pred_data = pd.DataFrame(itertools.product(*inducers))
    pred_data.columns = ['ara', 'ohc14']
    pred_data = np.log10(pred_data)
    if show_interval:
        pred_data['prediction'], pred_data['std_prediction'] = model.predict(pred_data, return_std=True)
    else:
        pred_data['prediction'] = model.predict(pred_data)
        
    
    
    for x, i in enumerate(act_data[fixed].unique()):
        act = act_data[act_data[fixed]==i]
        pred = pred_data[pred_data[fixed]==i]
        
        axr[0].axvline(-3.15, linestyle='--')
        axr[0].axvline(-2.4, linestyle='--')
        axr[0].axvline(-1.425, linestyle='--')
        axr[0].axvline(-0.625, linestyle='--')
        
        axr[0].scatter(act[varied], act['fluo'], label=i, color=color[x])
        axr[0].plot(pred[varied], pred['prediction'], color=color[x])
        if show_interval:
            axr[0].fill_between(
                pred[varied],
                pred['prediction'] - 1.96 * pred['std_prediction'],
                pred['prediction'] + 1.96 * pred['std_prediction'],
                alpha=0.5,
                label=r"95% confidence interval", color=color[x]
            )
    axr[0].set_xlabel('{} (log)'.format(varied))
    axr[0].set_ylabel('RPU (log)')

    fixed = 'ohc14'
    varied = 'ara'

    inducers = [ind1, ohc14_rpu_2]
    pred_data = pd.DataFrame(itertools.product(*inducers))
    pred_data.columns = ['ara', 'ohc14']
    pred_data = np.log10(pred_data)
    if show_interval:
        pred_data['prediction'], pred_data['std_prediction'] = model.predict(pred_data, return_std=True)
    else:
        pred_data['prediction'] = model.predict(pred_data)

    for x, i in enumerate(act_data[fixed].unique()):
        act = act_data[act_data[fixed]==i]
        pred = pred_data[pred_data[fixed]==i]
        
        axr[1].axvline(-1.325, linestyle='--')
        axr[1].axvline(-0.75, linestyle='--')
        axr[1].axvline(-0.1, linestyle='--')
        axr[1].axvline(0.35, linestyle='--')
        
        axr[1].scatter(act[varied], act['fluo'], label=i, color=color[x])
        axr[1].plot(pred[varied], pred['prediction'], color=color[x])
        if show_interval:
            axr[1].fill_between(
                pred[varied],
                pred['prediction'] - 1.96 * pred['std_prediction'],
                pred['prediction'] + 1.96 * pred['std_prediction'],
                alpha=0.4,
                label=r"95% confidence interval", color=color[x]
            )
    axr[1].set_xlabel('{} (log)'.format(varied))
    axr[1].set_ylabel('RPU (log)')

    plt.suptitle(title)
    plt.tight_layout()
    sns.despine()
    
plot_2d_(model1, e20, 'ECF20 - GP (RBF Kernel)')

In [None]:
aa = [10**a for a in [-3.15, -2.4, -1.425, -0.625]]
bb = [10**b for b in [-1.325, -0.75, -0.1, 0.35]]
aa, bb

In [None]:
hill_activation_single(np.array([0.195, 0.666, 2.285, 8.05]), *hill_params['pcin'])

In [None]:
hill_activation_single(np.array([0.0223, 0.1008, 0.375, 1.453]), *hill_params['pbad'])

In [None]:
plot_2d_(model2, e38, 'ECF38 - GP (RBF Kernel)')

In [None]:
plot_2d(model1, e20, 'ECF20 - GP (RBF Kernel)', True)

In [None]:
plot_2d(model2, e38, 'ECF38 - GP (RBF Kernel)', True)

In [None]:
plot_3d([model2, model1], circuit, 'E38-E20 - GP (RBF Kernel)')

#### Optimized Induction

In [None]:
ara_22 = np.array([0, 0.0223, 0.1008, 0.375, 1.453, 13])
ohc14_22 = np.array([0, 0.195, 0.666, 2.285, 8.05, 20])
ara_rpu_22 = hill_activation_single(ara_22, *hill_params['pbad'])
ohc14_rpu_22 = hill_activation_single(ohc14_22, *hill_params['pcin'])

gates2 = pd.read_csv('{}/jump-and-gates-optimized.csv'.format(folder))
e20_ = pd.concat([pd.DataFrame(itertools.product(ara_rpu_22, ohc14_rpu_22)), gates['e20']], axis=1)
e20_.columns = ['ara', 'ohc14', 'fluo']
e38_ = pd.concat([pd.DataFrame(itertools.product(ara_rpu_22, ohc14_rpu_22)), gates['e38']], axis=1)
e38_.columns = ['ara', 'ohc14', 'fluo']

In [None]:
X1_ = np.log10(e20_[['ara', 'ohc14']])
y1_ = np.log10(e20_['fluo'])
X2_ = np.log10(e38_[['ara', 'ohc14']])
y2_ = np.log10(e38_['fluo'])

In [None]:
ara_22_exp = np.array([0, 0.1008, 13])
ohc14_22_exp = np.array([0, 0.666, 20])
ara_rpu_22_exp = hill_activation_single(ara_22_exp, *hill_params['pbad'])
ohc14_rpu_22_exp = hill_activation_single(ohc14_22_exp, *hill_params['pcin'])

In [None]:
exp_inducers = pd.DataFrame(itertools.product(ara_rpu_22_exp, ohc14_rpu_22_exp))
exp_inducers.columns = ['ara', 'ohc14']
e20_exp = pd.merge(exp_inducers, e20_, on=['ara', 'ohc14'], how='left')

In [None]:
X1_exp = np.log10(e20_exp[['ara', 'ohc14']])
y1_exp = np.log10(e20_exp['fluo'])
model_exp = gp.GaussianProcessRegressor()
model_exp.fit(X1_exp, y1_exp)

In [None]:
plot_2d2(model_exp, e20_exp, 'ECF20', True)

In [None]:
plot_2d2(model_exp, e20_exp, 'ECF20', True)

In [None]:
kernel = WhiteKernel() + RBF(1, "fixed")
model1_ = gp.GaussianProcessRegressor(kernel=kernel)
model1_.fit(X1_, y1_)

In [None]:
kernel = WhiteKernel() + RBF(1, "fixed")
model2_ = gp.GaussianProcessRegressor(kernel=kernel)
model2_.fit(X2_, y2_)

In [None]:
def plot_2d2(model, data, title, show_interval=False, i1=ara_rpu_22, i2=ohc14_rpu_22):

    ind1 = np.logspace(np.log10(i1.min()), np.log10(i1.max()), 1000)
    ind2 = np.logspace(np.log10(i2.min()), np.log10(i2.max()), 1000)

    act_data = np.log10(data)

    row, col = 1, 2
    f, axs = plt.subplots(row, col, sharex=False, sharey=False, figsize=(col*6, row*3))
    axr = axs.ravel()

    fixed = 'ara'
    varied = 'ohc14'

    inducers = [ara_rpu_22, ind2]
    pred_data = pd.DataFrame(itertools.product(*inducers))
    pred_data.columns = ['ara', 'ohc14']
    pred_data = np.log10(pred_data)
    if show_interval:
        pred_data['prediction'], pred_data['std_prediction'] = model.predict(pred_data, return_std=True)
    else:
        pred_data['prediction'] = model.predict(pred_data)

    for x, i in enumerate(act_data[fixed].unique()):
        act = act_data[act_data[fixed]==i]
        pred = pred_data[pred_data[fixed]==i]
        axr[0].scatter(act[varied], act['fluo'], label=i, color=color[x])
        axr[0].plot(pred[varied], pred['prediction'], color=color[x])
        if show_interval:
            axr[0].fill_between(
                pred[varied],
                pred['prediction'] - 1.96 * pred['std_prediction'],
                pred['prediction'] + 1.96 * pred['std_prediction'],
                alpha=0.5,
                label=r"95% confidence interval", color=color[x]
            )
    axr[0].set_xlabel('{} (log)'.format(varied))
    axr[0].set_ylabel('RPU (log)')

    fixed = 'ohc14'
    varied = 'ara'

    inducers = [ind1, ohc14_rpu_22]
    pred_data = pd.DataFrame(itertools.product(*inducers))
    pred_data.columns = ['ara', 'ohc14']
    pred_data = np.log10(pred_data)
    if show_interval:
        pred_data['prediction'], pred_data['std_prediction'] = model.predict(pred_data, return_std=True)
    else:
        pred_data['prediction'] = model.predict(pred_data)

    for x, i in enumerate(act_data[fixed].unique()):
        act = act_data[act_data[fixed]==i]
        pred = pred_data[pred_data[fixed]==i]
        axr[1].scatter(act[varied], act['fluo'], label=i, color=color[x])
        axr[1].plot(pred[varied], pred['prediction'], color=color[x])
        if show_interval:
            axr[1].fill_between(
                pred[varied],
                pred['prediction'] - 1.96 * pred['std_prediction'],
                pred['prediction'] + 1.96 * pred['std_prediction'],
                alpha=0.4,
                label=r"95% confidence interval", color=color[x]
            )
    axr[1].set_xlabel('{} (log)'.format(varied))
    axr[1].set_ylabel('RPU (log)')

    plt.suptitle(title)
    plt.tight_layout()
    sns.despine()

In [None]:
plot_2d2(model1_, e20_, 'ECF20 - GP (White + RBF Kernel)', True)

In [None]:
plot_2d2(model2_, e38_, 'ECF38 - GP (White + RBF Kernel)', True)

In [None]:
plot_3d([model2_, model1_], circuit, 'E38-E20 - GP (RBF Kernel)')

#### Neural Network

In [None]:
model1 = MLPRegressor(solver='lbfgs')
model1.fit(X1_, y1_)

In [None]:
model2 = MLPRegressor(solver='lbfgs')
model2.fit(X2_, y2_)

In [None]:
plot_2d2(model1, e20_, 'ECF20 - MLP (300:300)')

In [None]:
plot_2d2(model2, e38_, 'ECF38 - MLP (300:300)')

In [None]:
plot_3d([model2, model1], circuit, 'E38-E20 - MLP (300:300)')

In [None]:
circuit2

In [None]:
circuit2 = circuit.copy()
circuit2['e38-pred'] = model2.predict(circuit2[['ara', 'ohc14']])
circuit2['e20-pred'] = model1.predict(circuit2[['sal', 'e38-pred']])

mape = 100 - mean_absolute_percentage_error(circuit2['e38-e20'], circuit2['e20-pred'])
r = r2_score(circuit2['e38-e20'], circuit2['e20-pred'])
sns.regplot(circuit2['e38-e20'], circuit2['e20-pred'], label='mape:{}, r2:{}'.format(np.round(mape, 2), np.round(r, 2)))
plt.legend()
plt.xlabel('experiment (log)')
plt.ylabel('prediction (log)')
plt.title('E38-E20 - GP')
sns.despine()

In [None]:
for s in circuit2['sal'].unique():
    print(s)

In [None]:
temp = circuit2[circuit2['sal']==-0.5843732235547442]
for a in temp['ara'].unique():
    #temp = temp[temp['ara']==a]
    #print(a)
    b = temp[temp['ara']==a].reset_index(drop=True)
    #print(a, b['ohc14'])
    plt.scatter(b.index, b['e38-e20'], label=np.round(a, 2))
    plt.plot(b.index, b['e38-pred'])
plt.legend()

In [None]:
temp = circuit2[circuit2['sal']==-2.17]
for a in temp['ara'].unique():
    #temp = temp[temp['ara']==a]
    #print(a)
    b = temp[temp['ara']==a].reset_index(drop=True)
    #print(a, b['ohc14'])
    plt.scatter(b.index, b['e38-e20'], label=np.round(a, 2))
    plt.plot(b.index, b['e38-pred'])
plt.legend()

In [None]:
on_off = [[a, b] for a, b in zip(circuit2[['ara', 'ohc14', 'sal']].min().tolist(), circuit2[['ara', 'ohc14', 'sal']].max().tolist())]
on_off_df = pd.DataFrame(itertools.product(*on_off))
on_off_df.columns = ['ara', 'ohc14', 'sal']
on_off_df = pd.merge(on_off_df, circuit2, on=['ara', 'ohc14', 'sal'], how='left')
on_off_df

In [None]:
w = 0.3
plt.bar(on_off_df.index, [10**a for a in on_off_df['e38-e20']], width=w, label='experiment')
plt.bar(on_off_df.index+w, [10**a for a in on_off_df['e20-pred']], width=w, label='prediction')
plt.axhline(1, linestyle='dotted', color='gray', label='on-off threshold')
plt.legend()
plt.title('E38-E20 - GP')
sns.despine()

In [None]:
w = 0.3
plt.bar(on_off_df.index, [10**a for a in on_off_df['e38-e20']], width=w, label='experiment')
plt.bar(on_off_df.index+w, [10**a for a in on_off_df['e20-pred']], width=w, label='prediction')
plt.axhline(1, linestyle='dotted', color='gray', label='on-off threshold')
plt.legend()
plt.title('E38-E20 - NN')
sns.despine()

#### Selected 9 AND gates

In [None]:
gate_names = ['e11x32STPhoRadA', 'e15x32NpuSspS2', 'e16x33NrdA2',
             'e20x32gp411', 'e32x30SspGyrB', 'e34x30MjaKlbA',
             'e38x32gp418', 'e41x32NrdJ1', 'e42x32STIMPDH1']
ara_3k3 = np.array([0, 0.8125, 3.25, 13, 52, 208])
cuma_3k3 = np.array([0, 0.078125, 0.3125, 1.25, 5, 20])

hill_params_3k3 = {
    'pbad': [0.88, 0.3, 0.32, -2.85],
    'pcymrc': [0.1, 1.82, 0.49, -2.9]
}

ara_3k3_rpu = hill_activation_single(ara_3k3, *hill_params_3k3['pbad'])
cuma_3k3_rpu = hill_activation_single(cuma_3k3, *hill_params_3k3['pcymrc'])

gates_3k3 = pd.read_csv('{}/selected-9-and-gates.csv'.format(folder))
e11 = pd.concat([pd.DataFrame(itertools.product(ara_3k3_rpu, cuma_3k3_rpu)), gates_3k3[gate_names[0]]], axis=1)
e11.columns = ['ara', 'cuma', 'fluo']
e15 = pd.concat([pd.DataFrame(itertools.product(ara_3k3_rpu, cuma_3k3_rpu)), gates_3k3[gate_names[1]]], axis=1)
e15.columns = ['ara', 'cuma', 'fluo']
e16 = pd.concat([pd.DataFrame(itertools.product(ara_3k3_rpu, cuma_3k3_rpu)), gates_3k3[gate_names[2]]], axis=1)
e16.columns = ['ara', 'cuma', 'fluo']
e32 = pd.concat([pd.DataFrame(itertools.product(ara_3k3_rpu, cuma_3k3_rpu)), gates_3k3[gate_names[4]]], axis=1)
e32.columns = ['ara', 'cuma', 'fluo']
e34 = pd.concat([pd.DataFrame(itertools.product(ara_3k3_rpu, cuma_3k3_rpu)), gates_3k3[gate_names[5]]], axis=1)
e34.columns = ['ara', 'cuma', 'fluo']

In [None]:
def plot_2d_3k3(model, data, title, show_interval=False, i1=ara_3k3_rpu, i2=cuma_3k3_rpu):

    ind1 = np.logspace(np.log10(i1.min()), np.log10(i1.max()), 1000)
    ind2 = np.logspace(np.log10(i2.min()), np.log10(i2.max()), 1000)

    act_data = np.log10(data)

    row, col = 1, 2
    f, axs = plt.subplots(row, col, sharex=False, sharey=False, figsize=(col*6, row*3))
    axr = axs.ravel()

    fixed = 'ara'
    varied = 'cuma'

    inducers = [i1, ind2]
    pred_data = pd.DataFrame(itertools.product(*inducers))
    pred_data.columns = ['ara', 'cuma']
    pred_data = np.log10(pred_data)
    if show_interval:
        pred_data['prediction'], pred_data['std_prediction'] = model.predict(pred_data, return_std=True)
    else:
        pred_data['prediction'] = model.predict(pred_data)

    for x, i in enumerate(act_data[fixed].unique()):
        act = act_data[act_data[fixed]==i]
        pred = pred_data[pred_data[fixed]==i]
        axr[0].scatter(act[varied], act['fluo'], label=i, color=color[x])
        axr[0].plot(pred[varied], pred['prediction'], color=color[x])
        if show_interval:
            axr[0].fill_between(
                pred[varied],
                pred['prediction'] - 1.96 * pred['std_prediction'],
                pred['prediction'] + 1.96 * pred['std_prediction'],
                alpha=0.5,
                label=r"95% confidence interval", color=color[x]
            )
    axr[0].set_xlabel('{} (log)'.format(varied))
    axr[0].set_ylabel('RPU (log)')

    fixed = 'cuma'
    varied = 'ara'

    inducers = [ind1, i2]
    pred_data = pd.DataFrame(itertools.product(*inducers))
    pred_data.columns = ['ara', 'cuma']
    pred_data = np.log10(pred_data)
    if show_interval:
        pred_data['prediction'], pred_data['std_prediction'] = model.predict(pred_data, return_std=True)
    else:
        pred_data['prediction'] = model.predict(pred_data)

    for x, i in enumerate(act_data[fixed].unique()):
        act = act_data[act_data[fixed]==i]
        pred = pred_data[pred_data[fixed]==i]
        axr[1].scatter(act[varied], act['fluo'], label=i, color=color[x])
        axr[1].plot(pred[varied], pred['prediction'], color=color[x])
        if show_interval:
            axr[1].fill_between(
                pred[varied],
                pred['prediction'] - 1.96 * pred['std_prediction'],
                pred['prediction'] + 1.96 * pred['std_prediction'],
                alpha=0.4,
                label=r"95% confidence interval", color=color[x]
            )
    axr[1].set_xlabel('{} (log)'.format(varied))
    axr[1].set_ylabel('RPU (log)')

    plt.suptitle(title)
    plt.tight_layout()
    sns.despine()

In [None]:
X = np.log10(e15[['ara', 'cuma']])
y = np.log10(e15['fluo'])

In [None]:
model = MLPRegressor(solver='lbfgs')
model.fit(X, y)

In [None]:
plot_2d_3k3(model, e15, 'ECF15 (RBF Kernel)')

In [None]:
color = ['red', 'green', 'blue', 'orange', 'purple', 'cyan']

In [None]:
model = model2
data = e38.copy()
show_interval = False

act_data = np.log10(data)

row, col = 1, 2
f, axs = plt.subplots(row, col, sharex=False, sharey=False, figsize=(col*6, row*3))
axr = axs.ravel()

fixed = 'ara'
varied = 'ohc14'

inducers = [ara_rpu_2, ind2]
pred_data = pd.DataFrame(itertools.product(*inducers))
pred_data.columns = ['ara', 'ohc14']
pred_data = np.log10(pred_data)
if show_interval:
    pred_data['prediction'], pred_data['std_prediction'] = model.predict(pred_data, return_std=True)
else:
    pred_data['prediction'] = model.predict(pred_data)

for x, i in enumerate(act_data[fixed].unique()):
    act = act_data[act_data[fixed]==i]
    pred = pred_data[pred_data[fixed]==i]
    axr[0].scatter(act[varied], act['fluo'], label=i, color=color[x])
    axr[0].plot(pred[varied], pred['prediction'], color=color[x])
    if show_interval:
        axr[0].fill_between(
            pred[varied],
            pred['prediction'] - 1.96 * pred['std_prediction'],
            pred['prediction'] + 1.96 * pred['std_prediction'],
            alpha=0.5,
            label=r"95% confidence interval", color=color[x]
        )
axr[0].set_xlabel('{} (logscale)'.format(varied))
axr[0].set_ylabel('RPU (logscale)')

fixed = 'ohc14'
varied = 'ara'

inducers = [ind1, ohc14_rpu_2]
pred_data = pd.DataFrame(itertools.product(*inducers))
pred_data.columns = ['ara', 'ohc14']
pred_data = np.log10(pred_data)
if show_interval:
    pred_data['prediction'], pred_data['std_prediction'] = model.predict(pred_data, return_std=True)
else:
    pred_data['prediction'] = model.predict(pred_data)

for x, i in enumerate(act_data[fixed].unique()):
    act = act_data[act_data[fixed]==i]
    pred = pred_data[pred_data[fixed]==i]
    axr[1].scatter(act[varied], act['fluo'], label=i, color=color[x])
    axr[1].plot(pred[varied], pred['prediction'], color=color[x])
    if show_interval:
        axr[1].fill_between(
            pred[varied],
            pred['prediction'] - 1.96 * pred['std_prediction'],
            pred['prediction'] + 1.96 * pred['std_prediction'],
            alpha=0.4,
            label=r"95% confidence interval", color=color[x]
        )
axr[1].set_xlabel('{} (logscale)'.format(varied))
axr[1].set_ylabel('RPU (logscale)')

plt.tight_layout()
sns.despine()

### 3-input AND gates

In [None]:
circuit = pd.read_csv('{}/3i-and-gates-jump.csv'.format(folder))
inducers = [ara_rpu_3, ohc14_rpu_3, sal_rpu_3]
df3 = pd.DataFrame(itertools.product(*inducers))
df3.columns = ['ara', 'ohc14', 'sal']
df3 = pd.concat([df3, circuit['e38-e20']], axis=1)
df3 = np.log10(df3)

In [None]:
df3['e38-pred'] = model2.predict(df3[['ara', 'ohc14']])
df3['e20-pred'] = model1.predict(df3[['e38-pred', 'sal']])

In [None]:
mape = 100 - mean_absolute_percentage_error(df3['e38-e20'], df3['e20-pred'])
r = r2_score(df3['e38-e20'], df3['e20-pred'])
sns.regplot(df3['e38-e20'], df3['e20-pred'], label='mape:{}, r2:{}'.format(np.round(mape, 2), np.round(r, 2)))
plt.legend()
sns.despine()

In [None]:
def plot_3d(models, data, i1=ara_rpu_3, i2=ohc14_rpu_3, i3=sal_rpu_3)

    ind1 = np.logspace(np.log10(i1.min()), np.log10(i1.max()), 1000)
    ind2 = np.logspace(np.log10(i2.min()), np.log10(i2.max()), 1000)
    ind3 = np.logspace(np.log10(i3.min()), np.log10(i3.max()), 1000)

    row, col = 2, 2
    f, axs = plt.subplots(row, col, sharex=False, sharey=False, figsize=(col*6, row*3))
    axr = axs.ravel()

    for x, a in enumerate(i1):

        for o in i2:

            inducers = [[a], [o], ind3]
            pred_data = pd.DataFrame(itertools.product(*inducers))
            pred_data.columns = ['ara', 'ohc14', 'sal']
            pred_data = np.log10(pred_data)

            pred_data['e38-pred'] = models[1].predict(pred_data[['ara', 'ohc14']])
            pred_data['e20-pred'] = models[0].predict(pred_data[['e38-pred', 'sal']])

            act_data = df3[(df3['ara']==np.log10(a)) & (df3['ohc14']==np.log10(o))].reset_index(drop=True)

            axr[x].scatter(act_data['sal'], act_data['e38-e20'])
            axr[x].plot(pred_data['sal'], pred_data['e20-pred'], label=np.round(o, 2))

            axr[x].set_xlabel('Sal')
            axr[x].set_ylabel('GFP (RPU)')
            axr[x].set_title('Ara:{}'.format(np.round(a, 2)))
            axr[x].legend(loc=4, bbox_to_anchor=(1.1, 1.05))

    plt.tight_layout()
    sns.despine()

In [None]:
row, col = 2, 2
f, axs = plt.subplots(row, col, sharex=False, sharey=False, figsize=(col*6, row*3))
axr = axs.ravel()

for x, o in enumerate(ohc14_rpu_3):
    
    for s in sal_rpu_3:
        
        inducers = [ind1, [o], [s]]
        pred_data = pd.DataFrame(itertools.product(*inducers))
        pred_data.columns = ['ara', 'ohc14', 'sal']
        pred_data = np.log10(pred_data)
        
        pred_data['e38-pred'] = model2.predict(pred_data[['ara', 'ohc14']])
        pred_data['e20-pred'] = model1.predict(pred_data[['e38-pred', 'sal']])
        
        
        act_data = df3[(df3['ohc14']==np.log10(o)) & (df3['sal']==np.log10(s))].reset_index(drop=True)
        
        axr[x].scatter(act_data['ara'], act_data['e38-e20'])
        axr[x].plot(pred_data['ara'], pred_data['e20-pred'], label=np.round(s, 2))
        
        axr[x].set_xlabel('Ara')
        axr[x].set_ylabel('GFP (RPU)')
        axr[x].set_title('OHC14:{}'.format(np.round(o, 2)))
        axr[x].legend(loc=4, bbox_to_anchor=(1.1, 1.05))
        
plt.tight_layout()
sns.despine()

In [None]:
row, col = 2, 2
f, axs = plt.subplots(row, col, sharex=False, sharey=False, figsize=(col*6, row*3))
axr = axs.ravel()

for x, s in enumerate(sal_rpu_3):
    
    for a in ara_rpu_3:
        
        inducers = [[a], ind2, [s]]
        pred_data = pd.DataFrame(itertools.product(*inducers))
        pred_data.columns = ['ara', 'ohc14', 'sal']
        pred_data = np.log10(pred_data)
        
        pred_data['e38-pred'] = model2.predict(pred_data[['ara', 'ohc14']])
        pred_data['e20-pred'] = model1.predict(pred_data[['e38-pred', 'sal']])
        
        act_data = df3[(df3['ara']==np.log10(a)) & (df3['sal']==np.log10(s))].reset_index(drop=True)
        
        axr[x].scatter(act_data['ohc14'], act_data['e38-e20'])
        axr[x].plot(pred_data['ohc14'], pred_data['e20-pred'], label=np.round(a, 2))
        
        axr[x].set_xlabel('Sal')
        axr[x].set_ylabel('GFP (RPU)')
        axr[x].set_title('Sal:{}'.format(np.round(s, 2)))
        axr[x].legend(loc=4, bbox_to_anchor=(1.1, 1.05))
        
plt.tight_layout()
sns.despine()