In [1]:
import pandas as pd
#pd.set_option('display.max_columns', None)
#pd.set_option('display.max_rows', None)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import json
import warnings
warnings.filterwarnings("ignore")
from scipy.optimize import curve_fit
import itertools
from sklearn.metrics import mean_squared_log_error

from utils.model import *
from utils.helper import *
from utils.plotter import *

In [None]:
with open('utils/hill-params.json', 'r') as f:
    HILL_PARAMS = json.load(f)
HILL_PARAMS

In [None]:
ind_pro_pair = {
    'ara': 'PBAD',
    'ohc14': 'PCin',
    'sal': 'PSalTTC'
}
pro_ind_pair = {
    'PBAD': 'ara',
    'PCin': 'ohc14',
    'PSalTTC': 'sal'
}

#### Promoter Permutation

In [None]:
folder = 'datasets/cleansed/promoter-permutation/non-zero'
gates = ['PBAD-PCin-E20', 'PBAD-PSalTTC-E20', 'PCin-PBAD-E20', 'PCin-PSalTTC-E20', 'PSalTTC-PBAD-E20', 'PSalTTC-PCin-E20']
gate = pd.read_csv('{}/{}.csv'.format(folder, gates[0]))
ind1_key = gate.columns.tolist()[0]
ind2_key = gate.columns.tolist()[1]
ind1 = sorted(gate[ind1_key].unique().tolist())
ind2 = sorted(gate[ind2_key].unique().tolist())

In [None]:
plt.figure(figsize=[6, 4])
actual = gate.pivot(ind1_key, ind2_key)
actual.index = [np.round(i, 2) for i in ind1]
actual.columns = [np.round(i, 4) for i in ind2]
sns.heatmap(actual, annot=True, fmt='.2f')
plt.xlabel('{} [RPU]'.format(ind2_key))
plt.ylabel('{} [RPU]'.format(ind1_key))
plt.title('E20')

plt.tight_layout()
sns.despine()

In [None]:
ind1_range = np.logspace(np.log10(min(ind1)), np.log10(max(ind1)), 100)
ind2_range = np.logspace(np.log10(min(ind2)), np.log10(max(ind2)), 100)

In [None]:
fixed_params = [*HILL_PARAMS['e20'], 0.5]#, *HILL_PARAMS['PBAD'], *HILL_PARAMS['PCin']]

x2, x1 = np.meshgrid(ind2, ind1)
x = np.vstack((x2.ravel(), x1.ravel()))
y = np.abs(gate['fluo']).values
#gate_bounds = [[(a-0.5) for a in fixed_params],
#               [(a+0.8) for a in fixed_params]]
gate_bounds = [[0, -5, -5, -5, 0],
               [5, 5, 2, 0, 1]]
init_guess = random_search(50, x, y, gate_bounds, hill_activation_and_fixing)
popt, pcov = curve_fit(hill_activation_and_fixing, x, y, p0=init_guess, bounds=gate_bounds)
print(popt)

In [None]:
x2, x1 = np.meshgrid(ind2_range, ind1)
x = np.vstack((x2.ravel(), x1.ravel()))
y_pred = hill_activation_and_fixing(x, *popt)

x2_, x1_ = np.meshgrid(ind2, ind1_range)
x_ = np.vstack((x2_.ravel(), x1_.ravel()))
y_pred_ = hill_activation_and_fixing(x_, *popt)

In [None]:
df = pd.DataFrame(x.T)
df['y'] = y_pred
training = df.pivot(1, 0)
training.index = ind1
training.columns = ind2_range

df_ = pd.DataFrame(x_.T)
df_['y'] = y_pred_
training_ = df_.pivot(1, 0)
training_.index = ind1_range
training_.columns = ind2

In [None]:
row, col = 1, 2
f, axs = plt.subplots(row, col, sharex=False, sharey=False, figsize=(col*6, row*4))
ax = axs.ravel()
for i in range(len(actual)):
    ax[0].scatter(actual.columns, actual.iloc[i, :])#, color=colors[i])#, label='Ara-{}'.format(np.round(aras[i], 3)))
    ax[0].plot(training.columns, training.iloc[i, :])#, color=colors[i])
ax[0].loglog()
ax[0].set_xlabel(ind2_key)

for i in range(len(actual.columns)):
    ax[1].scatter(actual.index, actual.iloc[:, i])#, color=colors[i], label='OHC14-{}'.format(np.round(ohc14s[i], 3)))
    ax[1].plot(training_.index, training_.iloc[:, i])#, color=colors[i])
ax[1].loglog()
ax[1].set_xlabel(ind1_key)
plt.show()

#### Induction matrix 2-input AND gates

Generate data.

In [None]:
folder = '053-E20-E38-induction-matrix'
aras = [0.05078125/4, 0.05078125, 0.203125, 0.8125, 3.25, 13]
ohc14s = [0.078125/4, 0.078125, 0.3125, 1.25, 5, 20]
inducers = [aras, ohc14s]
samples_t = generate_data_at_t(folder, 'raw', 'plate_map', kind='2-input induction matrix', pos_ctrl_map=[(0, 2), (1, 2)])

#### E20 & E38

In [None]:
f, axs = plt.subplots(1, 2, sharex=False, sharey=False, figsize=(10, 4))
axr = axs.ravel()

fluo_t = samples_t[0].reset_index()
fluo_t['ara'] = (fluo_t['name'].str.split('_', expand=True)[1].str[0]).astype(int)
fluo_t['ara'] = fluo_t['ara'].apply(lambda x: aras[x])
fluo_t['ohc14'] = (fluo_t['name'].str.split('_', expand=True)[1].str[1]).astype(int)
fluo_t['ohc14'] = fluo_t['ohc14'].apply(lambda x: ohc14s[x])
actual = fluo_t[['8 h ', 'ara', 'ohc14']].pivot('ara', 'ohc14')
actual.index = [np.round(i, 2) for i in aras]
actual.columns = [np.round(i, 4) for i in ohc14s]
sns.heatmap(actual, annot=True, fmt='.2f', ax=axr[0])
axr[0].set_xlabel('OHC14 [RPU]')
axr[0].set_ylabel('Ara [RPU]')
axr[0].set_title('E20')

fluo_t = samples_t[1].reset_index()
fluo_t['ara'] = (fluo_t['name'].str.split('_', expand=True)[1].str[0]).astype(int)
fluo_t['ara'] = fluo_t['ara'].apply(lambda x: aras[x])
fluo_t['ohc14'] = (fluo_t['name'].str.split('_', expand=True)[1].str[1]).astype(int)
fluo_t['ohc14'] = fluo_t['ohc14'].apply(lambda x: ohc14s[x])
actual = fluo_t[['8 h ', 'ara', 'ohc14']].pivot('ara', 'ohc14')
actual.index = [np.round(i, 2) for i in aras]
actual.columns = [np.round(i, 4) for i in ohc14s]
sns.heatmap(actual, annot=True, fmt='.2f', ax=axr[1])
axr[1].set_xlabel('OHC14 [RPU]')
axr[1].set_ylabel('Ara [RPU]')
axr[1].set_title('E38')

plt.tight_layout()
sns.despine()

In [None]:
fluo_t = samples_t[0].reset_index()
fluo_t['ara'] = (fluo_t['name'].str.split('_', expand=True)[1].str[0]).astype(int)
fluo_t['ara'] = fluo_t['ara'].apply(lambda x: aras[x])
fluo_t['ohc14'] = (fluo_t['name'].str.split('_', expand=True)[1].str[1]).astype(int)
fluo_t['ohc14'] = fluo_t['ohc14'].apply(lambda x: ohc14s[x])

In [None]:
x2, x1 = np.meshgrid(ohc14s, aras)
x = np.vstack((x2.ravel(), x1.ravel()))
y = np.abs(fluo_t['8 h '].values)

In [None]:
#gate_bounds = [[0, -3, -2, -3, -2, 0, -15, 0, 0, -12],
#               [1, 0, 0, -1, 0, 2, -1, 2, 1, 0]]
gate_bounds = [[0, -3, -2, -3],
               [1, 0, 0, -1]]
init_guess = random_search(50, x, y, gate_bounds, hill_activation_and)
popt, pcov = curve_fit(hill_activation_and, x, y, p0=init_guess, bounds=gate_bounds)
popt

In [None]:
popt = HILL_PARAMS['e20'] + HILL_PARAMS['PBAD'][1:] + HILL_PARAMS['PCin'][1:]
fluo_t['pred'] = hill_activation_and(x, *popt)

In [None]:
actual = fluo_t[['8 h ', 'ara', 'ohc14']].pivot('ara', 'ohc14')
actual.index = [np.round(i, 2) for i in aras]
actual.columns = [np.round(i, 4) for i in ohc14s]

training = fluo_t[['pred', 'ara', 'ohc14']].pivot('ara', 'ohc14')
training.index = [np.round(i, 2) for i in aras]
training.columns = [np.round(i, 4) for i in ohc14s]

In [None]:
f, axs = plt.subplots(1, 2, sharex=False, sharey=False, figsize=(10, 4))
axr = axs.ravel()
sns.heatmap(actual, annot=True, fmt='.2f', ax=axr[0])
axr[0].set_xlabel('OHC14 [RPU]')
axr[0].set_ylabel('Ara [RPU]')
axr[0].set_title('Experiment')

sns.heatmap(training, annot=True, fmt='.2f', ax=axr[1])
axr[1].set_xlabel('OHC14 [RPU]')
axr[1].set_ylabel('Ara [RPU]')
axr[1].set_title('Model training')

plt.suptitle('E20')
plt.tight_layout()
sns.despine()

In [None]:
actual = fluo_t[['8 h ', 'ara', 'ohc14']].pivot('ara', 'ohc14')
actual.index = aras #[np.round(i, 2) for i in aras_rpu]
actual.columns = ohc14s #[np.round(i, 2) for i in ohc14s_rpu]

training = fluo_t[['pred', 'ara', 'ohc14']].pivot('ara', 'ohc14')
training.index = aras #[np.round(i, 2) for i in aras_rpu]
training.columns = ohc14s #[np.round(i, 2) for i in ohc14s_rpu]

#### Visualization

In [None]:
colors = ['#cc0000', '#00cc00', '#0000cc', '#cccc00', '#cc00cc', '#00cccc']

In [None]:
aras_lim = np.log10(min(aras)/2), np.log10(max(aras)*2)
ohc14s_lim = np.log10(min(ohc14s)/2), np.log10(max(ohc14s)*2)
ind1 = np.logspace(*aras_lim, 100)
ind2 = np.logspace(*ohc14s_lim, 100)

In [None]:
x2, x1 = np.meshgrid(ind2, aras)
x = np.vstack((x2.ravel(), x1.ravel()))
y_pred = hill_activation_and(x, *popt)
df = pd.DataFrame(x.T)
df['y'] = y_pred
training = df.pivot(1, 0)
training.index = aras
training.columns = ind2

In [None]:
plt.figure(figsize=[6, 3])
print(popt)
for i in range(len(actual)):
    plt.scatter(actual.columns, actual.iloc[i, :], color=colors[i], label='Ara-{}'.format(np.round(aras[i], 3)))
    plt.plot(training.columns, training.iloc[i, :], color=colors[i])
plt.loglog()
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.ylabel('GFP [RPU]')
plt.xlabel('Ara')
sns.despine()

In [None]:
x2_, x1_ = np.meshgrid(ohc14s, ind1)
x_ = np.vstack((x2_.ravel(), x1_.ravel()))
y_pred_ = hill_activation_and(x_, *popt)
df_ = pd.DataFrame(x_.T)
df_['y'] = y_pred_
training_ = df_.pivot(1, 0)
training_.index = ind1
training_.columns = ohc14s

In [None]:
plt.figure(figsize=[6, 3])
print(popt)
for i in range(len(actual.columns)):
    plt.scatter(actual.index, actual.iloc[:, i], color=colors[i], label='OHC14-{}'.format(np.round(ohc14s[i], 3)))
    plt.plot(training_.index, training_.iloc[:, i], color=colors[i])
plt.loglog()
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.ylabel('GFP [RPU]')
plt.xlabel('Ara')
sns.despine()

#### 3-input AND gate

In [None]:
def hill_activation_and_3(x):

    x1, x2, x3 = x
    
    agi1, Ki1, ni1, epsi1 = HILL_PARAMS['PBAD']
    agi2, Ki2, ni2, epsi2 = HILL_PARAMS['PCin']
    agi3, Ki3, ni3, epsi3 = HILL_PARAMS['PSalTTC']
    ag1, K1, n1, eps1 = HILL_PARAMS['e38']
    ag2, K2, n2, eps2 = HILL_PARAMS['e20']
    
    inner_hill = hill_activation(hill_activation(x1, Ki1, ni1, epsi1)*hill_activation(x2, Ki2, ni2, epsi2), K1, n1, eps1)
    return 10**ag2 * hill_activation(inner_hill*hill_activation(x3, Ki3, ni3, epsi3), K2, n2, eps2)

In [None]:
aras = [0.8125/4, 0.8125, 3.25, 13]
ohc14s = [2.5/2, 2.5, 5, 10]
sals = [25/2, 25, 50, 100]
inducers = [aras, ohc14s, sals]
x = pd.DataFrame(itertools.product(*inducers)).T.values

circuit = pd.read_csv('datasets/experiment/900-cleansed/3i-and-gates-jump.csv')
y = circuit['e38-e20'].values

In [None]:
pred = pd.concat([pd.DataFrame(itertools.product(*inducers)), pd.DataFrame(hill_activation_and_3(x))], axis=1)
pred.columns = ['ara', 'ohc14', 'sal', 'prediction']
pred['actual'] = y

In [None]:
pred

In [None]:
def plot_3i_responses(data, inds, inds_lbl, row=2, col=2, xlabel='RPU', ylabel='GFP'):
    
    f, axs = plt.subplots(row, col, sharex=False, sharey=False, figsize=(col*6, row*3))
    axr = axs.ravel()
    for i, ax in enumerate(axr):
        if i < len(inds[0]):
            y = data[data[inds_lbl[0]]==inds[0][i]]
            for j, o in enumerate(inds[1]):
                d = y[y[inds_lbl[1]]==o]
                ax.scatter(d[inds_lbl[2]], d['actual'])
                ax.plot(d[inds_lbl[2]], d['prediction'], label='{}-{}'.format(inds_lbl[1], inds[1][j]))
            ax.set_xlabel(inds_lbl[2])
            ax.set_ylabel(ylabel)
            ax.loglog()
            ax.set_title('{}-{}'.format(inds_lbl[0], inds[0][i]))
            ax.legend()
        else:
            ax.set_visible(False)
    plt.tight_layout()
    sns.despine()

In [None]:
plot_3i_responses(pred, [aras, sals, ohc14s], ['ara', 'sal', 'ohc14'])

In [None]:
df = pd.DataFrame([(a, b) for a, b in zip(y, hill_activation_and_3(pd.DataFrame(itertools.product(*inducers)).T.values))])
plt.bar(df.index, df[0])
plt.title('Actual')
plt.ylabel('GFP')
plt.xlabel('Induction states')
sns.despine()

In [None]:
plt.bar(df.index, df[1])
plt.title('Prediction')
plt.ylabel('GFP')
plt.xlabel('Induction states')
sns.despine()