In [None]:
%pylab

In [None]:
%matplotlib inline

In [None]:
import pandas as pd
import GPy, patsy, scipy

In [None]:
od = pd.read_excel("data/cfu-od-raw.xlsx")
od

In [None]:
melt = pd.melt(od,id_vars=['strain'],var_name='time', value_name='OD600')
melt.head()

In [None]:
cfu = pd.read_excel("data/cfu-od-raw.xlsx",sheetname=1)
cfu

In [None]:
merge = pd.merge(melt, cfu, 'outer', ['strain', 'OD600'])
# merge = pd.merge(melt, cfu, 'inner', ['strain', 'OD600'])

merge['logCFU'] = np.log10(merge['CFUs/ml'])
merge['logOD'] = np.log2(merge.OD600)
merge.time = merge.time.astype(float)
merge['normtime'] = (merge.time-merge.time.mean())/merge.time.std()

merge.head()

In [None]:
merge.tail()

In [None]:
plt.figure(figsize=(6,4))

plt.scatter(merge.time, merge.logOD,c='C0')
plt.ylabel("$log_2($OD$)$",fontsize=15,color='C0')
plt.yticks(color='C0')
plt.xticks(fontsize=12)
plt.xlabel("time (h)", fontsize=15)

plt.twinx()
plt.scatter(merge.time, merge.logCFU,c='C1')
plt.ylabel("$log_{10}($CFU$)$",fontsize=15,color='C1')
plt.yticks(color='C1')

plt.savefig("figures/cfu-vs-od.pdf",bbox_inches='tight')

In [None]:
plt.scatter(merge.logOD, merge.logCFU)

In [None]:
merge.head()

In [None]:
# y1,x1 = patsy.dmatrices('standardize(logOD) ~ standardize(time) + 0', merge)

# x2 = patsy.build_design_matrices([x1.design_info], merge[~merge.logCFU.isnull()])[0]
# y2 = patsy.dmatrix('standardize(logCFU) + 0', merge[~merge.logCFU.isnull()])
# # y2,x2 = patsy.dmatrices('standardize(logCFU) ~ standardize(time) + 0', merge[~merge.logCFU.isnull()])

# x1.shape, y1.shape, x2.shape, y2.shape

In [None]:
# k = y1.design_info.factor_infos.keys()[0]
# fi = y1.design_info.factor_infos[k]
# st = fi.state['transforms']['_patsy_stobj0__standardize__']

# st.current_mean, st.current_M2

In [None]:
x1 = merge.normtime.values[:,None]
y1 = merge.logOD.values[:,None]

x2 = merge.normtime.values[~merge.logCFU.isnull(),None]
y2 = merge.logCFU.values[~merge.logCFU.isnull(),None]
# x2 = merge.normtime.values[:,None]
# y2 = merge.logCFU.values[:,None]

norm = {'y1': (y1.mean(), y1.std()),\
        'y2': (y2.mean(), y2.std()),\
        'time': (merge.time.mean(), merge.time.std())}

y1 = (y1-y1.mean())/y1.std()
y2 = (y2-y2.mean())/y2.std()

x1.shape, y1.shape, x2.shape, y2.shape,

In [None]:
plt.scatter(x1,y1)
plt.scatter(x2,y2)

In [None]:
lcm = GPy.util.multioutput.LCM(1,1,[GPy.kern.RBF(1) for i in range(2)])

In [None]:
lcm

In [None]:
lcm.ICM0.B

In [None]:
K = GPy.kern.Matern32(1)
# K = GPy.kern.Matern52(1)
kern = GPy.util.multioutput.ICM(input_dim=1,num_outputs=2,kernel=K,W_rank=2)

# kern = GPy.util.multioutput.LCM(input_dim=1,num_outputs=2,kernels_list=[GPy.kern.Matern32(1) for i in range(2)])

m = GPy.models.GPCoregionalizedRegression([x1,x2],[y1,y2],kernel=kern)
m['.*Mat.*.var'].constrain_fixed(1.)

# icm = GPy.util.multioutput.ICM(input_dim=1,num_outputs=2,kernel=GPy.kern.RBF(1))
# m = GPy.models.GPCoregionalizedRegression([x1,x2],[y1,y2],kernel=icm)
# m.kern.rbf.variance.constrain_fixed(1.)

m.randomize()
m.optimize()
m

In [None]:
# kern.ICM0.B.W

In [None]:
# kern.ICM0.B.kappa

In [None]:
# kern.ICM1.B.W

In [None]:
# kern.ICM1.B.kappa

In [None]:
# kern.B.W

In [None]:
# icm.B.kappa

In [None]:
icm.B.B[1,0]/np.sqrt(icm.B.B[0,0])/np.sqrt(icm.B.B[1,1])

In [None]:
def reverseTransform(z, mean, std):
    return z*std + mean

In [None]:
def plotPrediction(xpred, predict_func, n, **kwargs):
    noise_dict = {'output_index':xpred[:,1:].astype(int)}
    mu, var = predict_func(xpred,Y_metadata=noise_dict)
    std = np.sqrt(var[:,0])
    mu = mu[:,0]

    plt.plot(reverseTransform(xpred[:,0],*norm['time']), reverseTransform(mu, *n), **kwargs)
    plt.fill_between(reverseTransform(xpred[:,0],*norm['time']), reverseTransform(mu-1.98*std, *n), reverseTransform(mu+1.98*std, *n), alpha=.2, **kwargs)

In [None]:
plt.figure(figsize=(8,4))

xpred = np.hstack([np.linspace(x1.min(), x1.max())[:,None], [[0]]*50])

plt.subplot(121)
plt.title("$log_2($OD$)$",fontsize=15,color='C0')
plt.xlabel("time (h)", fontsize=15)
plotPrediction(xpred, m.predict_noiseless, norm['y1'])
plt.scatter(reverseTransform(x1, *norm['time']), reverseTransform(y1[:,0],*norm['y1']),c='C0')

plt.subplot(122)
plt.title("$log_{10}($CFU$)$",fontsize=15,color='C1')
plt.xlabel("time (h)", fontsize=15)
xpred[:,1] = 1
plotPrediction(xpred, m.predict_noiseless, norm['y2'],color='C1')
plt.scatter(reverseTransform(x2, *norm['time']), reverseTransform(y2[:,0],*norm['y2']),c='C1')

plt.savefig("figures/cfu-vs-od_gp.pdf",bbox_inches='tight')

In [None]:
plt.figure(figsize=(8,4))

xpred = np.hstack([np.linspace(x1.min()*2.5, x1.max()*2.5)[:,None], [[0]]*50])

plt.subplot(121)
plt.title("$log_2($OD$)$",fontsize=15,color='C0')
plt.xlabel("time (h)", fontsize=15)
plotPrediction(xpred, m.predict_noiseless, norm['y1'])
plt.scatter(reverseTransform(x1, *norm['time']), reverseTransform(y1[:,0],*norm['y1']),c='C0')

plt.subplot(122)
plt.title("$log_{10}($CFU$)$",fontsize=15,color='C1')
plt.xlabel("time (h)", fontsize=15)
xpred[:,1] = 1
plotPrediction(xpred, m.predict_noiseless, norm['y2'],color='C1')
plt.scatter(reverseTransform(x2, *norm['time']), reverseTransform(y2[:,0],*norm['y2']),c='C1')

plt.savefig("figures/cfu-vs-od-long_gp.pdf",bbox_inches='tight')

In [None]:
plt.figure(figsize=(8,4))

xpred = np.hstack([np.linspace(x1.min()*2.5, x1.max()*2.5)[:,None], [[0]]*50])

plt.subplot(121)
plt.title("$log_2($OD$)$",fontsize=15,color='C0')
plt.xlabel("time (h)", fontsize=15)
plotPrediction(xpred, m.predict, norm['y1'])
plt.scatter(reverseTransform(x1, *norm['time']), reverseTransform(y1[:,0],*norm['y1']),c='C0')

plt.subplot(122)
plt.title("$log_{10}($CFU$)$",fontsize=15,color='C1')
plt.xlabel("time (h)", fontsize=15)
xpred[:,1] = 1
plotPrediction(xpred, m.predict, norm['y2'],color='C1')
plt.scatter(reverseTransform(x2, *norm['time']), reverseTransform(y2[:,0],*norm['y2']),c='C1')

plt.savefig("figures/cfu-vs-od-obs-long_gp.pdf",bbox_inches='tight')

In [None]:
xpred = np.hstack([np.tile(np.linspace(x1.min(), x1.max()),2)[:,None], [[0]]*100])
xpred[50:,1] = 1
noise_dict = {'output_index':xpred[:,1:].astype(int)}
mu, cov = m.predict_noiseless(xpred,Y_metadata=noise_dict,full_cov=True)
mu = mu[:,0]

# y = Bx + c
B = np.zeros((100,100))
B[range(100),range(100)] = [norm['y1'][1]]*50 + [norm['y2'][1]]*50

c = np.zeros(100)
c[:50] = norm['y1'][0]
c[50:] = norm['y2'][0]

mu = np.dot(B, mu) + c
cov = np.dot(B, np.dot(cov, B.T))


B = np.eye(100)
# convert cfu to log2
# B[range(50,100),range(50,100)] = 1./np.log10(2)
# ..or convert OD to log10
B[range(50),range(50)] = 1./np.log2(10)

mu = np.dot(B, mu)
cov = np.dot(B, np.dot(cov, B.T))

# compute diff
B = np.zeros((50,100))
B[range(50),range(50)] = -1.
B[range(50),range(50,100)] = 1.

mu = np.dot(B, mu)
cov = np.dot(B, np.dot(cov, B.T))
std = np.sqrt(np.diag(cov))

plt.figure(figsize=(4,4))

plt.plot(reverseTransform(xpred[:50,0],*norm['time']), mu, color='C4')
plt.fill_between(reverseTransform(xpred[:50,0],*norm['time']), mu-1.98*std, mu+1.98*std,alpha=.4, color='C4')

plt.title("$log_{10}($CFU/OD$)$",fontsize=15)
plt.xlabel("time (h)", fontsize=15)

plt.savefig("figures/cfu-vs-od_gp-ratio.pdf",bbox_inches='tight')

In [None]:
K = GPy.kern.Matern32(1)

m1 = GPy.models.GPRegression(x1,y1,kernel=K.copy())
m1.optimize()
m2 = GPy.models.GPRegression(x2,y2,kernel=K.copy())
m2.optimize()

In [None]:
plt.figure(figsize=(8,4))

xpred = np.hstack([np.linspace(x1.min(), x1.max())[:,None], [[0]]*50])

plt.subplot(121)
plt.title("$log_2($OD$)$",fontsize=15,color='C0')
plt.xlabel("time (h)", fontsize=15)
plotPrediction(xpred, m1.predict_noiseless, norm['y1'])
plt.scatter(reverseTransform(x1, *norm['time']), reverseTransform(y1[:,0],*norm['y1']),c='C0')

plt.subplot(122)
plt.title("$log_{10}($CFU$)$",fontsize=15,color='C1')
plt.xlabel("time (h)", fontsize=15)
xpred[:,1] = 1
plotPrediction(xpred, m2.predict_noiseless, norm['y2'],color='C1')
plt.scatter(reverseTransform(x2, *norm['time']), reverseTransform(y2[:,0],*norm['y2']),c='C1')
plt.ylim(6.1, 9.5)

plt.savefig("figures/cfu-vs-od_gp-null.pdf")

In [None]:
plt.figure(figsize=(8,4))

xpred = np.hstack([np.linspace(x1.min()*2.5, x1.max()*2.5)[:,None], [[0]]*50])

plt.subplot(121)
plt.title("$log_2($OD$)$",fontsize=15,color='C0')
plt.xlabel("time (h)", fontsize=15)
plotPrediction(xpred, m1.predict_noiseless, norm['y1'])
plt.scatter(reverseTransform(x1, *norm['time']), reverseTransform(y1[:,0],*norm['y1']),c='C0')

plt.subplot(122)
plt.title("$log_{10}($CFU$)$",fontsize=15,color='C1')
plt.xlabel("time (h)", fontsize=15)
xpred[:,1] = 1
plotPrediction(xpred, m2.predict_noiseless, norm['y2'],color='C1')
plt.scatter(reverseTransform(x2, *norm['time']), reverseTransform(y2[:,0],*norm['y2']),c='C1')
plt.ylim(6.1, 9.5)

plt.savefig("figures/cfu-vs-od-long_gp-null.pdf",bbox_inches='tight')