In [None]:
import scipy, patsy, GPy
from gpmultipy import dataset
import pandas as pd

In [None]:
%pylab

In [None]:
%matplotlib inline

In [None]:
ds1 = dataset.DataSet("../data/normalized/ura3-pq-replicate/")
ds1.meta['batch'] = 1

ds2 = dataset.DataSet("../data/pq-osmo-control/")
ds2.meta['mM_PQ'] = ds2.meta['mM PQ']
ds2.meta.mM_PQ[ds2.meta.mM_PQ.isnull()] = 0
ds2.meta['batch'] = 2

ds3 = dataset.DataSet("../data/pq-osmo-combo/")
ds3.meta['mM_PQ'] = ds3.meta['mM PQ']
ds3.meta.mM_PQ[ds3.meta.mM_PQ.isnull()] = 0
ds3.meta['batch'] = 3

In [None]:
meta = pd.concat((ds1.meta,ds2.meta,ds3.meta))
meta.loc[meta['M NaCl'].isnull(),'M NaCl'] = 4.2

meta.head()

In [None]:
', '.join(['%.3lf' % pq for pq in sorted(meta.mM_PQ.unique())])

In [None]:
data = pd.concat((ds1.data,ds2.data,ds3.data),1)
data.head()

In [None]:
# remove osmo stress

select = meta['M NaCl']==4.2
meta = meta[select]
data = data.loc[:,select]

In [None]:
# remove edges

edge = range(101,111) + range(111,191,10) + range(120,191,10) + range(191,201) + range(201,211) + range(211,291,10) + range(220,291,10) + range(291,301)
edge = np.array(edge)

select = ~meta.Well.isin(edge)

meta = meta[select]
data = data.loc[:,select]

In [None]:
ds = dataset.DataSet('data/')

In [None]:
pivot = pd.concat((ds.meta, ds.data.T),1,ignore_index=False)

pivot.mM_PQ = pivot.mM_PQ.round(2)
pivot = pivot[pivot.Strain=='ura3']
pivot = pivot[~(pivot.Bio.isin(list('EFGHIJKLMN')))]

pivot.head()

In [None]:
melt = pd.melt(pivot, ds.meta.columns.tolist(), ds.data.columns.tolist(), var_name='time', value_name='od')

melt.time = melt.time.astype(float)
melt.mM_PQ = melt.mM_PQ.round(2)

melt = melt[~melt.od.isnull()]
melt = melt[melt.Strain=='ura3']
melt = melt[~(melt.Bio.isin(list('EFGHIJKLMN')))]

melt.Bio = melt.Bio.astype('category').cat.codes
melt.Bio += 1

melt.batch = melt.batch.astype(int)

melt.od = np.log2(melt.od)

melt = melt.loc[(melt.time.astype(int) % 3) == 0,:]

melt.shape

In [None]:
select = melt[melt.mM_PQ==0.0]
g = select.groupby(['batch','Well'])

plt.figure(figsize=(10,6))

for k, temp in g:
    #print k
    batch, well = k
    
    temp = temp.sort_values('time')
    
    if batch == 1:
        color = 'g'
    elif batch == 2:
        color = 'r'
    else:
        color = 'b'
    
    plt.plot(temp.time,temp.od,c=color,alpha=.6)
    plt.ylim(melt.od.min(), melt.od.max())

plt.ylabel("log(OD)",fontsize=20)
plt.xlabel("time (h)",fontsize=20)
plt.tight_layout()

In [None]:
g = melt.groupby(['mM_PQ','batch','Bio','Well'])
pqvals = melt.mM_PQ.unique()
pqvals.sort()

plt.figure(figsize=(20,8))

for k, temp in g:
    #print k
    pq, batch, bio, well = k
    
    ind = pqvals.tolist().index(pq)
    plt.subplot(2,5,ind+1)
    plt.title(pq)
    
    temp = temp.sort_values('time')
    
    if batch == 1:
        color = 'g'
    elif batch == 2:
        color = 'r'
    else:
        color = 'b'
    
    plt.plot(temp.time,temp.od,c=color,alpha=.6)
    plt.ylim(melt.od.min(), melt.od.max())
    
plt.tight_layout()

In [None]:
plt.scatter(melt.time.values[::10], melt.od.values[::10])

In [None]:
melt.head()

In [None]:
# xgp = melt[['time', 'mM_PQ', 'batch']].values

# ygp = 

In [None]:
ygp, xgp = patsy.dmatrices('standardize(od) ~ standardize(time) + standardize(mM_PQ) + batch + 0', melt)

fi = ygp.design_info.factor_infos[ygp.design_info.factor_infos.keys()[0]]
s = fi.state['transforms']['_patsy_stobj0__standardize__']

xgp

In [None]:
kmain = GPy.kern.RBF(1, ARD=True, name='time') + GPy.kern.RBF(2, ARD=True,name='interaction')
kmain.name='main'

kbatch = GPy.kern.RBF(2, ARD=True, name='batch')

In [None]:
select = xgp[:,-1] == 1
gp_b1 = GPy.models.GPRegression(xgp[select,:], ygp[select,:], kmain.copy())
gp_b1.randomize()
gp_b1.optimize()
gp_b1

In [None]:
select = xgp[:,-1] == 2
gp_b2 = GPy.models.GPRegression(xgp[select,:], ygp[select,:], kmain.copy())
gp_b2.randomize()
gp_b2.optimize()
gp_b2

In [None]:
select = xgp[:,-1] == 3
gp_b3 = GPy.models.GPRegression(xgp[select,:], ygp[select,:], kmain.copy())
gp_b3.randomize()
gp_b3.optimize()
gp_b3

In [None]:
kern = GPy.kern.Hierarchical((kmain.copy(), kbatch.copy()))

gp = GPy.models.GPRegression(xgp, ygp, kern)
gp.randomize()
gp

In [None]:
gp.optimize()
gp

In [None]:
meltpred = {'time':np.linspace(melt.time.min(), melt.time.max()),'mM_PQ':[0]*50, 'batch':[3]*50}
xpred = patsy.build_design_matrices([xgp.design_info], meltpred)[0]

mu, cov = gp.predict_noiseless(xpred, kern = kmain)
# mu, cov = gp.predict_noiseless(xpred)
# mu, cov = gp.predict(xpred)
cov = cov[:,0]
mu = mu[:,0]
plt.plot(xpred[:,0], mu)
plt.fill_between(xpred[:,0], mu-2*np.sqrt(cov), mu+2*np.sqrt(cov), alpha=.3)

plt.scatter(xgp[xgp[:,1]==xpred[0,1],0], ygp[xgp[:,1]==xpred[0,1],0], color='C0',alpha=.1)

meltpred['mM_PQ'] = [0.33]*50
xpred = patsy.build_design_matrices([xgp.design_info], meltpred)[0]
mu, cov = gp.predict_noiseless(xpred, kern = kmain)
# mu, cov = gp.predict_noiseless(xpred,)
# mu, cov = gp.predict(xpred)
cov = cov[:,0]
mu = mu[:,0]
plt.plot(xpred[:,0], mu)
plt.fill_between(xpred[:,0], mu-2*np.sqrt(cov), mu+2*np.sqrt(cov), alpha=.3)

plt.scatter(xgp[np.isclose(xgp[:,1],xpred[0,1]),0], ygp[np.isclose(xgp[:,1],xpred[0,1]),0], color='C1',alpha=.1)



In [None]:
plt.bar(range(2), [kmain.interaction.variance, kern.batch.variance])

In [None]:
select = melt[melt.mM_PQ==0.0]
g = select.groupby(['batch','Well'])

meltpred = {'time':np.linspace(melt.time.min(), melt.time.max()),'mM_PQ':[0]*50, 'batch':[3]*50}
xpred = patsy.build_design_matrices([xgp.design_info], meltpred)[0]

mu, cov = gp.predict_noiseless(xpred, kern = gp.kern.main)
# mu, cov = gp.predict_noiseless(xpred)
# mu, cov = gp.predict(xpred)
cov = cov[:,0]
mu = mu[:,0]

plt.figure(figsize=(12,4))

for k, temp in g:
    #print k
    batch, well = k
        
    plt.subplot(1,3,batch)
    
    temp = temp.sort_values('time')
    
    color = "C%d"%(batch-1)
    
    #plt.plot(temp.time,s.transform(temp.od),c=color,alpha=.6)
    
for batch in range(1,4):
    plt.subplot(1,3,batch)
    plt.plot(meltpred['time'], mu, c='k')
    plt.fill_between(meltpred['time'], mu-2*np.sqrt(cov), mu+2*np.sqrt(cov), alpha=.3, color='k')
    
    temp_gp = [gp_b1, gp_b2, gp_b3][batch-1]
    mu2,cov2 = temp_gp.predict_noiseless(xpred)
    cov2 = cov2[:,0]
    mu2 = mu2[:,0]

    plt.plot(meltpred['time'], mu2, c='C%d'%(batch-1))
    plt.fill_between(meltpred['time'], mu2-2*np.sqrt(cov2), mu2+2*np.sqrt(cov2), alpha=.3, color='C%d'%(batch-1))

# plt.ylabel("log(OD)",fontsize=20)
# plt.xlabel("time (h)",fontsize=20)
plt.tight_layout()




In [None]:
s = fi.state['transforms']['_patsy_stobj0__standardize__']

In [None]:
s.transform(temp.od)